(beam) branch master updated (92c5d57d529 -> e2b8acb8bbf)

2024-07-24 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


from 92c5d57d529 Dedup Lineage and getTableToExtract call in 
BigQuerySourceBase (#31960)
 add e2b8acb8bbf Adds ordered list user state support to fnapi accessor 
cache. (#31923)

No new revisions were added by this update.

Summary of changes:
 .../java/org/apache/beam/fn/harness/state/FnApiStateAccessor.java | 8 +---
 1 file changed, 1 insertion(+), 7 deletions(-)



(beam) branch master updated: Expand test coverage (#31957)

2024-07-23 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new eadb81fd01f Expand test coverage (#31957)
eadb81fd01f is described below

commit eadb81fd01f281755f18a8a3095e3c2c8194e9fc
Author: Francis O'Hara 
AuthorDate: Tue Jul 23 19:18:45 2024 +

Expand test coverage (#31957)

Co-authored-by: Lahari Guduru 
<108150650+laharigud...@users.noreply.github.com>
---
 .../java/org/apache/beam/sdk/io/csv/CsvIO.java |   3 +
 .../apache/beam/sdk/io/csv/CsvIOParseHelpers.java  |   4 +
 .../beam/sdk/io/csv/CsvIOStringToCsvRecord.java|  17 +-
 .../beam/sdk/io/csv/CsvIOParseHelpersTest.java |  12 +
 .../sdk/io/csv/CsvIOStringToCsvRecordTest.java | 445 ++---
 5 files changed, 433 insertions(+), 48 deletions(-)

diff --git 
a/sdks/java/io/csv/src/main/java/org/apache/beam/sdk/io/csv/CsvIO.java 
b/sdks/java/io/csv/src/main/java/org/apache/beam/sdk/io/csv/CsvIO.java
index 6d940f7d96d..04141e5c677 100644
--- a/sdks/java/io/csv/src/main/java/org/apache/beam/sdk/io/csv/CsvIO.java
+++ b/sdks/java/io/csv/src/main/java/org/apache/beam/sdk/io/csv/CsvIO.java
@@ -74,6 +74,9 @@ import org.apache.commons.csv.CSVFormat;
  *   {@code boolean} https://javadoc.io/static/org.apache.commons/commons-csv/1.8/org/apache/commons/csv/CSVFormat.html#withIgnoreHeaderCase--;>ignoreHeaderCase
  *   - must be false.
+ *   {@code boolean} https://javadoc.io/static/org.apache.commons/commons-csv/1.8/org/apache/commons/csv/CSVFormat.html#withSkipHeaderRecord--;>skipHeaderRecord
+ *   - must be false. The header is already accounted for during parsing.
  * 
  *
  * Ignored CSVFormat parameters
diff --git 
a/sdks/java/io/csv/src/main/java/org/apache/beam/sdk/io/csv/CsvIOParseHelpers.java
 
b/sdks/java/io/csv/src/main/java/org/apache/beam/sdk/io/csv/CsvIOParseHelpers.java
index 856ccf42d84..4e4102f0efb 100644
--- 
a/sdks/java/io/csv/src/main/java/org/apache/beam/sdk/io/csv/CsvIOParseHelpers.java
+++ 
b/sdks/java/io/csv/src/main/java/org/apache/beam/sdk/io/csv/CsvIOParseHelpers.java
@@ -62,6 +62,10 @@ final class CsvIOParseHelpers {
   "Illegal %s: column name is required",
   CSVFormat.class);
 }
+checkArgument(
+!format.getSkipHeaderRecord(),
+"Illegal %s: cannot skip header record because the header is already 
accounted for",
+CSVFormat.class);
   }
 
   /**
diff --git 
a/sdks/java/io/csv/src/main/java/org/apache/beam/sdk/io/csv/CsvIOStringToCsvRecord.java
 
b/sdks/java/io/csv/src/main/java/org/apache/beam/sdk/io/csv/CsvIOStringToCsvRecord.java
index c92961f94a9..b5ce6a0fec2 100644
--- 
a/sdks/java/io/csv/src/main/java/org/apache/beam/sdk/io/csv/CsvIOStringToCsvRecord.java
+++ 
b/sdks/java/io/csv/src/main/java/org/apache/beam/sdk/io/csv/CsvIOStringToCsvRecord.java
@@ -20,6 +20,9 @@ package org.apache.beam.sdk.io.csv;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
+import org.apache.beam.sdk.coders.ListCoder;
+import org.apache.beam.sdk.coders.NullableCoder;
+import org.apache.beam.sdk.coders.StringUtf8Coder;
 import org.apache.beam.sdk.transforms.DoFn;
 import org.apache.beam.sdk.transforms.PTransform;
 import org.apache.beam.sdk.transforms.ParDo;
@@ -47,14 +50,21 @@ final class CsvIOStringToCsvRecord
*/
   @Override
   public PCollection> expand(PCollection input) {
-return input.apply(ParDo.of(new ProcessLineToRecordFn()));
+return input
+.apply(ParDo.of(new ProcessLineToRecordFn()))
+.setCoder(ListCoder.of(NullableCoder.of(StringUtf8Coder.of(;
   }
 
   /** Processes each line in order to convert it to a {@link CSVRecord}. */
   private class ProcessLineToRecordFn extends DoFn> {
+private final String headerLine = headerLine(csvFormat);
+
 @ProcessElement
 public void process(@Element String line, OutputReceiver> 
receiver)
 throws IOException {
+  if (headerLine.equals(line)) {
+return;
+  }
   for (CSVRecord record : CSVParser.parse(line, csvFormat).getRecords()) {
 receiver.output(csvRecordtoList(record));
   }
@@ -69,4 +79,9 @@ final class CsvIOStringToCsvRecord
 }
 return cells;
   }
+
+  /** Returns a formatted line of the CSVFormat header. */
+  static String headerLine(CSVFormat csvFormat) {
+return String.join(String.valueOf(csvFormat.getDelimiter()), 
csvFormat.getHeader());
+  }
 }
diff --git 
a/sdks/java/io/csv/src/test/java/org/apache/beam/sdk/io/csv/CsvIOParseHelpersTest.java
 
b/sdks/java/io/csv/src/test/java/org/apache/beam/sdk/io/csv/CsvIOParseHelpersTest.java
index 5a387652022..97374cf52fe 100644
--- 
a/sdks/java/io/csv/src/test/java/org/apache/beam/sdk/io/csv/CsvIOParseHelpersTest.java
+++ 
b/sdks/java/io/csv/src/test/java/org/apache/beam/sdk/io/csv/CsvIOParseHelpersTe

(beam) branch master updated: [Prism] Implement PrismPipelineResult (#31937)

2024-07-19 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new bdd5fff78c8 [Prism] Implement PrismPipelineResult (#31937)
bdd5fff78c8 is described below

commit bdd5fff78c84e45e6cc95d9dc4a1871bc39cf20f
Author: Damon 
AuthorDate: Fri Jul 19 15:22:25 2024 -0700

[Prism] Implement PrismPipelineResult (#31937)

* Implement PrismPipelineResult

* Add isAlive checks
---
 runners/prism/java/build.gradle|   2 +
 .../apache/beam/runners/prism/PrismExecutor.java   |   8 ++
 .../beam/runners/prism/PrismPipelineResult.java| 109 +
 .../runners/prism/PrismPipelineResultTest.java | 130 +
 4 files changed, 249 insertions(+)

diff --git a/runners/prism/java/build.gradle b/runners/prism/java/build.gradle
index dfc863e8f63..93d151f3e05 100644
--- a/runners/prism/java/build.gradle
+++ b/runners/prism/java/build.gradle
@@ -29,10 +29,12 @@ dependencies {
 implementation project(path: ":sdks:java:core", configuration: "shadow")
 implementation project(":runners:portability:java")
 
+implementation library.java.joda_time
 implementation library.java.slf4j_api
 implementation library.java.vendored_guava_32_1_2_jre
 
 testImplementation library.java.junit
+testImplementation library.java.mockito_core
 testImplementation library.java.truth
 }
 
diff --git 
a/runners/prism/java/src/main/java/org/apache/beam/runners/prism/PrismExecutor.java
 
b/runners/prism/java/src/main/java/org/apache/beam/runners/prism/PrismExecutor.java
index fba2eec99c5..620d5508f22 100644
--- 
a/runners/prism/java/src/main/java/org/apache/beam/runners/prism/PrismExecutor.java
+++ 
b/runners/prism/java/src/main/java/org/apache/beam/runners/prism/PrismExecutor.java
@@ -87,6 +87,14 @@ abstract class PrismExecutor {
 }
   }
 
+  /** Reports whether the Prism executable {@link Process#isAlive()}. */
+  boolean isAlive() {
+if (process == null) {
+  return false;
+}
+return process.isAlive();
+  }
+
   /**
* Execute the {@link ProcessBuilder} that starts the Prism service. 
Redirects output to STDOUT.
*/
diff --git 
a/runners/prism/java/src/main/java/org/apache/beam/runners/prism/PrismPipelineResult.java
 
b/runners/prism/java/src/main/java/org/apache/beam/runners/prism/PrismPipelineResult.java
new file mode 100644
index 000..a551196c9b6
--- /dev/null
+++ 
b/runners/prism/java/src/main/java/org/apache/beam/runners/prism/PrismPipelineResult.java
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.prism;
+
+import java.io.IOException;
+import org.apache.beam.sdk.PipelineResult;
+import org.apache.beam.sdk.metrics.MetricResults;
+import org.checkerframework.checker.nullness.qual.Nullable;
+import org.joda.time.Duration;
+
+/**
+ * The {@link PipelineResult} of executing a {@link 
org.apache.beam.sdk.Pipeline} using the {@link
+ * PrismRunner} and an internal {@link PipelineResult} delegate.
+ */
+class PrismPipelineResult implements PipelineResult {
+
+  static PrismPipelineResult of(PipelineResult delegate, PrismExecutor 
executor) {
+return new PrismPipelineResult(delegate, executor::stop);
+  }
+
+  private final PipelineResult delegate;
+  private final Runnable cancel;
+  private @Nullable MetricResults terminalMetrics;
+  private @Nullable State terminalState;
+
+  /**
+   * Instantiate the {@link PipelineResult} from the {@param delegate} and a 
{@param cancel} to be
+   * called when stopping the underlying executable Job management service.
+   */
+  PrismPipelineResult(PipelineResult delegate, Runnable cancel) {
+this.delegate = delegate;
+this.cancel = cancel;
+  }
+
+  /** Forwards the result of the delegate {@link PipelineResult#getState}. */
+  @Override
+  public State getState() {
+if (terminalState != null) {
+  return terminalState;
+}
+return delegate.getState();
+  }
+
+  /**
+   * Forwards the result of the deleg

(beam) branch master updated (f3e6c66c0a5 -> ff15999dfc4)

2024-07-17 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


from f3e6c66c0a5 Improve performance of BigQueryIO connector when 
withPropagateSuccessfulStorageApiWrites(true) is used (#31840)
 add ff15999dfc4 [CsvIO] Change method signature of CsvIOStringToCsvRecord 
class (#31920)

No new revisions were added by this update.

Summary of changes:
 .../beam/sdk/io/csv/CsvIOStringToCsvRecord.java | 21 -
 1 file changed, 16 insertions(+), 5 deletions(-)



(beam) branch master updated: [CsvIO] Implemented CsvIOParseHelpers:parseCell (#31802)

2024-07-11 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new e646c28d2ac [CsvIO] Implemented CsvIOParseHelpers:parseCell (#31802)
e646c28d2ac is described below

commit e646c28d2ac82535221e135d503b3c3becb6eace
Author: lahariguduru <108150650+laharigud...@users.noreply.github.com>
AuthorDate: Thu Jul 11 16:27:23 2024 +

[CsvIO] Implemented CsvIOParseHelpers:parseCell (#31802)

* Created CsvIOHelpers method

* Created CsvIOHelpers:parseCell method

* deleted ExamplePojo class, created CsvIOParseHelpers::parseCell method

* Changed IllegalArgumentException to UnsupportedOperationException in 
parseCell() method

-

Co-authored-by: Lahari Guduru 
---
 .../apache/beam/sdk/io/csv/CsvIOParseHelpers.java  |  36 +-
 .../beam/sdk/io/csv/CsvIOParseHelpersTest.java | 373 +
 2 files changed, 407 insertions(+), 2 deletions(-)

diff --git 
a/sdks/java/io/csv/src/main/java/org/apache/beam/sdk/io/csv/CsvIOParseHelpers.java
 
b/sdks/java/io/csv/src/main/java/org/apache/beam/sdk/io/csv/CsvIOParseHelpers.java
index 042e284cd52..df99807cfea 100644
--- 
a/sdks/java/io/csv/src/main/java/org/apache/beam/sdk/io/csv/CsvIOParseHelpers.java
+++ 
b/sdks/java/io/csv/src/main/java/org/apache/beam/sdk/io/csv/CsvIOParseHelpers.java
@@ -17,6 +17,8 @@
  */
 package org.apache.beam.sdk.io.csv;
 
+import java.math.BigDecimal;
+import java.time.Instant;
 import java.util.ArrayList;
 import java.util.List;
 import org.apache.beam.sdk.schemas.Schema;
@@ -48,8 +50,38 @@ final class CsvIOParseHelpers {
* Parse the given {@link String} cell of the CSV record based on the given 
field's {@link
* Schema.FieldType}.
*/
-  // TODO(https://github.com/apache/beam/issues/31719): implement method.
   static Object parseCell(String cell, Schema.Field field) {
-return "";
+Schema.FieldType fieldType = field.getType();
+try {
+  switch (fieldType.getTypeName()) {
+case STRING:
+  return cell;
+case INT16:
+  return Short.parseShort(cell);
+case INT32:
+  return Integer.parseInt(cell);
+case INT64:
+  return Long.parseLong(cell);
+case BOOLEAN:
+  return Boolean.parseBoolean(cell);
+case BYTE:
+  return Byte.parseByte(cell);
+case DECIMAL:
+  return new BigDecimal(cell);
+case DOUBLE:
+  return Double.parseDouble(cell);
+case FLOAT:
+  return Float.parseFloat(cell);
+case DATETIME:
+  return Instant.parse(cell);
+default:
+  throw new UnsupportedOperationException(
+  "Unsupported type: " + fieldType + ", consider using 
withCustomRecordParsing");
+  }
+
+} catch (IllegalArgumentException e) {
+  throw new IllegalArgumentException(
+  e.getMessage() + " field " + field.getName() + " was received -- 
type mismatch");
+}
   }
 }
diff --git 
a/sdks/java/io/csv/src/test/java/org/apache/beam/sdk/io/csv/CsvIOParseHelpersTest.java
 
b/sdks/java/io/csv/src/test/java/org/apache/beam/sdk/io/csv/CsvIOParseHelpersTest.java
new file mode 100644
index 000..d6129055ae3
--- /dev/null
+++ 
b/sdks/java/io/csv/src/test/java/org/apache/beam/sdk/io/csv/CsvIOParseHelpersTest.java
@@ -0,0 +1,373 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.csv;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThrows;
+
+import java.math.BigDecimal;
+import java.time.DateTimeException;
+import java.time.Instant;
+import org.apache.beam.sdk.schemas.Schema;
+import org.apache.commons.collections.keyvalue.DefaultMapEntry;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Tests for {@link CsvIOParseHelpers}. */
+@RunWith(JUnit4.class)
+public class CsvIOParseHelpersTest {
+
+  @Test
+  public void ignoresCaseForm

(beam) branch master updated: Locate and download Prism binary (#31796)

2024-07-10 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new 8d5c3b5ee2c Locate and download Prism binary (#31796)
8d5c3b5ee2c is described below

commit 8d5c3b5ee2c4d1ad62aa09adde7b3cce99b79cd3
Author: Damon 
AuthorDate: Wed Jul 10 15:47:25 2024 -0700

Locate and download Prism binary (#31796)

* Stage PrismRunner implementation and dependencies

* Locate and download Prism binary

* Sync with head

* Remove redundant check

* Remove sha verification; delete files in test setup

* Remove destination dir; check exists

* Add tests for 404 and tag prefix
---
 .../apache/beam/runners/prism/PrismLocator.java| 221 +
 .../beam/runners/prism/PrismLocatorTest.java   | 125 
 2 files changed, 346 insertions(+)

diff --git 
a/runners/prism/java/src/main/java/org/apache/beam/runners/prism/PrismLocator.java
 
b/runners/prism/java/src/main/java/org/apache/beam/runners/prism/PrismLocator.java
new file mode 100644
index 000..f32e4d88f42
--- /dev/null
+++ 
b/runners/prism/java/src/main/java/org/apache/beam/runners/prism/PrismLocator.java
@@ -0,0 +1,221 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.prism;
+
+import static org.apache.beam.sdk.util.Preconditions.checkStateNotNull;
+import static 
org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkArgument;
+import static 
org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkState;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.net.URL;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.nio.file.attribute.PosixFilePermission;
+import java.nio.file.attribute.PosixFilePermissions;
+import java.util.Set;
+import java.util.function.BiConsumer;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipInputStream;
+import org.apache.beam.sdk.util.ReleaseInfo;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Strings;
+import 
org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.io.ByteStreams;
+
+/**
+ * Locates a Prism executable based on a user's default operating system and 
architecture
+ * environment or a {@link PrismPipelineOptions#getPrismLocation()} override. 
Handles the download,
+ * unzip, {@link PosixFilePermissions}, as needed. For {@link 
#GITHUB_DOWNLOAD_PREFIX} sources,
+ * additionally performs a SHA512 verification.
+ */
+class PrismLocator {
+  static final String OS_NAME_PROPERTY = "os.name";
+  static final String ARCH_PROPERTY = "os.arch";
+  static final String USER_HOME_PROPERTY = "user.home";
+
+  private static final String ZIP_EXT = "zip";
+  private static final ReleaseInfo RELEASE_INFO = ReleaseInfo.getReleaseInfo();
+  private static final String PRISM_BIN_PATH = ".apache_beam/cache/prism/bin";
+  private static final Set PERMS =
+  PosixFilePermissions.fromString("rwxr-xr-x");
+  private static final String GITHUB_DOWNLOAD_PREFIX =
+  "https://github.com/apache/beam/releases/download;;
+  private static final String GITHUB_TAG_PREFIX = 
"https://github.com/apache/beam/releases/tag;;
+
+  private final PrismPipelineOptions options;
+
+  PrismLocator(PrismPipelineOptions options) {
+this.options = options;
+  }
+
+  /**
+   * Downloads and prepares a Prism executable for use with the {@link 
PrismRunner}. The returned
+   * {@link String} is the absolute path to the Prism executable.
+   */
+  String resolve() throws IOException {
+
+String from =
+String.format("%s/v%s/%s.zip", GITHUB_DOWNLOAD_PREFIX, 
getSDKVersion(), buildFileName());
+
+if (!Strings.isNullOrEmpty(options.getPrismLocation())) {
+  checkArgument(
+  !options.getPrismLocation().startsWith(GITHUB_TAG_PREFIX),
+  "Provided --prismLocation 

(beam) branch master updated: Remove CsvIOParseResult (#31819)

2024-07-09 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new f72f6ce0e81 Remove CsvIOParseResult (#31819)
f72f6ce0e81 is described below

commit f72f6ce0e813ce1189f723714ec319a5a0431419
Author: Damon 
AuthorDate: Tue Jul 9 17:26:59 2024 -0700

Remove CsvIOParseResult (#31819)
---
 .../apache/beam/sdk/io/csv/CsvIOParseResult.java   | 86 --
 .../org/apache/beam/sdk/io/csv/CsvIOReadFiles.java | 20 ++---
 2 files changed, 7 insertions(+), 99 deletions(-)

diff --git 
a/sdks/java/io/csv/src/main/java/org/apache/beam/sdk/io/csv/CsvIOParseResult.java
 
b/sdks/java/io/csv/src/main/java/org/apache/beam/sdk/io/csv/CsvIOParseResult.java
deleted file mode 100644
index 5d4d4c8c02e..000
--- 
a/sdks/java/io/csv/src/main/java/org/apache/beam/sdk/io/csv/CsvIOParseResult.java
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.sdk.io.csv;
-
-import java.util.Map;
-import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.transforms.PTransform;
-import org.apache.beam.sdk.values.PCollection;
-import org.apache.beam.sdk.values.PCollectionTuple;
-import org.apache.beam.sdk.values.PInput;
-import org.apache.beam.sdk.values.POutput;
-import org.apache.beam.sdk.values.PValue;
-import org.apache.beam.sdk.values.TupleTag;
-import 
org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap;
-
-/**
- * The {@link T} and {@link org.apache.beam.sdk.io.csv.CsvIOParseError} {@link 
PCollection} results
- * of parsing CSV records. Use {@link #getOutput()} and {@link #getErrors()} 
to apply these results
- * in a pipeline.
- */
-public class CsvIOParseResult implements POutput {
-
-  static  CsvIOParseResult of(
-  TupleTag outputTag, TupleTag errorTag, 
PCollectionTuple pct) {
-return new CsvIOParseResult<>(outputTag, errorTag, pct);
-  }
-
-  private final Pipeline pipeline;
-  private final TupleTag outputTag;
-  private final PCollection output;
-  private final TupleTag errorTag;
-  private final PCollection errors;
-
-  private CsvIOParseResult(
-  TupleTag outputTag, TupleTag errorTag, 
PCollectionTuple pct) {
-this.outputTag = outputTag;
-this.errorTag = errorTag;
-this.pipeline = pct.getPipeline();
-this.output = pct.get(outputTag);
-this.errors = pct.get(errorTag);
-  }
-
-  /** The {@link T} {@link PCollection} as a result of successfully parsing 
CSV records. */
-  public PCollection getOutput() {
-return output;
-  }
-
-  /**
-   * The {@link org.apache.beam.sdk.io.csv.CsvIOParseError} {@link 
PCollection} as a result of
-   * errors associated with parsing CSV records.
-   */
-  public PCollection getErrors() {
-return errors;
-  }
-
-  @Override
-  public Pipeline getPipeline() {
-return pipeline;
-  }
-
-  @Override
-  public Map, PValue> expand() {
-return ImmutableMap.of(
-outputTag, output,
-errorTag, errors);
-  }
-
-  @Override
-  public void finishSpecifyingOutput(
-  String transformName, PInput input, PTransform transform) {}
-}
diff --git 
a/sdks/java/io/csv/src/main/java/org/apache/beam/sdk/io/csv/CsvIOReadFiles.java 
b/sdks/java/io/csv/src/main/java/org/apache/beam/sdk/io/csv/CsvIOReadFiles.java
index 0f6267c6b34..3e0b36b85c2 100644
--- 
a/sdks/java/io/csv/src/main/java/org/apache/beam/sdk/io/csv/CsvIOReadFiles.java
+++ 
b/sdks/java/io/csv/src/main/java/org/apache/beam/sdk/io/csv/CsvIOReadFiles.java
@@ -17,20 +17,19 @@
  */
 package org.apache.beam.sdk.io.csv;
 
-import org.apache.beam.sdk.Pipeline;
 import org.apache.beam.sdk.io.FileIO;
+import org.apache.beam.sdk.transforms.DoFn;
 import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.transforms.ParDo;
 import org.apache.beam.sdk.values.PCollection;
-import org.apache.beam.sdk.values.PCollectionTuple;
-import org.apache.beam.sdk.values.TupleTag;
 
 /**
  * Skeleton for error handling in CsvIO that transforms a {@link 
FileIO.ReadableFile} into the
  * result of parsing.
  */
 // T

(beam) branch master updated: Support class executes the Prism binary (#31795)

2024-07-09 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new 81538672cfe Support class executes the Prism binary (#31795)
81538672cfe is described below

commit 81538672cfea253d4965063ac4ca12233203aa06
Author: Damon 
AuthorDate: Tue Jul 9 16:09:17 2024 -0700

Support class executes the Prism binary (#31795)

* Stage PrismRunner implementation and dependencies

* A Java support class executes the Prism binary

* Sync with head

* Remove pid
---
 .../apache/beam/runners/prism/PrismExecutor.java   | 160 +
 .../beam/runners/prism/PrismExecutorTest.java  |  99 +
 2 files changed, 259 insertions(+)

diff --git 
a/runners/prism/java/src/main/java/org/apache/beam/runners/prism/PrismExecutor.java
 
b/runners/prism/java/src/main/java/org/apache/beam/runners/prism/PrismExecutor.java
new file mode 100644
index 000..fba2eec99c5
--- /dev/null
+++ 
b/runners/prism/java/src/main/java/org/apache/beam/runners/prism/PrismExecutor.java
@@ -0,0 +1,160 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.prism;
+
+import static org.apache.beam.sdk.util.Preconditions.checkStateNotNull;
+
+import com.google.auto.value.AutoValue;
+import java.io.File;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Optional;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
+import 
org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.io.ByteStreams;
+import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * {@link PrismExecutor} builds and executes a {@link ProcessBuilder} for use 
by the {@link
+ * PrismRunner}. Prism is a {@link 
org.apache.beam.runners.portability.PortableRunner} maintained at
+ * https://github.com/apache/beam/tree/master/sdks/go/cmd/prism;>sdks/go/cmd/prism.
+ */
+@AutoValue
+abstract class PrismExecutor {
+
+  private static final Logger LOG = 
LoggerFactory.getLogger(PrismExecutor.class);
+
+  protected @MonotonicNonNull Process process;
+  protected ExecutorService executorService = 
Executors.newSingleThreadExecutor();
+  protected @MonotonicNonNull Future future = null;
+
+  static Builder builder() {
+return new AutoValue_PrismExecutor.Builder();
+  }
+
+  /** The command to execute the Prism binary. */
+  abstract String getCommand();
+
+  /**
+   * Additional arguments to pass when invoking the Prism binary. Defaults to 
an {@link
+   * Collections#emptyList()}.
+   */
+  abstract List getArguments();
+
+  /** Stops the execution of the {@link Process}, created as a result of 
{@link #execute}. */
+  void stop() {
+LOG.info("Stopping Prism...");
+if (future != null) {
+  future.cancel(true);
+}
+executorService.shutdown();
+try {
+  boolean ignored = executorService.awaitTermination(1000L, 
TimeUnit.MILLISECONDS);
+} catch (InterruptedException ignored) {
+}
+if (process == null) {
+  return;
+}
+if (!process.isAlive()) {
+  return;
+}
+process.destroy();
+try {
+  process.waitFor();
+} catch (InterruptedException ignored) {
+}
+  }
+
+  /**
+   * Execute the {@link ProcessBuilder} that starts the Prism service. 
Redirects output to STDOUT.
+   */
+  void execute() throws IOException {
+execute(createProcessBuilder().inheritIO());
+  }
+
+  /**
+   * Execute the {@link ProcessBuilder} that starts the Prism service. 
Redirects output to the
+   * {@param outputStream}.
+   */
+  void execute(OutputStream outputStream) throws IOException {
+execute(createProcessBuilder().redirectErrorStream(true));
+this.future =
+executorService.submit(
+() -> {
+  try {
+ByteSt

(beam) branch master updated (ac423af5699 -> 631d40d0e79)

2024-07-08 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


from ac423af5699 Pass-through IcebergIO catalog properties (#31726)
 add 631d40d0e79 Stage PrismRunner implementation and dependencies (#31794)

No new revisions were added by this update.

Summary of changes:
 runners/prism/build.gradle |  1 +
 .../sbe => runners/prism/java}/build.gradle| 24 --
 .../beam/runners/prism/PrismPipelineOptions.java   | 44 +++
 .../org/apache/beam/runners/prism/PrismRunner.java | 86 ++
 .../apache/beam/runners/prism}/package-info.java   |  4 +-
 .../apache/beam/runners/prism/PrismRunnerTest.java | 86 ++
 settings.gradle.kts|  1 +
 7 files changed, 236 insertions(+), 10 deletions(-)
 copy {sdks/java/extensions/sbe => runners/prism/java}/build.gradle (68%)
 create mode 100644 
runners/prism/java/src/main/java/org/apache/beam/runners/prism/PrismPipelineOptions.java
 create mode 100644 
runners/prism/java/src/main/java/org/apache/beam/runners/prism/PrismRunner.java
 copy 
{examples/java/cdap/hubspot/src/main/java/org/apache/beam/examples/complete/cdap/hubspot
 => 
runners/prism/java/src/main/java/org/apache/beam/runners/prism}/package-info.java
 (90%)
 create mode 100644 
runners/prism/java/src/test/java/org/apache/beam/runners/prism/PrismRunnerTest.java



(beam) branch master updated (d1df1d7ecc9 -> 02600f55d21)

2024-07-08 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


from d1df1d7ecc9 Bump cloud.google.com/go/storage from 1.41.0 to 1.43.0 in 
/sdks (#31772)
 add 02600f55d21 Set Snowflake escape char to backslash since it is the 
default used by CSVParser (fixes #24467) (#31779)

No new revisions were added by this update.

Summary of changes:
 .../beam/sdk/io/snowflake/services/SnowflakeBatchServiceImpl.java   | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)



(beam) branch master updated: Updates Dataflow service API version. (#31751)

2024-07-02 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new 8a4a250076b Updates Dataflow service API version. (#31751)
8a4a250076b is described below

commit 8a4a250076bde59fadb89b2e580e6e98d064b26d
Author: Andrew Crites 
AuthorDate: Tue Jul 2 13:38:04 2024 -0700

Updates Dataflow service API version. (#31751)
---
 buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git 
a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy 
b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
index c0ac274a657..6f040c00cd1 100644
--- a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
+++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
@@ -736,7 +736,7 @@ class BeamModulePlugin implements Plugin {
 google_api_common   : 
"com.google.api:api-common", // google_cloud_platform_libraries_bom sets version
 google_api_services_bigquery: 
"com.google.apis:google-api-services-bigquery:v2-rev20240323-2.0.0",  // 
[bomupgrader] sets version
 google_api_services_cloudresourcemanager: 
"com.google.apis:google-api-services-cloudresourcemanager:v1-rev20240310-2.0.0",
  // [bomupgrader] sets version
-google_api_services_dataflow: 
"com.google.apis:google-api-services-dataflow:v1b3-rev20240218-$google_clients_version",
+google_api_services_dataflow: 
"com.google.apis:google-api-services-dataflow:v1b3-rev20240624-$google_clients_version",
 google_api_services_healthcare  : 
"com.google.apis:google-api-services-healthcare:v1-rev20240130-$google_clients_version",
 google_api_services_pubsub  : 
"com.google.apis:google-api-services-pubsub:v1-rev20220904-$google_clients_version",
 google_api_services_storage : 
"com.google.apis:google-api-services-storage:v1-rev20240319-2.0.0",  // 
[bomupgrader] sets version



(beam) branch master updated: Solace Read connector: adding Basic Authentication support (#31541)

2024-07-01 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new 3b8ddda10a0 Solace Read connector: adding Basic Authentication support 
(#31541)
3b8ddda10a0 is described below

commit 3b8ddda10a01ff640ed4cf1ce746d0c19e003180
Author: Bartosz Zablocki 
AuthorDate: Mon Jul 1 23:24:57 2024 +0200

Solace Read connector: adding Basic Authentication support (#31541)

* Add support for BasicAuth to Solace

* Address PR comments

* Use `checkStateNotNull`
---
 .../broker/BasicAuthJcsmpSessionService.java   | 148 +
 .../BasicAuthJcsmpSessionServiceFactory.java   |  74 +++
 .../io/solace/broker/SolaceMessageReceiver.java|  72 ++
 3 files changed, 294 insertions(+)

diff --git 
a/sdks/java/io/solace/src/main/java/org/apache/beam/sdk/io/solace/broker/BasicAuthJcsmpSessionService.java
 
b/sdks/java/io/solace/src/main/java/org/apache/beam/sdk/io/solace/broker/BasicAuthJcsmpSessionService.java
new file mode 100644
index 000..7863dbd129c
--- /dev/null
+++ 
b/sdks/java/io/solace/src/main/java/org/apache/beam/sdk/io/solace/broker/BasicAuthJcsmpSessionService.java
@@ -0,0 +1,148 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.solace.broker;
+
+import static org.apache.beam.sdk.util.Preconditions.checkStateNotNull;
+
+import com.solacesystems.jcsmp.ConsumerFlowProperties;
+import com.solacesystems.jcsmp.EndpointProperties;
+import com.solacesystems.jcsmp.FlowReceiver;
+import com.solacesystems.jcsmp.InvalidPropertiesException;
+import com.solacesystems.jcsmp.JCSMPException;
+import com.solacesystems.jcsmp.JCSMPFactory;
+import com.solacesystems.jcsmp.JCSMPProperties;
+import com.solacesystems.jcsmp.JCSMPSession;
+import com.solacesystems.jcsmp.Queue;
+import java.io.IOException;
+import javax.annotation.Nullable;
+import org.apache.beam.sdk.io.solace.RetryCallableManager;
+import 
org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableSet;
+
+/**
+ * A class that manages a connection to a Solace broker using basic 
authentication.
+ *
+ * This class provides a way to connect to a Solace broker and receive 
messages from a queue. The
+ * connection is established using basic authentication.
+ */
+public class BasicAuthJcsmpSessionService implements SessionService {
+  private final String queueName;
+  private final String host;
+  private final String username;
+  private final String password;
+  private final String vpnName;
+  @Nullable private JCSMPSession jcsmpSession;
+  private final RetryCallableManager retryCallableManager = 
RetryCallableManager.create();
+
+  /**
+   * Creates a new {@link BasicAuthJcsmpSessionService} with the given 
parameters.
+   *
+   * @param queueName The name of the queue to receive messages from.
+   * @param host The host name or IP address of the Solace broker. Format: 
Host[:Port]
+   * @param username The username to use for authentication.
+   * @param password The password to use for authentication.
+   * @param vpnName The name of the VPN to connect to.
+   */
+  public BasicAuthJcsmpSessionService(
+  String queueName, String host, String username, String password, String 
vpnName) {
+this.queueName = queueName;
+this.host = host;
+this.username = username;
+this.password = password;
+this.vpnName = vpnName;
+  }
+
+  @Override
+  public void connect() {
+retryCallableManager.retryCallable(this::connectSession, 
ImmutableSet.of(JCSMPException.class));
+  }
+
+  @Override
+  public void close() {
+if (isClosed()) {
+  return;
+}
+retryCallableManager.retryCallable(
+() -> {
+  checkStateNotNull(jcsmpSession).closeSession();
+  return 0;
+},
+ImmutableSet.of(IOException.class));
+  }
+
+  @Override
+  public MessageReceiver createReceiver() {
+return retryCallableManager.retryCallable(
+this::createFlowReceiver, ImmutableSet.of(JCSMPException.class));
+  }
+
+  @Override
+  

(beam) branch master updated: created CsvIOReadFiles class (#31738)

2024-07-01 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new 957a7cd22b6 created CsvIOReadFiles class (#31738)
957a7cd22b6 is described below

commit 957a7cd22b6974a4c565846ad32b9fb9fc9c8122
Author: lahariguduru <108150650+laharigud...@users.noreply.github.com>
AuthorDate: Mon Jul 1 20:20:15 2024 +

created CsvIOReadFiles class (#31738)

Co-authored-by: Lahari Guduru 
---
 .../org/apache/beam/sdk/io/csv/CsvIOReadFiles.java | 54 ++
 .../apache/beam/sdk/io/csv/CsvIOReadFilesTest.java | 32 +
 2 files changed, 86 insertions(+)

diff --git 
a/sdks/java/io/csv/src/main/java/org/apache/beam/sdk/io/csv/CsvIOReadFiles.java 
b/sdks/java/io/csv/src/main/java/org/apache/beam/sdk/io/csv/CsvIOReadFiles.java
new file mode 100644
index 000..0f6267c6b34
--- /dev/null
+++ 
b/sdks/java/io/csv/src/main/java/org/apache/beam/sdk/io/csv/CsvIOReadFiles.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.csv;
+
+import org.apache.beam.sdk.Pipeline;
+import org.apache.beam.sdk.io.FileIO;
+import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.sdk.values.PCollectionTuple;
+import org.apache.beam.sdk.values.TupleTag;
+
+/**
+ * Skeleton for error handling in CsvIO that transforms a {@link 
FileIO.ReadableFile} into the
+ * result of parsing.
+ */
+// TODO(https://github.com/apache/beam/issues/31736): Plan completion in 
future PR after
+// dependencies are completed.
+class CsvIOReadFiles extends PTransform, 
CsvIOParseResult> {
+  /** Stores required parameters for parsing. */
+  private final CsvIOParseConfiguration.Builder configBuilder;
+
+  CsvIOReadFiles(CsvIOParseConfiguration.Builder configBuilder) {
+this.configBuilder = configBuilder;
+  }
+
+  /** {@link PTransform} that parses and relays the filename associated with 
each error. */
+  // TODO: complete expand method to unsure parsing from FileIO.ReadableFile 
to CsvIOParseResult.
+  @Override
+  public CsvIOParseResult expand(PCollection input) {
+// TODO(https://github.com/apache/beam/issues/31736): Needed to prevent 
check errors, will
+// remove with future PR.
+configBuilder.build();
+TupleTag outputTag = new TupleTag<>();
+TupleTag errorTag = new TupleTag<>();
+Pipeline p = input.getPipeline();
+PCollectionTuple tuple = PCollectionTuple.empty(p);
+return CsvIOParseResult.of(outputTag, errorTag, tuple);
+  }
+}
diff --git 
a/sdks/java/io/csv/src/test/java/org/apache/beam/sdk/io/csv/CsvIOReadFilesTest.java
 
b/sdks/java/io/csv/src/test/java/org/apache/beam/sdk/io/csv/CsvIOReadFilesTest.java
new file mode 100644
index 000..c4a62f84eae
--- /dev/null
+++ 
b/sdks/java/io/csv/src/test/java/org/apache/beam/sdk/io/csv/CsvIOReadFilesTest.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.csv;
+
+import org.apache.beam.sdk.util.SerializableUtils;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Tests for {@link CsvIOReadFiles}. */
+@RunWith(JUnit4.class)
+public clas

(beam) branch master updated: Move CsvIOParseResult (#31722)

2024-06-28 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new 5a090958992 Move CsvIOParseResult (#31722)
5a090958992 is described below

commit 5a09095899289155edf977c65b962f414ba24c48
Author: lahariguduru <108150650+laharigud...@users.noreply.github.com>
AuthorDate: Sat Jun 29 00:04:01 2024 +

Move CsvIOParseResult (#31722)

Co-authored-by: Lahari Guduru 
---
 .../beam/sdk/io/csv/{providers => }/CsvIOParseResult.java| 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git 
a/sdks/java/io/csv/src/main/java/org/apache/beam/sdk/io/csv/providers/CsvIOParseResult.java
 
b/sdks/java/io/csv/src/main/java/org/apache/beam/sdk/io/csv/CsvIOParseResult.java
similarity index 86%
rename from 
sdks/java/io/csv/src/main/java/org/apache/beam/sdk/io/csv/providers/CsvIOParseResult.java
rename to 
sdks/java/io/csv/src/main/java/org/apache/beam/sdk/io/csv/CsvIOParseResult.java
index 3a6299e1591..5d4d4c8c02e 100644
--- 
a/sdks/java/io/csv/src/main/java/org/apache/beam/sdk/io/csv/providers/CsvIOParseResult.java
+++ 
b/sdks/java/io/csv/src/main/java/org/apache/beam/sdk/io/csv/CsvIOParseResult.java
@@ -15,11 +15,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.beam.sdk.io.csv.providers;
+package org.apache.beam.sdk.io.csv;
 
 import java.util.Map;
 import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.io.csv.CsvIOParseError;
 import org.apache.beam.sdk.transforms.PTransform;
 import org.apache.beam.sdk.values.PCollection;
 import org.apache.beam.sdk.values.PCollectionTuple;
@@ -30,8 +29,9 @@ import org.apache.beam.sdk.values.TupleTag;
 import 
org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap;
 
 /**
- * The {@link T} and {@link CsvIOParseError} {@link PCollection} results of 
parsing CSV records. Use
- * {@link #getOutput()} and {@link #getErrors()} to apply these results in a 
pipeline.
+ * The {@link T} and {@link org.apache.beam.sdk.io.csv.CsvIOParseError} {@link 
PCollection} results
+ * of parsing CSV records. Use {@link #getOutput()} and {@link #getErrors()} 
to apply these results
+ * in a pipeline.
  */
 public class CsvIOParseResult implements POutput {
 
@@ -61,8 +61,8 @@ public class CsvIOParseResult implements POutput {
   }
 
   /**
-   * The {@link CsvIOParseError} {@link PCollection} as a result of errors 
associated with parsing
-   * CSV records.
+   * The {@link org.apache.beam.sdk.io.csv.CsvIOParseError} {@link 
PCollection} as a result of
+   * errors associated with parsing CSV records.
*/
   public PCollection getErrors() {
 return errors;



(beam) branch master updated (2f81e16f3ea -> 9bdcb672d08)

2024-06-28 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


from 2f81e16f3ea [#31403] Python wrapper to download, use, or build and run 
prism. (#31583)
 add 9bdcb672d08 Create CsvIOParseConfiguration class (#31714)

No new revisions were added by this update.

Summary of changes:
 .../beam/sdk/io/csv/CsvIOParseConfiguration.java   | 69 ++
 .../sdk/io/csv/CsvIOParseConfigurationTest.java| 12 ++--
 2 files changed, 75 insertions(+), 6 deletions(-)
 create mode 100644 
sdks/java/io/csv/src/main/java/org/apache/beam/sdk/io/csv/CsvIOParseConfiguration.java
 copy learning/katas/java/IO/Built-in IOs/Built-in 
IOs/test/org/apache/beam/learning/katas/io/builtinios/TaskTest.java => 
sdks/java/io/csv/src/test/java/org/apache/beam/sdk/io/csv/CsvIOParseConfigurationTest.java
 (72%)



(beam) branch master updated: Create CsvIOParseResult (#31706)

2024-06-28 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new f8b63ff3d55 Create CsvIOParseResult (#31706)
f8b63ff3d55 is described below

commit f8b63ff3d55aeefab07020e463882225cf2350cf
Author: Damon 
AuthorDate: Fri Jun 28 15:04:53 2024 -0700

Create CsvIOParseResult (#31706)
---
 .../sdk/io/csv/providers/CsvIOParseResult.java | 86 ++
 1 file changed, 86 insertions(+)

diff --git 
a/sdks/java/io/csv/src/main/java/org/apache/beam/sdk/io/csv/providers/CsvIOParseResult.java
 
b/sdks/java/io/csv/src/main/java/org/apache/beam/sdk/io/csv/providers/CsvIOParseResult.java
new file mode 100644
index 000..3a6299e1591
--- /dev/null
+++ 
b/sdks/java/io/csv/src/main/java/org/apache/beam/sdk/io/csv/providers/CsvIOParseResult.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.csv.providers;
+
+import java.util.Map;
+import org.apache.beam.sdk.Pipeline;
+import org.apache.beam.sdk.io.csv.CsvIOParseError;
+import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.sdk.values.PCollectionTuple;
+import org.apache.beam.sdk.values.PInput;
+import org.apache.beam.sdk.values.POutput;
+import org.apache.beam.sdk.values.PValue;
+import org.apache.beam.sdk.values.TupleTag;
+import 
org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap;
+
+/**
+ * The {@link T} and {@link CsvIOParseError} {@link PCollection} results of 
parsing CSV records. Use
+ * {@link #getOutput()} and {@link #getErrors()} to apply these results in a 
pipeline.
+ */
+public class CsvIOParseResult implements POutput {
+
+  static  CsvIOParseResult of(
+  TupleTag outputTag, TupleTag errorTag, 
PCollectionTuple pct) {
+return new CsvIOParseResult<>(outputTag, errorTag, pct);
+  }
+
+  private final Pipeline pipeline;
+  private final TupleTag outputTag;
+  private final PCollection output;
+  private final TupleTag errorTag;
+  private final PCollection errors;
+
+  private CsvIOParseResult(
+  TupleTag outputTag, TupleTag errorTag, 
PCollectionTuple pct) {
+this.outputTag = outputTag;
+this.errorTag = errorTag;
+this.pipeline = pct.getPipeline();
+this.output = pct.get(outputTag);
+this.errors = pct.get(errorTag);
+  }
+
+  /** The {@link T} {@link PCollection} as a result of successfully parsing 
CSV records. */
+  public PCollection getOutput() {
+return output;
+  }
+
+  /**
+   * The {@link CsvIOParseError} {@link PCollection} as a result of errors 
associated with parsing
+   * CSV records.
+   */
+  public PCollection getErrors() {
+return errors;
+  }
+
+  @Override
+  public Pipeline getPipeline() {
+return pipeline;
+  }
+
+  @Override
+  public Map, PValue> expand() {
+return ImmutableMap.of(
+outputTag, output,
+errorTag, errors);
+  }
+
+  @Override
+  public void finishSpecifyingOutput(
+  String transformName, PInput input, PTransform transform) {}
+}



(beam) branch master updated: Create CsvIOParseError data class (#31700)

2024-06-27 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new bb296e46c06 Create CsvIOParseError data class (#31700)
bb296e46c06 is described below

commit bb296e46c06082098bc8cb0757356002d0fc8bee
Author: Damon 
AuthorDate: Thu Jun 27 12:04:16 2024 -0700

Create CsvIOParseError data class (#31700)
---
 sdks/java/io/csv/build.gradle  |   1 +
 .../apache/beam/sdk/io/csv/CsvIOParseError.java|  75 +++
 .../beam/sdk/io/csv/CsvIOParseErrorTest.java   | 101 +
 3 files changed, 177 insertions(+)

diff --git a/sdks/java/io/csv/build.gradle b/sdks/java/io/csv/build.gradle
index 2be8f59d1f3..92c66ff0140 100644
--- a/sdks/java/io/csv/build.gradle
+++ b/sdks/java/io/csv/build.gradle
@@ -28,6 +28,7 @@ dependencies {
 implementation project(path: ":sdks:java:core", configuration: "shadow")
 implementation library.java.commons_csv
 implementation library.java.vendored_guava_32_1_2_jre
+implementation library.java.joda_time
 testImplementation project(path: ":sdks:java:core", configuration: 
"shadowTest")
 testImplementation library.java.junit
 testRuntimeOnly project(path: ":runners:direct-java", configuration: 
"shadow")
diff --git 
a/sdks/java/io/csv/src/main/java/org/apache/beam/sdk/io/csv/CsvIOParseError.java
 
b/sdks/java/io/csv/src/main/java/org/apache/beam/sdk/io/csv/CsvIOParseError.java
new file mode 100644
index 000..ad7d05912fa
--- /dev/null
+++ 
b/sdks/java/io/csv/src/main/java/org/apache/beam/sdk/io/csv/CsvIOParseError.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.csv;
+
+import com.google.auto.value.AutoValue;
+import org.apache.beam.sdk.schemas.AutoValueSchema;
+import org.apache.beam.sdk.schemas.annotations.DefaultSchema;
+import org.checkerframework.checker.nullness.qual.Nullable;
+import org.joda.time.Instant;
+
+/**
+ * {@link CsvIOParseError} is a data class to store errors from CSV record 
processing. It is {@link
+ * org.apache.beam.sdk.schemas.Schema} mapped for compatibility with writing 
to Beam Schema-aware
+ * I/O connectors.
+ */
+@DefaultSchema(AutoValueSchema.class)
+@AutoValue
+public abstract class CsvIOParseError {
+
+  static Builder builder() {
+return new AutoValue_CsvIOParseError.Builder();
+  }
+
+  /** The caught {@link Exception#getMessage()}. */
+  public abstract String getMessage();
+
+  /**
+   * The CSV record associated with the caught {@link Exception}. Annotated 
{@link Nullable} as not
+   * all processing errors are associated with a CSV record.
+   */
+  public abstract @Nullable String getCsvRecord();
+
+  /**
+   * The filename associated with the caught {@link Exception}. Annotated 
{@link Nullable} as not
+   * all processing errors are associated with a file.
+   */
+  public abstract @Nullable String getFilename();
+
+  /** The date and time when the {@link Exception} occurred. */
+  public abstract Instant getObservedTimestamp();
+
+  /** The caught {@link Exception#getStackTrace()}. */
+  public abstract String getStackTrace();
+
+  @AutoValue.Builder
+  abstract static class Builder {
+
+abstract Builder setMessage(String message);
+
+abstract Builder setCsvRecord(String csvRecord);
+
+abstract Builder setFilename(String filename);
+
+abstract Builder setObservedTimestamp(Instant observedTimestamp);
+
+abstract Builder setStackTrace(String stackTrace);
+
+public abstract CsvIOParseError build();
+  }
+}
diff --git 
a/sdks/java/io/csv/src/test/java/org/apache/beam/sdk/io/csv/CsvIOParseErrorTest.java
 
b/sdks/java/io/csv/src/test/java/org/apache/beam/sdk/io/csv/CsvIOParseErrorTest.java
new file mode 100644
index 000..8e746c00605
--- /dev/null
+++ 
b/sdks/java/io/csv/src/test/java/org/apache/beam/sdk/io/csv/CsvIOParseErrorTest.java
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor licens

(beam) branch master updated (cbc480eba20 -> 970a3f52680)

2024-06-25 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


from cbc480eba20 Bump cloud.google.com/go/bigtable from 1.22.0 to 1.25.0 in 
/sdks (#31681)
 add 970a3f52680 Add RetryCallable mechanism for remote calls (#31539)

No new revisions were added by this update.

Summary of changes:
 sdks/java/io/solace/build.gradle   |   5 +-
 .../beam/sdk/io/solace/RetryCallableManager.java   | 130 
 .../sdk/io/solace/RetryCallableManagerTest.java| 169 +
 3 files changed, 303 insertions(+), 1 deletion(-)
 create mode 100644 
sdks/java/io/solace/src/main/java/org/apache/beam/sdk/io/solace/RetryCallableManager.java
 create mode 100644 
sdks/java/io/solace/src/test/java/org/apache/beam/sdk/io/solace/RetryCallableManagerTest.java



(beam) branch master updated: Solace Read connector (#31476)

2024-06-21 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new 18af8c837fa Solace Read connector (#31476)
18af8c837fa is described below

commit 18af8c837faf63aaee66f7071aef63835a9b9dc0
Author: Bartosz Zablocki 
AuthorDate: Fri Jun 21 21:47:56 2024 +0200

Solace Read connector (#31476)

* wip solace connector

* wip solace connector

* some checker errors resolved

* all checker errors resolved

* improving unit tests

* respond to pr commments

* Documentation

* Small refactor - move data classes out of the client

* refactor

* Add github action for integration test of Solace

* testing github workflow

* bump testcontainers to 1.19.7 - soalce semp was updated with an admin 
user access

* Use FlowHandle to acknowledge messages to make SolaceCheckpointMark's 
fields serializable.

* Handle StaleSessionException error

* Add @Internal annotation to mark the SolaceIO API beta and subject to 
change.

* Improve documentation

* Back to ack based on bytesxmlmessages. Deduplicate default to false.

* update changes.md with Solace read connector

* remove ack by id code

* remove todo comment

* Add licenses to package-info.java files

* Restructure documentation

* update aws test after upgrading testcontainers version.

* Disable publishing docs until the first pass on the master branch

* Remove files from this branch to split PR into smaller chunks

* refactor tests for readability

* revert upgrade of testcontainers - not needed in this PR chunk

* revert upgrade of testcontainers - not needed in this PR chunk

* spotless

* remove IT tests from this pr

* Tech Writer review

* Add a field to Solace.Record mapped from 
BytesXMLMessage.getAttachmentByteBuffer()

* Add and fix some documentation

* Remove CheckpointMark's reference to the UnboundedSolaceReader - 
unnecessary.

* Revert "Remove CheckpointMark's reference to the UnboundedSolaceReader - 
unnecessary."

This reverts commit 2e1c10e0b4c0f124af779ee4f284fcc79ccc8fc9.

* Solace project init - github workflow file, gradle module

* Splitting the #31476 - Leaving only PTransform AutoValue configurations

* remove unnecessary dependencies

* remove info from CHANGES.md

* Add watermark-related code

* Remove excessive @Nullable annotations on Solace.Record class

* Remove entry from CHANGES.md

* Make Record builder package-private

* Refactor SolaceIO - the constructor of Read takes a configuration builder 
as an argument

* Change payload and attachments type to immutable ByteString

* Downgrade Record builders access modifiers to package private

* Add documentation

* Add documentation to classes and methods in Solace.java

* typo

* Add SolaceCheckpointMark.java

* Make SolaceCheckpointMark visible for testing

* Remove SolaceRecordCoder and take advantage of @DefaultSchema
---
 sdks/java/io/solace/build.gradle   |   8 +
 .../org/apache/beam/sdk/io/solace/SolaceIO.java| 415 ++--
 .../io/solace/broker/SessionServiceFactory.java|  19 +
 .../org/apache/beam/sdk/io/solace/data/Solace.java |   4 +
 .../sdk/io/solace/MockEmptySessionService.java}|  37 +-
 .../apache/beam/sdk/io/solace/MockSempClient.java  |  87 +++
 .../beam/sdk/io/solace/MockSempClientFactory.java} |  29 +-
 .../beam/sdk/io/solace/MockSessionService.java |  88 +++
 .../sdk/io/solace/MockSessionServiceFactory.java}  |  29 +-
 .../apache/beam/sdk/io/solace/SolaceIOTest.java| 597 +
 .../beam/sdk/io/solace/data/SolaceDataUtils.java   | 708 +
 11 files changed, 1945 insertions(+), 76 deletions(-)

diff --git a/sdks/java/io/solace/build.gradle b/sdks/java/io/solace/build.gradle
index c317b566618..506145f3529 100644
--- a/sdks/java/io/solace/build.gradle
+++ b/sdks/java/io/solace/build.gradle
@@ -35,8 +35,16 @@ dependencies {
 implementation library.java.slf4j_api
 implementation library.java.joda_time
 implementation library.java.solace
+implementation library.java.vendored_guava_32_1_2_jre
 implementation project(":sdks:java:extensions:avro")
 implementation library.java.vendored_grpc_1_60_1
 implementation library.java.avro
 permitUnusedDeclared library.java.avro
+implementation library.java.vendored_grpc_1_60_1
+
+testImplementation library.java.junit
+testImplementation project(path: ":sdks:java:io:common", configu

(beam) branch master updated: Solace Read connector: data classes and mapper (#31637)

2024-06-20 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new 3914ad0754d Solace Read connector: data classes and mapper (#31637)
3914ad0754d is described below

commit 3914ad0754dbd4904ddb079044672f6c1df830a0
Author: Bartosz Zablocki 
AuthorDate: Fri Jun 21 05:23:09 2024 +0200

Solace Read connector: data classes and mapper (#31637)

* Add data classes

* Remove obvious tests of AutoValue classes

* Remove @DefaultSchema and @SchemaFieldNumber annotations
---
 sdks/java/io/solace/build.gradle   |   1 +
 .../org/apache/beam/sdk/io/solace/data/Solace.java | 319 +++--
 2 files changed, 302 insertions(+), 18 deletions(-)

diff --git a/sdks/java/io/solace/build.gradle b/sdks/java/io/solace/build.gradle
index b33b8fb1802..c317b566618 100644
--- a/sdks/java/io/solace/build.gradle
+++ b/sdks/java/io/solace/build.gradle
@@ -36,6 +36,7 @@ dependencies {
 implementation library.java.joda_time
 implementation library.java.solace
 implementation project(":sdks:java:extensions:avro")
+implementation library.java.vendored_grpc_1_60_1
 implementation library.java.avro
 permitUnusedDeclared library.java.avro
 }
diff --git 
a/sdks/java/io/solace/src/main/java/org/apache/beam/sdk/io/solace/data/Solace.java
 
b/sdks/java/io/solace/src/main/java/org/apache/beam/sdk/io/solace/data/Solace.java
index 076a16b96ce..79057445a4e 100644
--- 
a/sdks/java/io/solace/src/main/java/org/apache/beam/sdk/io/solace/data/Solace.java
+++ 
b/sdks/java/io/solace/src/main/java/org/apache/beam/sdk/io/solace/data/Solace.java
@@ -17,26 +17,23 @@
  */
 package org.apache.beam.sdk.io.solace.data;
 
+import com.google.auto.value.AutoValue;
+import com.solacesystems.jcsmp.BytesXMLMessage;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import org.apache.beam.vendor.grpc.v1p60p1.com.google.protobuf.ByteString;
+import org.checkerframework.checker.nullness.qual.Nullable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 /**
- * A record to be written to a Solace topic.
- *
- * You need to transform to {@link Solace.Record} to be able to write to 
Solace. For that, you
- * can use the {@link Solace.Record.Builder} provided with this class.
- *
- * For instance, to create a record, use the following code:
- *
- * {@code
- * Solace.Record record = Solace.Record.builder()
- * .setMessageId(messageId)
- * .setSenderTimestamp(timestampMillis)
- * .setPayload(payload)
- * .build();
- * }
- *
- * Setting the message id and the timestamp is mandatory.
+ * Provides core data models and utilities for working with Solace messages in 
the context of Apache
+ * Beam pipelines. This class includes representations for Solace topics, 
queues, destinations, and
+ * message records, as well as a utility for converting Solace messages into 
Beam-compatible
+ * records.
  */
 public class Solace {
-
+  /** Represents a Solace queue. */
   public static class Queue {
 private final String name;
 
@@ -52,7 +49,7 @@ public class Solace {
   return name;
 }
   }
-
+  /** Represents a Solace topic. */
   public static class Topic {
 private final String name;
 
@@ -68,4 +65,290 @@ public class Solace {
   return name;
 }
   }
+  /** Represents a Solace destination type. */
+  public enum DestinationType {
+TOPIC,
+QUEUE,
+UNKNOWN
+  }
+
+  /** Represents a Solace message destination (either a Topic or a Queue). */
+  @AutoValue
+  public abstract static class Destination {
+/**
+ * Gets the name of the destination.
+ *
+ * @return The destination name.
+ */
+public abstract String getName();
+
+/**
+ * Gets the type of the destination (TOPIC, QUEUE or UNKNOWN).
+ *
+ * @return The destination type.
+ */
+public abstract DestinationType getType();
+
+static Builder builder() {
+  return new AutoValue_Solace_Destination.Builder();
+}
+
+@AutoValue.Builder
+abstract static class Builder {
+  abstract Builder setName(String name);
+
+  abstract Builder setType(DestinationType type);
+
+  abstract Destination build();
+}
+  }
+
+  /** Represents a Solace message record with its associated metadata. */
+  @AutoValue
+  public abstract static class Record {
+/**
+ * Gets the unique identifier of the message, a string for an 
application-specific message
+ * identifier.
+ *
+ * Mapped from {@link BytesXMLMessage#getApplicationMessageId()}
+ *
+ * @return The message ID, or null if not available.
+ */
+public abstract @Nullable String getMessageId();
+
+/**
+ * Gets the payload of the message as a ByteString.
+ *
+ * Mapped from {@link BytesXMLMessag

(beam) branch master updated: Handle MultimapKeysSideInput in State GetRequests (#31632)

2024-06-20 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new c6c3fd0490a Handle MultimapKeysSideInput in State GetRequests (#31632)
c6c3fd0490a is described below

commit c6c3fd0490ad73af1f4775d84de18c4ba8fb7af0
Author: Damon 
AuthorDate: Thu Jun 20 20:21:02 2024 -0700

Handle MultimapKeysSideInput in State GetRequests (#31632)

* Handle MultimapKeysSideInput in State GetRequests

* Assign data to keys

* Fix test name

* Fix import sort
---
 .../beam/runners/prism/internal/worker/worker.go   | 15 
 .../runners/prism/internal/worker/worker_test.go   | 95 ++
 2 files changed, 110 insertions(+)

diff --git a/sdks/go/pkg/beam/runners/prism/internal/worker/worker.go 
b/sdks/go/pkg/beam/runners/prism/internal/worker/worker.go
index 47fc2cccfc5..d8eb4c96149 100644
--- a/sdks/go/pkg/beam/runners/prism/internal/worker/worker.go
+++ b/sdks/go/pkg/beam/runners/prism/internal/worker/worker.go
@@ -468,6 +468,21 @@ func (wk *W) State(state fnpb.BeamFnState_StateServer) 
error {
 
data = winMap[w]
 
+   case *fnpb.StateKey_MultimapKeysSideInput_:
+   mmkey := key.GetMultimapKeysSideInput()
+   wKey := mmkey.GetWindow()
+   var w typex.Window = 
window.GlobalWindow{}
+   if len(wKey) > 0 {
+   w, err = 
exec.MakeWindowDecoder(coder.NewIntervalWindow()).DecodeSingle(bytes.NewBuffer(wKey))
+   if err != nil {
+   
panic(fmt.Sprintf("error decoding multimap side input window key %v: %v", wKey, 
err))
+   }
+   }
+   winMap := 
b.MultiMapSideInputData[SideInputKey{TransformID: mmkey.GetTransformId(), 
Local: mmkey.GetSideInputId()}]
+   for k := range winMap[w] {
+   data = append(data, []byte(k))
+   }
+
case *fnpb.StateKey_MultimapSideInput_:
mmkey := key.GetMultimapSideInput()
wKey := mmkey.GetWindow()
diff --git a/sdks/go/pkg/beam/runners/prism/internal/worker/worker_test.go 
b/sdks/go/pkg/beam/runners/prism/internal/worker/worker_test.go
index b87667eef38..e5b03214ae0 100644
--- a/sdks/go/pkg/beam/runners/prism/internal/worker/worker_test.go
+++ b/sdks/go/pkg/beam/runners/prism/internal/worker/worker_test.go
@@ -18,12 +18,16 @@ package worker
 import (
"bytes"
"context"
+   "github.com/google/go-cmp/cmp"
"net"
+   "sort"
"sync"
"testing"
"time"
 
+   "github.com/apache/beam/sdks/v2/go/pkg/beam/core/graph/coder"
"github.com/apache/beam/sdks/v2/go/pkg/beam/core/graph/window"
+   "github.com/apache/beam/sdks/v2/go/pkg/beam/core/runtime/exec"
"github.com/apache/beam/sdks/v2/go/pkg/beam/core/typex"
fnpb "github.com/apache/beam/sdks/v2/go/pkg/beam/model/fnexecution_v1"

"github.com/apache/beam/sdks/v2/go/pkg/beam/runners/prism/internal/engine"
@@ -97,6 +101,23 @@ func serveTestWorker(t *testing.T) (context.Context, *W, 
*grpc.ClientConn) {
return ctx, w, clientConn
 }
 
+type closeSend func()
+
+func serveTestWorkerStateStream(t *testing.T) (*W, 
fnpb.BeamFnState_StateClient, closeSend) {
+   ctx, wk, clientConn := serveTestWorker(t)
+
+   stateCli := fnpb.NewBeamFnStateClient(clientConn)
+   stateStream, err := stateCli.State(ctx)
+   if err != nil {
+   t.Fatal("couldn't create state client:", err)
+   }
+   return wk, stateStream, func() {
+   if err := stateStream.CloseSend(); err != nil {
+   t.Errorf("stateStream.CloseSend() = %v", err)
+   }
+   }
+}
+
 func TestWorker_Logging(t *testing.T) {
ctx, _, clientConn := serveTestWorker(t)
 
@@ -291,3 +312,77 @@ func TestWorker_State_Iterable(t *testing.T) {
t.Errorf("stateStream.CloseSend() = %v", err)
}
 }
+
+func TestWorker_State_MultimapKeysSideInput(t *testing.T) {
+   for _, tt := range []struct {
+   name string
+   wtypex.Window
+   }{
+   {
+   name: "global window&q

(beam) branch master updated: [Prism] Terminate Job with CancelFn instead of panic (#31599)

2024-06-14 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new e2d62462cd6 [Prism] Terminate Job with CancelFn instead of panic 
(#31599)
e2d62462cd6 is described below

commit e2d62462cd622db8b008e7e36e2343b57e90d4f1
Author: Damon 
AuthorDate: Fri Jun 14 20:12:47 2024 -0700

[Prism] Terminate Job with CancelFn instead of panic (#31599)

* Refactor elementmanager Bundles with upstream CancelCauseFunc

* Fix minor edits
---
 .../prism/internal/engine/elementmanager.go| 23 --
 .../prism/internal/engine/elementmanager_test.go   | 12 +++
 sdks/go/pkg/beam/runners/prism/internal/execute.go |  2 +-
 3 files changed, 22 insertions(+), 15 deletions(-)

diff --git a/sdks/go/pkg/beam/runners/prism/internal/engine/elementmanager.go 
b/sdks/go/pkg/beam/runners/prism/internal/engine/elementmanager.go
index 76c60e810d4..2c4e08bcd09 100644
--- a/sdks/go/pkg/beam/runners/prism/internal/engine/elementmanager.go
+++ b/sdks/go/pkg/beam/runners/prism/internal/engine/elementmanager.go
@@ -34,6 +34,7 @@ import (
"github.com/apache/beam/sdks/v2/go/pkg/beam/core/graph/window"
"github.com/apache/beam/sdks/v2/go/pkg/beam/core/runtime/exec"
"github.com/apache/beam/sdks/v2/go/pkg/beam/core/typex"
+   "github.com/apache/beam/sdks/v2/go/pkg/beam/internal/errors"
"golang.org/x/exp/maps"
"golang.org/x/exp/slog"
 )
@@ -290,7 +291,7 @@ func (rb RunBundle) LogValue() slog.Value {
 // Bundles is the core execution loop. It produces a sequences of bundles able 
to be executed.
 // The returned channel is closed when the context is canceled, or there are 
no pending elements
 // remaining.
-func (em *ElementManager) Bundles(ctx context.Context, nextBundID func() 
string) <-chan RunBundle {
+func (em *ElementManager) Bundles(ctx context.Context, upstreamCancelFn 
context.CancelCauseFunc, nextBundID func() string) <-chan RunBundle {
runStageCh := make(chan RunBundle)
ctx, cancelFn := context.WithCancelCause(ctx)
go func() {
@@ -384,7 +385,9 @@ func (em *ElementManager) Bundles(ctx context.Context, 
nextBundID func() string)
}
}
}
-   em.checkForQuiescence(advanced)
+   if err := em.checkForQuiescence(advanced); err != nil {
+   upstreamCancelFn(err)
+   }
}
}()
return runStageCh
@@ -400,11 +403,11 @@ func (em *ElementManager) Bundles(ctx context.Context, 
nextBundID func() string)
 // executing off the next TestStream event.
 //
 // Must be called while holding em.refreshCond.L.
-func (em *ElementManager) checkForQuiescence(advanced set[string]) {
+func (em *ElementManager) checkForQuiescence(advanced set[string]) error {
defer em.refreshCond.L.Unlock()
if len(em.inprogressBundles) > 0 {
// If there are bundles in progress, then there may be 
watermark refreshes when they terminate.
-   return
+   return nil
}
if len(em.watermarkRefreshes) > 0 {
// If there are watermarks to refresh, we aren't yet stuck.
@@ -414,12 +417,12 @@ func (em *ElementManager) checkForQuiescence(advanced 
set[string]) {
slog.Int("refreshCount", len(em.watermarkRefreshes)),
slog.Int64("pendingElementCount", v),
)
-   return
+   return nil
}
if em.testStreamHandler == nil && len(em.processTimeEvents.events) > 0 {
// If there's no test stream involved, and processing time 
events exist, then
// it's only a matter of time.
-   return
+   return nil
}
// The job has quiesced!
 
@@ -433,12 +436,12 @@ func (em *ElementManager) checkForQuiescence(advanced 
set[string]) {
// Note: it's a prism bug if test stream never causes a refresh 
to occur for a given event.
// It's not correct to move to the next event if no refreshes 
would occur.
if len(em.watermarkRefreshes) > 0 {
-   return
+   return nil
} else if _, ok := nextEvent.(tsProcessingTimeEvent); ok {
// It's impossible to fully control processing time SDK 
side handling for processing time
// Runner side, so we specialize refresh handling here 
to avoid spuriously getting stuck.
em.watermarkRefreshes.insert(em.testStreamHandler.ID)
- 

(beam) branch master updated (fe54c211512 -> 7a0ad41935f)

2024-06-07 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


from fe54c211512 Implement Hugging Face Image Embedding MLTransform (#31536)
 add 7a0ad41935f Escape literal tab. (#31548)

No new revisions were added by this update.

Summary of changes:
 sdks/python/apache_beam/transforms/external_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)



(beam) branch master updated: retry test_big_query_legacy_sql (#31417)

2024-06-06 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new 03d553e1f9c retry test_big_query_legacy_sql (#31417)
03d553e1f9c is described below

commit 03d553e1f9c58eb8a1cd29352e9281427cd8669e
Author: liferoad 
AuthorDate: Thu Jun 6 16:43:01 2024 -0400

retry test_big_query_legacy_sql (#31417)

* retry test_big_query_legacy_sql

* fix lint

* fixed lint
---
 sdks/python/apache_beam/io/gcp/big_query_query_to_table_it_test.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sdks/python/apache_beam/io/gcp/big_query_query_to_table_it_test.py 
b/sdks/python/apache_beam/io/gcp/big_query_query_to_table_it_test.py
index e8cd8884219..052790c4a20 100644
--- a/sdks/python/apache_beam/io/gcp/big_query_query_to_table_it_test.py
+++ b/sdks/python/apache_beam/io/gcp/big_query_query_to_table_it_test.py
@@ -30,6 +30,8 @@ import unittest
 
 import pytest
 from hamcrest.core.core.allof import all_of
+from tenacity import retry
+from tenacity import stop_after_attempt
 
 from apache_beam.io.gcp import big_query_query_to_table_pipeline
 from apache_beam.io.gcp.bigquery_tools import BigQueryWrapper
@@ -155,6 +157,7 @@ class BigQueryQueryToTableIT(unittest.TestCase):
 self.assertTrue(passed, 'Error in BQ setup: %s' % errors)
 
   @pytest.mark.it_postcommit
+  @retry(reraise=True, stop=stop_after_attempt(3))
   def test_big_query_legacy_sql(self):
 verify_query = DIALECT_OUTPUT_VERIFY_QUERY % self.output_table
 expected_checksum = test_utils.compute_hash(DIALECT_OUTPUT_EXPECTED)



(beam) branch master updated: fix typescript syntax (#31493)

2024-06-04 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new 195dc3f0146 fix typescript syntax (#31493)
195dc3f0146 is described below

commit 195dc3f0146f8e21cf90d7ce5347166babc5b02e
Author: Yi Hu 
AuthorDate: Tue Jun 4 18:53:28 2024 -0400

fix typescript syntax (#31493)
---
 sdks/typescript/src/apache_beam/runners/flink.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdks/typescript/src/apache_beam/runners/flink.ts 
b/sdks/typescript/src/apache_beam/runners/flink.ts
index bedcd726628..ad4339b431f 100644
--- a/sdks/typescript/src/apache_beam/runners/flink.ts
+++ b/sdks/typescript/src/apache_beam/runners/flink.ts
@@ -28,7 +28,7 @@ import { JavaJarService } from "../utils/service";
 const MAGIC_HOST_NAMES = ["[local]", "[auto]"];
 
 // These should stay in sync with gradle.properties.
-const PUBLISHED_FLINK_VERSIONS = ["1.15", "1.16", "1.17, "1.18""];
+const PUBLISHED_FLINK_VERSIONS = ["1.15", "1.16", "1.17", "1.18"];
 
 const defaultOptions = {
   flinkMaster: "[local]",



(beam) branch master updated (bb4ad994304 -> fbe9427b0a9)

2024-06-03 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


from bb4ad994304 Fix one small typo in build script. (#31413)
 add fbe9427b0a9 Add Trigger_Never to Prepare features check (#31472)

No new revisions were added by this update.

Summary of changes:
 sdks/go/pkg/beam/runners/prism/internal/jobservices/management.go | 8 +++-
 sdks/go/pkg/beam/runners/prism/internal/unimplemented_test.go | 2 +-
 2 files changed, 8 insertions(+), 2 deletions(-)



(beam) branch master updated (c90fd8320a5 -> 5454489e29b)

2024-05-30 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


from c90fd8320a5 Use bytes instead of mibs. (#31457)
 add 5454489e29b Report prism changes. (#31433)

No new revisions were added by this update.

Summary of changes:
 CHANGES.md | 4 
 1 file changed, 4 insertions(+)



(beam) branch master updated: flink portable client configurations (#31188)

2024-05-30 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new 3cadc83d348 flink portable client configurations (#31188)
3cadc83d348 is described below

commit 3cadc83d348605d07837532701c0fb4de0feb116
Author: Marc hurabielle 
AuthorDate: Fri May 31 01:46:20 2024 +0900

flink portable client configurations (#31188)

* add support for FlinkJobServer configuration

* remove hardcoded timeout for FlinkPortableClient
---
 .../flink/FlinkPortableClientEntryPoint.java   | 47 +-
 .../runners/jobsubmission/JobServerDriver.java | 12 ++
 2 files changed, 39 insertions(+), 20 deletions(-)

diff --git 
a/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkPortableClientEntryPoint.java
 
b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkPortableClientEntryPoint.java
index 47d3959ad18..8f0ecf3efbd 100644
--- 
a/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkPortableClientEntryPoint.java
+++ 
b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkPortableClientEntryPoint.java
@@ -73,14 +73,10 @@ import org.slf4j.LoggerFactory;
 public class FlinkPortableClientEntryPoint {
   private static final Logger LOG = 
LoggerFactory.getLogger(FlinkPortableClientEntryPoint.class);
   private static final String JOB_ENDPOINT_FLAG = "--job_endpoint";
-  private static final Duration JOB_INVOCATION_TIMEOUT = 
Duration.ofSeconds(30);
-  private static final Duration JOB_SERVICE_STARTUP_TIMEOUT = 
Duration.ofSeconds(30);
-
   private final String driverCmd;
   private FlinkJobServerDriver jobServer;
   private Thread jobServerThread;
   private DetachedJobInvokerFactory jobInvokerFactory;
-  private int jobPort = 0; // pick any free port
 
   public FlinkPortableClientEntryPoint(String driverCmd) {
 Preconditions.checkState(
@@ -96,8 +92,8 @@ public class FlinkPortableClientEntryPoint {
 FlinkPortableClientEntryPoint runner =
 new FlinkPortableClientEntryPoint(configuration.driverCmd);
 try {
-  runner.startJobService();
-  runner.runDriverProgram();
+  runner.startJobService(configuration);
+  
runner.runDriverProgram(Duration.ofSeconds(configuration.jobInvocationTimeoutSeconds));
 } catch (Exception e) {
   throw new RuntimeException(String.format("Job %s failed.", 
configuration.driverCmd), e);
 } finally {
@@ -107,7 +103,8 @@ public class FlinkPortableClientEntryPoint {
 LOG.info("Job submitted successfully.");
   }
 
-  private static class EntryPointConfiguration {
+  private static class EntryPointConfiguration
+  extends FlinkJobServerDriver.FlinkServerConfiguration {
 @Option(
 name = "--driver-cmd",
 required = true,
@@ -115,6 +112,16 @@ public class FlinkPortableClientEntryPoint {
 "Command that launches the Python driver program. "
 + "(The job service endpoint will be appended as 
--job_endpoint=localhost:.)")
 private String driverCmd;
+
+@Option(
+name = "--job-service-startup-timeout-seconds",
+usage = "Timeout for the job service start in seconds")
+private long jobServiceStartupTimeoutSeconds = 30;
+
+@Option(
+name = "--job-invocation-timeout-seconds",
+usage = "Timeout for the job submission in seconds")
+private long jobInvocationTimeoutSeconds = 30;
   }
 
   private static EntryPointConfiguration parseArgs(String[] args) {
@@ -127,20 +134,20 @@ public class FlinkPortableClientEntryPoint {
   parser.printUsage(System.err);
   throw new IllegalArgumentException("Unable to parse command line 
arguments.", e);
 }
+configuration.setPort(0);
+configuration.setArtifactPort(0);
+configuration.setExpansionPort(0);
 return configuration;
   }
 
-  private void startJobService() throws Exception {
+  private void startJobService(EntryPointConfiguration configuration) throws 
Exception {
 jobInvokerFactory = new DetachedJobInvokerFactory();
-jobServer =
-FlinkJobServerDriver.fromConfig(
-FlinkJobServerDriver.parseArgs(
-new String[] {"--job-port=" + jobPort, "--artifact-port=0", 
"--expansion-port=0"}),
-jobInvokerFactory);
+jobServer = FlinkJobServerDriver.fromConfig(configuration, 
jobInvokerFactory);
 jobServerThread = new Thread(jobServer);
 jobServerThread.start();
 
-Deadline deadline = Deadline.fromNow(JOB_SERVICE_STARTUP_TIMEOUT);
+Deadline deadline =
+
Deadline.fromNow(Duration.ofSeconds(configuration.jobServiceStartupTimeoutSeconds));
 while (jobServer.getJobServerUrl() == null && deadline.hasTimeLeft()) {

(beam) branch master updated: Don't re-encode byte[] values in SortValues transform (#31025)

2024-05-30 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new 9f3f1c93a60 Don't re-encode byte[] values in SortValues transform 
(#31025)
9f3f1c93a60 is described below

commit 9f3f1c93a605bd14409cae141967129f67cd8bb6
Author: Claire McGinty 
AuthorDate: Thu May 30 12:45:16 2024 -0400

Don't re-encode byte[] values in SortValues transform (#31025)

* Don't re-encode byte[] values in SortValues transform

* checkstyle

* Apply code review comments
---
 .../beam/sdk/extensions/sorter/SortValues.java |  26 +++-
 .../beam/sdk/extensions/sorter/SortValuesTest.java | 144 ++---
 2 files changed, 149 insertions(+), 21 deletions(-)

diff --git 
a/sdks/java/extensions/sorter/src/main/java/org/apache/beam/sdk/extensions/sorter/SortValues.java
 
b/sdks/java/extensions/sorter/src/main/java/org/apache/beam/sdk/extensions/sorter/SortValues.java
index bc9fb2f8955..e7618681e1b 100644
--- 
a/sdks/java/extensions/sorter/src/main/java/org/apache/beam/sdk/extensions/sorter/SortValues.java
+++ 
b/sdks/java/extensions/sorter/src/main/java/org/apache/beam/sdk/extensions/sorter/SortValues.java
@@ -20,7 +20,9 @@ package org.apache.beam.sdk.extensions.sorter;
 import java.io.IOException;
 import java.util.Iterator;
 import javax.annotation.Nonnull;
+import org.apache.beam.sdk.coders.ByteArrayCoder;
 import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.CoderException;
 import org.apache.beam.sdk.coders.IterableCoder;
 import org.apache.beam.sdk.coders.KvCoder;
 import org.apache.beam.sdk.transforms.DoFn;
@@ -131,6 +133,20 @@ public class SortValues
 return getSecondaryKeyValueCoder(inputCoder).getValueCoder();
   }
 
+  private static  T elementOf(Coder coder, byte[] bytes) throws 
CoderException {
+if (coder instanceof ByteArrayCoder) {
+  return (T) bytes;
+}
+return CoderUtils.decodeFromByteArray(coder, bytes);
+  }
+
+  private static  byte[] bytesOf(Coder coder, T element) throws 
CoderException {
+if (element instanceof byte[]) {
+  return (byte[]) element;
+}
+return CoderUtils.encodeToByteArray(coder, element);
+  }
+
   private static class SortValuesDoFn
   extends DoFn<
   KV>>,
@@ -156,9 +172,7 @@ public class SortValues
 Sorter sorter = BufferedExternalSorter.create(sorterOptions);
 for (KV record : records) {
   sorter.add(
-  KV.of(
-  CoderUtils.encodeToByteArray(keyCoder, record.getKey()),
-  CoderUtils.encodeToByteArray(valueCoder, 
record.getValue(;
+  KV.of(bytesOf(keyCoder, record.getKey()), bytesOf(valueCoder, 
record.getValue(;
 }
 
 c.output(KV.of(c.element().getKey(), new 
DecodingIterable(sorter.sort(;
@@ -197,9 +211,9 @@ public class SortValues
   public KV next() {
 KV next = iterator.next();
 try {
-  return KV.of(
-  CoderUtils.decodeFromByteArray(keyCoder, next.getKey()),
-  CoderUtils.decodeFromByteArray(valueCoder, next.getValue()));
+  SecondaryKeyT secondaryKey = elementOf(keyCoder, next.getKey());
+  ValueT value = elementOf(valueCoder, next.getValue());
+  return KV.of(secondaryKey, value);
 } catch (IOException e) {
   throw new RuntimeException(e);
 }
diff --git 
a/sdks/java/extensions/sorter/src/test/java/org/apache/beam/sdk/extensions/sorter/SortValuesTest.java
 
b/sdks/java/extensions/sorter/src/test/java/org/apache/beam/sdk/extensions/sorter/SortValuesTest.java
index fa3fbe6ae16..451583a9ba9 100644
--- 
a/sdks/java/extensions/sorter/src/test/java/org/apache/beam/sdk/extensions/sorter/SortValuesTest.java
+++ 
b/sdks/java/extensions/sorter/src/test/java/org/apache/beam/sdk/extensions/sorter/SortValuesTest.java
@@ -22,7 +22,10 @@ import static org.hamcrest.Matchers.contains;
 import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.hamcrest.Matchers.is;
 
+import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
+import java.util.List;
+import java.util.stream.Collectors;
 import org.apache.beam.sdk.testing.PAssert;
 import org.apache.beam.sdk.testing.TestPipeline;
 import org.apache.beam.sdk.transforms.Create;
@@ -67,30 +70,141 @@ public class SortValuesTest {
 grouped.apply(SortValues.create(BufferedExternalSorter.options()));
 
 PAssert.that(groupedAndSorted)
-.satisfies(new 
AssertThatHasExpectedContentsForTestSecondaryKeySorting());
+.satisfies(
+new AssertThatHasExpectedContentsForTestSecondaryKeySorting<>(
+Arrays.asList(
+KV.of(
+"key1",
+Arrays.asList(
+

(beam) branch master updated: Refactor RowMutationInformation to use string type (#31323)

2024-05-29 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new df8bead5945 Refactor RowMutationInformation to use string type (#31323)
df8bead5945 is described below

commit df8bead5945c801854f07dce6e708b1241c94696
Author: Damon 
AuthorDate: Wed May 29 10:49:24 2024 -0700

Refactor RowMutationInformation to use string type (#31323)

* Refactor RowMutationInformation to use string type

* Remove unnecessary test

* Add javadoc

* Add segment too large test cases

* Add hex based test cases to integration test
---
 .../beam/sdk/io/gcp/bigquery/AppendClientInfo.java |   2 +-
 .../AvroGenericRecordToStorageApiProto.java|  17 ++-
 .../io/gcp/bigquery/BeamRowToStorageApiProto.java  |  16 ++-
 .../beam/sdk/io/gcp/bigquery/RowMutation.java  |  27 +++--
 .../io/gcp/bigquery/RowMutationInformation.java| 111 -
 .../beam/sdk/io/gcp/bigquery/StorageApiCDC.java|   9 ++
 .../StorageApiDynamicDestinationsBeamRow.java  |   4 +-
 ...StorageApiDynamicDestinationsGenericRecord.java |   7 +-
 .../StorageApiDynamicDestinationsTableRow.java |   4 +-
 .../io/gcp/bigquery/TableRowToStorageApiProto.java |  40 +--
 .../sdk/io/gcp/testing/FakeDatasetService.java |   3 +-
 .../AvroGenericRecordToStorageApiProtoTest.java|   3 +-
 .../gcp/bigquery/BeamRowToStorageApiProtoTest.java |   4 +-
 .../sdk/io/gcp/bigquery/BigQueryIOWriteTest.java   |  88 --
 .../gcp/bigquery/RowMutationInformationTest.java   | 132 +
 .../io/gcp/bigquery/StorageApiSinkRowUpdateIT.java |  63 +-
 .../bigquery/TableRowToStorageApiProtoTest.java|   3 +-
 17 files changed, 457 insertions(+), 76 deletions(-)

diff --git 
a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/AppendClientInfo.java
 
b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/AppendClientInfo.java
index 3094af5855e..211027c12b0 100644
--- 
a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/AppendClientInfo.java
+++ 
b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/AppendClientInfo.java
@@ -145,7 +145,7 @@ abstract class AppendClientInfo {
 true,
 null,
 null,
--1);
+null);
 return msg.toByteString();
   }
 
diff --git 
a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/AvroGenericRecordToStorageApiProto.java
 
b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/AvroGenericRecordToStorageApiProto.java
index 7141869b228..519f9391db6 100644
--- 
a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/AvroGenericRecordToStorageApiProto.java
+++ 
b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/AvroGenericRecordToStorageApiProto.java
@@ -162,6 +162,19 @@ public class AvroGenericRecordToStorageApiProto {
 return builder.build();
   }
 
+  /**
+   * Forwards {@param changeSequenceNum} to {@link 
#messageFromGenericRecord(Descriptor,
+   * GenericRecord, String, String)} via {@link Long#toHexString}.
+   */
+  public static DynamicMessage messageFromGenericRecord(
+  Descriptor descriptor,
+  GenericRecord record,
+  @Nullable String changeType,
+  long changeSequenceNum) {
+return messageFromGenericRecord(
+descriptor, record, changeType, Long.toHexString(changeSequenceNum));
+  }
+
   /**
* Given an Avro {@link GenericRecord} object, returns a protocol-buffer 
message that can be used
* to write data using the BigQuery Storage streaming API.
@@ -174,7 +187,7 @@ public class AvroGenericRecordToStorageApiProto {
   Descriptor descriptor,
   GenericRecord record,
   @Nullable String changeType,
-  long changeSequenceNum) {
+  @Nullable String changeSequenceNum) {
 Schema schema = record.getSchema();
 DynamicMessage.Builder builder = DynamicMessage.newBuilder(descriptor);
 for (Schema.Field field : schema.getFields()) {
@@ -195,7 +208,7 @@ public class AvroGenericRecordToStorageApiProto {
   builder.setField(
   org.apache.beam.sdk.util.Preconditions.checkStateNotNull(
   descriptor.findFieldByName(StorageApiCDC.CHANGE_SQN_COLUMN)),
-  changeSequenceNum);
+  
org.apache.beam.sdk.util.Preconditions.checkStateNotNull(changeSequenceNum));
 }
 return builder.build();
   }
diff --git 
a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BeamRowToStorageApiProto.java
 
b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BeamRowToStorageApiProto.java
index

(beam) branch master updated: Avoid json Unmarshal on proto CancelJobRequest (#31178)

2024-05-03 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new e64c3596253 Avoid json Unmarshal on proto CancelJobRequest (#31178)
e64c3596253 is described below

commit e64c3596253c80ade845bf8765e6f7e04b8c57f1
Author: Damon 
AuthorDate: Fri May 3 15:30:55 2024 -0700

Avoid json Unmarshal on proto CancelJobRequest (#31178)
---
 sdks/go/pkg/beam/runners/prism/internal/web/web.go | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/sdks/go/pkg/beam/runners/prism/internal/web/web.go 
b/sdks/go/pkg/beam/runners/prism/internal/web/web.go
index baa14428aaa..f0002d850d9 100644
--- a/sdks/go/pkg/beam/runners/prism/internal/web/web.go
+++ b/sdks/go/pkg/beam/runners/prism/internal/web/web.go
@@ -373,8 +373,12 @@ type jobCancelHandler struct {
Jobcli jobpb.JobServiceClient
 }
 
+type cancelJobRequest struct {
+   JobId string `json:"job_id"`
+}
+
 func (h *jobCancelHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
-   var cancelRequest *jobpb.CancelJobRequest
+   var cancelRequest *cancelJobRequest
if r.Method != http.MethodPost {
http.Error(w, http.StatusText(http.StatusMethodNotAllowed), 
http.StatusMethodNotAllowed)
return
@@ -395,7 +399,10 @@ func (h *jobCancelHandler) ServeHTTP(w 
http.ResponseWriter, r *http.Request) {
return
}
 
-   resp, err := h.Jobcli.Cancel(r.Context(), cancelRequest)
+   // Forward JobId from POST body avoids direct json Unmarshall on 
composite types containing protobuf message types.
+   resp, err := h.Jobcli.Cancel(r.Context(), {
+   JobId: cancelRequest.JobId,
+   })
if err != nil {
statusCode := status.Code(err)
httpCode := http.StatusInternalServerError



(beam) branch master updated: Add a workaround for BEAM-20873 for optimized list side inputs. (#31163)

2024-05-03 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new afe0793302c Add a workaround for BEAM-20873 for optimized list side 
inputs. (#31163)
afe0793302c is described below

commit afe0793302cbe89d57adbcbabbe958f0d5c7d8e2
Author: Robert Bradshaw 
AuthorDate: Fri May 3 10:25:59 2024 -0700

Add a workaround for BEAM-20873 for optimized list side inputs. (#31163)

This triggered a case with Dataflow Runner v1 and PTransformOverrides
that exposed https://github.com/apache/beam/issues/20873 .
---
 .../beam/runners/dataflow/worker/IsmSideInputReader.java|  4 +++-
 .../src/main/java/org/apache/beam/sdk/transforms/View.java  | 13 ++---
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git 
a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/IsmSideInputReader.java
 
b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/IsmSideInputReader.java
index 30602444153..c2e8c334a09 100644
--- 
a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/IsmSideInputReader.java
+++ 
b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/IsmSideInputReader.java
@@ -70,6 +70,7 @@ import org.apache.beam.sdk.util.WindowedValue;
 import org.apache.beam.sdk.values.KV;
 import org.apache.beam.sdk.values.PCollectionView;
 import org.apache.beam.sdk.values.PCollectionViews.HasDefaultValue;
+import org.apache.beam.sdk.values.PCollectionViews.IterableBackedListViewFn;
 import org.apache.beam.sdk.values.PCollectionViews.IterableViewFn;
 import org.apache.beam.sdk.values.PCollectionViews.IterableViewFn2;
 import org.apache.beam.sdk.values.PCollectionViews.ListViewFn;
@@ -354,7 +355,8 @@ public class IsmSideInputReader implements SideInputReader {
   if (viewFn instanceof IterableViewFn
   || viewFn instanceof IterableViewFn2
   || viewFn instanceof ListViewFn
-  || viewFn instanceof ListViewFn2) {
+  || viewFn instanceof ListViewFn2
+  || viewFn instanceof IterableBackedListViewFn) {
 @SuppressWarnings("unchecked")
 ViewT viewT = (ViewT) getListForWindow(tag, window);
 return viewT;
diff --git 
a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/View.java 
b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/View.java
index ca04542b372..16084c569f9 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/View.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/View.java
@@ -286,11 +286,18 @@ public class View {
 }
 
 private PCollectionView> expandWithoutRandomAccess(PCollection 
input) {
+  Coder inputCoder = input.getCoder();
+  // HACK to work around https://github.com/apache/beam/issues/20873:
+  // There are bugs in "composite" vs "primitive" transform distinction
+  // in TransformHierachy. This noop transform works around them and 
should be zero
+  // cost.
+  PCollection materializationInput =
+  input.apply(MapElements.via(new SimpleFunction(x -> x) {}));
   PCollectionView> view =
   PCollectionViews.listView(
-  input,
-  (TypeDescriptorSupplier) 
input.getCoder()::getEncodedTypeDescriptor,
-  input.getWindowingStrategy());
+  materializationInput,
+  (TypeDescriptorSupplier) inputCoder::getEncodedTypeDescriptor,
+  materializationInput.getWindowingStrategy());
   input.apply(CreatePCollectionView.of(view));
   return view;
 }



(beam) branch master updated (ee329ef08b9 -> 69956669cbe)

2024-05-02 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


from ee329ef08b9 add support for int gauge metrics (#31136)
 add 69956669cbe [Prism] Enable Java validatesRunner tests on Prism (#31075)

No new revisions were added by this update.

Summary of changes:
 runners/portability/java/build.gradle | 11 --
 runners/prism/build.gradle| 65 +++
 settings.gradle.kts   |  1 +
 3 files changed, 75 insertions(+), 2 deletions(-)
 create mode 100644 runners/prism/build.gradle



(beam) branch master updated: Terraform module to provision a kafka proxy (#31142)

2024-05-02 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new 25191d5187d Terraform module to provision a kafka proxy (#31142)
25191d5187d is described below

commit 25191d5187dbb4dd8dd75a62d3cff8f7297fff2e
Author: Damon 
AuthorDate: Thu May 2 11:15:24 2024 -0700

Terraform module to provision a kafka proxy (#31142)

* IaC to provision a kafka proxy

* Install numpy
---
 .test-infra/kafka/proxy/.terraform.lock.hcl | 40 +
 .test-infra/kafka/proxy/README.md   | 56 +
 .test-infra/kafka/proxy/common.tfvars   | 21 +++
 .test-infra/kafka/proxy/compute.tf  | 88 +++
 .test-infra/kafka/proxy/prerequisites.tf| 75 +++
 .test-infra/kafka/proxy/provider.tf | 22 +++
 .test-infra/kafka/proxy/variables.tf| 93 +
 7 files changed, 395 insertions(+)

diff --git a/.test-infra/kafka/proxy/.terraform.lock.hcl 
b/.test-infra/kafka/proxy/.terraform.lock.hcl
new file mode 100644
index 000..fd927e0920a
--- /dev/null
+++ b/.test-infra/kafka/proxy/.terraform.lock.hcl
@@ -0,0 +1,40 @@
+# This file is maintained automatically by "terraform init".
+# Manual edits may be lost in future updates.
+
+provider "registry.terraform.io/hashicorp/google" {
+  version = "5.27.0"
+  hashes = [
+"h1:chdLlH3DV0DXSfV40ZeiJQ+mB+OYt0RMRlpeTsbhHiw=",
+"zh:08301af898c1a78e78ad547482d50c95a43ef65d09fd5058800cf32cd9c8cd53",
+"zh:1a4f9e5134e990132978e78ea15431d32e06bf8024fd6733a98faa811ae03efb",
+"zh:383e66659d69dc4b4a1ad5d7cbc6aa4ce75015f380cfb5f47beaeb506c9e2e1c",
+"zh:3aa4aff7dd9240fb387271dc791e084d010044dc58336a7a690b0f1a8890ab68",
+"zh:4084b9a61e662bdd79d1304432dffc6cd3cf00021b937b01001ae9fee5727b12",
+"zh:448f5d281cab53caacb8759fcd3309c7aa1ba5a210d1866b28e8bd77fd4634ab",
+"zh:75457a1f0b77bc7477efe58e7b223649340147fd735ed8b8fe57a06ec8459c95",
+"zh:7648c6ea04d5b1d1413cce880ed77bd7373aef1a58cd5a26394edf64dc6cac11",
+"zh:b43630367e29a4c185d3eab8b3f84f818e8a91f16007f0e81d876ab96af4ee43",
+"zh:b478e7d36c5e99f0c026cb05c06047ce1f24fc07284692a10e74214a853e7139",
+"zh:e6f349125299401049f64e608b3d73236b139e960816fffdd208d1ba405e1804",
+"zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c",
+  ]
+}
+
+provider "registry.terraform.io/hashicorp/random" {
+  version = "3.6.1"
+  hashes = [
+"h1:1OlP753r4lOKlBprL0HdZGWerm5DCabD5Mli8k8lWAg=",
+"zh:2a0ec154e39911f19c8214acd6241e469157489fc56b6c739f45fbed5896a176",
+"zh:57f4e553224a5e849c99131f5e5294be3a7adcabe2d867d8a4fef8d0976e0e52",
+"zh:58f09948c608e601bd9d0a9e47dcb78e2b2c13b4bda4d8f097d09152ea9e91c5",
+"zh:5c2a297146ed6fb3fe934c800e78380f700f49ff24dbb5fb5463134948e3a65f",
+"zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3",
+"zh:7ce41e26f0603e31cdac849085fc99e5cd5b3b73414c6c6d955c0ceb249b593f",
+"zh:8c9e8d30c4ef08ee8bcc4294dbf3c2115cd7d9049c6ba21422bd3471d92faf8a",
+"zh:93e91be717a7ffbd6410120eb925ebb8658cc8f563de35a8b53804d33c51c8b0",
+"zh:982542e921970d727ce10ed64795bf36c4dec77a5db0741d4665230d12250a0d",
+"zh:b9d1873f14d6033e216510ef541c891f44d249464f13cc07d3f782d09c7d18de",
+"zh:cfe27faa0bc9556391c8803ade135a5856c34a3fe85b9ae3bdd515013c0c87c1",
+"zh:e4aabf3184bbb556b89e4b195eab1514c86a2914dd01c23ad9813ec17e863a8a",
+  ]
+}
diff --git a/.test-infra/kafka/proxy/README.md 
b/.test-infra/kafka/proxy/README.md
new file mode 100644
index 000..c101b0e3b39
--- /dev/null
+++ b/.test-infra/kafka/proxy/README.md
@@ -0,0 +1,56 @@
+
+
+# Kafka proxy
+
+Provisions a private IP bastion host on Google Cloud for use as a proxy to a 
private IP Kafka instance.
+
+# Prerequisites
+
+- Kafka cluster (See [.test-infra/kafka](..) for available solutions.)
+
+# Usage
+
+## Acquire bootstrap server hosts
+
+One of the variables requires a mapping of bootstrap server hosts to the 
desired proxy exposed port. See
+the variable description for `bootstrap_endpoint_mapping` found in the 
[variables.tf](variables.tf) file.
+
+## Apply module
+
+Follows typical terraform workflow without the use of a
+[backend](https://developer.hashicorp.com/terraform/language/settings/backends/configuration).
+
+```
+DIR=.test-infra/kafka/proxy
+terraform -chdir=$DIR init
+```
+
+```
+terraform -chdir=$DIR apply -var-file=common.tfvars 
-var-file=name_of_your_specific.tfvars
+```
+
+## Invoke gcloud ssh tunneling command
+
+Successful applicati

(beam) branch master updated: [Prism] Connect Web UI cancel requests with backend (#31028)

2024-05-02 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new ac83276dbce [Prism] Connect Web UI cancel requests with backend 
(#31028)
ac83276dbce is described below

commit ac83276dbce9d27ad4795cb850582006b109f920
Author: Damon 
AuthorDate: Thu May 2 10:06:03 2024 -0700

[Prism] Connect Web UI cancel requests with backend (#31028)

* Connect Web UI requests with backend

* Remove artificial setting of cancelled state

* Only lock when acquiring job
---
 sdks/go/pkg/beam/runners/prism/internal/execute.go |   2 +-
 .../prism/internal/jobservices/management_test.go  |  42 +---
 .../prism/internal/jobservices/server_test.go  |   8 +-
 .../prism/internal/web/assets/job-action.js| 120 -
 .../runners/prism/internal/web/jobdetails.html |   2 +-
 sdks/go/pkg/beam/runners/prism/internal/web/web.go |  21 +++-
 6 files changed, 169 insertions(+), 26 deletions(-)

diff --git a/sdks/go/pkg/beam/runners/prism/internal/execute.go 
b/sdks/go/pkg/beam/runners/prism/internal/execute.go
index 504125a2bd6..b218d84b891 100644
--- a/sdks/go/pkg/beam/runners/prism/internal/execute.go
+++ b/sdks/go/pkg/beam/runners/prism/internal/execute.go
@@ -69,7 +69,7 @@ func RunPipeline(j *jobservices.Job) {
j.SendMsg("running " + j.String())
j.Running()
 
-   if err := executePipeline(j.RootCtx, wks, j); err != nil {
+   if err := executePipeline(j.RootCtx, wks, j); err != nil && 
!errors.Is(err, jobservices.ErrCancel) {
j.Failed(err)
return
}
diff --git 
a/sdks/go/pkg/beam/runners/prism/internal/jobservices/management_test.go 
b/sdks/go/pkg/beam/runners/prism/internal/jobservices/management_test.go
index 176abb8543a..5aad58b4a86 100644
--- a/sdks/go/pkg/beam/runners/prism/internal/jobservices/management_test.go
+++ b/sdks/go/pkg/beam/runners/prism/internal/jobservices/management_test.go
@@ -46,9 +46,13 @@ func TestServer(t *testing.T) {
 
cmpOpts := []cmp.Option{protocmp.Transform(), cmpopts.EquateEmpty()}
tests := []struct {
-   name   string
+   name string
+   postRunState jobpb.JobState_Enum
+   // noJobsCheck tests in the setting that the Job doesn't exist
+   // postPrepCheck tests after Server Prepare invoked
noJobsCheck, postPrepCheck func(context.Context, *testing.T, 
*Server)
-   postRunCheck   func(context.Context, *testing.T, 
*Server, string)
+   // postRunCheck tests after Server Run invoked
+   postRunCheck func(context.Context, *testing.T, *Server, string)
}{
{
name: "GetJobs",
@@ -170,36 +174,38 @@ func TestServer(t *testing.T) {
},
},
{
-   name: "Canceling",
+   name: "Canceling",
+   postRunState: jobpb.JobState_RUNNING,
noJobsCheck: func(ctx context.Context, t *testing.T, 
undertest *Server) {
-   resp, err := undertest.Cancel(ctx, 
{JobId: "job-001"})
-   if resp != nil {
-   t.Errorf("Canceling(\"job-001\") = %s, 
want nil", resp)
-   }
+   id := "job-001"
+   _, err := undertest.Cancel(ctx, 
{JobId: id})
+   // Cancel currently returns nil, nil when Job 
not found
if err != nil {
-   t.Errorf("Canceling(\"job-001\") = %v, 
want nil", err)
+   t.Errorf("Cancel(%q) = %v, want not 
found error", id, err)
}
},
postPrepCheck: func(ctx context.Context, t *testing.T, 
undertest *Server) {
-   resp, err := undertest.Cancel(ctx, 
{JobId: "job-001"})
+   id := "job-001"
+   resp, err := undertest.Cancel(ctx, 
{JobId: id})
if err != nil {
-   t.Errorf("Canceling(\"job-001\") = %v, 
want nil", err)
+   t.Errorf("Cancel(%q) = %v, want not 
found error", id, err)
}
if diff := cmp.Diff({
State: jobpb.JobState_CANCELLING

(beam) branch master updated: remove atomic and wait from strimzi helm terrafrom (#31132)

2024-04-30 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new 5f71e6a592c remove atomic and wait from strimzi helm terrafrom (#31132)
5f71e6a592c is described below

commit 5f71e6a592ce10c7390f2bdb4f824c1bd59cc00e
Author: Vlado Djerek 
AuthorDate: Tue Apr 30 17:30:56 2024 +0200

remove atomic and wait from strimzi helm terrafrom (#31132)
---
 .test-infra/kafka/strimzi/01-strimzi-operator/kafka.tf | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.test-infra/kafka/strimzi/01-strimzi-operator/kafka.tf 
b/.test-infra/kafka/strimzi/01-strimzi-operator/kafka.tf
index a51158c1389..7a8ad1d5f24 100644
--- a/.test-infra/kafka/strimzi/01-strimzi-operator/kafka.tf
+++ b/.test-infra/kafka/strimzi/01-strimzi-operator/kafka.tf
@@ -24,8 +24,7 @@ resource "helm_release" "strimzi-helm-release" {
   chart= var.chart_name
   version  = var.chart_version
 
-  atomic  = "true"
-  timeout = 500
+  wait = false
 
   set {
 name  = "watchAnyNamespace"



(beam) branch master updated (68f6b551541 -> 28a2682d54c)

2024-04-26 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


from 68f6b551541 One more macos change (#31123)
 add 28a2682d54c add terraform for utility cluster. Add name override to 
gke (#30847)

No new revisions were added by this update.

Summary of changes:
 .gitignore |   11 +-
 .../01-strimzi-operator/.terraform.lock.hcl|   21 +
 .../kafka/strimzi/01-strimzi-operator/README.md|   28 -
 .../strimzi/01-strimzi-operator/common.tfvars  |   12 +-
 .../kafka/strimzi/01-strimzi-operator/kafka.tf |   22 +-
 .../strimzi/01-strimzi-operator/kustomization.yaml |   23 -
 .../strimzi/01-strimzi-operator/namespace.yaml |   22 -
 .../kafka/strimzi/01-strimzi-operator/provider.tf  |6 +-
 ...10-ServiceAccount-strimzi-cluster-operator.yaml |   23 -
 ...-ClusterRole-strimzi-cluster-operator-role.yaml |  170 -
 .../020-RoleBinding-strimzi-cluster-operator.yaml  |   31 -
 ...-ClusterRole-strimzi-cluster-operator-role.yaml |   55 -
 ...lusterRoleBinding-strimzi-cluster-operator.yaml |   31 -
 ...-ClusterRole-strimzi-cluster-operator-role.yaml |   48 -
 .../022-RoleBinding-strimzi-cluster-operator.yaml  |   31 -
 ...-ClusterRole-strimzi-cluster-operator-role.yaml |   82 -
 .../023-RoleBinding-strimzi-cluster-operator.yaml  |   31 -
 .../030-ClusterRole-strimzi-kafka-broker.yaml  |   32 -
 ...i-cluster-operator-kafka-broker-delegation.yaml |   33 -
 .../031-ClusterRole-strimzi-entity-operator.yaml   |   61 -
 ...luster-operator-entity-operator-delegation.yaml |   33 -
 .../033-ClusterRole-strimzi-kafka-client.yaml  |   33 -
 ...i-cluster-operator-kafka-client-delegation.yaml |   34 -
 .../01-strimzi-operator/v0.33.2/040-Crd-kafka.yaml | 6208 
 .../v0.33.2/041-Crd-kafkaconnect.yaml  | 1952 --
 .../v0.33.2/042-Crd-strimzipodset.yaml |  135 -
 .../v0.33.2/043-Crd-kafkatopic.yaml|  270 -
 .../v0.33.2/044-Crd-kafkauser.yaml |  702 ---
 .../v0.33.2/045-Crd-kafkamirrormaker.yaml  | 1228 
 .../v0.33.2/046-Crd-kafkabridge.yaml   | 1178 
 .../v0.33.2/047-Crd-kafkaconnector.yaml|  146 -
 .../v0.33.2/048-Crd-kafkamirrormaker2.yaml | 1992 ---
 .../v0.33.2/049-Crd-kafkarebalance.yaml|  159 -
 .../050-ConfigMap-strimzi-cluster-operator.yaml|   52 -
 .../060-Deployment-strimzi-cluster-operator.yaml   |  155 -
 .../01-strimzi-operator/v0.33.2/kustomization.yaml |   46 -
 .../strimzi/01-strimzi-operator}/variables.tf  |   25 +-
 .../kafka/strimzi/02-kafka-persistent/README.md|   17 +-
 .../gke-internal-load-balanced/kustomization.yaml  |4 +-
 .test-infra/kafka/strimzi/README.md|  133 +-
 kafka-workflows.apache-beam-testing.tfbackend} |1 +
 .../google-kubernetes-engine/.terraform.lock.hcl   |   42 +
 .../google-kubernetes-engine/README.md |   84 +-
 .../google-kubernetes-engine/cluster.tf|   15 +-
 .../copyme.apache-beam-testing.tfbackend}  |1 +
 .../google-kubernetes-engine/iam.tf}   |   28 +-
 ... => kafka-workflows.apache-beam-testing.tfvars} |7 +-
 .../google-kubernetes-engine/outputs.tf}   |5 +-
 .../google-kubernetes-engine/prerequisites.tf  |   24 +
 .../google-kubernetes-engine/provider.tf   |6 +
 .../us-central1.apache-beam-testing.tfvars |4 +-
 .../us-west1.apache-beam-testing.tfvars|4 +-
 .../google-kubernetes-engine/variables.tf  |   16 +
 build.gradle.kts   |3 +
 54 files changed, 283 insertions(+), 15232 deletions(-)
 create mode 100644 
.test-infra/kafka/strimzi/01-strimzi-operator/.terraform.lock.hcl
 delete mode 100644 .test-infra/kafka/strimzi/01-strimzi-operator/README.md
 copy 
playground/kafka-emulator/src/main/java/org/apache/beam/playground/package-info.java
 => .test-infra/kafka/strimzi/01-strimzi-operator/common.tfvars (81%)
 copy 
examples/multi-language/src/main/java/org/apache/beam/examples/multilanguage/JavaPrefixConfiguration.java
 => .test-infra/kafka/strimzi/01-strimzi-operator/kafka.tf (69%)
 delete mode 100644 
.test-infra/kafka/strimzi/01-strimzi-operator/kustomization.yaml
 delete mode 100644 .test-infra/kafka/strimzi/01-strimzi-operator/namespace.yaml
 copy playground/terraform/infrastructure/memorystore/outputs.tf => 
.test-infra/kafka/strimzi/01-strimzi-operator/provider.tf (91%)
 delete mode 100644 
.test-infra/kafka/strimzi/01-strimzi-operator/v0.33.2/010-ServiceAccount-strimzi-cluster-operator.yaml
 delete mode 100644 
.test-infra/kafka/strimzi/01-strimzi-operator/v0.33.2/020-ClusterRole-strimzi-cluster-operator-role.yaml
 delete mode 100644 
.test-infra/kafka/strimzi/01-strimzi-operator/v0.33.2/020-RoleBinding-strimzi-cluster-operator.yaml
 delete mode 100644 

(beam) branch master updated: Fix test failure due to locale sensitivity. (#31069)

2024-04-22 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new f8b81211049 Fix test failure due to locale sensitivity. (#31069)
f8b81211049 is described below

commit f8b81211049e1b2c07b300e8ea283e763c5fe330
Author: Robert Bradshaw 
AuthorDate: Mon Apr 22 13:51:43 2024 -0700

Fix test failure due to locale sensitivity. (#31069)
---
 .../harness/src/test/java/org/apache/beam/fn/harness/CachesTest.java | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git 
a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/CachesTest.java 
b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/CachesTest.java
index b004ba90508..61b62a21083 100644
--- a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/CachesTest.java
+++ b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/CachesTest.java
@@ -18,6 +18,7 @@
 package org.apache.beam.fn.harness;
 
 import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.anyOf;
 import static org.hamcrest.Matchers.containsString;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNull;
@@ -199,7 +200,9 @@ public class CachesTest {
 }
 
 assertThat(cache.describeStats(), containsString("used/max 600/1000 MB"));
-assertThat(cache.describeStats(), containsString("hit 50.00%"));
+assertThat(
+// Locale sensitive.
+cache.describeStats(), anyOf(containsString("hit 50.00%"), 
containsString("hit 50,00%")));
 assertThat(cache.describeStats(), containsString("lookups 200"));
 assertThat(cache.describeStats(), containsString("avg load time"));
 assertThat(cache.describeStats(), containsString("loads 100"));



(beam) branch master updated (a207e4e8bbe -> 05b7146656a)

2024-04-17 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


from a207e4e8bbe Revert "Docker - Remove Deprecated Types (#30826)" (#31010)
 add 05b7146656a Add /job/cancel endpoint to prism web server. (#30825)

No new revisions were added by this update.

Summary of changes:
 sdks/go/pkg/beam/runners/prism/internal/web/web.go | 57 ++
 1 file changed, 57 insertions(+)



(beam) branch master updated: [bug30870]: make consumer polling timeout configurable for KafkaIO.Read (#30877)

2024-04-09 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new 3f4b2561c58 [bug30870]: make consumer polling timeout configurable for 
KafkaIO.Read (#30877)
3f4b2561c58 is described below

commit 3f4b2561c58c9c0bf76f4144fb9f06d424f5
Author: xianhualiu <122747878+xianhua...@users.noreply.github.com>
AuthorDate: Tue Apr 9 19:44:06 2024 -0400

[bug30870]: make consumer polling timeout configurable for KafkaIO.Read 
(#30877)

* [bug30870]: make consumer polling timeout configurable for KafkaIO.Read

* fixed spotless complains

* fixed unit tests

* added logs and increased default polling timeout from 1 to 2 seconds.

* spotless apply changes

* Update CHANGES.md

updated changes.md with changes to make consumer polling timeout 
configurable for KafkaIO.Read

* Update CHANGES.md

* Update CHANGES.md

added break changes

* Update CHANGES.md
---
 CHANGES.md |  1 +
 .../java/org/apache/beam/sdk/io/kafka/KafkaIO.java | 35 +-
 .../KafkaIOReadImplementationCompatibility.java|  1 +
 .../beam/sdk/io/kafka/ReadFromKafkaDoFn.java   | 16 --
 .../org/apache/beam/sdk/io/kafka/KafkaIOTest.java  | 12 
 .../beam/sdk/io/kafka/ReadFromKafkaDoFnTest.java   | 14 +
 6 files changed, 75 insertions(+), 4 deletions(-)

diff --git a/CHANGES.md b/CHANGES.md
index 5824c71a98d..941ba23a757 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -73,6 +73,7 @@
 ## Breaking Changes
 
 * X behavior was changed ([#X](https://github.com/apache/beam/issues/X)).
+* Default consumer polling timeout for KafkaIO.Read was increased from 1 
second to 2 seconds. Use KafkaIO.read().withConsumerPollingTimeout(Duration 
duration) to configure this timeout value when necessary 
([#30870](https://github.com/apache/beam/issues/30870)).
 
 ## Deprecations
 
diff --git 
a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java 
b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java
index 231a1b9e49e..c56071e85ad 100644
--- a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java
+++ b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java
@@ -587,6 +587,7 @@ public class KafkaIO {
 .setCommitOffsetsInFinalizeEnabled(false)
 .setDynamicRead(false)
 .setTimestampPolicyFactory(TimestampPolicyFactory.withProcessingTime())
+.setConsumerPollingTimeout(Duration.standardSeconds(2L))
 .build();
   }
 
@@ -706,6 +707,9 @@ public class KafkaIO {
 @Pure
 public abstract @Nullable ErrorHandler 
getBadRecordErrorHandler();
 
+@Pure
+public abstract @Nullable Duration getConsumerPollingTimeout();
+
 abstract Builder toBuilder();
 
 @AutoValue.Builder
@@ -762,6 +766,8 @@ public class KafkaIO {
 return 
setCheckStopReadingFn(CheckStopReadingFnWrapper.of(checkStopReadingFn));
   }
 
+  abstract Builder setConsumerPollingTimeout(Duration 
consumerPollingTimeout);
+
   abstract Read build();
 
   static  void setupExternalBuilder(
@@ -1334,6 +1340,17 @@ public class KafkaIO {
   return 
toBuilder().setBadRecordErrorHandler(badRecordErrorHandler).build();
 }
 
+/**
+ * Sets the timeout time for Kafka consumer polling request in the {@link 
ReadFromKafkaDoFn}.
+ * The default is 2 second.
+ */
+public Read withConsumerPollingTimeout(Duration duration) {
+  checkState(
+  duration == null || duration.compareTo(Duration.ZERO) > 0,
+  "Consumer polling timeout must be greater than 0.");
+  return toBuilder().setConsumerPollingTimeout(duration).build();
+}
+
 /** Returns a {@link PTransform} for PCollection of {@link KV}, dropping 
Kafka metatdata. */
 public PTransform>> withoutMetadata() {
   return new TypedWithoutMetadata<>(this);
@@ -1596,7 +1613,8 @@ public class KafkaIO {
 
.withValueDeserializerProvider(kafkaRead.getValueDeserializerProvider())
 .withManualWatermarkEstimator()
 
.withTimestampPolicyFactory(kafkaRead.getTimestampPolicyFactory())
-.withCheckStopReadingFn(kafkaRead.getCheckStopReadingFn());
+.withCheckStopReadingFn(kafkaRead.getCheckStopReadingFn())
+
.withConsumerPollingTimeout(kafkaRead.getConsumerPollingTimeout());
 if (kafkaRead.isCommitOffsetsInFinalizeEnabled()) {
   readTransform = readTransform.commitOffsets();
 }
@@ -2036,6 +2054,9 @@ public class KafkaIO {
 @Pure
 abstract ErrorHandler getBadRecordErrorHandler();
 
+@Pure
+abstract @Nullable Duration getConsumerPollingTimeout();
+

(beam) branch master updated: Randomize object path for validation tasks (#30719)

2024-03-24 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new cb18dd3a0f7 Randomize object path for validation tasks (#30719)
cb18dd3a0f7 is described below

commit cb18dd3a0f7221a96ddedf95a65c19f8ec9205e3
Author: Damon 
AuthorDate: Sat Mar 23 23:06:04 2024 -0700

Randomize object path for validation tasks (#30719)
---
 buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git 
a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy 
b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
index a03b1940e4a..e9c0fae5cc6 100644
--- a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
+++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
@@ -2504,7 +2504,7 @@ class BeamModulePlugin implements Plugin {
 argsNeeded.add("--gcpRegion=${config.gcpRegion}")
   }
   if (config.gcsBucket) {
-argsNeeded.add("--gcsBucket=${config.gcsBucket}")
+
argsNeeded.add("--gcsBucket=${config.gcsBucket}/${randomUUID().toString()}")
   }
   if (config.bqDataset) {
 argsNeeded.add("--bqDataset=${config.bqDataset}")



(beam) branch master updated: Revert "[BEAM-30531] Automatically execute unbounded pipelines in streaming mode. (#30533)" (#30706)

2024-03-22 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new e207a147c55 Revert "[BEAM-30531] Automatically execute unbounded 
pipelines in streaming mode. (#30533)" (#30706)
e207a147c55 is described below

commit e207a147c55ae7251f116183270f92e53b9dc350
Author: Damon 
AuthorDate: Fri Mar 22 09:43:22 2024 -0700

Revert "[BEAM-30531] Automatically execute unbounded pipelines in streaming 
mode. (#30533)" (#30706)

This reverts commit 1c55117fc3349508edf8af07b1b361313c4b8a33.
---
 CHANGES.md |  1 -
 .../runners/dataflow/dataflow_runner.py| 20 ---
 .../runners/dataflow/dataflow_runner_test.py   | 61 --
 3 files changed, 82 deletions(-)

diff --git a/CHANGES.md b/CHANGES.md
index af92752d331..2064d2387ae 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -112,7 +112,6 @@
 * Merged sdks/java/fn-execution and runners/core-construction-java into the 
main SDK. These artifacts were never meant for users, but noting
   that they no longer exist. These are steps to bring portability into the 
core SDK alongside all other core functionality.
 * Added Vertex AI Feature Store handler for Enrichment transform (Python) 
([#30388](https://github.com/apache/beam/pull/30388))
-* Python Dataflow users no longer need to manually specify --streaming for 
pipelines using unbounded sources such as ReadFromPubSub.
 
 ## Breaking Changes
 
diff --git a/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py 
b/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py
index aae8e2e04ee..db6a5235ac9 100644
--- a/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py
+++ b/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py
@@ -42,7 +42,6 @@ from apache_beam.options.pipeline_options import TestOptions
 from apache_beam.options.pipeline_options import TypeOptions
 from apache_beam.options.pipeline_options import WorkerOptions
 from apache_beam.portability import common_urns
-from apache_beam.portability.api import beam_runner_api_pb2
 from apache_beam.runners.common import group_by_key_input_visitor
 from apache_beam.runners.dataflow.internal.clients import dataflow as 
dataflow_api
 from apache_beam.runners.runner import PipelineResult
@@ -415,12 +414,6 @@ class DataflowRunner(PipelineRunner):
   self.proto_pipeline, self.proto_context = pipeline.to_runner_api(
   return_context=True, default_environment=self._default_environment)
 
-if any(pcoll.is_bounded == beam_runner_api_pb2.IsBounded.UNBOUNDED
-   for pcoll in self.proto_pipeline.components.pcollections.values()):
-  options.view_as(StandardOptions).streaming = True
-if options.view_as(StandardOptions).streaming:
-  _check_and_add_missing_streaming_options(options)
-
 # Dataflow can only handle Docker environments.
 for env_id, env in self.proto_pipeline.components.environments.items():
   self.proto_pipeline.components.environments[env_id].CopyFrom(
@@ -478,7 +471,6 @@ class DataflowRunner(PipelineRunner):
 if test_options.dry_run:
   result = PipelineResult(PipelineState.DONE)
   result.wait_until_finish = lambda duration=None: None
-  result.job = self.job
   return result
 
 # Get a Dataflow API client and set its options
@@ -604,21 +596,9 @@ def _check_and_add_missing_options(options):
 "an SDK preinstalled in the default Dataflow dev runtime environment "
 "or in a custom container image, use --sdk_location=container.")
 
-
-def _check_and_add_missing_streaming_options(options):
-  # Type: (PipelineOptions) -> None
-
-  """Validates and adds missing pipeline options depending on options set.
-
-  Must be called after it has been determined whether we're running in
-  streaming mode.
-
-  :param options: PipelineOptions for this pipeline.
-  """
   # Streaming only supports using runner v2 (aka unified worker).
   # Runner v2 only supports using streaming engine (aka windmill service)
   if options.view_as(StandardOptions).streaming:
-debug_options = options.view_as(DebugOptions)
 google_cloud_options = options.view_as(GoogleCloudOptions)
 if (not google_cloud_options.enable_streaming_engine and
 (debug_options.lookup_experiment("enable_windmill_service") or
diff --git a/sdks/python/apache_beam/runners/dataflow/dataflow_runner_test.py 
b/sdks/python/apache_beam/runners/dataflow/dataflow_runner_test.py
index b5568305ce6..bef184c45c4 100644
--- a/sdks/python/apache_beam/runners/dataflow/dataflow_runner_test.py
+++ b/sdks/python/apache_beam/runners/dataflow/dataflow_runner_test.py
@@ -39,7 +39,6 @@ from apache_beam.runners import create_runner
 from apache_beam.runners.dataflow.dataflow_runn

(beam) branch master updated: Add unique ID to Dataflow BOM tests (#30709)

2024-03-21 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new 924c6a29840 Add unique ID to Dataflow BOM tests (#30709)
924c6a29840 is described below

commit 924c6a29840d2692b315b4499f3eec17c465485f
Author: Damon 
AuthorDate: Thu Mar 21 16:12:06 2024 -0700

Add unique ID to Dataflow BOM tests (#30709)
---
 release/src/main/groovy/mobilegaming-java-dataflowbom.groovy | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/release/src/main/groovy/mobilegaming-java-dataflowbom.groovy 
b/release/src/main/groovy/mobilegaming-java-dataflowbom.groovy
index 87944588e35..a0fd0f13c75 100644
--- a/release/src/main/groovy/mobilegaming-java-dataflowbom.groovy
+++ b/release/src/main/groovy/mobilegaming-java-dataflowbom.groovy
@@ -18,7 +18,6 @@
  */
 
 t = new TestScripts(args)
-mobileGamingCommands = new MobileGamingCommands(testScripts: t)
 
 /*
  * Run the mobile game examples on Dataflow.
@@ -36,6 +35,8 @@ String command_output_text
  *  Run the UserScore example on DataflowRunner
  * */
 
+mobileGamingCommands = new MobileGamingCommands(testScripts: t, testRunId: 
UUID.randomUUID().toString())
+
 t.intent("Running: UserScore example with Beam GCP BOM on DataflowRunner")
 t.run(mobileGamingCommands.createPipelineCommand("UserScore", runner))
 command_output_text = t.run "gsutil cat 
gs://${t.gcsBucket()}/${mobileGamingCommands.getUserScoreOutputName(runner)}* | 
grep user19_BananaWallaby"
@@ -48,6 +49,8 @@ t.run "gsutil rm 
gs://${t.gcsBucket()}/${mobileGamingCommands.getUserScoreOutput
  * Run the HourlyTeamScore example on DataflowRunner
  * */
 
+mobileGamingCommands = new MobileGamingCommands(testScripts: t, testRunId: 
UUID.randomUUID().toString())
+
 t.intent("Running: HourlyTeamScore example with Beam GCP BOM on 
DataflowRunner")
 t.run(mobileGamingCommands.createPipelineCommand("HourlyTeamScore", runner))
 command_output_text = t.run "gsutil cat 
gs://${t.gcsBucket()}/${mobileGamingCommands.getHourlyTeamScoreOutputName(runner)}*
 | grep AzureBilby "



(beam) branch master updated: Use unique GCS object path (#30690)

2024-03-20 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new 4a3b6c55fd8 Use unique GCS object path (#30690)
4a3b6c55fd8 is described below

commit 4a3b6c55fd8a02911b99d7ef8b5fa6f006c23fb2
Author: Damon 
AuthorDate: Wed Mar 20 16:35:59 2024 -0700

Use unique GCS object path (#30690)
---
 release/src/main/groovy/MobileGamingCommands.groovy   | 5 +++--
 release/src/main/groovy/mobilegaming-java-dataflow.groovy | 5 -
 release/src/main/groovy/mobilegaming-java-direct.groovy   | 5 -
 3 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/release/src/main/groovy/MobileGamingCommands.groovy 
b/release/src/main/groovy/MobileGamingCommands.groovy
index 2b38943067a..39f526d7da8 100644
--- a/release/src/main/groovy/MobileGamingCommands.groovy
+++ b/release/src/main/groovy/MobileGamingCommands.groovy
@@ -21,6 +21,7 @@
 class MobileGamingCommands {
 
   private TestScripts testScripts
+  private String testRunId
 
   private static final INPUT_GAMING_DATA = 
"gs://dataflow-samples/game/5000_gaming_data.csv"
 
@@ -58,11 +59,11 @@ class MobileGamingCommands {
 "BattleshipGrey"))
 
   public String getUserScoreOutputName(String runner){
-return "java-userscore-result-${RUNNERS[runner]}.txt"
+return "java-userscore-result-${RUNNERS[runner]}/${testRunId}/output"
   }
 
   public String getHourlyTeamScoreOutputName(String runner){
-return "java-hourlyteamscore-result-${RUNNERS[runner]}.txt"
+return "java-hourlyteamscore-result-${RUNNERS[runner]}/${testRunId}/output"
   }
 
   public String createPipelineCommand(String exampleName, String runner, 
String jobName='', String className=null){
diff --git a/release/src/main/groovy/mobilegaming-java-dataflow.groovy 
b/release/src/main/groovy/mobilegaming-java-dataflow.groovy
index 462b3d2cea0..bb0b76bd675 100644
--- a/release/src/main/groovy/mobilegaming-java-dataflow.groovy
+++ b/release/src/main/groovy/mobilegaming-java-dataflow.groovy
@@ -18,7 +18,6 @@
  */
 
 t = new TestScripts(args)
-mobileGamingCommands = new MobileGamingCommands(testScripts: t)
 
 /*
  * Run the mobile game examples on Dataflow.
@@ -36,6 +35,8 @@ String command_output_text
  *  Run the UserScore example on DataflowRunner
  * */
 
+mobileGamingCommands = new MobileGamingCommands(testScripts: t, testRunId: 
UUID.randomUUID().toString())
+
 t.intent("Running: UserScore example on DataflowRunner")
 t.run(mobileGamingCommands.createPipelineCommand("UserScore", runner))
 command_output_text = t.run "gsutil cat 
gs://${t.gcsBucket()}/${mobileGamingCommands.getUserScoreOutputName(runner)}* | 
grep user19_BananaWallaby"
@@ -48,6 +49,8 @@ t.run "gsutil rm 
gs://${t.gcsBucket()}/${mobileGamingCommands.getUserScoreOutput
  * Run the HourlyTeamScore example on DataflowRunner
  * */
 
+mobileGamingCommands = new MobileGamingCommands(testScripts: t, testRunId: 
UUID.randomUUID().toString())
+
 t.intent("Running: HourlyTeamScore example on DataflowRunner")
 t.run(mobileGamingCommands.createPipelineCommand("HourlyTeamScore", runner))
 command_output_text = t.run "gsutil cat 
gs://${t.gcsBucket()}/${mobileGamingCommands.getHourlyTeamScoreOutputName(runner)}*
 | grep AzureBilby "
diff --git a/release/src/main/groovy/mobilegaming-java-direct.groovy 
b/release/src/main/groovy/mobilegaming-java-direct.groovy
index 9ff24cefaf9..3c6f4ca01a6 100644
--- a/release/src/main/groovy/mobilegaming-java-direct.groovy
+++ b/release/src/main/groovy/mobilegaming-java-direct.groovy
@@ -18,7 +18,6 @@
  */
 
 t = new TestScripts(args)
-mobileGamingCommands = new MobileGamingCommands(testScripts: t)
 
 /*
  * Run the mobile game examples on DirectRunner.
@@ -36,6 +35,8 @@ String command_output_text
  * Run the UserScore example with DirectRunner
  * */
 
+mobileGamingCommands = new MobileGamingCommands(testScripts: t, testRunId: 
UUID.randomUUID().toString())
+
 t.intent("Running: UserScore example on DirectRunner")
 t.run(mobileGamingCommands.createPipelineCommand("UserScore", runner))
 command_output_text = t.run "grep user19_BananaWallaby 
${mobileGamingCommands.getUserScoreOutputName(runner)}* "
@@ -47,6 +48,8 @@ t.success("UserScore successfully run on DirectRunners.")
  * Run the HourlyTeamScore example with DirectRunner
  * */
 
+mobileGamingCommands = new MobileGamingCommands(testScripts: t, testRunId: 
UUID.randomUUID().toString())
+
 t.intent("Running: HourlyTeamScore example on DirectRunner")
 t.run(mobileGamingCommands.createPipelineCommand("HourlyTeamScore", runner))
 command_output_text = t.run "grep AzureBilby 
${mobileGamingCommands.getHourlyTeamScoreOutputName(runner)}* "



(beam) branch weekly_update_python_dependencies_1708820977 updated (3b0a585ccb9 -> fb7ba65e223)

2024-03-19 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a change to branch 
weekly_update_python_dependencies_1708820977
in repository https://gitbox.apache.org/repos/asf/beam.git


 discard 3b0a585ccb9 Update Python Dependencies
 add 436f3b223f0 [runners-core-java] restore original 
SerializablePipelineOptions
 add 4982f6ac6e4 [sdks-java-core] add serialVersionUID to PTransform
 add 4f13a4e1ea2 [runners-core] add jackson
 add 907a4514386 Merge pull request #30403: [flink] #30402 restore 
upgradability
 add d5a2e3b3c9b Bump github.com/aws/aws-sdk-go-v2/config from 1.26.2 to 
1.27.4 in /sdks (#30415)
 add ffe2dba5320 Implementing lull reporting at bundle level processing 
(#29882)
 add 6406cfe3c9b Remove some uses of ClassLoadingStrategy.Default.INJECTION 
(#30367)
 add 2ae4a28e6fc Force downgrade mpmath to avoid test breakages (#30418)
 add 6c3e8ad1af1 [Dataflow Streaming] Start to refactor persistence layer 
to prepare for direct path (#30265)
 add 2a84a20236b [Python] Add doc comment for WriteToText.skip_if_empty 
(#30409)
 add b7a58bf8b2d Ensure flatten windows match (#30410)
 add 8fd24b76a19 Bump google.golang.org/grpc from 1.61.0 to 1.62.0 in /sdks 
(#30434)
 add 0a184f449a5 Bump orjson from 3.9.14 to 3.9.15 in 
/sdks/python/container/py310 (#30426)
 add a1d87549f78 Make Monitoring build method public (#30431)
 add 5af76b0de1a Remove unused code (#30432)
 add 549faba9b98 Roll forward "Read API Source v2 (#25392)" fix data loss 
(#28778)
 add 2b9d958807d Wire error handling into PubSubIO and add initial tests 
(#30372)
 add 2bebb27a64f Fix Python PostCommit BigQuery JSON (#30438)
 add 183cc9a2962 [yaml] fix providers doc
 add 61c6d09b153 Merge pull request #30433 [yaml] Fix providers doc.
 add 09a6fc5c5f3 Fix updating of annotations for explicitly named 
transforms.
 add e6e91b99542 Expand on comment.
 add 80d4c852f36 Merge pull request #30380 Fix updating of annotations for 
explicitly named transforms.
 add 2d85be72f31 Fix Avro Version PostCommit
 add fbde0ce32c9 Merge pull request #30442: Fix Avro Version PostCommit
 add 4f966a85884 Add yaml to programming guide. (#30269)
 add 53cae786126 Revise documentation on managing dependencies. (#30450)
 add c5e1d106cb5 Support passing credentials from pipeline options into 
SpannerIO.readChangeStream (#30361)
 add 4ea3898e589 Bump cryptography from 42.0.3 to 42.0.4 in 
/sdks/python/container/py39 (#30373)
 add aebb34a94c3 [Dataflow] ActiveWorkRefresh (#30390)
 add a20705c20d1 Fix issue creation for grafana alerts (#30424)
 add 6cae6b874a5 make the flaky_test_detection script read only be default 
(#30461)
 add 168d06bade5 [Go SDK] filter out TestTestStreamInt16Sequence from flink 
VR tests. (#30462)
 add 935ca9c58d6 fix alerting path for grafana provisioning (#30464)
 add be1526b7b57 Register streaming specific options with 
DataflowPipelineRegistrar (#30474)
 add 770c6fe39cc Make windmill service stream max backoff configurable 
(#30475)
 add 4ad8d530916 Add last error time to stream error message (#30476)
 add e1fb9ce99a8 Unskip test_user_agent_insert_all (#30455)
 add 15d16f706f3 Add instructions on reviewing Python version updates 
(#30481)
 add 2b8a737ebd0 Add test code to overwrite SQL in Beam Python JDBC (#30417)
 add 9c3f209aa69 Initial prototype of using Beam transforms directly in a 
Flink pipeline. (#30332)
 add fb58cfc0cad Duet AI data encoding prompts (no links) (#30420)
 add ddb9161c412 Update Python Dependencies (#30470)
 add 264493bd2d7 adding github workflow for running nightly playground ci 
(#30454)
 add 0e3e796a15e Move credential rotation and gate on beam repo (#30479)
 add b776d705997 Duet AI ML data processing prompts (no links) (#30421)
 add 6a03f9ba062 Update triggering frequency doc (#30457)
 add fa43f82f8a2 Disable BigQueryStorageStreamSource.splitAtFraction when 
read api v2 used (#30443)
 add 6f8f12acd03 Cleanup bigtable open resources on teardown (#30453)
 add 94a51619f3d Make the TransformService test suite not pass trivially.
 add 1c74cebda6c Merge pull request #30491: Make the TransformService test 
suite not pass trivially.
 add 04c7be3e18e Fix grafana url for alerts and GA Post-Commits Status 
dashboard (#30468)
 add 9ec2d49af98 Duet AI Prompt - Beam YAML (no links) (#30440)
 add 407d5077b9f Remove JsonIgore for usePublicIps
 add 89366bbb4aa Merge pull request #30484: Remove JsonIgore for 
usePublicIps
 add 3c46415a3ce Document yaml pipeline options (#30490)
 add 0440587898c Update Go Version for artifacts to Go v1.21.8 (#30501)
 add 3f474535632 Expose JmsIO read receive timeout (#30485)
 add b6301b52058 fix: support reading arrays of structs from bigquery with 
schemas (#30448)
 add bc1a458bb7c Bigtable: provide a way to override BigtableDa

(beam) branch master updated: Update header bannder (#30600)

2024-03-13 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new da26f5aa6ba Update header bannder (#30600)
da26f5aa6ba is described below

commit da26f5aa6badb7c1d00de8a11a4fa0f9a394c2ac
Author: Damon 
AuthorDate: Wed Mar 13 12:33:17 2024 -0700

Update header bannder (#30600)
---
 website/www/site/assets/js/sliders/top-banners.js | 10 +++---
 website/www/site/layouts/partials/header.html |  8 ++--
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/website/www/site/assets/js/sliders/top-banners.js 
b/website/www/site/assets/js/sliders/top-banners.js
index 4474198990f..628dee2b457 100644
--- a/website/www/site/assets/js/sliders/top-banners.js
+++ b/website/www/site/assets/js/sliders/top-banners.js
@@ -11,10 +11,14 @@
 // the License.
 
 new Swiper('.top-banners', {
-  autoplay: {
-delay: 5000,
+  autoplay: false,
+  effect: 'fade',
+  loop: true,
+  navigation: {
+enabled: true,
+nextEl: '.swiper-button-next',
+prevEl: '.swiper-button-prev',
   },
-  loop: false,
   pagination: {
 el: ".top-banners .swiper-pagination",
 clickable: true,
diff --git a/website/www/site/layouts/partials/header.html 
b/website/www/site/layouts/partials/header.html
index 957e3de2b1e..139ae9bc885 100644
--- a/website/www/site/layouts/partials/header.html
+++ b/website/www/site/layouts/partials/header.html
@@ -207,12 +207,6 @@
 
 
   
-
-  https://beamcollege.dev/;>
-
-
-  
-
 
   https://tour.beam.apache.org;>
 
@@ -227,6 +221,8 @@
 
   
   
+  
+  
 
 {{ $swiperSlider := resources.Get "js/swiper-bundle.min.js" | minify | 
fingerprint }}
 



(beam) branch master updated (afae0215f8f -> a391198b5a6)

2024-03-08 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


from afae0215f8f Try fix Dataflow PreCommit on release branch (#30574)
 add a391198b5a6 [RRIO]: Add RequestResponseIO examples and documentation 
on website for the Java SDK (#30430)

No new revisions were added by this update.

Summary of changes:
 examples/java/webapis/build.gradle |  51 ++
 .../beam/examples/webapis/AdditionalSnippets.java  |  50 ++
 .../beam/examples/webapis/GeminiAIClient.java  | 126 +
 .../beam/examples/webapis/GeminiAIExample.java | 183 +++
 .../beam/examples/webapis/GeminiAIOptions.java |  34 ++
 .../webapis/GenerateContentRequestCoder.java   |  48 ++
 .../webapis/GenerateContentResponseCoder.java  |  44 ++
 .../beam/examples/webapis/HttpImageClient.java | 111 +
 .../apache/beam/examples/webapis/ImageRequest.java |  78 +++
 .../beam/examples/webapis/ImageRequestCoder.java   |  48 ++
 .../beam/examples/webapis/ImageResponse.java   |  53 ++
 .../beam/examples/webapis/ImageResponseCoder.java  |  56 +++
 .../org/apache/beam/examples/webapis/Images.java   |  96 
 .../java/org/apache/beam/examples/webapis/Log.java |  56 +++
 .../examples/webapis/UsingHttpClientExample.java   |  68 +++
 .../beam/examples/webapis/GeminiAIExampleTest.java |  52 ++
 .../webapis/UsingHttpClientExampleTest.java|  46 ++
 .../resources/beam/checkstyle/suppressions.xml |   1 +
 settings.gradle.kts|   1 +
 .../en/documentation/io/built-in/webapis.md| 536 +
 .../site/content/en/documentation/io/connectors.md |  13 +
 .../partials/section-menu/en/documentation.html|   1 +
 22 files changed, 1752 insertions(+)
 create mode 100644 examples/java/webapis/build.gradle
 create mode 100644 
examples/java/webapis/src/main/java/org/apache/beam/examples/webapis/AdditionalSnippets.java
 create mode 100644 
examples/java/webapis/src/main/java/org/apache/beam/examples/webapis/GeminiAIClient.java
 create mode 100644 
examples/java/webapis/src/main/java/org/apache/beam/examples/webapis/GeminiAIExample.java
 create mode 100644 
examples/java/webapis/src/main/java/org/apache/beam/examples/webapis/GeminiAIOptions.java
 create mode 100644 
examples/java/webapis/src/main/java/org/apache/beam/examples/webapis/GenerateContentRequestCoder.java
 create mode 100644 
examples/java/webapis/src/main/java/org/apache/beam/examples/webapis/GenerateContentResponseCoder.java
 create mode 100644 
examples/java/webapis/src/main/java/org/apache/beam/examples/webapis/HttpImageClient.java
 create mode 100644 
examples/java/webapis/src/main/java/org/apache/beam/examples/webapis/ImageRequest.java
 create mode 100644 
examples/java/webapis/src/main/java/org/apache/beam/examples/webapis/ImageRequestCoder.java
 create mode 100644 
examples/java/webapis/src/main/java/org/apache/beam/examples/webapis/ImageResponse.java
 create mode 100644 
examples/java/webapis/src/main/java/org/apache/beam/examples/webapis/ImageResponseCoder.java
 create mode 100644 
examples/java/webapis/src/main/java/org/apache/beam/examples/webapis/Images.java
 create mode 100644 
examples/java/webapis/src/main/java/org/apache/beam/examples/webapis/Log.java
 create mode 100644 
examples/java/webapis/src/main/java/org/apache/beam/examples/webapis/UsingHttpClientExample.java
 create mode 100644 
examples/java/webapis/src/test/java/org/apache/beam/examples/webapis/GeminiAIExampleTest.java
 create mode 100644 
examples/java/webapis/src/test/java/org/apache/beam/examples/webapis/UsingHttpClientExampleTest.java
 create mode 100644 
website/www/site/content/en/documentation/io/built-in/webapis.md



(beam) branch master updated: Add vertex AI dependency (#30553)

2024-03-06 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new 827e96d671a Add vertex AI dependency (#30553)
827e96d671a is described below

commit 827e96d671afb0f3995aad6d6a06859866b0e628
Author: Damon 
AuthorDate: Wed Mar 6 18:47:07 2024 -0800

Add vertex AI dependency (#30553)
---
 buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy | 1 +
 1 file changed, 1 insertion(+)

diff --git 
a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy 
b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
index fae25d67b1b..c5d7ed8d571 100644
--- a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
+++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
@@ -756,6 +756,7 @@ class BeamModulePlugin implements Plugin {
 google_cloud_platform_libraries_bom : 
"com.google.cloud:libraries-bom:26.32.0",
 google_cloud_spanner: 
"com.google.cloud:google-cloud-spanner", // google_cloud_platform_libraries_bom 
sets version
 google_cloud_spanner_test   : 
"com.google.cloud:google-cloud-spanner:$google_cloud_spanner_version:tests",
+google_cloud_vertexai   : 
"com.google.cloud:google-cloud-vertexai", // 
google_cloud_platform_libraries_bom sets version
 google_code_gson: 
"com.google.code.gson:gson:$google_code_gson_version",
 // google-http-client's version is explicitly declared for 
sdks/java/maven-archetypes/examples
 // This version should be in line with the one in 
com.google.cloud:libraries-bom.



(beam) branch master updated: Remove unused code (#30432)

2024-02-27 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new 5af76b0de1a Remove unused code (#30432)
5af76b0de1a is described below

commit 5af76b0de1a2fb083e51d6f9be54f0989aabb0d0
Author: Damon 
AuthorDate: Tue Feb 27 10:36:53 2024 -0800

Remove unused code (#30432)
---
 sdks/java/io/rrio/build.gradle |   2 -
 .../apache/beam/io/requestresponse/ApiIOError.java |  25 +-
 .../org/apache/beam/io/requestresponse/Call.java   |   4 +-
 .../org/apache/beam/io/requestresponse/Quota.java  |  69 
 .../beam/io/requestresponse/RequestResponseIO.java |  53 ---
 .../ThrottleWithExternalResource.java  | 418 -
 .../ThrottleWithoutExternalResource.java   |  57 ---
 .../ThrottleWithExternalResourceIT.java| 186 -
 .../ThrottleWithExternalResourceTest.java  |  77 
 9 files changed, 11 insertions(+), 880 deletions(-)

diff --git a/sdks/java/io/rrio/build.gradle b/sdks/java/io/rrio/build.gradle
index 4ecdf4e91df..9d51df4c1dc 100644
--- a/sdks/java/io/rrio/build.gradle
+++ b/sdks/java/io/rrio/build.gradle
@@ -34,8 +34,6 @@ dependencies {
 implementation project(path: ":sdks:java:core", configuration: "shadow")
 implementation library.java.joda_time
 implementation library.java.vendored_guava_32_1_2_jre
-implementation library.java.jackson_core
-implementation library.java.jackson_databind
 implementation "redis.clients:jedis:$jedisVersion"
 
 testImplementation project(path: ":sdks:java:core", configuration: 
"shadowTest")
diff --git 
a/sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponse/ApiIOError.java
 
b/sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponse/ApiIOError.java
index abb25bd33ba..5a3663fb103 100644
--- 
a/sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponse/ApiIOError.java
+++ 
b/sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponse/ApiIOError.java
@@ -17,10 +17,6 @@
  */
 package org.apache.beam.io.requestresponse;
 
-import static org.apache.beam.sdk.util.Preconditions.checkStateNotNull;
-
-import com.fasterxml.jackson.core.JsonProcessingException;
-import com.fasterxml.jackson.databind.ObjectMapper;
 import com.google.auto.value.AutoValue;
 import java.util.Optional;
 import org.apache.beam.sdk.schemas.AutoValueSchema;
@@ -36,19 +32,18 @@ import org.joda.time.Instant;
 @AutoValue
 public abstract class ApiIOError {
 
-  private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
-
   /**
* Instantiate an {@link ApiIOError} from an {@link ErrorT} {@link T} 
element. The {@link T}
-   * element is converted to a JSON string.
+   * element is converted to a string by calling {@link Object#toString()}.
*/
-  static  ApiIOError of(ErrorT e, T element)
-  throws JsonProcessingException {
-
-String json = OBJECT_MAPPER.writeValueAsString(checkStateNotNull(element));
+  static  ApiIOError of(ErrorT e, T element) {
+String request = "";
+if (element != null) {
+  request = element.toString();
+}
 
 return ApiIOError.builder()
-.setRequestAsJsonString(json)
+.setRequestAsString(request)
 .setMessage(Optional.ofNullable(e.getMessage()).orElse(""))
 .setObservedTimestamp(Instant.now())
 .setStackTrace(Throwables.getStackTraceAsString(e))
@@ -59,8 +54,8 @@ public abstract class ApiIOError {
 return new AutoValue_ApiIOError.Builder();
   }
 
-  /** The JSON string representation of the request associated with the error. 
*/
-  public abstract String getRequestAsJsonString();
+  /** The string representation of the request associated with the error. */
+  public abstract String getRequestAsString();
 
   /** The observed timestamp of the error. */
   public abstract Instant getObservedTimestamp();
@@ -74,7 +69,7 @@ public abstract class ApiIOError {
   @AutoValue.Builder
   abstract static class Builder {
 
-abstract Builder setRequestAsJsonString(String value);
+abstract Builder setRequestAsString(String value);
 
 abstract Builder setObservedTimestamp(Instant value);
 
diff --git 
a/sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponse/Call.java 
b/sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponse/Call.java
index 65038a8ffa3..b6941f8fcbb 100644
--- 
a/sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponse/Call.java
+++ 
b/sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponse/Call.java
@@ -20,7 +20,6 @@ package org.apache.beam.io.requestresponse;
 import static org.apache.beam.io.requestresponse.Monitoring.incIfPresent;
 import static org.apache.beam.sdk.util.Preconditions.checkStateNotNull;
 
-import com.fasterxml.jackson

(beam) branch master updated (0a184f449a5 -> a1d87549f78)

2024-02-27 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


from 0a184f449a5 Bump orjson from 3.9.14 to 3.9.15 in 
/sdks/python/container/py310 (#30426)
 add a1d87549f78 Make Monitoring build method public (#30431)

No new revisions were added by this update.

Summary of changes:
 .../src/main/java/org/apache/beam/io/requestresponse/Monitoring.java| 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)



(beam) branch master updated: [Proposal] Improve DisplayData support in PTransform API (#30115)

2024-02-08 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new ff272db75e6 [Proposal] Improve DisplayData support in PTransform API 
(#30115)
ff272db75e6 is described below

commit ff272db75e685024dcae4cf84f3a979568c8ea62
Author: Claire McGinty 
AuthorDate: Thu Feb 8 12:43:20 2024 -0500

[Proposal] Improve DisplayData support in PTransform API (#30115)

* Improve DisplayData support in PTransform API

* checkstyle
---
 .../org/apache/beam/sdk/transforms/PTransform.java | 30 +-
 .../apache/beam/sdk/transforms/PTransformTest.java | 21 +++
 .../org/apache/beam/sdk/transforms/ParDoTest.java  |  4 +++
 3 files changed, 54 insertions(+), 1 deletion(-)

diff --git 
a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/PTransform.java 
b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/PTransform.java
index c0c3638b28d..939ac043f76 100644
--- 
a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/PTransform.java
+++ 
b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/PTransform.java
@@ -20,14 +20,17 @@ package org.apache.beam.sdk.transforms;
 import java.io.ObjectInputStream;
 import java.io.ObjectOutputStream;
 import java.io.Serializable;
+import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 import org.apache.beam.sdk.Pipeline;
 import org.apache.beam.sdk.coders.CannotProvideCoderException;
 import org.apache.beam.sdk.coders.Coder;
 import org.apache.beam.sdk.options.PipelineOptions;
 import org.apache.beam.sdk.transforms.display.DisplayData;
+import org.apache.beam.sdk.transforms.display.DisplayData.ItemSpec;
 import org.apache.beam.sdk.transforms.display.HasDisplayData;
 import org.apache.beam.sdk.transforms.resourcehints.ResourceHints;
 import org.apache.beam.sdk.util.NameUtils;
@@ -217,6 +220,25 @@ public abstract class PTransform
 return resourceHints;
   }
 
+  /**
+   * Set display data for your PTransform.
+   *
+   * @param displayData a list of {@link ItemSpec} instances.
+   * @return a reference to the same transfrom instance.
+   * For example:
+   * {@code
+   * Pipeline p = ...
+   * ...
+   * p.apply(new 
SomeTransform().setDisplayData(ImmutableList.of(DisplayData.item("userFn", 
userFn.getClass(
+   * ...
+   *
+   * }
+   */
+  public PTransform setDisplayData(@NonNull List> 
displayData) {
+this.displayData = displayData;
+return this;
+  }
+
   /** Returns annotations map to provide additional hints to the runner. */
   public Map getAnnotations() {
 return annotations;
@@ -243,6 +265,8 @@ public abstract class PTransform
 
   protected transient @NonNull Map annotations = new 
HashMap<>();
 
+  protected transient @NonNull List> displayData = new 
ArrayList<>();
+
   protected PTransform() {
 this.name = null;
   }
@@ -346,7 +370,11 @@ public abstract class PTransform
* provide their own display data.
*/
   @Override
-  public void populateDisplayData(DisplayData.Builder builder) {}
+  public void populateDisplayData(DisplayData.Builder builder) {
+if (this.displayData != null) {
+  this.displayData.forEach(builder::add);
+}
+  }
 
   /**
* For a {@code SerializableFunction} {@code fn}, returns a 
{@code PTransform}
diff --git 
a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/PTransformTest.java
 
b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/PTransformTest.java
index 7196b244708..4692d16a605 100644
--- 
a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/PTransformTest.java
+++ 
b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/PTransformTest.java
@@ -17,9 +17,11 @@
  */
 package org.apache.beam.sdk.transforms;
 
+import static 
org.apache.beam.sdk.transforms.display.DisplayDataMatchers.hasDisplayItem;
 import static org.apache.beam.sdk.values.TypeDescriptors.integers;
 import static org.hamcrest.MatcherAssert.assertThat;
 import static org.hamcrest.Matchers.empty;
+import static org.hamcrest.Matchers.hasSize;
 import static org.junit.Assert.assertEquals;
 
 import java.io.Serializable;
@@ -29,6 +31,7 @@ import org.apache.beam.sdk.testing.TestPipeline;
 import org.apache.beam.sdk.transforms.display.DisplayData;
 import org.apache.beam.sdk.values.PCollection;
 import org.apache.beam.sdk.values.PCollectionList;
+import 
org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
@@ -54,6 +57,24 @@ public class PTransformTest implements Serializable {
 assertThat(displayData.items(), empty());
   }
 
+  @Test
+  public void testSetDisplayData() {
+PTransform, PCollecti

(beam) branch master updated (fe262b630b3 -> 9361141c31b)

2024-02-08 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


from fe262b630b3 [lint] Doc comment exported ErrCancelled. (#30251)
 add 9361141c31b Add Drain to Job Management proto (#30157)

No new revisions were added by this update.

Summary of changes:
 .../model/job_management/v1/beam_job_api.proto |   16 +
 .../beam/model/fnexecution_v1/beam_fn_api.pb.go|  803 ---
 .../jobmanagement_v1/beam_expansion_api.pb.go  |  295 +--
 .../beam/model/jobmanagement_v1/beam_job_api.pb.go |  887 
 .../model/jobmanagement_v1/beam_job_api_grpc.pb.go |   38 +
 .../beam/model/pipeline_v1/beam_runner_api.pb.go   | 2234 ++--
 6 files changed, 2379 insertions(+), 1894 deletions(-)



(beam) branch master updated: [lint] Doc comment exported ErrCancelled. (#30251)

2024-02-08 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new fe262b630b3 [lint] Doc comment exported ErrCancelled. (#30251)
fe262b630b3 is described below

commit fe262b630b3fd51854e667aa61a3ad0155f81634
Author: Robert Burke 
AuthorDate: Thu Feb 8 09:40:13 2024 -0800

[lint] Doc comment exported ErrCancelled. (#30251)
---
 sdks/go/pkg/beam/runners/prism/internal/jobservices/management.go | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sdks/go/pkg/beam/runners/prism/internal/jobservices/management.go 
b/sdks/go/pkg/beam/runners/prism/internal/jobservices/management.go
index 0da37ef0bd7..1c7e280dcdd 100644
--- a/sdks/go/pkg/beam/runners/prism/internal/jobservices/management.go
+++ b/sdks/go/pkg/beam/runners/prism/internal/jobservices/management.go
@@ -32,6 +32,7 @@ import (
 )
 
 var (
+   // ErrCancel represents a pipeline cancellation by the user.
ErrCancel = errors.New("pipeline canceled")
 )
 



(beam) branch master updated: [Prism] Implement jobservices.Server Cancel (#30178)

2024-02-05 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new a47b1faa527 [Prism] Implement jobservices.Server Cancel (#30178)
a47b1faa527 is described below

commit a47b1faa5276cdbf05c356b60ed8c4494ee622aa
Author: Damon 
AuthorDate: Mon Feb 5 17:23:10 2024 +

[Prism] Implement jobservices.Server Cancel (#30178)

* Implement jobservices.Server Cancel

* Small code cleanup

* Fix test err; canceled state after complete
---
 sdks/go/pkg/beam/runners/prism/internal/execute.go |  8 +++
 .../beam/runners/prism/internal/jobservices/job.go | 10 
 .../prism/internal/jobservices/management.go   | 30 +++
 .../prism/internal/jobservices/management_test.go  | 34 
 .../prism/internal/jobservices/server_test.go  | 61 ++
 5 files changed, 143 insertions(+)

diff --git a/sdks/go/pkg/beam/runners/prism/internal/execute.go 
b/sdks/go/pkg/beam/runners/prism/internal/execute.go
index b8bc68dcd1b..1aa95bc6ee1 100644
--- a/sdks/go/pkg/beam/runners/prism/internal/execute.go
+++ b/sdks/go/pkg/beam/runners/prism/internal/execute.go
@@ -17,6 +17,7 @@ package internal
 
 import (
"context"
+   "errors"
"fmt"
"io"
"sort"
@@ -70,6 +71,13 @@ func RunPipeline(j *jobservices.Job) {
j.Failed(err)
return
}
+
+   if errors.Is(context.Cause(j.RootCtx), jobservices.ErrCancel) {
+   j.SendMsg("pipeline canceled " + j.String())
+   j.Canceled()
+   return
+   }
+
j.SendMsg("pipeline completed " + j.String())
 
j.SendMsg("terminating " + j.String())
diff --git a/sdks/go/pkg/beam/runners/prism/internal/jobservices/job.go 
b/sdks/go/pkg/beam/runners/prism/internal/jobservices/job.go
index bb5eb88c919..6cde48ded9a 100644
--- a/sdks/go/pkg/beam/runners/prism/internal/jobservices/job.go
+++ b/sdks/go/pkg/beam/runners/prism/internal/jobservices/job.go
@@ -177,6 +177,16 @@ func (j *Job) Done() {
j.sendState(jobpb.JobState_DONE)
 }
 
+// Canceling indicates that the job is canceling.
+func (j *Job) Canceling() {
+   j.sendState(jobpb.JobState_CANCELLING)
+}
+
+// Canceled indicates that the job is canceled.
+func (j *Job) Canceled() {
+   j.sendState(jobpb.JobState_CANCELLED)
+}
+
 // Failed indicates that the job completed unsuccessfully.
 func (j *Job) Failed(err error) {
slog.Error("job failed", slog.Any("job", j), slog.Any("error", err))
diff --git a/sdks/go/pkg/beam/runners/prism/internal/jobservices/management.go 
b/sdks/go/pkg/beam/runners/prism/internal/jobservices/management.go
index 323d8c46efb..0da37ef0bd7 100644
--- a/sdks/go/pkg/beam/runners/prism/internal/jobservices/management.go
+++ b/sdks/go/pkg/beam/runners/prism/internal/jobservices/management.go
@@ -17,6 +17,7 @@ package jobservices
 
 import (
"context"
+   "errors"
"fmt"
"sync"
"sync/atomic"
@@ -30,6 +31,10 @@ import (
"google.golang.org/protobuf/types/known/timestamppb"
 )
 
+var (
+   ErrCancel = errors.New("pipeline canceled")
+)
+
 func (s *Server) nextId() string {
v := atomic.AddUint32(, 1)
return fmt.Sprintf("job-%03d", v)
@@ -215,6 +220,31 @@ func (s *Server) Run(ctx context.Context, req 
*jobpb.RunJobRequest) (*jobpb.RunJ
}, nil
 }
 
+// Cancel a Job requested by the CancelJobRequest for jobs not in an already 
terminal state.
+// Otherwise, returns nil if Job does not exist or the Job's existing state as 
part of the CancelJobResponse.
+func (s *Server) Cancel(_ context.Context, req *jobpb.CancelJobRequest) 
(*jobpb.CancelJobResponse, error) {
+   s.mu.Lock()
+   job, ok := s.jobs[req.GetJobId()]
+   s.mu.Unlock()
+   if !ok {
+   return nil, nil
+   }
+   state := job.state.Load().(jobpb.JobState_Enum)
+   switch state {
+   case jobpb.JobState_CANCELLED, jobpb.JobState_DONE, 
jobpb.JobState_DRAINED, jobpb.JobState_UPDATED, jobpb.JobState_FAILED:
+   // Already at terminal state.
+   return {
+   State: state,
+   }, nil
+   }
+   job.SendMsg("canceling " + job.String())
+   job.Canceling()
+   job.CancelFn(ErrCancel)
+   return {
+   State: jobpb.JobState_CANCELLING,
+   }, nil
+}
+
 // GetMessageStream subscribes to a stream of state changes and messages from 
the job. If throughput
 // is high, this may cause losses of messages.
 func (s *Server) GetMessageStream(req *jobpb.JobMessagesRequest, stream 
jobpb.JobService_GetMessa

(beam) branch master updated: Improve varint encoding throughput with unrolled loop (#29689)

2024-01-31 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new 4f09e62f520 Improve varint encoding throughput with unrolled loop 
(#29689)
4f09e62f520 is described below

commit 4f09e62f520d8a3bd40f9907e7343814cbd239ae
Author: Steven van Rossum 
AuthorDate: Wed Jan 31 23:23:50 2024 +0100

Improve varint encoding throughput with unrolled loop (#29689)

* Improve varint encoding throughput with unrolled loop

* Change BlackHole to Blackhole

* Add single byte encode tests

* Add missing L

* Remove public modifier

* Remove unused fields
---
 .../apache/beam/sdk/jmh/util/VarIntBenchmark.java  | 303 +
 .../main/java/org/apache/beam/sdk/util/VarInt.java |  62 -
 2 files changed, 358 insertions(+), 7 deletions(-)

diff --git 
a/sdks/java/core/jmh/src/main/java/org/apache/beam/sdk/jmh/util/VarIntBenchmark.java
 
b/sdks/java/core/jmh/src/main/java/org/apache/beam/sdk/jmh/util/VarIntBenchmark.java
new file mode 100644
index 000..964928d5055
--- /dev/null
+++ 
b/sdks/java/core/jmh/src/main/java/org/apache/beam/sdk/jmh/util/VarIntBenchmark.java
@@ -0,0 +1,303 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.jmh.util;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.Random;
+import org.apache.beam.sdk.util.ByteStringOutputStream;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.OperationsPerInvocation;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.infra.Blackhole;
+
+/** Benchmarks for {@link org.apache.beam.sdk.util.VarInt} and variants. */
+@OperationsPerInvocation(VarIntBenchmark.VALUES_PER_INVOCATION)
+public class VarIntBenchmark {
+  static final int VALUES_PER_INVOCATION = 2048;
+  private static final Random RNG = new Random(314159);
+
+  /** Output to {@link Blackhole}. Do nothing, assume nothing. */
+  @State(Scope.Benchmark)
+  public static class BlackholeOutput {
+OutputStream stream;
+
+@Setup
+public void setup(Blackhole bh) {
+  stream =
+  new OutputStream() {
+@Override
+public void write(int b) {
+  bh.consume(b);
+}
+
+@Override
+public void write(byte[] b) throws IOException {
+  bh.consume(b);
+}
+
+@Override
+public void write(byte[] b, int off, int len) throws IOException {
+  bh.consume(b);
+}
+  };
+}
+  }
+
+  /** Output to {@link ByteStringOutputStream}. */
+  @State(Scope.Thread)
+  public static class ByteStringOutput {
+final ByteStringOutputStream stream = new ByteStringOutputStream();
+
+// Unfortunately, this needs to be cleaned up after use to avoid OOMs.
+// It's not generally recommended to use Level.Invocation, but there's no 
way around it.
+@TearDown(Level.Invocation)
+public void tearDown(Blackhole bh) {
+  bh.consume(stream.toByteStringAndReset());
+}
+  }
+
+  /** Input from randomly generated bytes. */
+  @State(Scope.Benchmark)
+  public static class Bytes {
+long[] values = new long[VALUES_PER_INVOCATION];
+
+@Setup
+public void setup() {
+  values = new long[VALUES_PER_INVOCATION];
+  byte[] bytes = new byte[VALUES_PER_INVOCATION];
+  RNG.nextBytes(bytes);
+
+  for (int i = 0; i < VALUES_PER_INVOCATION; i++) {
+values[i] = (long) (bytes[i] & 0x7F);
+  }
+}
+  }
+
+  /** Input from randomly generated longs. */
+  @State(Scope.Benchmark)
+  public static class Longs {
+long[] values = new long[VALUES_PER_INVOCATION];
+
+@Setup
+public void setup() {
+  values = new long[VALUES_PER_INVOCATION];
+
+  for (int i = 0; i < VALUES_PER_INVOCATION; i++) {
+/

(beam) branch master updated (b9fd39cb16b -> 22fefebacab)

2024-01-31 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


from b9fd39cb16b Fix Dataproc cleanup race condition (#30154)
 add 22fefebacab BigQuery: Decouple clustering from time partitioning when 
writing (#30094)

No new revisions were added by this update.

Summary of changes:
 CHANGES.md |  3 +-
 .../beam/sdk/io/gcp/bigquery/BigQueryIO.java   | 10 +++---
 .../sdk/io/gcp/bigquery/CreateTableHelpers.java| 10 +++---
 .../gcp/bigquery/DynamicDestinationsHelpers.java   | 42 --
 .../io/gcp/bigquery/UpdateSchemaDestination.java   |  9 ++---
 .../sdk/io/gcp/bigquery/BigQueryIOWriteTest.java   | 38 +++-
 6 files changed, 62 insertions(+), 50 deletions(-)



(beam) branch master updated: Fix an edge case of getting duplicated records when using TextIO. (#30026)

2024-01-17 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new b5dc728b677 Fix an edge case of getting duplicated records when using 
TextIO. (#30026)
b5dc728b677 is described below

commit b5dc728b677101cf3968e9f94db0898342343f6e
Author: Shunping Huang 
AuthorDate: Wed Jan 17 13:39:35 2024 -0500

Fix an edge case of getting duplicated records when using TextIO. (#30026)

When processing a CRLF-delimited file and the read buffer has
CR as the last character, startOfNextRecord will be set to the
position after the CR, i.e. the following LF. Let's say the
position of this LF is p.

In the next buffer, even though the actual start of record should be
p+1, startOfRecord is set to startOfNextRecord, which is p.

Then the code processes the next record by skipping the LF and yields
a record starting from p+1. It decides whether the record is valid by
checking if startOfRecord is in the range defined in RangeTracker.

If there is a split right after p, i.e. we have ranges [a, p+1) and [p+1, 
b),
then the above record would be considered as valid in the split [a, p+1),
because its startOfRecord is p <= p+1. However, the record is also
considered valid when split [p+1, b) is processed, resulting into
duplicated records in the output.
---
 .../java/org/apache/beam/sdk/io/TextSource.java|   5 +-
 .../org/apache/beam/sdk/io/TextIOReadTest.java | 113 ++---
 2 files changed, 82 insertions(+), 36 deletions(-)

diff --git 
a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/TextSource.java 
b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/TextSource.java
index 3d62c677950..8367b38751c 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/TextSource.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/TextSource.java
@@ -323,10 +323,13 @@ public class TextSource extends FileBasedSource {
 
 // Consume any LF after CR if it is the first character of the next 
buffer
 if (skipLineFeedAtStart && buffer[bufferPosn] == LF) {
-  ++bytesConsumed;
   ++startPosn;
   ++bufferPosn;
   skipLineFeedAtStart = false;
+
+  // Right now, startOfRecord is pointing at the position of LF, but 
the actual start
+  // position of the new record should be the position after LF.
+  ++startOfRecord;
 }
 
 // Search for the newline
diff --git 
a/sdks/java/core/src/test/java/org/apache/beam/sdk/io/TextIOReadTest.java 
b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/TextIOReadTest.java
index 84c05ee6c90..253308d1b93 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/io/TextIOReadTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/TextIOReadTest.java
@@ -386,53 +386,96 @@ public class TextIOReadTest {
   runTestReadWithData(line.getBytes(UTF_8), expected);
 }
 
+// Placeholder channel that only yields 0- and 1-length buffers.
+private static class SlowReadChannel implements ReadableByteChannel {
+  int readCount = 0;
+  InputStream stream;
+  ReadableByteChannel channel;
+
+  public SlowReadChannel(FileBasedSource source) throws IOException {
+channel =
+FileSystems.open(
+
FileSystems.matchSingleFileSpec(source.getFileOrPatternSpec()).resourceId());
+stream = Channels.newInputStream(channel);
+  }
+
+  // Data is read at most one byte at a time from line parameter.
+  @Override
+  public int read(ByteBuffer dst) throws IOException {
+if (++readCount % 3 == 0) {
+  if (dst.hasRemaining()) {
+int value = stream.read();
+if (value == -1) {
+  return -1;
+}
+dst.put((byte) value);
+return 1;
+  }
+}
+return 0;
+  }
+
+  @Override
+  public boolean isOpen() {
+return channel.isOpen();
+  }
+
+  @Override
+  public void close() throws IOException {
+stream.close();
+  }
+}
+
 @Test
-public void 
testReadLinesWithDefaultDelimiterAndZeroAndOneLengthReturningChannel()
-throws Exception {
+public void testReadLinesWithDefaultDelimiterAndSlowReadChannel() throws 
Exception {
   Path path = tempFolder.newFile().toPath();
   Files.write(path, line.getBytes(UTF_8));
   Metadata metadata = FileSystems.matchSingleFileSpec(path.toString());
   FileBasedSource source =
   getTextSource(path.toString(), null, 0)
   .createForSubrangeOfFile(metadata, 0, metadata.sizeBytes());
+
   FileBasedReader reader =
   source.createSingleFileReader(PipelineOptionsFactory.create());
-  ReadableByteChan

(beam) branch master updated (db0a4deea2f -> c38dc77283f)

2024-01-12 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


from db0a4deea2f [IT] Pub/Sub RM should delete subscriptions that were 
created by Beam (#29957)
 add c38dc77283f Upgrade Parquet to 0.13.1 (#29953)

No new revisions were added by this update.

Summary of changes:
 sdks/java/io/file-schema-transform/build.gradle | 2 +-
 sdks/java/io/parquet/build.gradle   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)



(beam) branch master updated: [IT] Pub/Sub RM should delete subscriptions that were created by Beam (#29957)

2024-01-12 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new db0a4deea2f [IT] Pub/Sub RM should delete subscriptions that were 
created by Beam (#29957)
db0a4deea2f is described below

commit db0a4deea2f22ee725d22cb3619b4967aa2b2606
Author: Bruno Volpato 
AuthorDate: Fri Jan 12 12:57:47 2024 -0500

[IT] Pub/Sub RM should delete subscriptions that were created by Beam 
(#29957)

* [IT] Pub/Sub RM should delete subscriptions that were created by Beam

* Add unit tests
---
 .../beam/it/gcp/pubsub/PubsubResourceManager.java  | 14 ++-
 .../it/gcp/pubsub/PubsubResourceManagerTest.java   | 28 +-
 2 files changed, 40 insertions(+), 2 deletions(-)

diff --git 
a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/pubsub/PubsubResourceManager.java
 
b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/pubsub/PubsubResourceManager.java
index 738620c15b7..f947d70efd9 100644
--- 
a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/pubsub/PubsubResourceManager.java
+++ 
b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/pubsub/PubsubResourceManager.java
@@ -319,7 +319,19 @@ public final class PubsubResourceManager implements 
ResourceManager {
 
   for (TopicName topic : createdTopics) {
 LOG.info("Deleting topic '{}'", topic);
-Failsafe.with(retryOnDeadlineExceeded()).run(() -> 
topicAdminClient.deleteTopic(topic));
+Failsafe.with(retryOnDeadlineExceeded())
+.run(
+() -> {
+
+  // Delete subscriptions that would be orphaned.
+  for (String topicSub :
+  
topicAdminClient.listTopicSubscriptions(topic).iterateAll()) {
+LOG.info("Deleting subscription '{}'", topicSub);
+subscriptionAdminClient.deleteSubscription(topicSub);
+  }
+
+  topicAdminClient.deleteTopic(topic);
+});
   }
 
   for (SchemaName schemaName : createdSchemas) {
diff --git 
a/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/pubsub/PubsubResourceManagerTest.java
 
b/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/pubsub/PubsubResourceManagerTest.java
index d531862d960..08b480895ff 100644
--- 
a/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/pubsub/PubsubResourceManagerTest.java
+++ 
b/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/pubsub/PubsubResourceManagerTest.java
@@ -37,6 +37,8 @@ import com.google.pubsub.v1.SubscriptionName;
 import com.google.pubsub.v1.Topic;
 import com.google.pubsub.v1.TopicName;
 import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Map;
 import 
org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap;
 import org.junit.Before;
@@ -63,8 +65,11 @@ public final class PubsubResourceManagerTest {
   private static final String VALID_MESSAGE_ID = "abcdef";
 
   @Mock private TopicAdminClient topicAdminClient;
-  @Mock private SubscriptionAdminClient subscriptionAdminClient;
 
+  @Mock
+  private TopicAdminClient.ListTopicSubscriptionsPagedResponse 
listTopicSubscriptionsPagedResponse;
+
+  @Mock private SubscriptionAdminClient subscriptionAdminClient;
   @Mock private SchemaServiceClient schemaServiceClient;
   private Topic topic;
   private Subscription subscription;
@@ -75,6 +80,7 @@ public final class PubsubResourceManagerTest {
 
   @Captor private ArgumentCaptor topicNameCaptor;
   @Captor private ArgumentCaptor subscriptionNameCaptor;
+  @Captor private ArgumentCaptor stringArgumentCaptor;
   @Captor private ArgumentCaptor pubsubMessageCaptor;
 
   @Before
@@ -95,6 +101,8 @@ public final class PubsubResourceManagerTest {
 .setName(SubscriptionName.of(PROJECT_ID, 
SUBSCRIPTION_NAME).toString())
 .build();
 when(publisherFactory.createPublisher(any())).thenReturn(publisher);
+when(topicAdminClient.listTopicSubscriptions(any(TopicName.class)))
+.thenReturn(listTopicSubscriptionsPagedResponse);
   }
 
   @Test
@@ -225,6 +233,7 @@ public final class PubsubResourceManagerTest {
 Topic topic1 = Topic.newBuilder().setName(topicName1.toString()).build();
 Topic topic2 = Topic.newBuilder().setName(topicName2.toString()).build();
 
when(topicAdminClient.createTopic(any(TopicName.class))).thenReturn(topic1, 
topic2);
+when(listTopicSubscriptionsPagedResponse.iterateAll()).thenReturn(new 
ArrayList<>());
 
 testManager.createTopic("topic1");
 testManager.createTopic("topic2");
@@ -235,6 +244,23 @@ public final class PubsubResourceManagerTest {
 assertThat(topicNameCaptor.getAllValues()).containsEx

(beam) branch master updated (bb0a2037970 -> 8fb06da4ce8)

2024-01-09 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


from bb0a2037970 Merge pull request #29811: Bump 
org.checkerframework:checkerframework-gradle-plugin from 0.6.34 to 0.6.37
 add 8fb06da4ce8 [RRIO] Build RequestResponseIO and related dependency 
changes (#29710)

No new revisions were added by this update.

Summary of changes:
 .../apache/beam/io/requestresponse/ApiIOError.java |   7 +-
 .../org/apache/beam/io/requestresponse/Cache.java  | 190 +--
 .../org/apache/beam/io/requestresponse/Call.java   | 377 ++
 .../beam/io/requestresponse/CallShouldBackoff.java |   2 +-
 ...llShouldBackoffBasedOnRejectionProbability.java |   2 +-
 ...ava => DefaultSerializableBackoffSupplier.java} |  24 +-
 .../apache/beam/io/requestresponse/Monitoring.java | 362 ++
 .../org/apache/beam/io/requestresponse/Quota.java  |   7 +-
 .../beam/io/requestresponse/RedisClient.java   |   4 +-
 .../apache/beam/io/requestresponse/Repeater.java   |  85 +++-
 .../beam/io/requestresponse/RequestResponseIO.java | 552 +++--
 .../org/apache/beam/io/requestresponse/Result.java |  65 +--
 .../{Caller.java => SerializableSupplier.java} |  13 +-
 .../ThrottleWithExternalResource.java  |  12 +-
 .../UserCodeExecutionException.java|   8 +
 .../io/requestresponse/UserCodeQuotaException.java |   6 +
 .../UserCodeRemoteSystemException.java |   9 +
 .../requestresponse/UserCodeTimeoutException.java  |   9 +
 .../requestresponse/WindowedCallShouldBackoff.java |  76 +++
 .../apache/beam/io/requestresponse/CacheIT.java|  16 +-
 ...ouldBackoffBasedOnRejectionProbabilityTest.java |   2 +-
 .../apache/beam/io/requestresponse/CallTest.java   |   1 -
 .../EchoGRPCCallerWithSetupTeardownIT.java |   2 +-
 .../beam/io/requestresponse/EchoRequestCoder.java  |   1 +
 ...choRequestCoder.java => EchoResponseCoder.java} |  13 +-
 .../beam/io/requestresponse/RedisClientIT.java |   2 +-
 .../beam/io/requestresponse/RepeaterTest.java  |   2 +-
 .../io/requestresponse/RequestResponseIOIT.java|  95 
 .../io/requestresponse/RequestResponseIOTest.java  | 508 +++
 .../ThrottleWithExternalResourceIT.java|   6 +-
 .../WindowedCallShouldBackoffTest.java |  56 +++
 31 files changed, 2225 insertions(+), 289 deletions(-)
 copy 
sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponse/{CallShouldBackoff.java
 => DefaultSerializableBackoffSupplier.java} (61%)
 create mode 100644 
sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponse/Monitoring.java
 copy 
.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/conversions/RowConversionResult.java
 => 
sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponse/Result.java 
(54%)
 copy 
sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponse/{Caller.java 
=> SerializableSupplier.java} (74%)
 create mode 100644 
sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponse/WindowedCallShouldBackoff.java
 copy 
sdks/java/io/rrio/src/test/java/org/apache/beam/io/requestresponse/{EchoRequestCoder.java
 => EchoResponseCoder.java} (74%)
 create mode 100644 
sdks/java/io/rrio/src/test/java/org/apache/beam/io/requestresponse/RequestResponseIOIT.java
 create mode 100644 
sdks/java/io/rrio/src/test/java/org/apache/beam/io/requestresponse/RequestResponseIOTest.java
 create mode 100644 
sdks/java/io/rrio/src/test/java/org/apache/beam/io/requestresponse/WindowedCallShouldBackoffTest.java



(beam) branch master updated: Creating a Fully Managed Beam Streaming System with Flink Runner on Kubernetes - Part 2 (#29794)

2023-12-29 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new 7342b03ff4e Creating a Fully Managed Beam Streaming System with Flink 
Runner on Kubernetes - Part 2 (#29794)
7342b03ff4e is described below

commit 7342b03ff4e62076344a9b9f7caabb86d2f78d02
Author: Talat UYARER 
AuthorDate: Fri Dec 29 18:19:11 2023 -0800

Creating a Fully Managed Beam Streaming System with Flink Runner on 
Kubernetes - Part 2 (#29794)

* Initial version of Part 2

* Apply suggestions from code review

Co-authored-by: Rebecca Szper <98840847+rsz...@users.noreply.github.com>

* Removed Trailing whitespace

* Apply suggestions from code review

Co-authored-by: Rebecca Szper <98840847+rsz...@users.noreply.github.com>

* Removed more section and a descriptive sentence for checkpointing section

-

Co-authored-by: Talat UYARER 
Co-authored-by: Rebecca Szper <98840847+rsz...@users.noreply.github.com>
---
 .../blog/apache-beam-flink-and-kubernetes-part2.md | 158 +
 .../adaptive-timeout-kafka.png | Bin 0 -> 880831 bytes
 .../checkpoint_monitoring-history-subtasks.png | Bin 0 -> 170772 bytes
 .../flink-backlog-metrics.png  | Bin 0 -> 580860 bytes
 .../flink-partition-assignment.png | Bin 0 -> 66575 bytes
 .../kafkaio-wait-reader.png| Bin 0 -> 399857 bytes
 6 files changed, 158 insertions(+)

diff --git 
a/website/www/site/content/en/blog/apache-beam-flink-and-kubernetes-part2.md 
b/website/www/site/content/en/blog/apache-beam-flink-and-kubernetes-part2.md
new file mode 100644
index 000..ceba71a4401
--- /dev/null
+++ b/website/www/site/content/en/blog/apache-beam-flink-and-kubernetes-part2.md
@@ -0,0 +1,158 @@
+---
+title:  "Build a scalable, self-managed streaming infrastructure with Beam and 
Flink: Tackling Autoscaling Challenges - Part 2"
+date:   2023-12-18 09:00:00 -0400
+categories:
+  - blog
+authors:
+  - talat
+---
+
+
+# Build a scalable, self-managed streaming infrastructure with Flink: Tackling 
Autoscaling Challenges - Part 2
+
+
+Welcome to Part 2 of our in-depth series about building and managing a service 
for Apache Beam Flink on Kubernetes. In this segment, we're taking a closer 
look at the hurdles we encountered while implementing autoscaling. These 
challenges weren't just roadblocks. They were opportunities for us to innovate 
and enhance our system. Let’s break down these issues, understand their 
context, and explore the solutions we developed.
+
+## Understand Apache Beam backlog metrics in the Flink runner environment
+
+**The Challenge:** In our current setup, we are using Apache Flink for 
processing data streams. However, we've encountered a puzzling issue: our Flink 
job isn't showing the backlog metrics from Apache Beam. These metrics are 
critical for understanding the state and performance of our data pipelines.
+
+**What We Found:** Interestingly, we noticed that the metrics are actually 
being generated in `KafkaIO`, which is a part of our data pipeline that handles 
Kafka streams. But when we try to monitor these metrics through the Apache 
Flink Metric system, we can't find them. We suspected that there might be an 
issue with the integration (or 'wiring') between Apache Beam and Apache Flink.
+
+**Digging Deeper:** On closer inspection, we found that the metrics should be 
emitted during the 'Checkpointing' phase of the data stream processing. During 
this crucial step, the system takes a snapshot of the stream's state, and the 
metrics are typically metrics that are generated for unbounded sources. 
Unbounded sources are sources that continuously stream data, like Kafka.
+
+**A Potential Solution:** We believe the root of the problem lies in how the 
metric context is set during the checkpointing phase. A disconnect appears to 
prevent the Beam metrics from being properly captured in the Flink Metric 
system. We proposed a fix for this issue, which you can review and contribute 
to on our GitHub pull request: [Apache Beam PR 
#29793](https://github.com/apache/beam/pull/29793).
+
+
+
+
+
+## Overcoming challenges in checkpoint size reduction for autoscaling Beam jobs
+
+In this section we will discuss strategies for reducing the size of 
checkpoints in autoscaling Apache Beam jobs, focusing on efficient 
checkpointing in Apache Flink and optimizing bundle sizes and PipelineOptions 
to manage frequent checkpoint timeouts and large-scale job requirements.
+
+### Understand the basics of checkpointing in Apache Flink
+In stream processing, maintaining state consistency and fault tolerance is 
crucial. Apache Flink achieves this through a process called *checkpointing*. 
Ch

(beam) branch master updated: Add support to use side inputs with Combine.PerKeyWithHotKeyFanout (#28867)

2023-12-13 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new a9f5ab14d06 Add support to use side inputs with 
Combine.PerKeyWithHotKeyFanout (#28867)
a9f5ab14d06 is described below

commit a9f5ab14d0689568c6822f93bec0c2ca7658cb57
Author: Marc <53709151+marc7...@users.noreply.github.com>
AuthorDate: Wed Dec 13 23:31:01 2023 +0100

Add support to use side inputs with Combine.PerKeyWithHotKeyFanout (#28867)
---
 .../org/apache/beam/sdk/transforms/Combine.java| 24 ++
 .../apache/beam/sdk/transforms/CombineTest.java| 24 ++
 2 files changed, 44 insertions(+), 4 deletions(-)

diff --git 
a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/Combine.java 
b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/Combine.java
index ffbfac460dc..f1a964fa5a6 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/Combine.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/Combine.java
@@ -1555,7 +1555,7 @@ public class Combine {
  */
 public PerKeyWithHotKeyFanout withHotKeyFanout(
 SerializableFunction hotKeyFanout) {
-  return new PerKeyWithHotKeyFanout<>(fn, fnDisplayData, hotKeyFanout, 
fewKeys);
+  return new PerKeyWithHotKeyFanout<>(fn, fnDisplayData, hotKeyFanout, 
fewKeys, sideInputs);
 }
 
 /**
@@ -1578,7 +1578,8 @@ public class Combine {
   return hotKeyFanout;
 }
   },
-  fewKeys);
+  fewKeys,
+  sideInputs);
 }
 
 /** Returns the {@link GlobalCombineFn} used by this Combine operation. */
@@ -1624,18 +1625,20 @@ public class Combine {
 private final GlobalCombineFn fn;
 private final DisplayData.ItemSpec> fnDisplayData;
 private final SerializableFunction hotKeyFanout;
-
 private final boolean fewKeys;
+private final List> sideInputs;
 
 private PerKeyWithHotKeyFanout(
 GlobalCombineFn fn,
 DisplayData.ItemSpec> fnDisplayData,
 SerializableFunction hotKeyFanout,
-boolean fewKeys) {
+boolean fewKeys,
+List> sideInputs) {
   this.fn = fn;
   this.fnDisplayData = fnDisplayData;
   this.hotKeyFanout = hotKeyFanout;
   this.fewKeys = fewKeys;
+  this.sideInputs = sideInputs;
 }
 
 @Override
@@ -1928,6 +1931,10 @@ public class Combine {
   fewKeys
   ? Combine.fewKeys(hotPreCombine, fnDisplayData)
   : Combine.perKey(hotPreCombine, fnDisplayData);
+  if (!sideInputs.isEmpty()) {
+hotPreCombineTransform = 
hotPreCombineTransform.withSideInputs(sideInputs);
+  }
+
   PCollection>> precombinedHot =
   split
   .get(hot)
@@ -1975,6 +1982,10 @@ public class Combine {
   fewKeys
   ? Combine.fewKeys(postCombine, fnDisplayData)
   : Combine.perKey(postCombine, fnDisplayData);
+  if (!sideInputs.isEmpty()) {
+postCombineTransform = postCombineTransform.withSideInputs(sideInputs);
+  }
+
   return PCollectionList.of(precombinedHot)
   .and(preprocessedCold)
   .apply(Flatten.pCollections())
@@ -1993,6 +2004,11 @@ public class Combine {
   DisplayData.item("fanoutFn", 
hotKeyFanout.getClass()).withLabel("Fanout Function"));
 }
 
+/** Returns the side inputs used by this Combine operation. */
+public List> getSideInputs() {
+  return sideInputs;
+}
+
 /**
  * Used to store either an input or accumulator value, for flattening the 
hot and cold key
  * paths.
diff --git 
a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/CombineTest.java 
b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/CombineTest.java
index 024fedd177e..f070378a64e 100644
--- 
a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/CombineTest.java
+++ 
b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/CombineTest.java
@@ -1031,6 +1031,30 @@ public class CombineTest implements Serializable {
 
   assertEquals(Collections.singletonList(view), combine.getSideInputs());
 }
+
+@Test
+@Category({ValidatesRunner.class, UsesSideInputs.class})
+public void testHotKeyCombineWithSideInputs() {
+  PCollection> input =
+  createInput(
+  pipeline,
+  Arrays.asList(
+  KV.of("a", 1), KV.of("a", 1), KV.of("a", 4), KV.of("b", 1), 
KV.of("b", 13)));
+  PCollection sum =
+  input.apply(Values.create()).apply("Sum", Combine.globally(new 
SumInts()));
+  PCollectionView sumView = sum.apply(View.asSingleton());
+
+  PCollection> combi

(beam) branch master updated (276aa023903 -> 8be85d29f53)

2023-12-12 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


from 276aa023903 Add suffix for vocab files in tft transforms (#29720)
 add 8be85d29f53 fix: optimize segment reader (#29694)

No new revisions were added by this update.

Summary of changes:
 .../sdk/io/gcp/bigtable/BigtableServiceImpl.java   |  9 +++-
 .../io/gcp/bigtable/BigtableServiceImplTest.java   | 63 ++
 2 files changed, 70 insertions(+), 2 deletions(-)



(beam) branch master updated (22ae2e35b98 -> 40685cdd0ee)

2023-12-05 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


from 22ae2e35b98 Feature/dead letter queue core (#29164)
 add 40685cdd0ee [RRIO] [Call] Implement the Repeater (#29490)

No new revisions were added by this update.

Summary of changes:
 .../apache/beam/io/requestresponse/Repeater.java   | 140 +
 ...ion.java => UserCodeRemoteSystemException.java} |  15 +-
 .../beam/io/requestresponse/RepeaterTest.java  | 345 +
 3 files changed, 492 insertions(+), 8 deletions(-)
 create mode 100644 
sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponse/Repeater.java
 copy 
sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponse/{UserCodeQuotaException.java
 => UserCodeRemoteSystemException.java} (70%)
 create mode 100644 
sdks/java/io/rrio/src/test/java/org/apache/beam/io/requestresponse/RepeaterTest.java



(beam) branch master updated: Remove SuppressWarnings nullness from JsonMatcher (#29486)

2023-11-27 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new 109133e38ed Remove SuppressWarnings nullness from JsonMatcher (#29486)
109133e38ed is described below

commit 109133e38edb43f6e267c97a26cd4b1bc8de0461
Author: Damon 
AuthorDate: Mon Nov 27 10:17:32 2023 -0800

Remove SuppressWarnings nullness from JsonMatcher (#29486)
---
 .../main/java/org/apache/beam/sdk/testing/JsonMatcher.java  | 13 ++---
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git 
a/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/JsonMatcher.java 
b/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/JsonMatcher.java
index 034e68220e4..2d2359badfd 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/JsonMatcher.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/JsonMatcher.java
@@ -17,12 +17,14 @@
  */
 package org.apache.beam.sdk.testing;
 
+import static org.apache.beam.sdk.util.Preconditions.checkStateNotNull;
 import static org.hamcrest.Matchers.is;
 
 import com.fasterxml.jackson.core.type.TypeReference;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import java.io.IOException;
 import java.util.Map;
+import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
 import org.hamcrest.Description;
 import org.hamcrest.Matcher;
 import org.hamcrest.TypeSafeMatcher;
@@ -35,13 +37,10 @@ import org.hamcrest.TypeSafeMatcher;
  *  jsonStringLike("{\"height\": 80, \"name\": \"person\"}"));
  * 
  */
-@SuppressWarnings({
-  "nullness" // TODO(https://github.com/apache/beam/issues/20497)
-})
 public abstract class JsonMatcher extends TypeSafeMatcher {
-  private Matcher> mapMatcher;
+  private final Matcher> mapMatcher;
   private static final ObjectMapper MAPPER = new ObjectMapper();
-  private Map actualMap;
+  private @MonotonicNonNull Map actualMap;
 
   public JsonMatcher(Map expectedMap) {
 this.mapMatcher = is(expectedMap);
@@ -86,7 +85,7 @@ public abstract class JsonMatcher extends 
TypeSafeMatcher {
 } catch (IOException e) {
   return false;
 }
-return mapMatcher.matches(actualMap);
+return mapMatcher.matches(checkStateNotNull(actualMap));
   }
 
   @Override
@@ -96,6 +95,6 @@ public abstract class JsonMatcher extends 
TypeSafeMatcher {
 
   @Override
   protected void describeMismatchSafely(T item, Description 
mismatchDescription) {
-mapMatcher.describeMismatch(actualMap, mismatchDescription);
+mapMatcher.describeMismatch(checkStateNotNull(actualMap), 
mismatchDescription);
   }
 }



(beam) branch master updated: [RRIO] [Throttle] [Cache] Implement Throttle and Cache using an external resource. (#29401)

2023-11-22 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new 1e06d882bdd [RRIO] [Throttle] [Cache] Implement Throttle and Cache 
using an external resource. (#29401)
1e06d882bdd is described below

commit 1e06d882bddfa2eae33a6bcfc462fe10172704bb
Author: Damon 
AuthorDate: Wed Nov 22 11:12:58 2023 -0800

[RRIO] [Throttle] [Cache] Implement Throttle and Cache using an external 
resource. (#29401)

* WIP: Implement CacheSerializer and providers

* wip

* Condense Throttle into one class

* wip

* Implement Throttle and Cache

* Update javadoc

* Edit per PR comments

* Refacter per PR comments
---
 .../configmap.yaml |  30 ++
 .../deployment.yaml|  27 ++
 .../kustomization.yaml |  34 ++
 sdks/java/io/rrio/build.gradle |   1 +
 .../org/apache/beam/io/requestresponse/Cache.java  | 239 
 .../apache/beam/io/requestresponse/CacheRead.java  | 121 --
 .../apache/beam/io/requestresponse/CacheWrite.java | 119 --
 .../org/apache/beam/io/requestresponse/Call.java   |  39 +-
 .../org/apache/beam/io/requestresponse/Quota.java  |  70 
 .../beam/io/requestresponse/RedisClient.java   |  10 +
 .../beam/io/requestresponse/ThrottleDequeue.java   | 101 -
 .../beam/io/requestresponse/ThrottleEnqueue.java   |  61 ---
 .../io/requestresponse/ThrottleRefreshQuota.java   |  55 ---
 .../ThrottleWithExternalResource.java  | 418 +
 .../apache/beam/io/requestresponse/CacheIT.java| 120 ++
 .../apache/beam/io/requestresponse/CacheTest.java  | 132 +++
 .../apache/beam/io/requestresponse/CallTest.java   | 112 +-
 ...java => EchoGRPCCallerWithSetupTeardownIT.java} |  14 +-
 ...HTTPCallerTestIT.java => EchoHTTPCallerIT.java} |  18 +-
 .../beam/io/requestresponse/EchoITOptions.java |   7 +-
 .../beam/io/requestresponse/EchoRequestCoder.java  |  44 +++
 .../{RedisClientTestIT.java => RedisClientIT.java} |  24 +-
 .../ThrottleWithExternalResourceIT.java| 186 +
 .../ThrottleWithExternalResourceTest.java  |  77 
 24 files changed, 1559 insertions(+), 500 deletions(-)

diff --git 
a/.test-infra/mock-apis/infrastructure/kubernetes/refresher/overlays/echo-ThrottleWithExternalResourceTestIT-10-per-1s-quota/configmap.yaml
 
b/.test-infra/mock-apis/infrastructure/kubernetes/refresher/overlays/echo-ThrottleWithExternalResourceTestIT-10-per-1s-quota/configmap.yaml
new file mode 100644
index 000..6a482b21b16
--- /dev/null
+++ 
b/.test-infra/mock-apis/infrastructure/kubernetes/refresher/overlays/echo-ThrottleWithExternalResourceTestIT-10-per-1s-quota/configmap.yaml
@@ -0,0 +1,30 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Configures patch for ../base/configmap.yaml
+# See 
https://kubectl.docs.kubernetes.io/references/kustomize/kustomization/patches/
+
+- op: replace
+  path: /metadata/labels/quota-id
+  value: echo-ThrottleWithExternalResourceTestIT-10-per-1s-quota
+- op: replace
+  path: /data/QUOTA_ID
+  value: echo-ThrottleWithExternalResourceTestIT-10-per-1s-quota
+- op: replace
+  path: /data/QUOTA_SIZE
+  value: "10"
+- op: replace
+  path: /data/QUOTA_REFRESH_INTERVAL
+  value: 1s
diff --git 
a/.test-infra/mock-apis/infrastructure/kubernetes/refresher/overlays/echo-ThrottleWithExternalResourceTestIT-10-per-1s-quota/deployment.yaml
 
b/.test-infra/mock-apis/infrastructure/kubernetes/refresher/overlays/echo-ThrottleWithExternalResourceTestIT-10-per-1s-quota/deployment.yaml
new file mode 100644
index 000..cff2f994cd6
--- /dev/null
+++ 
b/.test-infra/mock-apis/infrastructure/kubernetes/refresher/overlays/echo-ThrottleWithExternalResourceTestIT-10-per-1s-quota/deployment.yaml
@@ -0,0 +1,27 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information re

(beam) branch master updated: Bump grpc_version dependency (#29511)

2023-11-22 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new 09a2cfcc2b2 Bump grpc_version dependency (#29511)
09a2cfcc2b2 is described below

commit 09a2cfcc2b2080751d32b3dbe70ee03ed80f9aa8
Author: Damon 
AuthorDate: Wed Nov 22 08:17:58 2023 -0800

Bump grpc_version dependency (#29511)

* Bump grpc_version depencency

https://mvnrepository.com/artifact/com.google.cloud/libraries-bom/26.26.0 
shows the io.grpc:grpc-bom to be 1.59.0

* Change to 1.58.0 per PR comment.
---
 buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git 
a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy 
b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
index 999474b030b..359aeea55a2 100644
--- a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
+++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
@@ -607,7 +607,7 @@ class BeamModulePlugin implements Plugin {
 def google_code_gson_version = "2.10.1"
 def google_oauth_clients_version = "1.34.1"
 // Try to keep grpc_version consistent with gRPC version in 
google_cloud_platform_libraries_bom
-def grpc_version = "1.56.1"
+def grpc_version = "1.58.0"
 def guava_version = "32.1.2-jre"
 def hadoop_version = "2.10.2"
 def hamcrest_version = "2.1"



(beam) branch master updated: Add random chars as the job name suffix by default on IT, avoid name collision (#29441)

2023-11-15 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new 9b1040b2ee3 Add random chars as the job name suffix by default on IT, 
avoid name collision (#29441)
9b1040b2ee3 is described below

commit 9b1040b2ee3313a9ca266c5e7b7cd1e2b3cdc221
Author: Bruno Volpato 
AuthorDate: Wed Nov 15 12:47:25 2023 -0500

Add random chars as the job name suffix by default on IT, avoid name 
collision (#29441)
---
 .../src/main/java/org/apache/beam/it/common/PipelineLauncher.java   | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git 
a/it/common/src/main/java/org/apache/beam/it/common/PipelineLauncher.java 
b/it/common/src/main/java/org/apache/beam/it/common/PipelineLauncher.java
index 6d1aeae21dd..f6dcb0a7e64 100644
--- a/it/common/src/main/java/org/apache/beam/it/common/PipelineLauncher.java
+++ b/it/common/src/main/java/org/apache/beam/it/common/PipelineLauncher.java
@@ -115,6 +115,10 @@ public interface PipelineLauncher {
 
   /** Config for starting a Dataflow job. */
   class LaunchConfig {
+
+/** The default number of random characters to use in the generated job 
names. */
+public static final int JOB_NAME_DEFAULT_CHARS_SUFFIX = 8;
+
 private final String jobName;
 private final ImmutableMap parameters;
 private final ImmutableMap environment;
@@ -176,7 +180,7 @@ public interface PipelineLauncher {
 }
 
 public static Builder builder(String testName, String specPath) {
-  return new Builder(createJobName(testName), specPath);
+  return new Builder(createJobName(testName, 
JOB_NAME_DEFAULT_CHARS_SUFFIX), specPath);
 }
 
 public static Builder builder(String jobName) {



(beam) branch master updated: Add damondouglas to REVIEWERS (#29433)

2023-11-15 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new 7740284b56d Add damondouglas to REVIEWERS (#29433)
7740284b56d is described below

commit 7740284b56dfad1735e2b9e1ff1f55f12e924dfc
Author: Damon 
AuthorDate: Wed Nov 15 09:13:15 2023 -0800

Add damondouglas to REVIEWERS (#29433)
---
 .github/REVIEWERS.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/REVIEWERS.yml b/.github/REVIEWERS.yml
index 9fd61727f10..84e5b6ecb19 100644
--- a/.github/REVIEWERS.yml
+++ b/.github/REVIEWERS.yml
@@ -41,6 +41,7 @@ labels:
   - robertwb
   - bvolpato
   - m-trieu
+  - damondouglas
 exclusionList: []
   - name: IO
 reviewers:
@@ -49,6 +50,7 @@ labels:
   - Abacn
   - ahmedabu98
   - bvolpato
+  - damondouglas
 exclusionList: []
   - name: spanner
 reviewers:
@@ -68,6 +70,7 @@ fallbackReviewers:
   - AnandInguva
   - chamikaramj
   - damccorm
+  - damondouglas
   - johnjcasey
   - jrmccluskey
   - kennknowles



(beam) branch master updated: Fix closing parenthesis in beam_Metrics_Report.yml (#29432)

2023-11-14 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new a687c7d41df Fix closing parenthesis in beam_Metrics_Report.yml (#29432)
a687c7d41df is described below

commit a687c7d41df5d25917cbef28b437fda8e0ebffb7
Author: Damon 
AuthorDate: Tue Nov 14 15:43:45 2023 -0800

Fix closing parenthesis in beam_Metrics_Report.yml (#29432)
---
 .github/workflows/beam_Metrics_Report.yml | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/beam_Metrics_Report.yml 
b/.github/workflows/beam_Metrics_Report.yml
index 8ed0c66480f..618c8b8d687 100644
--- a/.github/workflows/beam_Metrics_Report.yml
+++ b/.github/workflows/beam_Metrics_Report.yml
@@ -54,8 +54,10 @@ jobs:
 runs-on: [self-hosted, ubuntu-20.04, main]
 timeout-minutes: 100
 if: |
-  ((github.event_name == 'schedule' && github.repository == 'apache/beam') 
||
-  github.event_name == 'workflow_dispatch'
+  (
+(github.event_name == 'schedule' && github.repository == 'apache/beam')
+|| github.event_name == 'workflow_dispatch'
+  )
  
 steps:
   - uses: actions/checkout@v3



(beam) branch master updated: [RRIO] [Call] Define Caller & SetupTeardown interfaces (#29421)

2023-11-14 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new 970ebd31407 [RRIO] [Call] Define Caller & SetupTeardown interfaces 
(#29421)
970ebd31407 is described below

commit 970ebd3140796ad1524389d8d2243d029b2a58a2
Author: Damon 
AuthorDate: Tue Nov 14 08:56:21 2023 -0800

[RRIO] [Call] Define Caller & SetupTeardown interfaces (#29421)

* Define Caller & SetupTeardown interfaces

* Fix failing required option parameter

* Skip test when not providing flag

* Fix tox errors
---
 sdks/python/apache_beam/io/requestresponseio.py|  65 
 .../apache_beam/io/requestresponseio_it_test.py| 172 +
 2 files changed, 237 insertions(+)

diff --git a/sdks/python/apache_beam/io/requestresponseio.py 
b/sdks/python/apache_beam/io/requestresponseio.py
new file mode 100644
index 000..912fbb73194
--- /dev/null
+++ b/sdks/python/apache_beam/io/requestresponseio.py
@@ -0,0 +1,65 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""``PTransform`` for reading from and writing to Web APIs."""
+import abc
+from typing import TypeVar
+
+RequestT = TypeVar('RequestT')
+ResponseT = TypeVar('ResponseT')
+
+
+class UserCodeExecutionException(Exception):
+  """Base class for errors related to calling Web APIs."""
+
+
+class UserCodeQuotaException(UserCodeExecutionException):
+  """Extends ``UserCodeExecutionException`` to signal specifically that
+  the Web API client encountered a Quota or API overuse related error.
+  """
+
+
+class UserCodeTimeoutException(UserCodeExecutionException):
+  """Extends ``UserCodeExecutionException`` to signal a user code timeout."""
+
+
+class Caller(metaclass=abc.ABCMeta):
+  """Interfaces user custom code intended for API calls."""
+  @abc.abstractmethod
+  def call(self, request: RequestT) -> ResponseT:
+"""Calls a Web API with the ``RequestT``  and returns a
+``ResponseT``. ``RequestResponseIO`` expects implementations of the
+call method to throw either a ``UserCodeExecutionException``,
+``UserCodeQuotaException``, or ``UserCodeTimeoutException``.
+"""
+pass
+
+
+class SetupTeardown(metaclass=abc.ABCMeta):
+  """Interfaces user custom code to set up and teardown the API clients.
+Called by ``RequestResponseIO`` within its DoFn's setup and teardown
+methods.
+"""
+  @abc.abstractmethod
+  def setup(self) -> None:
+"""Called during the DoFn's setup lifecycle method."""
+pass
+
+  @abc.abstractmethod
+  def teardown(self) -> None:
+"""Called during the DoFn's teardown lifecycle method."""
+pass
diff --git a/sdks/python/apache_beam/io/requestresponseio_it_test.py 
b/sdks/python/apache_beam/io/requestresponseio_it_test.py
new file mode 100644
index 000..608722a2ff6
--- /dev/null
+++ b/sdks/python/apache_beam/io/requestresponseio_it_test.py
@@ -0,0 +1,172 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import base64
+import sys
+import unittest
+from dataclasses im

(beam) branch master updated: Creating a Fully Managed Beam Streaming System with Flink Runner on Kubernetes (#29305)

2023-11-08 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new 6bf871ff424 Creating a Fully Managed Beam Streaming System with Flink 
Runner on Kubernetes (#29305)
6bf871ff424 is described below

commit 6bf871ff424e106ac83cc56a6f42d393225289e6
Author: Talat UYARER 
AuthorDate: Wed Nov 8 16:26:37 2023 -0800

Creating a Fully Managed Beam Streaming System with Flink Runner on 
Kubernetes (#29305)

* How to Create a Fully Managed Beam Streaming System with Flink Runner on 
Kubernetes

* Updated Kubernetes Deployment YAML with real code

* Added Palo Alto Networks on Conclusion

* Update 
website/www/site/content/en/blog/apache-beam-flink-and-kubernetes.md

Co-authored-by: Rebecca Szper <98840847+rsz...@users.noreply.github.com>

* Update 
website/www/site/content/en/blog/apache-beam-flink-and-kubernetes.md

Co-authored-by: Rebecca Szper <98840847+rsz...@users.noreply.github.com>

* Update 
website/www/site/content/en/blog/apache-beam-flink-and-kubernetes.md

Co-authored-by: Rebecca Szper <98840847+rsz...@users.noreply.github.com>

* Update 
website/www/site/content/en/blog/apache-beam-flink-and-kubernetes.md

Co-authored-by: Rebecca Szper <98840847+rsz...@users.noreply.github.com>

* Update 
website/www/site/content/en/blog/apache-beam-flink-and-kubernetes.md

Co-authored-by: Rebecca Szper <98840847+rsz...@users.noreply.github.com>

* Update 
website/www/site/content/en/blog/apache-beam-flink-and-kubernetes.md

Co-authored-by: Rebecca Szper <98840847+rsz...@users.noreply.github.com>

* Update 
website/www/site/content/en/blog/apache-beam-flink-and-kubernetes.md

Co-authored-by: Rebecca Szper <98840847+rsz...@users.noreply.github.com>

* Update 
website/www/site/content/en/blog/apache-beam-flink-and-kubernetes.md

Co-authored-by: Rebecca Szper <98840847+rsz...@users.noreply.github.com>

* Update 
website/www/site/content/en/blog/apache-beam-flink-and-kubernetes.md

Co-authored-by: Rebecca Szper <98840847+rsz...@users.noreply.github.com>

* Update 
website/www/site/content/en/blog/apache-beam-flink-and-kubernetes.md

Co-authored-by: Rebecca Szper <98840847+rsz...@users.noreply.github.com>

* Apply suggestions from code review

Co-authored-by: Rebecca Szper <98840847+rsz...@users.noreply.github.com>

* add my username on authors.yml and removed redundant paragraph

* Updated Images with better resolutions

-

Co-authored-by: tuyarer 
Co-authored-by: Rebecca Szper <98840847+rsz...@users.noreply.github.com>
---
 .../en/blog/apache-beam-flink-and-kubernetes.md| 403 +
 website/www/site/data/authors.yml  |   3 +
 .../autoscaling-metrics.png| Bin 0 -> 79199 bytes
 .../backlog-graph.png  | Bin 0 -> 28754 bytes
 .../fko-library.png| Bin 0 -> 176729 bytes
 .../flink-checkpoint-ui.png| Bin 0 -> 413413 bytes
 .../flink-deployment-yaml.png  | Bin 0 -> 39477 bytes
 .../gcs-write-graph.png| Bin 0 -> 228927 bytes
 .../apache-beam-flink-and-kubernetes/image1.png| Bin 0 -> 103228 bytes
 .../job-metrics.png| Bin 0 -> 348117 bytes
 .../job-start-activity-diagram.png | Bin 0 -> 177442 bytes
 .../latency-graph.png  | Bin 0 -> 81918 bytes
 .../stream-service-changes.png | Bin 0 -> 1187027 bytes
 .../watermark-metrics.png  | Bin 0 -> 46059 bytes
 14 files changed, 406 insertions(+)

diff --git 
a/website/www/site/content/en/blog/apache-beam-flink-and-kubernetes.md 
b/website/www/site/content/en/blog/apache-beam-flink-and-kubernetes.md
new file mode 100644
index 000..b50d475ed7b
--- /dev/null
+++ b/website/www/site/content/en/blog/apache-beam-flink-and-kubernetes.md
@@ -0,0 +1,403 @@
+---
+title:  "Build a scalable, self-managed streaming infrastructure with Beam and 
Flink"
+date:   2023-11-03 09:00:00 -0400
+categories:
+  - blog
+authors:
+  - talat
+---
+
+
+In this blog series, [Talat Uyarer (Architect / Senior Principal 
Engineer)](https://www.linkedin.com/in/talatuyarer/), [Rishabh Kedia (Principal 
Engineer)](https://www.linkedin.com/in/rishabhkedia/), and [David He 
(Engineering Director)](https://www.linkedin.com/in/davidqhe/) describe how we 
built a self-managed streaming platform by using Apache Beam and Flink. In this 
part of the series, we 

(beam) branch master updated: [RRIO] [Test] Create test Caller and SetupTeardown (#29262)

2023-11-07 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new aa92afc1f6c [RRIO] [Test] Create test Caller and SetupTeardown (#29262)
aa92afc1f6c is described below

commit aa92afc1f6cf24981848e8930313232f8306c29d
Author: Damon 
AuthorDate: Tue Nov 7 13:15:34 2023 -0800

[RRIO] [Test] Create test Caller and SetupTeardown (#29262)

* [RRIO] [Test] Create test Caller and SetupTeardown

* Fix argument checks; Implement HTTP

* Revert go changes

* Patch code comments

* Add missing flag to documentation

* Patch per PR comments
---
 .test-infra/mock-apis/README.md|   4 -
 .../resources/beam/checkstyle/suppressions.xml |   1 +
 sdks/java/io/rrio/build.gradle |  19 +++-
 .../EchoGRPCCallerWithSetupTeardown.java   |  96 
 .../EchoGRPCCallerWithSetupTeardownTestIT.java | 126 +
 .../beam/io/requestresponse/EchoHTTPCaller.java|  92 +++
 .../io/requestresponse/EchoHTTPCallerTestIT.java   | 120 
 .../beam/io/requestresponse/EchoITOptions.java |  61 ++
 8 files changed, 509 insertions(+), 10 deletions(-)

diff --git a/.test-infra/mock-apis/README.md b/.test-infra/mock-apis/README.md
index ec94eb45a19..e2148d390a0 100644
--- a/.test-infra/mock-apis/README.md
+++ b/.test-infra/mock-apis/README.md
@@ -51,10 +51,6 @@ flowchart LR
 end
 ```
 
-# Writing Integration Tests
-
-TODO: See https://github.com/apache/beam/issues/28859
-
 # Development Dependencies
 
 | Dependency  | Reason 
|
diff --git 
a/sdks/java/build-tools/src/main/resources/beam/checkstyle/suppressions.xml 
b/sdks/java/build-tools/src/main/resources/beam/checkstyle/suppressions.xml
index 3f052508f2d..c30c48f8244 100644
--- a/sdks/java/build-tools/src/main/resources/beam/checkstyle/suppressions.xml
+++ b/sdks/java/build-tools/src/main/resources/beam/checkstyle/suppressions.xml
@@ -88,6 +88,7 @@
   
   
   
+  
 
   
   
diff --git a/sdks/java/io/rrio/build.gradle b/sdks/java/io/rrio/build.gradle
index 52119c91b47..bfd030ce61d 100644
--- a/sdks/java/io/rrio/build.gradle
+++ b/sdks/java/io/rrio/build.gradle
@@ -20,11 +20,15 @@ plugins { id 'org.apache.beam.module' }
 applyJavaNature(
 automaticModuleName: 'org.apache.beam.sdk.io.requestresponse'
 )
+provideIntegrationTestingDependencies()
+enableJavaPerformanceTesting()
 
 description = "Apache Beam :: SDKS :: Java :: IO :: RequestResponseIO (RRIO)"
 ext.summary = "Support to read from and write to Web APIs"
 
 var jedisVersion = "5.0.0"
+var grpcVersion = "1.59.0"
+var protobufVersion = "3.21.5"
 
 dependencies {
 implementation project(path: ":sdks:java:core", configuration: "shadow")
@@ -36,15 +40,18 @@ dependencies {
 
 testImplementation project(path: ":sdks:java:core", configuration: 
"shadowTest")
 testImplementation project(path: ":sdks:java:io:common", configuration: 
"testRuntimeMigration")
+testImplementation project(path: ":beam-test-infra-mock-apis")
+// Vendored grpc library not fully compatible with proto autogenerated code
+testImplementation "io.grpc:grpc-netty-shaded:${grpcVersion}"
+testImplementation "io.grpc:grpc-protobuf:${grpcVersion}"
+testImplementation "io.grpc:grpc-stub:${grpcVersion}"
+testImplementation 
"com.google.protobuf:protobuf-java-util:${protobufVersion}"
+
+testImplementation 
platform(library.java.google_cloud_platform_libraries_bom)
+testImplementation library.java.google_http_client
 testImplementation library.java.junit
 testImplementation library.java.testcontainers_base
 
 testRuntimeOnly project(path: ":runners:direct-java", configuration: 
"shadow")
 testRuntimeOnly library.java.slf4j_jdk14
 }
-
-task integrationTest(type: Test) {
-group = "verification"
-
-include '**/*IT.class'
-}
\ No newline at end of file
diff --git 
a/sdks/java/io/rrio/src/test/java/org/apache/beam/io/requestresponse/EchoGRPCCallerWithSetupTeardown.java
 
b/sdks/java/io/rrio/src/test/java/org/apache/beam/io/requestresponse/EchoGRPCCallerWithSetupTeardown.java
new file mode 100644
index 000..22e2ff9b1a1
--- /dev/null
+++ 
b/sdks/java/io/rrio/src/test/java/org/apache/beam/io/requestresponse/EchoGRPCCallerWithSetupTeardown.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional i

(beam) branch master updated: [RRIO] [Call] Implement PTransform without adaptive throttling (#29144)

2023-11-06 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new aa890ea5621 [RRIO] [Call] Implement PTransform without adaptive 
throttling (#29144)
aa890ea5621 is described below

commit aa890ea562103c8a6040ce8dd238355666724a02
Author: Damon 
AuthorDate: Mon Nov 6 10:50:49 2023 -0800

[RRIO] [Call] Implement PTransform without adaptive throttling (#29144)

* Implement Call PTransform

* Replace use of Coder with JSON

* Remove public modifiers from ApiIOError Builder

* Reference #29248
---
 sdks/java/io/rrio/build.gradle |   2 +
 .../apache/beam/io/requestresponse/ApiIOError.java |  36 +-
 .../org/apache/beam/io/requestresponse/Call.java   | 337 +-
 .../apache/beam/io/requestresponse/CallTest.java   | 493 +
 4 files changed, 845 insertions(+), 23 deletions(-)

diff --git a/sdks/java/io/rrio/build.gradle b/sdks/java/io/rrio/build.gradle
index 6963fcb23dd..52119c91b47 100644
--- a/sdks/java/io/rrio/build.gradle
+++ b/sdks/java/io/rrio/build.gradle
@@ -30,6 +30,8 @@ dependencies {
 implementation project(path: ":sdks:java:core", configuration: "shadow")
 implementation library.java.joda_time
 implementation library.java.vendored_guava_32_1_2_jre
+implementation library.java.jackson_core
+implementation library.java.jackson_databind
 implementation "redis.clients:jedis:$jedisVersion"
 
 testImplementation project(path: ":sdks:java:core", configuration: 
"shadowTest")
diff --git 
a/sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponse/ApiIOError.java
 
b/sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponse/ApiIOError.java
index 5936c5dd84b..cfff3bd8941 100644
--- 
a/sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponse/ApiIOError.java
+++ 
b/sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponse/ApiIOError.java
@@ -17,11 +17,16 @@
  */
 package org.apache.beam.io.requestresponse;
 
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
 import com.google.auto.value.AutoValue;
+import java.util.Optional;
 import org.apache.beam.sdk.schemas.AutoValueSchema;
 import org.apache.beam.sdk.schemas.annotations.DefaultSchema;
 import org.apache.beam.sdk.schemas.annotations.SchemaCaseFormat;
 import 
org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.CaseFormat;
+import 
org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Throwables;
+import org.checkerframework.checker.nullness.qual.NonNull;
 import org.joda.time.Instant;
 
 /** {@link ApiIOError} is a data class for storing details about an error. */
@@ -30,12 +35,31 @@ import org.joda.time.Instant;
 @AutoValue
 public abstract class ApiIOError {
 
+  private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+  /**
+   * Instantiate an {@link ApiIOError} from an {@link ErrorT} {@link T} 
element. The {@link T}
+   * element is converted to a JSON string.
+   */
+  static  ApiIOError of(@NonNull ErrorT e, 
@NonNull T element)
+  throws JsonProcessingException {
+
+String json = OBJECT_MAPPER.writeValueAsString(element);
+
+return ApiIOError.builder()
+.setRequestAsJsonString(json)
+.setMessage(Optional.ofNullable(e.getMessage()).orElse(""))
+.setObservedTimestamp(Instant.now())
+.setStackTrace(Throwables.getStackTraceAsString(e))
+.build();
+  }
+
   static Builder builder() {
 return new AutoValue_ApiIOError.Builder();
   }
 
-  /** The encoded UTF-8 string representation of the related processed 
element. */
-  public abstract String getEncodedElementAsUtfString();
+  /** The JSON string representation of the request associated with the error. 
*/
+  public abstract String getRequestAsJsonString();
 
   /** The observed timestamp of the error. */
   public abstract Instant getObservedTimestamp();
@@ -49,13 +73,13 @@ public abstract class ApiIOError {
   @AutoValue.Builder
   abstract static class Builder {
 
-public abstract Builder setEncodedElementAsUtfString(String value);
+abstract Builder setRequestAsJsonString(String value);
 
-public abstract Builder setObservedTimestamp(Instant value);
+abstract Builder setObservedTimestamp(Instant value);
 
-public abstract Builder setMessage(String value);
+abstract Builder setMessage(String value);
 
-public abstract Builder setStackTrace(String value);
+abstract Builder setStackTrace(String value);
 
 abstract ApiIOError build();
   }
diff --git 
a/sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponse/Call.java 
b/sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponse/Call.java
index 4f854ea69c7..5

(beam) branch master updated: [RRIO] [Testing] Mock API integration tests (#29236)

2023-11-01 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new 00a55272eaa [RRIO] [Testing] Mock API integration tests (#29236)
00a55272eaa is described below

commit 00a55272eaac451c3f364c4071e4af1ab56b88bd
Author: Damon 
AuthorDate: Wed Nov 1 10:09:14 2023 -0700

[RRIO] [Testing] Mock API integration tests (#29236)

* Update README with integration test instructions

* Replace refresher with human readable ids

* Impl integration tests; refactor code errors

* Add missing code comment

* Fix whitespace
---
 .test-infra/mock-apis/README.md|  56 --
 .test-infra/mock-apis/go.mod   |   6 +-
 .test-infra/mock-apis/go.sum   |  12 +-
 .../configmap.yaml |  24 ---
 .../configmap.yaml |   8 +-
 .../deployment.yaml|   6 +-
 .../kustomization.yaml |   4 +-
 .../configmap.yaml |   7 +-
 .../deployment.yaml|   6 +-
 .../kustomization.yaml |   4 +-
 .../deployment.yaml|  27 ---
 .../kustomization.yaml |  34 
 .../src/main/go/internal/service/echo/echo.go  |  47 +++--
 .../src/main/go/test/integration/echo/echo_test.go | 220 +
 .../src/main/go/test/integration/integration.go|  32 +++
 .../mock-apis/src/main/go/test/integration/vars.go |  49 +
 16 files changed, 402 insertions(+), 140 deletions(-)

diff --git a/.test-infra/mock-apis/README.md b/.test-infra/mock-apis/README.md
index df34757b770..9c4911a0d63 100644
--- a/.test-infra/mock-apis/README.md
+++ b/.test-infra/mock-apis/README.md
@@ -75,7 +75,45 @@ go test ./src/main/go/internal/...
 
 ## Integration
 
-TODO: See https://github.com/apache/beam/issues/28859
+Integration tests require the following values.
+
+### Quota ID
+
+Each allocated quota corresponds to a unique ID known as the Quota ID.
+There exists a one-to-one relationship between the allocated quota and
+the
+[infrastructure/kubernetes/refresher/overlays](infrastructure/kubernetes/refresher/overlays).
+
+To query the Kubernetes cluster for allocated Quota IDs:
+```
+kubectl get deploy --selector=app.kubernetes.io/name=refresher -o 
custom-columns='QUOTA_ID:.metadata.labels.quota-id'
+```
+
+### Service Endpoint
+
+To list available endpoints, run:
+
+```
+kubectl get svc 
-o=custom-columns='NAME:.metadata.name,HOST:.status.loadBalancer.ingress[*].ip,PORT_NAME:.spec.ports[*].name,PORT:.spec.ports[*].port'
+```
+
+You should see something similar to:
+
+```
+NAME HOST  PORT_NAME   PORT
+echo 10.n.n.n  grpc,http   50051,8080
+```
+
+When running tests locally, you will need to first run:
+```
+kubectl port-forward service/echo 50051:50051 8080:8080
+```
+
+which allows you to access the gRPC via `localhost:50051` and the HTTP via
+`http://localhost:8080/v1/echo`.
+
+When running tests on Dataflow, you supply `10.n.n.n:50051` for gRPC and
+`http://10.n.n.n:8080/v1/echo` for HTTP.
 
 # Local Usage
 
@@ -186,24 +224,14 @@ The Refresher service relies on 
[kustomize](https://kustomize.io) overlays
 which are located at 
[infrastructure/kubernetes/refresher/overlays](infrastructure/kubernetes/refresher/overlays).
 
 Each folder contained in 
[infrastructure/kubernetes/refresher/overlays](infrastructure/kubernetes/refresher/overlays)
-corresponds to an individual Refresher instance that is identified by the UUID.
-You will need to deploy each one individually.
+corresponds to an individual Refresher instance that is identified by a unique
+string id. You will need to deploy each one individually.
 
 For example:
 ```
-kubectl kustomize 
infrastructure/kubernetes/refresher/overlays/f588787b-28f8-4e5f-8335-f862379daf59
 | ko resolve -f - | kubectl apply -f -
+kubectl kustomize 
infrastructure/kubernetes/refresher/overlays/echo-should-never-exceed-quota | 
ko resolve -f - | kubectl apply -f -
 ```
 
 Like previously, you may see "Does not have minimum availability" message
 showing on the status. It may take some time for GKE autopilot
 to scale the node pool.
-
-## Additional note for creating a new Refresher service instance
-
-Each Refresher service instance relies on a unique UUID, where
-the [kustomize](https://kustomize.io) overlay replaces in the
-[infrastructure/kubernetes/refresher/base](infrastructure/kubernetes/refresher/base)
-template.
-
-You can copy the entire folder and paste into a new one with a unique UUID
-and then perform a find-replace of the old UUID with the new one.
diff --git a/.test-infra/mock-apis/go.mod b/.test-infra/mock-apis/go

[beam] branch master updated (60c2ba56f60 -> a411a71bc46)

2023-10-26 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


from 60c2ba56f60 Set correct markers for the other test scenario. (#29141)
 add a411a71bc46 [RRIO] create RedisClient utility (#29081)

No new revisions were added by this update.

Summary of changes:
 sdks/java/io/rrio/build.gradle |  12 ++
 .../beam/io/requestresponse/RedisClient.java   | 188 ++
 .../beam/io/requestresponse/RedisClientTestIT.java | 209 +
 .../RedisExternalResourcesRule.java|  71 +++
 4 files changed, 480 insertions(+)
 create mode 100644 
sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponse/RedisClient.java
 create mode 100644 
sdks/java/io/rrio/src/test/java/org/apache/beam/io/requestresponse/RedisClientTestIT.java
 create mode 100644 
sdks/java/io/rrio/src/test/java/org/apache/beam/io/requestresponse/RedisExternalResourcesRule.java



[beam] branch master updated: [RRIO] [Throttle] Stub throttle without external transform (#29059)

2023-10-23 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new f70cbc12053 [RRIO] [Throttle] Stub throttle without external transform 
(#29059)
f70cbc12053 is described below

commit f70cbc12053984b0d74d4c24d8b27de06dedb749
Author: Damon 
AuthorDate: Mon Oct 23 15:05:36 2023 -0700

[RRIO] [Throttle] Stub throttle without external transform (#29059)
---
 .../ThrottleWithoutExternalResource.java   | 57 ++
 1 file changed, 57 insertions(+)

diff --git 
a/sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponse/ThrottleWithoutExternalResource.java
 
b/sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponse/ThrottleWithoutExternalResource.java
new file mode 100644
index 000..0648a86f28e
--- /dev/null
+++ 
b/sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponse/ThrottleWithoutExternalResource.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.io.requestresponse;
+
+import com.google.auto.value.AutoValue;
+import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.values.PCollection;
+
+/**
+ * {@link ThrottleWithoutExternalResource} throttles a {@link RequestT} {@link 
PCollection} emitting
+ * a {@link RequestT} {@link PCollection} at a maximally configured rate, 
without using an external
+ * resource.
+ */
+// TODO(damondouglas): expand what "without external resource" means with 
respect to "with external
+//   resource" when the other throttle transforms implemented.
+//   See: https://github.com/apache/beam/issues/28932
+class ThrottleWithoutExternalResource
+extends PTransform, PCollection> {
+
+  // TODO(damondouglas): remove suppress warnings when finally utilized in a 
future PR.
+  @SuppressWarnings({"unused"})
+  private final Configuration configuration;
+
+  private ThrottleWithoutExternalResource(Configuration 
configuration) {
+this.configuration = configuration;
+  }
+
+  @Override
+  public PCollection expand(PCollection input) {
+// TODO(damondouglas): expand in a future PR.
+return input;
+  }
+
+  @AutoValue
+  abstract static class Configuration {
+
+@AutoValue.Builder
+abstract static class Builder {
+  abstract Configuration build();
+}
+  }
+}



[beam] branch master updated (ca96cb5ea42 -> a07d90eec0e)

2023-10-23 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


from ca96cb5ea42 Add Setup environment actions (#29026)
 add a07d90eec0e [RRIO] Define and implement mock quota aware API (#28893)

No new revisions were added by this update.

Summary of changes:
 .test-infra/mock-apis/README.md|  116 ++
 .test-infra/mock-apis/buf.gen.yaml |   40 +
 .test-infra/mock-apis/buf.lock |7 +
 .test-infra/mock-apis/buf.yaml |   20 +
 .test-infra/mock-apis/build.gradle |   44 +
 .test-infra/mock-apis/go.mod   |   58 +
 .test-infra/mock-apis/go.sum   |  214 +++
 .test-infra/mock-apis/proto/echo/v1/echo.proto |   46 +
 .../mock-apis/src/main/go/cmd/service/echo/main.go |  148 ++
 .../src/main/go/cmd/service/refresher/main.go  |  121 ++
 .../mock-apis/src/main/go/internal/cache/cache.go  |  122 ++
 .../mock-apis/src/main/go/internal/cache/doc.go|   17 +
 .../src/main/go/internal/cache/interface.go|   45 +
 .../mock-apis/src/main/go/internal/cache/redis.go  |   59 +
 .../src/main/go/internal/environment/variable.go   |  118 ++
 .../main/go/internal/environment/variable_test.go  |  312 +
 .../src/main/go/internal/logging/logging.go|  137 ++
 .../src/main/go/internal/logging/logging_test.go   |  153 +++
 .../mock-apis/src/main/go/internal/metric/doc.go   |   17 +
 .../mock-apis/src/main/go/internal/metric/gcp.go   |   77 ++
 .../src/main/go/internal/metric/interface.go   |   38 +
 .../src/main/go/internal/proto/echo/v1/echo.pb.go  |  256 
 .../main/go/internal/proto/echo/v1/echo_grpc.pb.go |  107 ++
 .../src/main/go/internal/service/echo/echo.go  |  185 +++
 .../beam/testinfra/mockapis/echo/v1/Echo.java  | 1447 
 .../mockapis/echo/v1/EchoServiceGrpc.java  |  393 ++
 .../testinfra/mockapis/echo/v1/package-info.java   |   20 +
 build.gradle.kts   |4 +
 .../resources/beam/checkstyle/suppressions.xml |1 +
 settings.gradle.kts|2 +
 30 files changed, 4324 insertions(+)
 create mode 100644 .test-infra/mock-apis/README.md
 create mode 100644 .test-infra/mock-apis/buf.gen.yaml
 create mode 100644 .test-infra/mock-apis/buf.lock
 create mode 100644 .test-infra/mock-apis/buf.yaml
 create mode 100644 .test-infra/mock-apis/build.gradle
 create mode 100644 .test-infra/mock-apis/go.mod
 create mode 100644 .test-infra/mock-apis/go.sum
 create mode 100644 .test-infra/mock-apis/proto/echo/v1/echo.proto
 create mode 100644 .test-infra/mock-apis/src/main/go/cmd/service/echo/main.go
 create mode 100644 
.test-infra/mock-apis/src/main/go/cmd/service/refresher/main.go
 create mode 100644 .test-infra/mock-apis/src/main/go/internal/cache/cache.go
 create mode 100644 .test-infra/mock-apis/src/main/go/internal/cache/doc.go
 create mode 100644 
.test-infra/mock-apis/src/main/go/internal/cache/interface.go
 create mode 100644 .test-infra/mock-apis/src/main/go/internal/cache/redis.go
 create mode 100644 
.test-infra/mock-apis/src/main/go/internal/environment/variable.go
 create mode 100644 
.test-infra/mock-apis/src/main/go/internal/environment/variable_test.go
 create mode 100644 
.test-infra/mock-apis/src/main/go/internal/logging/logging.go
 create mode 100644 
.test-infra/mock-apis/src/main/go/internal/logging/logging_test.go
 create mode 100644 .test-infra/mock-apis/src/main/go/internal/metric/doc.go
 create mode 100644 .test-infra/mock-apis/src/main/go/internal/metric/gcp.go
 create mode 100644 
.test-infra/mock-apis/src/main/go/internal/metric/interface.go
 create mode 100644 
.test-infra/mock-apis/src/main/go/internal/proto/echo/v1/echo.pb.go
 create mode 100644 
.test-infra/mock-apis/src/main/go/internal/proto/echo/v1/echo_grpc.pb.go
 create mode 100644 
.test-infra/mock-apis/src/main/go/internal/service/echo/echo.go
 create mode 100644 
.test-infra/mock-apis/src/main/java/org/apache/beam/testinfra/mockapis/echo/v1/Echo.java
 create mode 100644 
.test-infra/mock-apis/src/main/java/org/apache/beam/testinfra/mockapis/echo/v1/EchoServiceGrpc.java
 create mode 100644 
.test-infra/mock-apis/src/main/java/org/apache/beam/testinfra/mockapis/echo/v1/package-info.java



[beam] branch master updated (266721bc001 -> 9fdc59b2c0f)

2023-10-18 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


from 266721bc001 [RRIO] [Throttle] stub the refresh quota transform (#29057)
 add 9fdc59b2c0f [RRIO] [Throttle] stub the dequeue transform (#29056)

No new revisions were added by this update.

Summary of changes:
 .../{CacheRead.java => ThrottleDequeue.java}   | 76 --
 1 file changed, 28 insertions(+), 48 deletions(-)
 copy 
sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponse/{CacheRead.java
 => ThrottleDequeue.java} (52%)



[beam] branch master updated (964710f4f9d -> 266721bc001)

2023-10-18 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


from 964710f4f9d [RRIO] [Call] stub the Call transform (#29060)
 add 266721bc001 [RRIO] [Throttle] stub the refresh quota transform (#29057)

No new revisions were added by this update.

Summary of changes:
 ...ottleEnqueue.java => ThrottleRefreshQuota.java} | 38 +-
 1 file changed, 16 insertions(+), 22 deletions(-)
 copy 
sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponse/{ThrottleEnqueue.java
 => ThrottleRefreshQuota.java} (60%)



[beam] branch master updated (d7039bca9f2 -> 964710f4f9d)

2023-10-18 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


from d7039bca9f2 Update `google_api_services_healthcare` [10/23] (#29055)
 add 964710f4f9d [RRIO] [Call] stub the Call transform (#29060)

No new revisions were added by this update.

Summary of changes:
 .../requestresponse/{CacheRead.java => Call.java}  | 41 ++
 1 file changed, 18 insertions(+), 23 deletions(-)
 copy 
sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponse/{CacheRead.java
 => Call.java} (65%)



[beam] branch master updated: [RRIO] [Throttle] Stub Enqueue PTransform (#29053)

2023-10-18 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new 4f7763bf2dc [RRIO] [Throttle] Stub Enqueue PTransform (#29053)
4f7763bf2dc is described below

commit 4f7763bf2dc262436d8333ea00f0c09c4935b2e3
Author: Damon 
AuthorDate: Wed Oct 18 09:13:15 2023 -0700

[RRIO] [Throttle] Stub Enqueue PTransform (#29053)
---
 .../beam/io/requestresponse/ThrottleEnqueue.java   | 61 ++
 1 file changed, 61 insertions(+)

diff --git 
a/sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponse/ThrottleEnqueue.java
 
b/sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponse/ThrottleEnqueue.java
new file mode 100644
index 000..505ef86be48
--- /dev/null
+++ 
b/sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponse/ThrottleEnqueue.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.io.requestresponse;
+
+import com.google.auto.value.AutoValue;
+import org.apache.beam.sdk.transforms.Create;
+import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.sdk.values.TypeDescriptor;
+
+/**
+ * {@link ThrottleEnqueue} enqueues {@link RequestT} elements yielding an 
{@link ApiIOError} {@link
+ * PCollection} of any enqueue errors.
+ */
+class ThrottleEnqueue extends PTransform, 
PCollection> {
+
+  @SuppressWarnings({"unused"})
+  private final Configuration configuration;
+
+  private ThrottleEnqueue(Configuration configuration) {
+this.configuration = configuration;
+  }
+
+  /** Configuration details for {@link ThrottleEnqueue}. */
+  @AutoValue
+  abstract static class Configuration {
+
+static  Builder builder() {
+  return new AutoValue_ThrottleEnqueue_Configuration.Builder<>();
+}
+
+abstract Builder toBuilder();
+
+@AutoValue.Builder
+abstract static class Builder {
+
+  abstract Configuration build();
+}
+  }
+
+  @Override
+  public PCollection expand(PCollection input) {
+// TODO(damondouglas): expand in a future PR.
+return 
input.getPipeline().apply(Create.empty(TypeDescriptor.of(ApiIOError.class)));
+  }
+}



[beam] branch master updated: Rename rrio package (#29040)

2023-10-17 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new 89115954f67 Rename rrio package (#29040)
89115954f67 is described below

commit 89115954f67d5e819df46d0428a8b274a9a54e8c
Author: Damon 
AuthorDate: Tue Oct 17 18:54:34 2023 -0700

Rename rrio package (#29040)
---
 sdks/java/io/rrio/build.gradle| 2 +-
 .../beam/io/{requestresponseio => requestresponse}/ApiIOError.java| 2 +-
 .../beam/io/{requestresponseio => requestresponse}/CacheRead.java | 4 ++--
 .../beam/io/{requestresponseio => requestresponse}/CacheWrite.java| 4 ++--
 .../io/{requestresponseio => requestresponse}/CallShouldBackoff.java  | 2 +-
 .../CallShouldBackoffBasedOnRejectionProbability.java | 2 +-
 .../apache/beam/io/{requestresponseio => requestresponse}/Caller.java | 2 +-
 .../io/{requestresponseio => requestresponse}/RequestResponseIO.java  | 4 ++--
 .../beam/io/{requestresponseio => requestresponse}/SetupTeardown.java | 2 +-
 .../UserCodeExecutionException.java   | 2 +-
 .../UserCodeQuotaException.java   | 2 +-
 .../UserCodeTimeoutException.java | 2 +-
 .../beam/io/{requestresponseio => requestresponse}/package-info.java  | 2 +-
 .../CallShouldBackoffBasedOnRejectionProbabilityTest.java | 2 +-
 .../beam/io/{requestresponseio => requestresponse}/CallerTest.java| 2 +-
 .../io/{requestresponseio => requestresponse}/SetupTeardownTest.java  | 2 +-
 16 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/sdks/java/io/rrio/build.gradle b/sdks/java/io/rrio/build.gradle
index d65df370e0c..d7d5c8817d0 100644
--- a/sdks/java/io/rrio/build.gradle
+++ b/sdks/java/io/rrio/build.gradle
@@ -18,7 +18,7 @@
 
 plugins { id 'org.apache.beam.module' }
 applyJavaNature(
-automaticModuleName: 'org.apache.beam.sdk.io.rrio'
+automaticModuleName: 'org.apache.beam.sdk.io.requestresponse'
 )
 
 description = "Apache Beam :: SDKS :: Java :: IO :: RequestResponseIO (RRIO)"
diff --git 
a/sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponseio/ApiIOError.java
 
b/sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponse/ApiIOError.java
similarity index 97%
rename from 
sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponseio/ApiIOError.java
rename to 
sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponse/ApiIOError.java
index b7c5524e823..5936c5dd84b 100644
--- 
a/sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponseio/ApiIOError.java
+++ 
b/sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponse/ApiIOError.java
@@ -15,7 +15,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.beam.io.requestresponseio;
+package org.apache.beam.io.requestresponse;
 
 import com.google.auto.value.AutoValue;
 import org.apache.beam.sdk.schemas.AutoValueSchema;
diff --git 
a/sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponseio/CacheRead.java
 
b/sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponse/CacheRead.java
similarity index 97%
rename from 
sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponseio/CacheRead.java
rename to 
sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponse/CacheRead.java
index 6154873e506..3765d25370a 100644
--- 
a/sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponseio/CacheRead.java
+++ 
b/sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponse/CacheRead.java
@@ -15,11 +15,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.beam.io.requestresponseio;
+package org.apache.beam.io.requestresponse;
 
 import com.google.auto.value.AutoValue;
 import java.util.Map;
-import org.apache.beam.io.requestresponseio.CacheRead.Result;
+import org.apache.beam.io.requestresponse.CacheRead.Result;
 import org.apache.beam.sdk.Pipeline;
 import org.apache.beam.sdk.transforms.PTransform;
 import org.apache.beam.sdk.values.KV;
diff --git 
a/sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponseio/CacheWrite.java
 
b/sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponse/CacheWrite.java
similarity index 97%
rename from 
sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponseio/CacheWrite.java
rename to 
sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponse/CacheWrite.java
index 0fb14af67c3..25249c3e41b 100644
--- 
a/sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponseio/CacheWrite.java
+++ 
b/sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestr

[beam] branch master updated: [RRIO] [Cache] Stub CacheRead transform (#29011)

2023-10-17 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new eb7997145e4 [RRIO] [Cache] Stub CacheRead transform (#29011)
eb7997145e4 is described below

commit eb7997145e4645aaa36ce33fc21924081c1f3735
Author: Damon 
AuthorDate: Tue Oct 17 14:14:37 2023 -0700

[RRIO] [Cache] Stub CacheRead transform (#29011)

* Stub CacheRead

* Run spotlessApply
---
 .../beam/io/requestresponseio/CacheRead.java   | 121 +
 1 file changed, 121 insertions(+)

diff --git 
a/sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponseio/CacheRead.java
 
b/sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponseio/CacheRead.java
new file mode 100644
index 000..6154873e506
--- /dev/null
+++ 
b/sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponseio/CacheRead.java
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.io.requestresponseio;
+
+import com.google.auto.value.AutoValue;
+import java.util.Map;
+import org.apache.beam.io.requestresponseio.CacheRead.Result;
+import org.apache.beam.sdk.Pipeline;
+import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.sdk.values.PCollectionTuple;
+import org.apache.beam.sdk.values.PInput;
+import org.apache.beam.sdk.values.POutput;
+import org.apache.beam.sdk.values.PValue;
+import org.apache.beam.sdk.values.TupleTag;
+import 
org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap;
+
+/**
+ * {@link CacheRead} reads associated {@link ResponseT} types from {@link 
RequestT} types, if any
+ * exist.
+ */
+class CacheRead
+extends PTransform, Result> {
+
+  private static final TupleTag FAILURE_TAG = new 
TupleTag() {};
+
+  // TODO(damondouglas): remove suppress warnings after instance utilized.
+  @SuppressWarnings({"unused"})
+  private final Configuration configuration;
+
+  private CacheRead(Configuration configuration) {
+this.configuration = configuration;
+  }
+
+  /** Configuration details for {@link CacheRead}. */
+  @AutoValue
+  abstract static class Configuration {
+
+static  Builder builder() {
+  return new AutoValue_CacheRead_Configuration.Builder<>();
+}
+
+abstract Builder toBuilder();
+
+@AutoValue.Builder
+abstract static class Builder {
+
+  abstract Configuration build();
+}
+  }
+
+  @Override
+  public Result expand(PCollection input) {
+return Result.of(
+new TupleTag>() {}, 
PCollectionTuple.empty(input.getPipeline()));
+  }
+
+  /**
+   * The {@link Result} of reading RequestT {@link PCollection} elements 
yielding ResponseT {@link
+   * PCollection} elements.
+   */
+  static class Result implements POutput {
+
+static  Result of(
+TupleTag> responseTag, PCollectionTuple pct) {
+  return new Result<>(responseTag, pct);
+}
+
+private final Pipeline pipeline;
+private final TupleTag> responseTag;
+private final PCollection> responses;
+private final PCollection failures;
+
+private Result(TupleTag> responseTag, 
PCollectionTuple pct) {
+  this.pipeline = pct.getPipeline();
+  this.responseTag = responseTag;
+  this.responses = pct.get(responseTag);
+  this.failures = pct.get(FAILURE_TAG);
+}
+
+PCollection> getResponses() {
+  return responses;
+}
+
+PCollection getFailures() {
+  return failures;
+}
+
+@Override
+public Pipeline getPipeline() {
+  return this.pipeline;
+}
+
+@Override
+public Map, PValue> expand() {
+  return ImmutableMap.of(
+  responseTag, responses,
+  FAILURE_TAG, failures);
+}
+
+@Override
+public void finishSpecifyingOutput(
+String transformName, PInput input, PTransform transform) {}
+  }
+}



[beam] branch master updated (1b50ebbe157 -> 7df9b24c6c6)

2023-10-16 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


from 1b50ebbe157 Fix concurrency groups in Readme (#29021)
 add 7df9b24c6c6 [RRIO] [Cache] Stub CacheWrite (#29012)

No new revisions were added by this update.

Summary of changes:
 .../beam/io/requestresponseio/CacheWrite.java  | 119 +
 1 file changed, 119 insertions(+)
 create mode 100644 
sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponseio/CacheWrite.java



[beam] branch rrio/main/stub deleted (was 104c10b3ee5)

2023-10-11 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a change to branch rrio/main/stub
in repository https://gitbox.apache.org/repos/asf/beam.git


 was 104c10b3ee5 [RRIO] Create Caller and SetupTeardown interfaces (#28905)

The revisions that were on this branch are still contained in
other references; therefore, this change does not discard any commits
from the repository.



[beam] branch rrio/main/stub created (now 104c10b3ee5)

2023-10-11 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a change to branch rrio/main/stub
in repository https://gitbox.apache.org/repos/asf/beam.git


  at 104c10b3ee5 [RRIO] Create Caller and SetupTeardown interfaces (#28905)

No new revisions were added by this update.



[beam] branch master updated: [RRIO] Create Caller and SetupTeardown interfaces (#28905)

2023-10-11 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new 104c10b3ee5 [RRIO] Create Caller and SetupTeardown interfaces (#28905)
104c10b3ee5 is described below

commit 104c10b3ee536a9a3ea52b4dbf62d86b669da5d9
Author: Damon 
AuthorDate: Wed Oct 11 12:39:38 2023 -0700

[RRIO] Create Caller and SetupTeardown interfaces (#28905)

* Create test Caller and SetupTeardown interfaces

* Update Javadoc

* Defer Call transform to future PR

* Rename package to requestresponseio

* Add username to TODO
---
 sdks/java/io/rrio/build.gradle |   7 +-
 .../apache/beam/io/requestresponseio/Caller.java   |  27 +
 .../beam/io/requestresponseio/SetupTeardown.java   |  34 ++
 .../UserCodeExecutionException.java|  38 ++
 .../requestresponseio/UserCodeQuotaException.java  |  42 +++
 .../UserCodeTimeoutException.java  |  39 ++
 .../beam/io/requestresponseio/package-info.java|  20 
 .../java/org/apache/beam/io/rrio/CallerTest.java   | 126 
 .../org/apache/beam/io/rrio/SetupTeardownTest.java | 132 +
 9 files changed, 462 insertions(+), 3 deletions(-)

diff --git a/sdks/java/io/rrio/build.gradle b/sdks/java/io/rrio/build.gradle
index d65df370e0c..32fbd9d22e3 100644
--- a/sdks/java/io/rrio/build.gradle
+++ b/sdks/java/io/rrio/build.gradle
@@ -25,9 +25,10 @@ description = "Apache Beam :: SDKS :: Java :: IO :: 
RequestResponseIO (RRIO)"
 ext.summary = "Support to read from and write to Web APIs"
 
 dependencies {
-implementation project(path: ":sdks:java:core", configuration: "shadow")
-implementation library.java.joda_time
-implementation library.java.vendored_guava_32_1_2_jre
+// TODO(damondouglas): revert to implementation after project is more 
fully developed
+permitUnusedDeclared project(path: ":sdks:java:core", configuration: 
"shadow")
+permitUnusedDeclared library.java.joda_time
+permitUnusedDeclared library.java.vendored_guava_32_1_2_jre
 
 testImplementation project(path: ":sdks:java:core", configuration: 
"shadowTest")
 testImplementation library.java.junit
diff --git 
a/sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponseio/Caller.java
 
b/sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponseio/Caller.java
new file mode 100644
index 000..32b514c43a1
--- /dev/null
+++ 
b/sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponseio/Caller.java
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.io.requestresponseio;
+
+import java.io.Serializable;
+
+/** {@link Caller} interfaces user custom code intended for API calls. */
+public interface Caller extends Serializable {
+
+  /** Calls a Web API with the {@link RequestT} and returns a {@link 
ResponseT}. */
+  ResponseT call(RequestT request) throws UserCodeExecutionException;
+}
diff --git 
a/sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponseio/SetupTeardown.java
 
b/sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponseio/SetupTeardown.java
new file mode 100644
index 000..2bdc8113d98
--- /dev/null
+++ 
b/sdks/java/io/rrio/src/main/java/org/apache/beam/io/requestresponseio/SetupTeardown.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License i

[beam] branch master updated: Remove warning from catch in table exists validation (#28288)

2023-09-29 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new 80311628c27 Remove warning from catch in table exists validation 
(#28288)
80311628c27 is described below

commit 80311628c27ea68704f46d9f5e6edea6466dfcb3
Author: Damon 
AuthorDate: Fri Sep 29 13:04:18 2023 -0700

Remove warning from catch in table exists validation (#28288)

* Remove warning from catch in table exists validation

* Remove warning from catch in read table exists validation

* Throw RuntimeException instead
---
 .../beam/sdk/io/gcp/bigtable/BigtableIO.java   | 24 ++
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git 
a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/BigtableIO.java
 
b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/BigtableIO.java
index 9f3c627a89e..92a0af20548 100644
--- 
a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/BigtableIO.java
+++ 
b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/BigtableIO.java
@@ -19,8 +19,8 @@ package org.apache.beam.sdk.io.gcp.bigtable;
 
 import static 
org.apache.beam.sdk.io.gcp.bigtable.BigtableServiceFactory.BigtableServiceEntry;
 import static org.apache.beam.sdk.options.ValueProvider.StaticValueProvider;
+import static org.apache.beam.sdk.util.Preconditions.checkArgumentNotNull;
 import static 
org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkArgument;
-import static 
org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkNotNull;
 import static 
org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkState;
 
 import com.google.auto.value.AutoValue;
@@ -689,14 +689,13 @@ public class BigtableIO {
 private void validateTableExists(
 BigtableConfig config, BigtableReadOptions readOptions, 
PipelineOptions options) {
   if (config.getValidate() && config.isDataAccessible() && 
readOptions.isDataAccessible()) {
-String tableId = checkNotNull(readOptions.getTableId().get());
+ValueProvider tableIdProvider = 
checkArgumentNotNull(readOptions.getTableId());
+String tableId = checkArgumentNotNull(tableIdProvider.get());
 try {
-  checkArgument(
-  getServiceFactory().checkTableExists(config, options, tableId),
-  "Table %s does not exist",
-  tableId);
+  boolean exists = getServiceFactory().checkTableExists(config, 
options, tableId);
+  checkArgument(exists, "Table %s does not exist", tableId);
 } catch (IOException e) {
-  LOG.warn("Error checking whether table {} exists; proceeding.", 
tableId, e);
+  throw new RuntimeException(e);
 }
   }
 }
@@ -1122,14 +1121,13 @@ public class BigtableIO {
 private void validateTableExists(
 BigtableConfig config, BigtableWriteOptions writeOptions, 
PipelineOptions options) {
   if (config.getValidate() && config.isDataAccessible() && 
writeOptions.isDataAccessible()) {
-String tableId = checkNotNull(writeOptions.getTableId().get());
+ValueProvider tableIdProvider = 
checkArgumentNotNull(writeOptions.getTableId());
+String tableId = checkArgumentNotNull(tableIdProvider.get());
 try {
-  checkArgument(
-  factory.checkTableExists(config, options, 
writeOptions.getTableId().get()),
-  "Table %s does not exist",
-  tableId);
+  boolean exists = factory.checkTableExists(config, options, tableId);
+  checkArgument(exists, "Table %s does not exist", tableId);
 } catch (IOException e) {
-  LOG.warn("Error checking whether table {} exists; proceeding.", 
tableId, e);
+  throw new RuntimeException(e);
 }
   }
 }



[beam] branch master updated: Create RequestResponseIO gradle project (#28706)

2023-09-28 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new dda0eb9d642 Create RequestResponseIO gradle project (#28706)
dda0eb9d642 is described below

commit dda0eb9d642a8f3b23174a6c39a587c10726f30f
Author: Damon 
AuthorDate: Thu Sep 28 09:08:37 2023 -0700

Create RequestResponseIO gradle project (#28706)
---
 sdks/java/io/rrio/build.gradle | 36 
 settings.gradle.kts|  1 +
 2 files changed, 37 insertions(+)

diff --git a/sdks/java/io/rrio/build.gradle b/sdks/java/io/rrio/build.gradle
new file mode 100644
index 000..d65df370e0c
--- /dev/null
+++ b/sdks/java/io/rrio/build.gradle
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+plugins { id 'org.apache.beam.module' }
+applyJavaNature(
+automaticModuleName: 'org.apache.beam.sdk.io.rrio'
+)
+
+description = "Apache Beam :: SDKS :: Java :: IO :: RequestResponseIO (RRIO)"
+ext.summary = "Support to read from and write to Web APIs"
+
+dependencies {
+implementation project(path: ":sdks:java:core", configuration: "shadow")
+implementation library.java.joda_time
+implementation library.java.vendored_guava_32_1_2_jre
+
+testImplementation project(path: ":sdks:java:core", configuration: 
"shadowTest")
+testImplementation library.java.junit
+testRuntimeOnly project(path: ":runners:direct-java", configuration: 
"shadow")
+testRuntimeOnly library.java.slf4j_jdk14
+}
\ No newline at end of file
diff --git a/settings.gradle.kts b/settings.gradle.kts
index 45b8c25101b..f4901d7df92 100644
--- a/settings.gradle.kts
+++ b/settings.gradle.kts
@@ -255,6 +255,7 @@ include(":sdks:java:io:parquet")
 include(":sdks:java:io:pulsar")
 include(":sdks:java:io:rabbitmq")
 include(":sdks:java:io:redis")
+include(":sdks:java:io:rrio")
 include(":sdks:java:io:solr")
 include(":sdks:java:io:sparkreceiver:2")
 include(":sdks:java:io:snowflake")



[beam] branch master updated (de10fbd5e5b -> 7e830593e61)

2023-09-18 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


from de10fbd5e5b Add GitHub Workflow Replacement for Jenkins 
job_PostCommit_Java_PVR_* (#28412)
 add 7e830593e61 Remove TableSchema to JSON conversion. (#28274)

No new revisions were added by this update.

Summary of changes:
 .../beam/sdk/io/gcp/bigquery/BigQueryIO.java   | 40 +-
 .../sdk/io/gcp/bigquery/BigQueryIOReadTest.java| 18 --
 2 files changed, 15 insertions(+), 43 deletions(-)



[beam] branch master updated: Implement terraform code to provision Vertex AI resources (#27979)

2023-08-25 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new 21b7e3abae6 Implement terraform code to provision Vertex AI resources 
(#27979)
21b7e3abae6 is described below

commit 21b7e3abae6dd10bfbed010f1a8611fe6a5f3ddc
Author: Damon 
AuthorDate: Fri Aug 25 16:18:55 2023 +

Implement terraform code to provision Vertex AI resources (#27979)
---
 .../vertex-ai-featurestore/README.md   |  74 +++
 .../vertex-ai-featurestore/featurestore.tf |  89 +++
 .../vertex-ai-featurestore/synthea.tfvars  | 621 +
 .../vertex-ai-featurestore/variables.tf|  51 ++
 4 files changed, 835 insertions(+)

diff --git 
a/.test-infra/terraform/google-cloud-platform/vertex-ai-featurestore/README.md 
b/.test-infra/terraform/google-cloud-platform/vertex-ai-featurestore/README.md
new file mode 100644
index 000..a84e5f62383
--- /dev/null
+++ 
b/.test-infra/terraform/google-cloud-platform/vertex-ai-featurestore/README.md
@@ -0,0 +1,74 @@
+
+
+# Overview
+
+This module provisions a
+[Vertex AI Featurestore](https://cloud.google.com/vertex-ai/docs/featurestore).
+
+# Requirements and Usage
+
+See [Google Cloud Platform 
requirements](../../../google-cloud-platform/README.md)
+for details on requirements
+and usage.
+
+## 1. Initialize the terraform module
+
+```
+cd .test-infra/terraform/google-cloud-platform/vertex-ai-featurestore
+terraform init
+```
+
+## 2. Create a *.tfvars file
+
+Create a `*.tfvars` file in the same directory as this module.
+
+```
+cd .test-infra/terraform/google-cloud-platform/vertex-ai-featurestore
+touch vars.tfvars
+```
+
+See [Examples](#examples) below for some example `*.tfvars` files.
+
+## 3. Apply the terraform module.
+
+```
+cd .test-infra/terraform/google-cloud-platform/vertex-ai-featurestore
+terraform apply -var-file=vars.tfvars
+```
+
+# Examples
+
+## synthea.tfvars
+
+This directory holds a [synthea.tfvars](synthea.tfvars) to generate an
+example Vertex AI Featurestore based on data generated from
+https://github.com/synthetichealth/synthea
+and stored in Google Cloud FHIR Store with BigQuery streaming.
+See: 
https://cloud.google.com/healthcare-api/docs/how-tos/fhir-bigquery-streaming
+for more details.
+
+To apply using this `*.tfvars` file:
+
+```
+cd .test-infra/terraform/google-cloud-platform/vertex-ai-featurestore
+terraform apply -var-file=synthea.tfvars
+```
+
+You will be prompted for any remaining unset variables.
diff --git 
a/.test-infra/terraform/google-cloud-platform/vertex-ai-featurestore/featurestore.tf
 
b/.test-infra/terraform/google-cloud-platform/vertex-ai-featurestore/featurestore.tf
new file mode 100644
index 000..e7f0d9d563c
--- /dev/null
+++ 
b/.test-infra/terraform/google-cloud-platform/vertex-ai-featurestore/featurestore.tf
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+provider "google" {
+  project = var.project
+}
+
+resource "google_project_service" "required" {
+  service= "aiplatform.googleapis.com"
+  disable_on_destroy = false
+}
+
+resource "random_string" "postfix" {
+  length  = 6
+  upper   = false
+  special = false
+}
+
+resource "google_vertex_ai_featurestore" "default" {
+  depends_on = [google_project_service.required]
+  name   = 
"${var.featurestore.name_prefix}_${random_string.postfix.result}"
+  region = var.region
+  online_serving_config {
+fixed_node_count = var.featurestore.fixed_node_count
+  }
+}
+
+resource "google_vertex_ai_featurestore_entitytype" "entities" {
+  depends_on   = [google_project_service.required]
+  for_each = var.featurestore.entity_types
+  name = each.key
+  featurestore = google_vertex_ai_featurestore.default.id
+  description  = each.value.description
+  monitoring_config {
+
+categorical_threshold_config {
+  value = 0.3
+}
+
+numerical_threshold_config {
+  value = 0.3
+}
+
+sn

[beam] branch master updated: Fix referenced README url (#27977)

2023-08-12 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new 25e1a64641b Fix referenced README url (#27977)
25e1a64641b is described below

commit 25e1a64641b1c8a3c0a6c75c6e86031b87307f22
Author: Damon 
AuthorDate: Sat Aug 12 12:59:28 2023 -0700

Fix referenced README url (#27977)
---
 .../terraform/google-cloud-platform/google-kubernetes-engine/README.md  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git 
a/.test-infra/terraform/google-cloud-platform/google-kubernetes-engine/README.md
 
b/.test-infra/terraform/google-cloud-platform/google-kubernetes-engine/README.md
index 2affc2e66b8..cd9558e7739 100644
--- 
a/.test-infra/terraform/google-cloud-platform/google-kubernetes-engine/README.md
+++ 
b/.test-infra/terraform/google-cloud-platform/google-kubernetes-engine/README.md
@@ -31,7 +31,7 @@ for details on requirements and usage.
 
 This module assumes the following pre-existing resources:
 
-- [Cloud Resource Manager API 
Enabled](https://pantheon.corp.google.com/apis/library/cloudresourcemanager.googleapis.com)
+- [Cloud Resource Manager API 
Enabled](https://console.cloud.google.com/apis/library/cloudresourcemanager.googleapis.com)
 - [Virtual Private Cloud (VPC) network and 
subnetwork](https://cloud.google.com/vpc/docs/create-modify-vpc-networks)
 - [GCP Service 
Account](https://cloud.google.com/iam/docs/service-accounts-create)
 



[beam] branch master updated (347f84c666e -> 23d44a45d68)

2023-08-07 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


from 347f84c666e Fix action run on base branch (#27860)
 add 23d44a45d68 Fix routing header issue in Firestore Beam connector 
(#27858)

No new revisions were added by this update.

Summary of changes:
 sdks/java/io/google-cloud-platform/build.gradle|  4 
 .../FirestoreStatefulComponentFactory.java | 26 ++
 2 files changed, 17 insertions(+), 13 deletions(-)



[beam] branch master updated: Remove SuppressWarnings from NoopLock.java (#27416)

2023-07-20 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new b6108465589 Remove SuppressWarnings from NoopLock.java (#27416)
b6108465589 is described below

commit b6108465589a84a1fbce001a87d3760d1a3f82f5
Author: Damon 
AuthorDate: Thu Jul 20 15:04:32 2023 -0700

Remove SuppressWarnings from NoopLock.java (#27416)

* Remove SuppressWarnings from NoopLock.java

* Remove Nonnull annotations from original
---
 .../src/main/java/org/apache/beam/sdk/util/NoopLock.java| 13 +
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git 
a/sdks/java/core/src/main/java/org/apache/beam/sdk/util/NoopLock.java 
b/sdks/java/core/src/main/java/org/apache/beam/sdk/util/NoopLock.java
index 0fc822987a6..36454a125d6 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/util/NoopLock.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/util/NoopLock.java
@@ -21,21 +21,19 @@ import java.io.Serializable;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.locks.Condition;
 import java.util.concurrent.locks.Lock;
-import javax.annotation.Nonnull;
+import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
+import org.checkerframework.checker.nullness.qual.NonNull;
 
 /**
  * A lock which can always be acquired. It should not be used when a proper 
lock is required, but it
  * is useful as a performance optimization when locking is not necessary but 
the code paths have to
  * be shared between the locking and the non-locking variant.
  */
-@SuppressWarnings({
-  "nullness" // TODO(https://github.com/apache/beam/issues/20497)
-})
 public class NoopLock implements Lock, Serializable {
 
-  private static NoopLock instance;
+  private static @MonotonicNonNull NoopLock instance;
 
-  public static NoopLock get() {
+  public static @NonNull NoopLock get() {
 if (instance == null) {
   instance = new NoopLock();
 }
@@ -56,14 +54,13 @@ public class NoopLock implements Lock, Serializable {
   }
 
   @Override
-  public boolean tryLock(long time, @Nonnull TimeUnit unit) {
+  public boolean tryLock(long time, TimeUnit unit) {
 return true;
   }
 
   @Override
   public void unlock() {}
 
-  @Nonnull
   @Override
   public Condition newCondition() {
 throw new UnsupportedOperationException("Not implemented");



[beam] branch master updated: Embed public Beam IO Performance Metrics (#27540)

2023-07-20 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new 549bbfc3a99 Embed public Beam IO Performance Metrics (#27540)
549bbfc3a99 is described below

commit 549bbfc3a99a320da64bc14ad4563e956610f5ba
Author: Damon 
AuthorDate: Thu Jul 20 14:40:16 2023 -0700

Embed public Beam IO Performance Metrics (#27540)

* Use shortcodes to render looks

* Add performance looks

* Fix performance/index

* Use pngs stored in GCS

* Fix textio wording

* Remove navigation header

* Add links to metrics

* Trim metrics list
---
 .../site/content/en/documentation/io/connectors.md |   8 +-
 website/www/site/content/en/performance/_index.md  |  40 
 .../site/content/en/performance/bigquery/_index.md |  50 ++
 .../site/content/en/performance/bigtable/_index.md |  50 ++
 .../site/content/en/performance/glossary/_index.md |  47 +
 .../site/content/en/performance/textio/_index.md   |  50 ++
 website/www/site/data/performance.yaml | 108 +
 website/www/site/i18n/navbar/en.yaml   |  10 ++
 website/www/site/layouts/performance/baseof.html   |  40 
 .../site/layouts/shortcodes/performance_looks.html |  30 ++
 10 files changed, 429 insertions(+), 4 deletions(-)

diff --git a/website/www/site/content/en/documentation/io/connectors.md 
b/website/www/site/content/en/documentation/io/connectors.md
index a1f458377af..4524dbc0c97 100644
--- a/website/www/site/content/en/documentation/io/connectors.md
+++ b/website/www/site/content/en/documentation/io/connectors.md
@@ -79,7 +79,7 @@ This table provides a consolidated, at-a-glance overview of 
the available built-
 ✔
   
   
-TextIO
+TextIO (metrics)
 ✔
 ✔
 
@@ -200,7 +200,7 @@ This table provides a consolidated, at-a-glance overview of 
the available built-
 ✘
   
   
-GcsFileSystem
+GcsFileSystem (metrics)
 ✔
 ✔
 
@@ -522,7 +522,7 @@ This table provides a consolidated, at-a-glance overview of 
the available built-
 ✔
   
   
-BigQueryIO (guide)
+BigQueryIO (guide) (metrics)
 ✔
 ✔
 
@@ -545,7 +545,7 @@ This table provides a consolidated, at-a-glance overview of 
the available built-
 ✔
   
   
-BigTableIO
+BigTableIO (metrics)
 ✔
 ✔
 
diff --git a/website/www/site/content/en/performance/_index.md 
b/website/www/site/content/en/performance/_index.md
new file mode 100644
index 000..f821b0f2508
--- /dev/null
+++ b/website/www/site/content/en/performance/_index.md
@@ -0,0 +1,40 @@
+---
+title: "Beam IO Performance"
+---
+
+
+
+# Beam IO Performance
+
+Various Beam pipelines measure characteristics of reading from and writing to
+various IOs.
+
+# Available Metrics
+
+Various metrics were gathered using the Beam SDK
+[Metrics API](/documentation/programming-guide/#metrics)
+from a pipeline Job running on [Dataflow](/documentation/runners/dataflow/).
+
+See the [glossary](/performance/glossary) for a list of the metrics and their
+definition.
+
+# Measured Beam IOs
+
+See the following pages for performance measures recorded when reading from and
+writing to various Beam IOs.
+
+- [BigQuery](/performance/bigquery)
+- [BigTable](/performance/bigtable)
+- [TextIO](/performance/textio)
\ No newline at end of file
diff --git a/website/www/site/content/en/performance/bigquery/_index.md 
b/website/www/site/content/en/performance/bigquery/_index.md
new file mode 100644
index 000..5d46e118097
--- /dev/null
+++ b/website/www/site/content/en/performance/bigquery/_index.md
@@ -0,0 +1,50 @@
+---
+title: "BigQuery Performance"
+---
+
+
+
+# BigQuery Performance
+
+The following graphs show various metrics when reading from and writing to
+BigQuery. See the [glossary](/performance/glossary) for definitions.
+
+## Read
+
+### What is the estimated cost to read from BigQuery?
+
+{{< performance_looks io="bigquery" read_or_write="read" section="test_name" 
>}}
+
+### How has various metrics changed when reading from BigQuery for different 
Beam SDK versions?
+
+{{< performance_looks io="bigquery" read_or_write="read" section="version" >}}
+
+### How has various metrics changed over time when reading from BigQuery?
+
+{{< performance_looks io="bigquery" read_or_write="read" section="date" >}}
+
+## Write
+
+### What is the estimated cost to write to BigQuery?
+
+{{< performance_looks io="bigquery" read_or_write="write" section="test_name" 
>}}
+
+### How has various metrics changed when writing to BigQuery for different 
Beam SDK versions?
+
+{{< performance_looks io="bigquery&

[beam] branch master updated: Provision a redis cluster on Kubernetes (#27409)

2023-07-10 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new a0004b33e56 Provision a redis cluster on Kubernetes (#27409)
a0004b33e56 is described below

commit a0004b33e56308256f8f82304410462c03181d86
Author: Damon 
AuthorDate: Mon Jul 10 21:02:17 2023 -0700

Provision a redis cluster on Kubernetes (#27409)
---
 .../03.io/api-overuse-study/02.redis/README.md | 46 ++
 .../03.io/api-overuse-study/02.redis/common.tfvars | 19 +
 .../03.io/api-overuse-study/02.redis/data.tf   | 24 +++
 .../03.io/api-overuse-study/02.redis/provider.tf   | 27 +
 .../03.io/api-overuse-study/02.redis/redis.tf  | 35 
 .../03.io/api-overuse-study/02.redis/variables.tf  | 22 +++
 6 files changed, 173 insertions(+)

diff --git 
a/.test-infra/pipelines/infrastructure/03.io/api-overuse-study/02.redis/README.md
 
b/.test-infra/pipelines/infrastructure/03.io/api-overuse-study/02.redis/README.md
new file mode 100644
index 000..9ea3a98ee77
--- /dev/null
+++ 
b/.test-infra/pipelines/infrastructure/03.io/api-overuse-study/02.redis/README.md
@@ -0,0 +1,46 @@
+
+
+# Overview
+
+This directory provisions a redis cluster in Kubernetes.
+
+# Usage
+
+Follow terraform workflow convention to apply this module. It assumes the
+working directory is at
+[.test-infra/pipelines/infrastructure/03.io/api-overuse-study](..).
+
+## Terraform Init
+
+Initialize the terraform workspace.
+
+```
+DIR=02.redis
+terraform -chdir=$DIR init
+```
+
+## Terraform Apply
+
+Apply the terraform module.
+
+```
+DIR=02.redis
+terraform -chdir=$DIR apply -var-file=common.tfvars
+```
\ No newline at end of file
diff --git 
a/.test-infra/pipelines/infrastructure/03.io/api-overuse-study/02.redis/common.tfvars
 
b/.test-infra/pipelines/infrastructure/03.io/api-overuse-study/02.redis/common.tfvars
new file mode 100644
index 000..f71b496b5a2
--- /dev/null
+++ 
b/.test-infra/pipelines/infrastructure/03.io/api-overuse-study/02.redis/common.tfvars
@@ -0,0 +1,19 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace = "api-overuse-study"
\ No newline at end of file
diff --git 
a/.test-infra/pipelines/infrastructure/03.io/api-overuse-study/02.redis/data.tf 
b/.test-infra/pipelines/infrastructure/03.io/api-overuse-study/02.redis/data.tf
new file mode 100644
index 000..32cb7434d30
--- /dev/null
+++ 
b/.test-infra/pipelines/infrastructure/03.io/api-overuse-study/02.redis/data.tf
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Query the Kubernetes namespace to verify existence.
+data "kubernetes_namespace" "default" {
+  metadata {
+name = var.namespace
+  }
+}
\ No newline at end of file
diff --git 
a/.test-infra/pipelines/infrastructure/03.io/api-overuse-study/02.redis/provider.tf
 
b/.test-infra/pipelines/infrastructure/03.io/api-overuse-study/02.redis/provider.tf
new file mode 100644
index 000..20bdbac74ea
--- /dev/null
+++ 
b/.test-infra/pipelines/infrastructure/03.io/api-overuse-study/02.redis/provider.tf
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the

[beam] branch master updated: Finish Java Exception Sampling (#27257)

2023-06-28 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new 0554372b25b Finish Java Exception Sampling (#27257)
0554372b25b is described below

commit 0554372b25bc8921a451b7ef1068f4bd8fc1651e
Author: Sam Rohde 
AuthorDate: Wed Jun 28 09:22:32 2023 -0700

Finish Java Exception Sampling (#27257)

* Finish Java Exception Sampling

* wrong param name in comment

* run tests

* run tests

* run tests
---
 .../fn/harness/control/ExecutionStateSampler.java  | 16 +++-
 .../harness/data/PCollectionConsumerRegistry.java  | 27 +--
 .../beam/fn/harness/debug/ElementSample.java   | 15 +++-
 .../beam/fn/harness/debug/OutputSampler.java   | 93 +++---
 .../beam/fn/harness/debug/OutputSamplerTest.java   | 91 +++--
 .../fn/harness/status/BeamFnStatusClientTest.java  |  2 +-
 6 files changed, 198 insertions(+), 46 deletions(-)

diff --git 
a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/ExecutionStateSampler.java
 
b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/ExecutionStateSampler.java
index 2c2485dd842..c8cef8cf861 100644
--- 
a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/ExecutionStateSampler.java
+++ 
b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/ExecutionStateSampler.java
@@ -363,9 +363,14 @@ public class ExecutionStateSampler {
   ExecutionStateImpl current = currentStateLazy.get();
   if (current != null) {
 return ExecutionStateTrackerStatus.create(
-current.ptransformId, current.ptransformUniqueName, thread, 
lastTransitionTimeMs);
+current.ptransformId,
+current.ptransformUniqueName,
+thread,
+lastTransitionTimeMs,
+processBundleId.get());
   } else {
-return ExecutionStateTrackerStatus.create(null, null, thread, 
lastTransitionTimeMs);
+return ExecutionStateTrackerStatus.create(
+null, null, thread, lastTransitionTimeMs, processBundleId.get());
   }
 }
 
@@ -518,9 +523,10 @@ public class ExecutionStateSampler {
 @Nullable String ptransformId,
 @Nullable String ptransformUniqueName,
 Thread trackedThread,
-long lastTransitionTimeMs) {
+long lastTransitionTimeMs,
+@Nullable String processBundleId) {
   return new AutoValue_ExecutionStateSampler_ExecutionStateTrackerStatus(
-  ptransformId, ptransformUniqueName, trackedThread, 
lastTransitionTimeMs);
+  ptransformId, ptransformUniqueName, trackedThread, 
lastTransitionTimeMs, processBundleId);
 }
 
 public abstract @Nullable String getPTransformId();
@@ -530,5 +536,7 @@ public class ExecutionStateSampler {
 public abstract Thread getTrackedThread();
 
 public abstract long getLastTransitionTimeMillis();
+
+public abstract @Nullable String getProcessBundleId();
   }
 }
diff --git 
a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/data/PCollectionConsumerRegistry.java
 
b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/data/PCollectionConsumerRegistry.java
index 150580c7f64..a7a8766ffc7 100644
--- 
a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/data/PCollectionConsumerRegistry.java
+++ 
b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/data/PCollectionConsumerRegistry.java
@@ -27,6 +27,7 @@ import java.util.Random;
 import javax.annotation.Nullable;
 import org.apache.beam.fn.harness.HandlesSplits;
 import org.apache.beam.fn.harness.control.BundleProgressReporter;
+import org.apache.beam.fn.harness.control.ExecutionStateSampler;
 import org.apache.beam.fn.harness.control.ExecutionStateSampler.ExecutionState;
 import 
org.apache.beam.fn.harness.control.ExecutionStateSampler.ExecutionStateTracker;
 import org.apache.beam.fn.harness.control.Metrics;
@@ -71,9 +72,12 @@ public class PCollectionConsumerRegistry {
   @SuppressWarnings({"rawtypes"})
   abstract static class ConsumerAndMetadata {
 public static ConsumerAndMetadata forConsumer(
-FnDataReceiver consumer, String pTransformId, ExecutionState state) {
+FnDataReceiver consumer,
+String pTransformId,
+ExecutionState state,
+ExecutionStateTracker stateTracker) {
   return new AutoValue_PCollectionConsumerRegistry_ConsumerAndMetadata(
-  consumer, pTransformId, state);
+  consumer, pTransformId, state, stateTracker);
 }
 
 public abstract FnDataReceiver getConsumer();
@@ -81,6 +85,8 @@ public class PCollectionConsumerRegistry {
 public abstract String getPTransformId();
 
 public abstract ExecutionState getExecutionState();
+
+public abstract ExecutionStateTracker getExecutionStateTracker();
   }
 

[beam] branch master updated: Python data sampling optimization (#27157)

2023-06-23 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new 92d82dbf7e2 Python data sampling optimization  (#27157)
92d82dbf7e2 is described below

commit 92d82dbf7e27e856cfc3b389b5abad7b67657550
Author: Sam Rohde 
AuthorDate: Fri Jun 23 14:23:57 2023 -0700

Python data sampling optimization  (#27157)

* Python optimization work

* Exception Sampling perf tests

* add better element sampling microbenchmark

* slowly move towards plumbing the samplers to the bundle processors

* cleaned up

* starting clean up and more testing

* finish tests

* fix unused data_sampler args and comments

* yapf, comments, and simplifications

* linter

* lint and mypy

* linter

* run tests

* address review comments

* run tests

-

Co-authored-by: Sam Rohde 
---
 .../apache_beam/runners/worker/bundle_processor.py |  90 +
 .../runners/worker/bundle_processor_test.py|  89 +++--
 .../apache_beam/runners/worker/data_sampler.py | 193 ++---
 .../runners/worker/data_sampler_test.py| 435 -
 .../apache_beam/runners/worker/operations.pxd  |   1 +
 .../apache_beam/runners/worker/operations.py   |  70 +++-
 .../apache_beam/runners/worker/sdk_worker.py   |   4 +-
 .../apache_beam/runners/worker/sdk_worker_test.py  |  29 +-
 8 files changed, 600 insertions(+), 311 deletions(-)

diff --git a/sdks/python/apache_beam/runners/worker/bundle_processor.py 
b/sdks/python/apache_beam/runners/worker/bundle_processor.py
index 6d814331fd6..3adb26552d0 100644
--- a/sdks/python/apache_beam/runners/worker/bundle_processor.py
+++ b/sdks/python/apache_beam/runners/worker/bundle_processor.py
@@ -66,7 +66,6 @@ from apache_beam.runners.worker import data_sampler
 from apache_beam.runners.worker import operation_specs
 from apache_beam.runners.worker import operations
 from apache_beam.runners.worker import statesampler
-from apache_beam.runners.worker.data_sampler import OutputSampler
 from apache_beam.transforms import TimeDomain
 from apache_beam.transforms import core
 from apache_beam.transforms import environments
@@ -194,8 +193,8 @@ class DataInputOperation(RunnerIOOperation):
 self.stop = float('inf')
 self.started = False
 
-  def setup(self):
-super().setup()
+  def setup(self, data_sampler=None):
+super().setup(data_sampler)
 # We must do this manually as we don't have a spec or spec.output_coders.
 self.receivers = [
 operations.ConsumerSet.create(
@@ -897,39 +896,11 @@ class BundleProcessor(object):
 'fnapi-step-%s' % self.process_bundle_descriptor.id,
 self.counter_factory)
 
-if self.data_sampler:
-  self.add_data_sampling_operations(process_bundle_descriptor)
-
 self.ops = self.create_execution_tree(self.process_bundle_descriptor)
 for op in reversed(self.ops.values()):
-  op.setup()
+  op.setup(self.data_sampler)
 self.splitting_lock = threading.Lock()
 
-  def add_data_sampling_operations(self, pbd):
-# type: (beam_fn_api_pb2.ProcessBundleDescriptor) -> None
-
-"""Adds a DataSamplingOperation to every PCollection.
-
-Implementation note: the alternative to this, is to add modify each
-Operation and forward a DataSampler to manually sample when an element is
-processed. This gets messy very quickly and is not future-proof as new
-operation types will need to be updated. This is the cleanest way of adding
-new operations to the final execution tree.
-"""
-coder = coders.FastPrimitivesCoder()
-
-for pcoll_id in pbd.pcollections:
-  transform_id = 'synthetic-data-sampling-transform-{}'.format(pcoll_id)
-  transform_proto: beam_runner_api_pb2.PTransform = pbd.transforms[
-  transform_id]
-  transform_proto.unique_name = transform_id
-  transform_proto.spec.urn = SYNTHETIC_DATA_SAMPLING_URN
-
-  coder_id = pbd.pcollections[pcoll_id].coder_id
-  transform_proto.spec.payload = coder.encode((pcoll_id, coder_id))
-
-  transform_proto.inputs['None'] = pcoll_id
-
   def create_execution_tree(
   self,
   descriptor  # type: beam_fn_api_pb2.ProcessBundleDescriptor
@@ -966,6 +937,12 @@ class BundleProcessor(object):
   for tag,
   pcoll_id in descriptor.transforms[transform_id].outputs.items()
   }
+
+  # Initialize transform-specific state in the Data Sampler.
+  if self.data_sampler:
+self.data_sampler.initialize_samplers(
+transform_id, descriptor, transform_factory.get_coder)
+
   return transform_factory.create_operation(
   transform_id, transform_consumers)
 
@@ -1987,52 +1964,3 @@ def

[beam] branch master updated: Implement Java exception sampling (#27121)

2023-06-15 Thread damondouglas
This is an automated email from the ASF dual-hosted git repository.

damondouglas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new 22c6e728e82 Implement Java exception sampling (#27121)
22c6e728e82 is described below

commit 22c6e728e82cf5415e3ecd2f965d3538d026a921
Author: Sam Rohde 
AuthorDate: Thu Jun 15 10:36:49 2023 -0700

Implement Java exception sampling (#27121)

* Implement Java exception sampling

* spotless

* s/Exception/IOException
---
 .../harness/data/PCollectionConsumerRegistry.java  | 17 -
 .../beam/fn/harness/debug/ElementSample.java   | 44 
 .../beam/fn/harness/debug/OutputSampler.java   | 68 ---
 .../beam/fn/harness/debug/OutputSamplerTest.java   | 79 +++---
 4 files changed, 190 insertions(+), 18 deletions(-)

diff --git 
a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/data/PCollectionConsumerRegistry.java
 
b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/data/PCollectionConsumerRegistry.java
index 84a82e83b88..150580c7f64 100644
--- 
a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/data/PCollectionConsumerRegistry.java
+++ 
b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/data/PCollectionConsumerRegistry.java
@@ -33,6 +33,7 @@ import org.apache.beam.fn.harness.control.Metrics;
 import org.apache.beam.fn.harness.control.Metrics.BundleCounter;
 import org.apache.beam.fn.harness.control.Metrics.BundleDistribution;
 import org.apache.beam.fn.harness.debug.DataSampler;
+import org.apache.beam.fn.harness.debug.ElementSample;
 import org.apache.beam.fn.harness.debug.OutputSampler;
 import org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleDescriptor;
 import org.apache.beam.model.pipeline.v1.MetricsApi.MonitoringInfo;
@@ -301,8 +302,9 @@ public class PCollectionConsumerRegistry {
   // we have window optimization.
   this.sampledByteSizeDistribution.tryUpdate(input.getValue(), this.coder);
 
+  ElementSample elementSample = null;
   if (outputSampler != null) {
-outputSampler.sample(input);
+elementSample = outputSampler.sample(input);
   }
 
   // Use the ExecutionStateTracker and enter an appropriate state to track 
the
@@ -311,6 +313,11 @@ public class PCollectionConsumerRegistry {
   executionState.activate();
   try {
 this.delegate.accept(input);
+  } catch (Exception e) {
+if (outputSampler != null) {
+  outputSampler.exception(elementSample, e);
+}
+throw e;
   } finally {
 executionState.deactivate();
   }
@@ -383,8 +390,9 @@ public class PCollectionConsumerRegistry {
   // when we have window optimization.
   this.sampledByteSizeDistribution.tryUpdate(input.getValue(), coder);
 
+  ElementSample elementSample = null;
   if (outputSampler != null) {
-outputSampler.sample(input);
+elementSample = outputSampler.sample(input);
   }
 
   // Use the ExecutionStateTracker and enter an appropriate state to track 
the
@@ -397,6 +405,11 @@ public class PCollectionConsumerRegistry {
 state.activate();
 try {
   consumerAndMetadata.getConsumer().accept(input);
+} catch (Exception e) {
+  if (outputSampler != null) {
+outputSampler.exception(elementSample, e);
+  }
+  throw e;
 } finally {
   state.deactivate();
 }
diff --git 
a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/debug/ElementSample.java
 
b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/debug/ElementSample.java
new file mode 100644
index 000..85abd02e1d9
--- /dev/null
+++ 
b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/debug/ElementSample.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.fn.harness.debug;
+
+import javax.annotation.Nullable;
+import org.apache.beam.sdk.util.WindowedValue;
+
+/**
+ * A record class that wraps an element sample with additional met

  1   2   >