[beam] branch pr-bot-state updated: Updating config from bot

2023-02-13 Thread github-bot
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch pr-bot-state
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/pr-bot-state by this push:
 new b837b4fed69 Updating config from bot
b837b4fed69 is described below

commit b837b4fed692ac51171fbadd8dce5ef042dfa179
Author: github-actions 
AuthorDate: Tue Feb 14 03:26:04 2023 +

Updating config from bot
---
 scripts/ci/pr-bot/state/pr-state/pr-25420.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/ci/pr-bot/state/pr-state/pr-25420.json 
b/scripts/ci/pr-bot/state/pr-state/pr-25420.json
index a7c19975bb1..27014f1d445 100644
--- a/scripts/ci/pr-bot/state/pr-state/pr-25420.json
+++ b/scripts/ci/pr-bot/state/pr-state/pr-25420.json
@@ -7,5 +7,5 @@
   "nextAction": "Reviewers",
   "stopReviewerNotifications": false,
   "remindAfterTestsPass": [],
-  "committerAssigned": false
+  "committerAssigned": true
 }
\ No newline at end of file



[beam] branch master updated (299be58cf99 -> 1dfd89dffc9)

2023-02-13 Thread tvalentyn
This is an automated email from the ASF dual-hosted git repository.

tvalentyn pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


from 299be58cf99 Adding support for DLQ for ZetaSQL (#25426)
 add 1dfd89dffc9 Remove python 3.6 references (#25445)

No new revisions were added by this update.

Summary of changes:
 .test-infra/tools/README.md| 4 ++--
 .test-infra/tools/python_installer.sh  | 2 +-
 sdks/python/apache_beam/runners/dataflow/internal/apiclient.py | 2 +-
 website/www/site/content/en/get-started/quickstart-py.md   | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)



[beam] branch bigtable-cdc-feature-branch updated: Implement generating initial list of partitions (#25411)

2023-02-13 Thread pabloem
This is an automated email from the ASF dual-hosted git repository.

pabloem pushed a commit to branch bigtable-cdc-feature-branch
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/bigtable-cdc-feature-branch by 
this push:
 new e743ebaf818 Implement generating initial list of partitions (#25411)
e743ebaf818 is described below

commit e743ebaf818039a01d810b96858c4270be05d638
Author: Tony Tang 
AuthorDate: Mon Feb 13 21:37:43 2023 -0500

Implement generating initial list of partitions (#25411)
---
 .../changestreams/ChangeStreamMetrics.java |  25 
 .../action/DetectNewPartitionsAction.java  |  10 +-
 .../action/GenerateInitialPartitionsAction.java|  42 ++-
 .../changestreams/dao/ChangeStreamDao.java |  11 ++
 .../action/DetectNewPartitionsActionTest.java  | 127 +
 .../GenerateInitialPartitionsActionTest.java   | 119 +++
 6 files changed, 326 insertions(+), 8 deletions(-)

diff --git 
a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/changestreams/ChangeStreamMetrics.java
 
b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/changestreams/ChangeStreamMetrics.java
index 2aaa6631ace..f8177adf873 100644
--- 
a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/changestreams/ChangeStreamMetrics.java
+++ 
b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/changestreams/ChangeStreamMetrics.java
@@ -18,8 +18,33 @@
 package org.apache.beam.sdk.io.gcp.bigtable.changestreams;
 
 import java.io.Serializable;
+import org.apache.beam.sdk.metrics.Counter;
+import org.apache.beam.sdk.metrics.Metrics;
 
 /** Class to aggregate metrics related functionality. */
 public class ChangeStreamMetrics implements Serializable {
   private static final long serialVersionUID = 7298901109362981596L;
+  // 
+  // Partition record metrics
+
+  /**
+   * Counter for the total number of partitions identified during the 
execution of the Connector.
+   */
+  public static final Counter LIST_PARTITIONS_COUNT =
+  Metrics.counter(
+  
org.apache.beam.sdk.io.gcp.bigtable.changestreams.ChangeStreamMetrics.class,
+  "list_partitions_count");
+
+  /**
+   * Increments the {@link
+   * 
org.apache.beam.sdk.io.gcp.bigtable.changestreams.ChangeStreamMetrics#LIST_PARTITIONS_COUNT}
 by
+   * 1 if the metric is enabled.
+   */
+  public void incListPartitionsCount() {
+inc(LIST_PARTITIONS_COUNT);
+  }
+
+  private void inc(Counter counter) {
+counter.inc();
+  }
 }
diff --git 
a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/changestreams/action/DetectNewPartitionsAction.java
 
b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/changestreams/action/DetectNewPartitionsAction.java
index 990d9b2fd01..932796d4ba8 100644
--- 
a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/changestreams/action/DetectNewPartitionsAction.java
+++ 
b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/changestreams/action/DetectNewPartitionsAction.java
@@ -25,7 +25,8 @@ import 
org.apache.beam.sdk.io.gcp.bigtable.changestreams.TimestampConverter;
 import org.apache.beam.sdk.io.gcp.bigtable.changestreams.dao.MetadataTableDao;
 import org.apache.beam.sdk.io.gcp.bigtable.changestreams.model.PartitionRecord;
 import org.apache.beam.sdk.io.range.OffsetRange;
-import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.DoFn.BundleFinalizer;
+import org.apache.beam.sdk.transforms.DoFn.OutputReceiver;
 import org.apache.beam.sdk.transforms.DoFn.ProcessContinuation;
 import org.apache.beam.sdk.transforms.splittabledofn.ManualWatermarkEstimator;
 import org.apache.beam.sdk.transforms.splittabledofn.RestrictionTracker;
@@ -93,11 +94,14 @@ public class DetectNewPartitionsAction {
   @VisibleForTesting
   public ProcessContinuation run(
   RestrictionTracker tracker,
-  DoFn.OutputReceiver receiver,
+  OutputReceiver receiver,
   ManualWatermarkEstimator watermarkEstimator,
-  DoFn.BundleFinalizer bundleFinalizer,
+  BundleFinalizer bundleFinalizer,
   Timestamp startTime)
   throws Exception {
+if (tracker.currentRestriction().getFrom() == 0L) {
+  return generateInitialPartitionsAction.run(receiver, tracker, 
watermarkEstimator, startTime);
+}
 
 // Terminate if endTime <= watermark that means all partitions have read 
up to or beyond
 // watermark. We no longer need to manage splits and merges, we can 
terminate.
diff --git 
a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/changestreams/action/GenerateInitialPartitionsAction.java
 
b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam

[beam-starter-java] branch dependabot/maven/org.apache.maven.plugins-maven-surefire-plugin-3.0.0-M9 created (now aa02fc6)

2023-02-13 Thread github-bot
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a change to branch 
dependabot/maven/org.apache.maven.plugins-maven-surefire-plugin-3.0.0-M9
in repository https://gitbox.apache.org/repos/asf/beam-starter-java.git


  at aa02fc6  Bump maven-surefire-plugin from 3.0.0-M8 to 3.0.0-M9

No new revisions were added by this update.



[beam] branch bigtable-cdc-feature-branch updated (feef2e40881 -> c31bda44f83)

2023-02-13 Thread pabloem
This is an automated email from the ASF dual-hosted git repository.

pabloem pushed a change to branch bigtable-cdc-feature-branch
in repository https://gitbox.apache.org/repos/asf/beam.git


from feef2e40881 Create metadata table in initialize stage if table doesn't 
exist. (#25364)
 add c31bda44f83 Add metadata table admin tests (#25414)

No new revisions were added by this update.

Summary of changes:
 .../changestreams/dao/MetadataTableAdminDao.java   |   1 -
 .../dao/MetadataTableAdminDaoTest.java | 126 +
 2 files changed, 126 insertions(+), 1 deletion(-)
 create mode 100644 
sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigtable/changestreams/dao/MetadataTableAdminDaoTest.java



[beam] branch master updated: Adding support for DLQ for ZetaSQL (#25426)

2023-02-13 Thread pabloem
This is an automated email from the ASF dual-hosted git repository.

pabloem pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new 299be58cf99 Adding support for DLQ for ZetaSQL (#25426)
299be58cf99 is described below

commit 299be58cf997d9e1561409034692ea4f2d4ff357
Author: Pablo Estrada 
AuthorDate: Mon Feb 13 14:28:01 2023 -0800

Adding support for DLQ for ZetaSQL (#25426)

* Adding support for DLQ for ZetaSQL

* fixed issue for not-all-fields are selected

* fixup

* fix spotless

* fix test
---
 .../apache/beam/sdk/coders/RowCoderGenerator.java  |  12 ++-
 .../extensions/sql/zetasql/BeamZetaSqlCalcRel.java | 112 -
 .../sql/zetasql/BeamZetaSqlCalcRelTest.java|  70 +
 .../sql/zetasql/ZetaSqlDialectSpecTest.java|  14 ++-
 4 files changed, 177 insertions(+), 31 deletions(-)

diff --git 
a/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/RowCoderGenerator.java
 
b/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/RowCoderGenerator.java
index 70ee6c1ad81..5ce7358d882 100644
--- 
a/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/RowCoderGenerator.java
+++ 
b/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/RowCoderGenerator.java
@@ -142,7 +142,10 @@ public abstract class RowCoderGenerator {
   }
   // There should never be duplicate encoding positions.
   Preconditions.checkState(
-  schema.getFieldCount() == 
Arrays.stream(encodingPosToRowIndex).distinct().count());
+  schema.getFieldCount() == 
Arrays.stream(encodingPosToRowIndex).distinct().count(),
+  "The input schema (%s) and map for position encoding (%s) do not 
match.",
+  schema.getFields(),
+  encodingPosToRowIndex);
 
   // Component coders are ordered by encoding position, but may encode a 
field with a different
   // row index.
@@ -311,7 +314,12 @@ public abstract class RowCoderGenerator {
 boolean hasNullableFields)
 throws IOException {
   checkState(value.getFieldCount() == value.getSchema().getFieldCount());
-  checkState(encodingPosToIndex.length == value.getFieldCount());
+  checkState(
+  encodingPosToIndex.length == value.getFieldCount(),
+  "Unable to encode row. Expected %s values, but row has %s%s",
+  encodingPosToIndex.length,
+  value.getFieldCount(),
+  value.getSchema().getFieldNames());
 
   // Encode the field count. This allows us to handle compatible schema 
changes.
   VAR_INT_CODER.encode(value.getFieldCount(), outputStream);
diff --git 
a/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/BeamZetaSqlCalcRel.java
 
b/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/BeamZetaSqlCalcRel.java
index 744fbd0bcd4..be1c4613ad0 100644
--- 
a/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/BeamZetaSqlCalcRel.java
+++ 
b/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/BeamZetaSqlCalcRel.java
@@ -37,18 +37,24 @@ import org.apache.beam.sdk.annotations.Internal;
 import org.apache.beam.sdk.extensions.sql.impl.BeamSqlPipelineOptions;
 import org.apache.beam.sdk.extensions.sql.impl.QueryPlanner.QueryParameters;
 import org.apache.beam.sdk.extensions.sql.impl.rel.AbstractBeamCalcRel;
+import org.apache.beam.sdk.extensions.sql.impl.rel.BeamSqlRelUtils;
 import org.apache.beam.sdk.extensions.sql.impl.utils.CalciteUtils;
 import 
org.apache.beam.sdk.extensions.sql.meta.provider.bigquery.BeamBigQuerySqlDialect;
 import 
org.apache.beam.sdk.extensions.sql.meta.provider.bigquery.BeamSqlUnparseContext;
 import org.apache.beam.sdk.schemas.FieldAccessDescriptor;
 import org.apache.beam.sdk.schemas.Schema;
+import org.apache.beam.sdk.schemas.utils.SelectHelpers;
 import org.apache.beam.sdk.transforms.DoFn;
 import org.apache.beam.sdk.transforms.PTransform;
 import org.apache.beam.sdk.transforms.ParDo;
 import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
 import org.apache.beam.sdk.values.PCollection;
 import org.apache.beam.sdk.values.PCollectionList;
+import org.apache.beam.sdk.values.PCollectionTuple;
+import org.apache.beam.sdk.values.POutput;
 import org.apache.beam.sdk.values.Row;
+import org.apache.beam.sdk.values.TupleTag;
+import org.apache.beam.sdk.values.TupleTagList;
 import 
org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.plan.RelOptCluster;
 import 
org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.plan.RelTraitSet;
 import org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.RelNode;
@@ -64,7 +70,6 @@ import 
org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.sql.fun.SqlStdO
 import 
org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.sql.parser.SqlParserPo

[beam] 01/02: Fix pulling licenses

2023-02-13 Thread damccorm
This is an automated email from the ASF dual-hosted git repository.

damccorm pushed a commit to branch users/damccorm/license-cp
in repository https://gitbox.apache.org/repos/asf/beam.git

commit 0eedc6c5f8f9dd01744c428123a6ce0169105403
Author: Danny McCormick 
AuthorDate: Mon Feb 13 17:04:07 2023 -0500

Fix pulling licenses
---
 sdks/java/container/license_scripts/dep_urls_java.yaml | 4 
 1 file changed, 4 insertions(+)

diff --git a/sdks/java/container/license_scripts/dep_urls_java.yaml 
b/sdks/java/container/license_scripts/dep_urls_java.yaml
index 43f008f58ff..a0566e883bb 100644
--- a/sdks/java/container/license_scripts/dep_urls_java.yaml
+++ b/sdks/java/container/license_scripts/dep_urls_java.yaml
@@ -65,3 +65,7 @@ org.eclipse.jgit:
   '4.4.1.201607150455-r':
 license: "https://www.eclipse.org/org/documents/edl-v10.html";
 type: "Eclipse Distribution License - v1.0"
+zstd-jni:
+  '1.5.2-5':
+license: "https://github.com/luben/zstd-jni/blob/master/LICENSE";
+type: "2-Clause BSD"



[beam] 02/02: Update sdks/java/container/license_scripts/dep_urls_java.yaml

2023-02-13 Thread damccorm
This is an automated email from the ASF dual-hosted git repository.

damccorm pushed a commit to branch users/damccorm/license-cp
in repository https://gitbox.apache.org/repos/asf/beam.git

commit 352a4ed124931e07e96a976b9259558caa738971
Author: Danny McCormick 
AuthorDate: Mon Feb 13 17:09:16 2023 -0500

Update sdks/java/container/license_scripts/dep_urls_java.yaml

Co-authored-by: Bruno Volpato 
---
 sdks/java/container/license_scripts/dep_urls_java.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdks/java/container/license_scripts/dep_urls_java.yaml 
b/sdks/java/container/license_scripts/dep_urls_java.yaml
index a0566e883bb..b55f3092111 100644
--- a/sdks/java/container/license_scripts/dep_urls_java.yaml
+++ b/sdks/java/container/license_scripts/dep_urls_java.yaml
@@ -67,5 +67,5 @@ org.eclipse.jgit:
 type: "Eclipse Distribution License - v1.0"
 zstd-jni:
   '1.5.2-5':
-license: "https://github.com/luben/zstd-jni/blob/master/LICENSE";
+license: "https://raw.githubusercontent.com/luben/zstd-jni/master/LICENSE";
 type: "2-Clause BSD"



[beam] branch users/damccorm/license-cp created (now 352a4ed1249)

2023-02-13 Thread damccorm
This is an automated email from the ASF dual-hosted git repository.

damccorm pushed a change to branch users/damccorm/license-cp
in repository https://gitbox.apache.org/repos/asf/beam.git


  at 352a4ed1249 Update 
sdks/java/container/license_scripts/dep_urls_java.yaml

This branch includes the following new commits:

 new 0eedc6c5f8f Fix pulling licenses
 new 352a4ed1249 Update 
sdks/java/container/license_scripts/dep_urls_java.yaml

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.




[beam] branch damccorm-patch-1 updated (58aea5642d0 -> e1c931fb9ed)

2023-02-13 Thread damccorm
This is an automated email from the ASF dual-hosted git repository.

damccorm pushed a change to branch damccorm-patch-1
in repository https://gitbox.apache.org/repos/asf/beam.git


from 58aea5642d0 Fix pulling licenses
 add e1c931fb9ed Update 
sdks/java/container/license_scripts/dep_urls_java.yaml

No new revisions were added by this update.

Summary of changes:
 sdks/java/container/license_scripts/dep_urls_java.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)



[beam] branch master updated (15699670853 -> 849692ef7b6)

2023-02-13 Thread yhu
This is an automated email from the ASF dual-hosted git repository.

yhu pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


from 15699670853 Raise the RuntimeError message when some InteractiveRunner 
functions are misused
 add 849692ef7b6 Fix tox error running hdfsIntegrationTest (#25446)

No new revisions were added by this update.

Summary of changes:
 sdks/python/tox.ini | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)



[beam] branch damccorm-patch-1 created (now 58aea5642d0)

2023-02-13 Thread damccorm
This is an automated email from the ASF dual-hosted git repository.

damccorm pushed a change to branch damccorm-patch-1
in repository https://gitbox.apache.org/repos/asf/beam.git


  at 58aea5642d0 Fix pulling licenses

No new revisions were added by this update.



[beam] branch damccorm-patch-1 created (now 36dae513ad0)

2023-02-13 Thread damccorm
This is an automated email from the ASF dual-hosted git repository.

damccorm pushed a change to branch damccorm-patch-1
in repository https://gitbox.apache.org/repos/asf/beam.git


  at 36dae513ad0 Fix pulling licenses

No new revisions were added by this update.



[beam] branch master updated (007713c8320 -> 15699670853)

2023-02-13 Thread ningk
This is an automated email from the ASF dual-hosted git repository.

ningk pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


from 007713c8320 Validate that GBK coders are always set correctly. (#25394)
 add a1e07773e8b Ignore flags for beam_sql magic
 add 03982ea4583 Merge remote-tracking branch 'origin/master'
 add 2c3fd4ff4ba Raise the error when some functions are misused
 add 154ff573837 change how to import
 new 15699670853 Raise the RuntimeError message when some InteractiveRunner 
functions are misused

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 sdks/python/apache_beam/runners/interactive/pipeline_fragment.py  | 5 +
 .../apache_beam/runners/interactive/pipeline_fragment_test.py | 8 
 2 files changed, 13 insertions(+)



[beam] 01/01: Raise the RuntimeError message when some InteractiveRunner functions are misused

2023-02-13 Thread ningk
This is an automated email from the ASF dual-hosted git repository.

ningk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git

commit 15699670853b155d7871477fe03f199b3ff4939b
Merge: 007713c8320 154ff573837
Author: Ning Kang 
AuthorDate: Mon Feb 13 13:39:16 2023 -0800

Raise the RuntimeError message when some InteractiveRunner functions are 
misused

 sdks/python/apache_beam/runners/interactive/pipeline_fragment.py  | 5 +
 .../apache_beam/runners/interactive/pipeline_fragment_test.py | 8 
 2 files changed, 13 insertions(+)



[beam] branch master updated (0a0bff5a049 -> 007713c8320)

2023-02-13 Thread lcwik
This is an automated email from the ASF dual-hosted git repository.

lcwik pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


from 0a0bff5a049 Support gauge metrics in portable mode (#25396)
 add 007713c8320 Validate that GBK coders are always set correctly. (#25394)

No new revisions were added by this update.

Summary of changes:
 sdks/python/apache_beam/runners/common.py  | 44 ++
 .../runners/dataflow/internal/apiclient.py |  2 +
 .../runners/dataflow/internal/apiclient_test.py| 16 
 .../python/apache_beam/runners/pipeline_context.py | 12 +-
 .../runners/portability/fn_api_runner/fn_runner.py |  2 +
 .../runners/portability/portable_runner.py |  7 
 sdks/python/apache_beam/typehints/schemas.py   | 22 +++
 7 files changed, 87 insertions(+), 18 deletions(-)



[beam] branch users/damccorm/tzinfo updated (52b6e2d1100 -> 3e08cdecfc5)

2023-02-13 Thread damccorm
This is an automated email from the ASF dual-hosted git repository.

damccorm pushed a change to branch users/damccorm/tzinfo
in repository https://gitbox.apache.org/repos/asf/beam.git


from 52b6e2d1100 Fix test
 add 3e08cdecfc5 fmt

No new revisions were added by this update.

Summary of changes:
 sdks/python/apache_beam/utils/timestamp_test.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)



[beam] branch master updated: Support gauge metrics in portable mode (#25396)

2023-02-13 Thread xinyu
This is an automated email from the ASF dual-hosted git repository.

xinyu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new 0a0bff5a049 Support gauge metrics in portable mode (#25396)
0a0bff5a049 is described below

commit 0a0bff5a049a17864e66bc34f36e40836238a653
Author: Katie Liu 
AuthorDate: Mon Feb 13 09:49:01 2023 -0800

Support gauge metrics in portable mode (#25396)
---
 .../runners/core/metrics/MetricsContainerImpl.java | 47 +++---
 .../core/metrics/MonitoringInfoConstants.java  |  2 +
 .../core/metrics/MetricsContainerImplTest.java | 35 
 3 files changed, 78 insertions(+), 6 deletions(-)

diff --git 
a/runners/core-java/src/main/java/org/apache/beam/runners/core/metrics/MetricsContainerImpl.java
 
b/runners/core-java/src/main/java/org/apache/beam/runners/core/metrics/MetricsContainerImpl.java
index 9c0c2a46c27..c23c2bbfa08 100644
--- 
a/runners/core-java/src/main/java/org/apache/beam/runners/core/metrics/MetricsContainerImpl.java
+++ 
b/runners/core-java/src/main/java/org/apache/beam/runners/core/metrics/MetricsContainerImpl.java
@@ -25,10 +25,12 @@ import static 
org.apache.beam.runners.core.metrics.MonitoringInfoEncodings.decod
 import static 
org.apache.beam.runners.core.metrics.MonitoringInfoEncodings.decodeInt64Gauge;
 import static 
org.apache.beam.runners.core.metrics.MonitoringInfoEncodings.encodeInt64Counter;
 import static 
org.apache.beam.runners.core.metrics.MonitoringInfoEncodings.encodeInt64Distribution;
+import static 
org.apache.beam.runners.core.metrics.MonitoringInfoEncodings.encodeInt64Gauge;
 import static 
org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkNotNull;
 
 import java.io.Serializable;
 import java.util.ArrayList;
+import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.Objects;
@@ -281,7 +283,7 @@ public class MetricsContainerImpl implements Serializable, 
MetricsContainer {
* @return The MonitoringInfo generated from the distribution metricUpdate.
*/
   private @Nullable MonitoringInfo distributionUpdateToMonitoringInfo(
-  MetricUpdate 
metricUpdate) {
+  MetricUpdate metricUpdate) {
 SimpleMonitoringInfoBuilder builder = 
distributionToMonitoringMetadata(metricUpdate.getKey());
 if (builder == null) {
   return null;
@@ -290,11 +292,33 @@ public class MetricsContainerImpl implements 
Serializable, MetricsContainer {
 return builder.build();
   }
 
+  /** @return The MonitoringInfo metadata from the gauge metric. */
+  private @Nullable SimpleMonitoringInfoBuilder 
gaugeToMonitoringMetadata(MetricKey metricKey) {
+return metricToMonitoringMetadata(
+metricKey,
+MonitoringInfoConstants.TypeUrns.LATEST_INT64_TYPE,
+MonitoringInfoConstants.Urns.USER_LATEST_INT64);
+  }
+
+  /**
+   * @param metricUpdate
+   * @return The MonitoringInfo generated from the distribution metricUpdate.
+   */
+  private @Nullable MonitoringInfo gaugeUpdateToMonitoringInfo(
+  MetricUpdate metricUpdate) {
+SimpleMonitoringInfoBuilder builder = 
gaugeToMonitoringMetadata(metricUpdate.getKey());
+if (builder == null) {
+  return null;
+}
+builder.setInt64LatestValue(metricUpdate.getUpdate());
+return builder.build();
+  }
+
   /** Return the cumulative values for any metrics in this container as 
MonitoringInfos. */
   @Override
   public Iterable getMonitoringInfos() {
 // Extract user metrics and store as MonitoringInfos.
-ArrayList monitoringInfos = new 
ArrayList();
+List monitoringInfos = new ArrayList<>();
 MetricUpdates metricUpdates = this.getUpdates();
 
 for (MetricUpdate metricUpdate : metricUpdates.counterUpdates()) {
@@ -304,13 +328,19 @@ public class MetricsContainerImpl implements 
Serializable, MetricsContainer {
   }
 }
 
-for (MetricUpdate 
metricUpdate :
-metricUpdates.distributionUpdates()) {
+for (MetricUpdate metricUpdate : 
metricUpdates.distributionUpdates()) {
   MonitoringInfo mi = distributionUpdateToMonitoringInfo(metricUpdate);
   if (mi != null) {
 monitoringInfos.add(mi);
   }
 }
+
+for (MetricUpdate metricUpdate : metricUpdates.gaugeUpdates()) {
+  MonitoringInfo mi = gaugeUpdateToMonitoringInfo(metricUpdate);
+  if (mi != null) {
+monitoringInfos.add(mi);
+  }
+}
 return monitoringInfos;
   }
 
@@ -324,14 +354,19 @@ public class MetricsContainerImpl implements 
Serializable, MetricsContainer {
 builder.put(shortId, encodeInt64Counter(metricUpdate.getUpdate()));
   }
 }
-for (MetricUpdate 
metricUpdate :
-metricUpdates.distributionUpdates()) {
+for (MetricUpdate metricUpdate : 
metricUpdates.distributionUpdates()) {
   String shortId =
   getShortId(metricUpdate.getKey(), 
this::distributionToMonitorin

[beam] branch pr-bot-state updated: Updating config from bot

2023-02-13 Thread github-bot
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch pr-bot-state
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/pr-bot-state by this push:
 new 31aaa023ed5 Updating config from bot
31aaa023ed5 is described below

commit 31aaa023ed5c40773e711634f3d4a6daf5549edf
Author: github-actions 
AuthorDate: Mon Feb 13 17:05:06 2023 +

Updating config from bot
---
 scripts/ci/pr-bot/state/reviewers-for-label-go.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/ci/pr-bot/state/reviewers-for-label-go.json 
b/scripts/ci/pr-bot/state/reviewers-for-label-go.json
index 11f04fd14f5..6bd48454e9c 100644
--- a/scripts/ci/pr-bot/state/reviewers-for-label-go.json
+++ b/scripts/ci/pr-bot/state/reviewers-for-label-go.json
@@ -4,7 +4,7 @@
 "lostluck": 1676230432748,
 "jrmccluskey": 1675957745510,
 "youngoli": 1657688896155,
-"damccorm": 1675877745199,
+"damccorm": 1676307901354,
 "riteshghorse": 1676059470817
   }
 }
\ No newline at end of file



[beam] branch pr-bot-state updated: Updating config from bot

2023-02-13 Thread github-bot
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch pr-bot-state
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/pr-bot-state by this push:
 new 2b093d4b078 Updating config from bot
2b093d4b078 is described below

commit 2b093d4b078b449bce270d0a814c9921ee9d25d6
Author: github-actions 
AuthorDate: Mon Feb 13 17:05:03 2023 +

Updating config from bot
---
 scripts/ci/pr-bot/state/pr-state/pr-22057.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/ci/pr-bot/state/pr-state/pr-22057.json 
b/scripts/ci/pr-bot/state/pr-state/pr-22057.json
index 1c2c2e31975..2deda2c6f85 100644
--- a/scripts/ci/pr-bot/state/pr-state/pr-22057.json
+++ b/scripts/ci/pr-bot/state/pr-state/pr-22057.json
@@ -1,7 +1,7 @@
 {
   "commentedAboutFailingChecks": false,
   "reviewersAssignedForLabels": {
-"go": "jrmccluskey"
+"go": "damccorm"
   },
   "nextAction": "Reviewers",
   "stopReviewerNotifications": false,



[beam] branch pr-bot-state updated: Updating config from bot

2023-02-13 Thread github-bot
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch pr-bot-state
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/pr-bot-state by this push:
 new f53afc7530c Updating config from bot
f53afc7530c is described below

commit f53afc7530cc1efbe7a75907b8e0425bceea993b
Author: github-actions 
AuthorDate: Mon Feb 13 17:04:57 2023 +

Updating config from bot
---
 scripts/ci/pr-bot/state/reviewers-for-label-java.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/ci/pr-bot/state/reviewers-for-label-java.json 
b/scripts/ci/pr-bot/state/reviewers-for-label-java.json
index 6e794fe45d9..039453d3d85 100644
--- a/scripts/ci/pr-bot/state/reviewers-for-label-java.json
+++ b/scripts/ci/pr-bot/state/reviewers-for-label-java.json
@@ -3,7 +3,7 @@
   "dateOfLastReviewAssignment": {
 "lukecwik": 1676018206481,
 "kennknowles": 1676059470817,
-"robertwb": 1676015579623,
+"robertwb": 1676307892789,
 "kileys": 1674428678843,
 "apilloud": 1676047340321
   }



[beam] branch pr-bot-state updated: Updating config from bot

2023-02-13 Thread github-bot
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch pr-bot-state
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/pr-bot-state by this push:
 new cf5f2bd18d8 Updating config from bot
cf5f2bd18d8 is described below

commit cf5f2bd18d8154288c711656c751aeaf54df8218
Author: github-actions 
AuthorDate: Mon Feb 13 17:04:54 2023 +

Updating config from bot
---
 scripts/ci/pr-bot/state/pr-state/pr-24799.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/ci/pr-bot/state/pr-state/pr-24799.json 
b/scripts/ci/pr-bot/state/pr-state/pr-24799.json
index fa3809f24dd..d26f23226c7 100644
--- a/scripts/ci/pr-bot/state/pr-state/pr-24799.json
+++ b/scripts/ci/pr-bot/state/pr-state/pr-24799.json
@@ -1,7 +1,7 @@
 {
   "commentedAboutFailingChecks": false,
   "reviewersAssignedForLabels": {
-"java": "kennknowles"
+"java": "robertwb"
   },
   "nextAction": "Reviewers",
   "stopReviewerNotifications": false,



[beam] branch pr-bot-state updated: Updating config from bot

2023-02-13 Thread github-bot
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch pr-bot-state
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/pr-bot-state by this push:
 new 3679a46ac58 Updating config from bot
3679a46ac58 is described below

commit 3679a46ac58deea606b7b27dc5e2953da74ce43d
Author: github-actions 
AuthorDate: Mon Feb 13 17:04:48 2023 +

Updating config from bot
---
 scripts/ci/pr-bot/state/reviewers-for-label-python.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/ci/pr-bot/state/reviewers-for-label-python.json 
b/scripts/ci/pr-bot/state/reviewers-for-label-python.json
index 25e51ab6dec..d0534d62cc8 100644
--- a/scripts/ci/pr-bot/state/reviewers-for-label-python.json
+++ b/scripts/ci/pr-bot/state/reviewers-for-label-python.json
@@ -8,7 +8,7 @@
 "tvalentyn": 1676270231633,
 "pabloem": 1675968974120,
 "y1chi": 1667002607045,
-"damccorm": 1675911700427,
+"damccorm": 1676307882950,
 "jrmccluskey": 1675969005164
   }
 }
\ No newline at end of file



[beam] branch pr-bot-state updated: Updating config from bot

2023-02-13 Thread github-bot
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch pr-bot-state
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/pr-bot-state by this push:
 new b3511bea32d Updating config from bot
b3511bea32d is described below

commit b3511bea32d27e31b8cfa3e6b02a93d23734264a
Author: github-actions 
AuthorDate: Mon Feb 13 17:04:45 2023 +

Updating config from bot
---
 scripts/ci/pr-bot/state/pr-state/pr-25272.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/ci/pr-bot/state/pr-state/pr-25272.json 
b/scripts/ci/pr-bot/state/pr-state/pr-25272.json
index 6272b4e797d..88d1920a8bb 100644
--- a/scripts/ci/pr-bot/state/pr-state/pr-25272.json
+++ b/scripts/ci/pr-bot/state/pr-state/pr-25272.json
@@ -1,7 +1,7 @@
 {
   "commentedAboutFailingChecks": false,
   "reviewersAssignedForLabels": {
-"python": "AnandInguva"
+"python": "damccorm"
   },
   "nextAction": "Reviewers",
   "stopReviewerNotifications": false,



[beam] branch users/damccorm/tzinfo updated (30f374dbc13 -> 52b6e2d1100)

2023-02-13 Thread damccorm
This is an automated email from the ASF dual-hosted git repository.

damccorm pushed a change to branch users/damccorm/tzinfo
in repository https://gitbox.apache.org/repos/asf/beam.git


from 30f374dbc13 fmt
 add 52b6e2d1100 Fix test

No new revisions were added by this update.

Summary of changes:
 sdks/python/apache_beam/utils/timestamp_test.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)



[beam] branch users/damccorm/tzinfo updated (4709bbb7d19 -> 30f374dbc13)

2023-02-13 Thread damccorm
This is an automated email from the ASF dual-hosted git repository.

damccorm pushed a change to branch users/damccorm/tzinfo
in repository https://gitbox.apache.org/repos/asf/beam.git


from 4709bbb7d19 Format
 add 30f374dbc13 fmt

No new revisions were added by this update.

Summary of changes:
 sdks/python/apache_beam/utils/timestamp.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)



[beam] branch users/damccorm/tzinfo updated (12293153ab6 -> 4709bbb7d19)

2023-02-13 Thread damccorm
This is an automated email from the ASF dual-hosted git repository.

damccorm pushed a change to branch users/damccorm/tzinfo
in repository https://gitbox.apache.org/repos/asf/beam.git


from 12293153ab6 Better error for missing timezone info
 add 4709bbb7d19 Format

No new revisions were added by this update.

Summary of changes:
 sdks/python/apache_beam/utils/timestamp.py | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)



[beam] branch master updated: Add WatchFilePattern (#25393)

2023-02-13 Thread damccorm
This is an automated email from the ASF dual-hosted git repository.

damccorm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new bb5e200df70 Add WatchFilePattern  (#25393)
bb5e200df70 is described below

commit bb5e200df70a875379ff5a6dfe72325172c3eb77
Author: Anand Inguva <34158215+ananding...@users.noreply.github.com>
AuthorDate: Mon Feb 13 10:23:16 2023 -0500

Add WatchFilePattern  (#25393)

* Add WatchFilePattern transform

* Remove defaults and update instructions

* Add batch args

* Refactor example based on comments

* Changes based on  PR comments

* Fix typo

* Fix up lint

* Fix doc precommit

* Fix up pydocs

* Fixup lint

* Fix up test

* Update docstring as per comments

* Add unittest.main

* Update 
sdks/python/apache_beam/examples/inference/pytorch_image_classification_with_side_inputs.py

Co-authored-by: Danny McCormick 

-

Co-authored-by: Danny McCormick 
---
 CHANGES.md |   1 +
 ...ytorch_image_classification_with_side_inputs.py | 218 +
 sdks/python/apache_beam/io/fileio.py   |   6 +-
 sdks/python/apache_beam/ml/inference/base_test.py  |  22 ++-
 sdks/python/apache_beam/ml/inference/utils.py  | 116 +++
 sdks/python/apache_beam/ml/inference/utils_test.py | 103 ++
 6 files changed, 455 insertions(+), 11 deletions(-)

diff --git a/CHANGES.md b/CHANGES.md
index 10310c6cbef..55a106f3513 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -68,6 +68,7 @@
 * Add UDF metrics support for Samza portable mode.
 * Option for SparkRunner to avoid the need of SDF output to fit in memory 
([#23852](https://github.com/apache/beam/issues/23852)).
   This helps e.g. with ParquetIO reads. Turn the feature on by adding 
experiment `use_bounded_concurrent_output_for_sdf`.
+* Add `WatchFilePattern` transform, which can be used as a side input to the 
RunInference PTransfrom to watch for model updates using a file pattern. 
([#24042](https://github.com/apache/beam/issues/24042))
 * Add support for loading TorchScript models with `PytorchModelHandler`. The 
TorchScript model path can be
   passed to PytorchModelHandler using 
`torch_script_model_path=`. 
([#25321](https://github.com/apache/beam/pull/25321))
 
diff --git 
a/sdks/python/apache_beam/examples/inference/pytorch_image_classification_with_side_inputs.py
 
b/sdks/python/apache_beam/examples/inference/pytorch_image_classification_with_side_inputs.py
new file mode 100644
index 000..2a4e6e9a9bc
--- /dev/null
+++ 
b/sdks/python/apache_beam/examples/inference/pytorch_image_classification_with_side_inputs.py
@@ -0,0 +1,218 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+A pipeline that uses RunInference PTransform to perform image classification
+and uses WatchFilePattern as side input to the RunInference PTransform.
+WatchFilePattern is used to watch for a file updates matching the file_pattern
+based on timestamps and emits latest model metadata, which is used in
+RunInference API for the dynamic model updates without the need for stopping
+the beam pipeline.
+
+This pipeline follows the pattern from
+https://beam.apache.org/documentation/patterns/side-inputs/
+
+To use the PubSub reading from a topic in the pipeline as source, you can
+publish a path to the model(resnet152 used in the pipeline from
+torchvision.models.resnet152) to the PubSub topic. Then pass that
+topic via command line arg --topic.  The published path(str) should be
+UTF-8 encoded.
+
+To run the example on DataflowRunner,
+
+python 
apache_beam/examples/inference/pytorch_image_classification_with_side_inputs.py 
# pylint: disable=line-too-long
+  --project=
+  --re=
+  --temp_location=
+  --staging_location=
+  --runner=DataflowRunner
+  --streaming
+  --interval=10
+  --num_workers=5
+  --requirements_file=apache_beam/ml/inference/torch_tests_requirements.txt
+  --topic=
+  --file_pattern=
+
+file_pattern is path(can contain glob characters), which will be passed to
+WatchContinuous

[beam] branch users/damccorm/tzinfo created (now 12293153ab6)

2023-02-13 Thread damccorm
This is an automated email from the ASF dual-hosted git repository.

damccorm pushed a change to branch users/damccorm/tzinfo
in repository https://gitbox.apache.org/repos/asf/beam.git


  at 12293153ab6 Better error for missing timezone info

This branch includes the following new commits:

 new 12293153ab6 Better error for missing timezone info

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.




[beam] 01/01: Better error for missing timezone info

2023-02-13 Thread damccorm
This is an automated email from the ASF dual-hosted git repository.

damccorm pushed a commit to branch users/damccorm/tzinfo
in repository https://gitbox.apache.org/repos/asf/beam.git

commit 12293153ab67c3bee645188a0275a9a3cf68b317
Author: Danny McCormick 
AuthorDate: Mon Feb 13 10:22:22 2023 -0500

Better error for missing timezone info
---
 sdks/python/apache_beam/runners/direct/transform_evaluator.py | 5 -
 sdks/python/apache_beam/utils/timestamp.py| 2 ++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/sdks/python/apache_beam/runners/direct/transform_evaluator.py 
b/sdks/python/apache_beam/runners/direct/transform_evaluator.py
index a0600255028..bfb27c4adc0 100644
--- a/sdks/python/apache_beam/runners/direct/transform_evaluator.py
+++ b/sdks/python/apache_beam/runners/direct/transform_evaluator.py
@@ -678,7 +678,10 @@ class _PubSubReadEvaluator(_TransformEvaluator):
   else:
 if message.publish_time is None:
   raise ValueError('No publish time present in message: %s' % message)
-timestamp = Timestamp.from_utc_datetime(message.publish_time)
+try:
+  timestamp = Timestamp.from_utc_datetime(message.publish_time)
+except ValueError as e:
+  raise ValueError('Bad timestamp value for message %s: %s', message, 
e)
 
   return timestamp, parsed_message
 
diff --git a/sdks/python/apache_beam/utils/timestamp.py 
b/sdks/python/apache_beam/utils/timestamp.py
index 502d1f78fa7..0e4f402a695 100644
--- a/sdks/python/apache_beam/utils/timestamp.py
+++ b/sdks/python/apache_beam/utils/timestamp.py
@@ -103,6 +103,8 @@ class Timestamp(object):
 Args:
   dt: A ``datetime.datetime`` object in UTC (offset-aware).
 """
+if dt.tzinfo is None:
+  raise ValueError('dt has no timezone info 
(https://docs.python.org/3/library/datetime.html#datetime.tzinfo:~:text=For%20applications%20requiring,is%20in%20effect.):
 %s' % dt)
 if dt.tzinfo != pytz.utc and dt.tzinfo != datetime.timezone.utc:
   raise ValueError('dt not in UTC: %s' % dt)
 duration = dt - cls._epoch_datetime_utc()



[beam] branch master updated: (#25316) Enable LZMA compression in Python SDK I/O (#25317)

2023-02-13 Thread yhu
This is an automated email from the ASF dual-hosted git repository.

yhu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new 6f7d2fbaeb4 (#25316) Enable LZMA compression in Python SDK I/O (#25317)
6f7d2fbaeb4 is described below

commit 6f7d2fbaeb4a71173df0d9553538ae79c23e6314
Author: William Ross Morrow 
AuthorDate: Mon Feb 13 06:31:01 2023 -0800

(#25316) Enable LZMA compression in Python SDK I/O (#25317)

* (#25316) Added naive first shot at enabling LZMA compression

* (#25316) Added a draft line to CHANGES.md

* (#25316) fix linter issues

* (#25316) update tests (draft)

* (#25316) import order in test file
---
 CHANGES.md|  1 +
 sdks/python/apache_beam/io/filesystem.py  | 21 +++--
 sdks/python/apache_beam/io/filesystem_test.py | 25 +++--
 3 files changed, 39 insertions(+), 8 deletions(-)

diff --git a/CHANGES.md b/CHANGES.md
index d01c764b7ea..10310c6cbef 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -61,6 +61,7 @@
 
 * Support for X source added (Java/Python) 
([#X](https://github.com/apache/beam/issues/X)).
 * Added in JmsIO a retry policy for failed publications (Java) 
([#24971](https://github.com/apache/beam/issues/24971)).
+* Support for `LZMA` compression/decompression of text files added to the 
Python SDK ([#25316](https://github.com/apache/beam/issues/25316))
 
 ## New Features / Improvements
 
diff --git a/sdks/python/apache_beam/io/filesystem.py 
b/sdks/python/apache_beam/io/filesystem.py
index fa1f67ac03f..142e04bc295 100644
--- a/sdks/python/apache_beam/io/filesystem.py
+++ b/sdks/python/apache_beam/io/filesystem.py
@@ -28,6 +28,7 @@ import abc
 import bz2
 import io
 import logging
+import lzma
 import os
 import posixpath
 import re
@@ -65,6 +66,10 @@ class CompressionTypes(object):
   #   .bz2 (implies BZIP2 as described below).
   #   .gz  (implies GZIP as described below)
   #   .deflate (implies DEFLATE as described below)
+  #   .zst (implies ZSTD as described below)
+  #   .zst (implies ZSTD as described below)
+  #   .xz (implies LZMA as described below)
+  #   .lzma (implies LZMA as described below)
   # Any non-recognized extension implies UNCOMPRESSED as described below.
   AUTO = 'auto'
 
@@ -80,6 +85,9 @@ class CompressionTypes(object):
   # GZIP compression (deflate with GZIP headers).
   GZIP = 'gzip'
 
+  # LZMA compression
+  LZMA = 'lzma'
+
   # Uncompressed (i.e., may be split).
   UNCOMPRESSED = 'uncompressed'
 
@@ -92,6 +100,7 @@ class CompressionTypes(object):
 CompressionTypes.DEFLATE,
 CompressionTypes.GZIP,
 CompressionTypes.ZSTD,
+CompressionTypes.LZMA,
 CompressionTypes.UNCOMPRESSED
 ])
 return compression_type in types
@@ -103,6 +112,7 @@ class CompressionTypes(object):
 cls.DEFLATE: 'application/x-deflate',
 cls.GZIP: 'application/x-gzip',
 cls.ZSTD: 'application/zstd',
+cls.LZMA: 'application/lzma'
 }
 return mime_types_by_compression_type.get(compression_type, default)
 
@@ -114,7 +124,9 @@ class CompressionTypes(object):
 '.deflate': cls.DEFLATE,
 '.gz': cls.GZIP,
 '.zst': cls.ZSTD,
-'.zstd': cls.ZSTD
+'.zstd': cls.ZSTD,
+'.xz': cls.LZMA,
+'.lzma': cls.LZMA
 }
 lowercased_path = file_path.lower()
 for suffix, compression_type in compression_types_by_suffix.items():
@@ -184,6 +196,8 @@ class CompressedFile(object):
   # https://github.com/indygreg/python-zstandard/issues/157
   self._decompressor = zstandard.ZstdDecompressor(
   max_window_size=2147483648).decompressobj()
+elif self._compression_type == CompressionTypes.LZMA:
+  self._decompressor = lzma.LZMADecompressor()
 else:
   assert self._compression_type == CompressionTypes.GZIP
   self._decompressor = zlib.decompressobj(self._gzip_mask)
@@ -196,6 +210,8 @@ class CompressedFile(object):
   zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED)
 elif self._compression_type == CompressionTypes.ZSTD:
   self._compressor = zstandard.ZstdCompressor().compressobj()
+elif self._compression_type == CompressionTypes.LZMA:
+  self._compressor = lzma.LZMACompressor()
 else:
   assert self._compression_type == CompressionTypes.GZIP
   self._compressor = zlib.compressobj(
@@ -257,7 +273,8 @@ class CompressedFile(object):
 if (self._compression_type == CompressionTypes.BZIP2 or
 self._compression_type == CompressionTypes.DEFLATE or
 self._compression_type == CompressionTypes.ZSTD or
-self._compression_type == CompressionTypes.GZIP):
+self._compression_type == CompressionTypes.GZIP or
+self._compression_type == CompressionTypes.LZMA):
   pass
 else:
   # Deflate, Gzip and bzi

[beam] branch pr-bot-state updated: Updating config from bot

2023-02-13 Thread github-bot
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch pr-bot-state
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/pr-bot-state by this push:
 new 82f3d69e993 Updating config from bot
82f3d69e993 is described below

commit 82f3d69e993788213ad8d0ffb2356df7dfd497f5
Author: github-actions 
AuthorDate: Mon Feb 13 14:09:04 2023 +

Updating config from bot
---
 scripts/ci/pr-bot/state/pr-state/pr-25357.json | 8 
 1 file changed, 8 insertions(+)

diff --git a/scripts/ci/pr-bot/state/pr-state/pr-25357.json 
b/scripts/ci/pr-bot/state/pr-state/pr-25357.json
new file mode 100644
index 000..242a48d7d3b
--- /dev/null
+++ b/scripts/ci/pr-bot/state/pr-state/pr-25357.json
@@ -0,0 +1,8 @@
+{
+  "commentedAboutFailingChecks": true,
+  "reviewersAssignedForLabels": {},
+  "nextAction": "Author",
+  "stopReviewerNotifications": false,
+  "remindAfterTestsPass": [],
+  "committerAssigned": false
+}
\ No newline at end of file