This is an automated email from the ASF dual-hosted git repository.

yhu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
     new bfa26a4d907 Add an automatic GCP-BOM dependency upgrader (#30262)
bfa26a4d907 is described below

commit bfa26a4d907d844aed4b938f88142ed0fc82c90f
Author: Yi Hu <ya...@google.com>
AuthorDate: Wed Feb 14 14:00:56 2024 -0500

    Add an automatic GCP-BOM dependency upgrader (#30262)
    
    * Add an automatic GCP-BOM dependency upgrader
    
    * Standardize comment tag
---
 .../org/apache/beam/gradle/BeamModulePlugin.groovy |  22 +-
 scripts/tools/bomupgrader.py                       | 233 +++++++++++++++++++++
 2 files changed, 245 insertions(+), 10 deletions(-)

diff --git 
a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy 
b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
index 9705557a162..abaaa7fb1df 100644
--- a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
+++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
@@ -590,6 +590,9 @@ class BeamModulePlugin implements Plugin<Project> {
     // These versions are defined here because they represent
     // a dependency version which should match across multiple
     // Maven artifacts.
+    //
+    // There are a few versions are determined by the BOMs by running 
scripts/tools/bomupgrader.py
+    // marked as [bomupgrader]. See the documentation of that script for 
detail.
     def activemq_version = "5.14.5"
     def autovalue_version = "1.9"
     def autoservice_version = "1.0.1"
@@ -601,17 +604,17 @@ class BeamModulePlugin implements Plugin<Project> {
     def classgraph_version = "4.8.162"
     def dbcp2_version = "2.9.0"
     def errorprone_version = "2.10.0"
-    // Try to keep gax_version consistent with gax-grpc version in 
google_cloud_platform_libraries_bom
+    // [bomupgrader] determined by: com.google.api:gax, consistent with: 
google_cloud_platform_libraries_bom
     def gax_version = "2.41.0"
     def google_ads_version = "26.0.0"
     def google_clients_version = "2.0.0"
     def google_cloud_bigdataoss_version = "2.2.16"
-    // Try to keep google_cloud_spanner_version consistent with 
google_cloud_spanner_bom in google_cloud_platform_libraries_bom
+    // [bomupgrader] determined by: com.google.cloud:google-cloud-spanner, 
consistent with: google_cloud_platform_libraries_bom
     def google_cloud_spanner_version = "6.57.0"
     def google_code_gson_version = "2.10.1"
     def google_oauth_clients_version = "1.34.1"
-    // Try to keep grpc_version consistent with gRPC version in 
google_cloud_platform_libraries_bom
-    def grpc_version = "1.60.0"
+    // [bomupgrader] determined by: io.grpc:grpc-netty, consistent with: 
google_cloud_platform_libraries_bom
+    def grpc_version = "1.61.0"
     def guava_version = "32.1.2-jre"
     def hadoop_version = "2.10.2"
     def hamcrest_version = "2.1"
@@ -625,11 +628,11 @@ class BeamModulePlugin implements Plugin<Project> {
     def kafka_version = "2.4.1"
     def log4j2_version = "2.20.0"
     def nemo_version = "0.1"
-    // Try to keep netty_version consistent with the netty version in grpc_bom 
(includes grpc_netty) in google_cloud_platform_libraries_bom
+    // [bomupgrader] determined by: io.grpc:grpc-netty, consistent with: 
google_cloud_platform_libraries_bom
     def netty_version = "4.1.100.Final"
     def postgres_version = "42.2.16"
     def powermock_version = "2.0.9"
-    // Try to keep protobuf_version consistent with the protobuf version in 
google_cloud_platform_libraries_bom
+    // [bomupgrader] determined by: com.google.protobuf:protobuf-java, 
consistent with: google_cloud_platform_libraries_bom
     def protobuf_version = "3.25.2"
     def qpid_jms_client_version = "0.61.0"
     def quickcheck_version = "1.0"
@@ -640,7 +643,7 @@ class BeamModulePlugin implements Plugin<Project> {
     def spark3_version = "3.2.2"
     def spotbugs_version = "4.0.6"
     def testcontainers_version = "1.17.3"
-    // Try to keep arrow_version consistent with the arrow version in 
google_cloud_bigquery, managed by google_cloud_platform_libraries_bom
+    // [bomupgrader] determined by: org.apache.arrow:arrow-memory-core, 
consistent with: google_cloud_platform_libraries_bom
     def arrow_version = "15.0.0"
     def jmh_version = "1.34"
     def jupiter_version = "5.7.0"
@@ -756,9 +759,8 @@ class BeamModulePlugin implements Plugin<Project> {
         google_cloud_firestore                      : 
"com.google.cloud:google-cloud-firestore", // 
google_cloud_platform_libraries_bom sets version
         google_cloud_pubsub                         : 
"com.google.cloud:google-cloud-pubsub", // google_cloud_platform_libraries_bom 
sets version
         google_cloud_pubsublite                     : 
"com.google.cloud:google-cloud-pubsublite",  // 
google_cloud_platform_libraries_bom sets version
-        // The release notes shows the versions set by the BOM:
-        // https://github.com/googleapis/java-cloud-bom/releases/tag/v26.31.0
-        // Update libraries-bom version on 
sdks/java/container/license_scripts/dep_urls_java.yaml
+        // [bomupgrader] the BOM version is set by 
scripts/tools/bomupgrader.py. If update manually, also update
+        // libraries-bom version on 
sdks/java/container/license_scripts/dep_urls_java.yaml
         google_cloud_platform_libraries_bom         : 
"com.google.cloud:libraries-bom:26.31.0",
         google_cloud_spanner                        : 
"com.google.cloud:google-cloud-spanner", // google_cloud_platform_libraries_bom 
sets version
         google_cloud_spanner_test                   : 
"com.google.cloud:google-cloud-spanner:$google_cloud_spanner_version:tests",
diff --git a/scripts/tools/bomupgrader.py b/scripts/tools/bomupgrader.py
new file mode 100644
index 00000000000..425d57cfe95
--- /dev/null
+++ b/scripts/tools/bomupgrader.py
@@ -0,0 +1,233 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import errno
+import logging
+import os
+import re
+import subprocess
+import sys
+"""
+This Python script is used for upgrading the GCP-BOM in BeamModulePlugin.
+Specifically, it
+
+1. preprocessing BeamModulePlugin.groovy to decide the dependencies need to 
sync
+2. generate an empty Maven project to fetch the exact target versions to change
+3. Write back to BeamModulePlugin.groovy
+4. Update libraries-bom version on 
sdks/java/container/license_scripts/dep_urls_java.yaml
+
+There are few reasons we need to declare the version numbers:
+1. Sync the dependency that not included in GCP-BOM with those included with 
BOM
+  For example, "com.google.cloud:google-cloud-spanner" does while 
"com.google.cloud:google-cloud-spanner:():test" doesn't
+2. There are Beam artifacts not depending on GCP-BOM but used dependency 
managed
+  by GCP-BOM.
+
+Refer to https://github.com/googleapis/java-cloud-bom/tags for the dependency
+versions managed by gcp-cloud-bom
+"""
+
+# To format: yapf --style sdks/python/setup.cfg --in-place 
scripts/tools/bomupgrader.py
+
+
+class BeamModulePluginProcessor:
+  # Known dependencies managed by GCP BOM and also used by Beam.
+  # We only need to have one dependency for each project to figure out the 
target version
+  KNOWN_DEPS = {
+      "arrow": "org.apache.arrow:arrow-memory-core",
+      "gax": "com.google.api:gax",
+      "google_cloud_spanner": "com.google.cloud:google-cloud-spanner",
+      "grpc":
+          "io.grpc:grpc-netty",  # use "grpc-netty" to pick up proper netty 
version
+      "netty": "io.netty:netty-transport",
+      "protobuf": "com.google.protobuf:protobuf-java"
+  }
+  # dependencies managed by GCP-BOM that used the dependencies in KNOWN_DEPS
+  # So we need to add it to the example project to get the version to sync
+  OTHER_CONSTRANTS = [
+      "com.google.cloud:google-cloud-bigquery"  # uses arrow
+  ]
+
+  # TODO: the logic can be generalized to support multiple BOM
+  ANCHOR = re.compile(
+      r'\s*//\s*\[bomupgrader\] determined by: (\S+), consistent with: 
google_cloud_platform_libraries_bom'
+  )
+  # e.g.  def grpc_version = "1.61.0"
+  VERSION_STRING = re.compile(r'^\s*def (\w+)_version\s*=\s*[\'"](\S+)[\'"]')
+  BOM_VERSION_STRING = re.compile(
+      
r'\s*google_cloud_platform_libraries_bom\s*:\s*[\'"]com\.google\.cloud:libraries-bom:([0-9\.]+)[\'"],?'
+  )
+  BUILD_DIR = 'build/dependencyResolver'
+  BEAMMPLG_PATH = 
'buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy'
+  LICENSE_SC_PATH = 'sdks/java/container/license_scripts/dep_urls_java.yaml'
+  GRADLE_TEMPLATE = """
+plugins { id 'java' }
+repositories { mavenCentral() }
+dependencies {
+implementation platform('com.google.cloud:libraries-bom:%s')
+%s
+}
+configurations.implementation.canBeResolved = true
+"""
+
+  def __init__(self, bom_version, project_root='.', runnable=None):
+    self.bom_version = bom_version
+    self.project_root = project_root
+    self.runnable = runnable or os.path.abspath('gradlew')
+    logging.info('-----Read BeamModulePlugin-----')
+    with open(os.path.join(project_root, self.BEAMMPLG_PATH), 'r') as fin:
+      self.original_lines = fin.readlines()
+    # e.g. {"io.grpc:grpc-netty", "1.61.0"}
+    self.dep_versions = {}
+    self.dep_versions_current = {}
+    self.known_deps = {}
+
+  def check_dependencies(self):
+    """Check dependencies in KNOWN_DEPS are found in BeamModulePlugin, and 
vice versa."""
+    logging.info("-----check dependency defs in BeamModulePlugin-----")
+    found_deps = {}
+    for idx, line in enumerate(self.original_lines):
+      m = self.ANCHOR.match(line)
+      if m:
+        n = self.VERSION_STRING.search(self.original_lines[idx + 1])
+        if not n:
+          raise RuntimeError(
+              "Version definition not found after anchor comment. Try 
standardize it."
+          )
+        found_deps[n.group(1)] = n.group(2)
+    assert sorted(self.KNOWN_DEPS.keys()) == sorted(found_deps.keys()), 
f"expect {self.KNOWN_DEPS.keys()} == {found_deps.keys()}"
+    self.dep_versions_current = {
+        self.KNOWN_DEPS[k]: v
+        for k, v in found_deps.items()
+    }
+
+  def prepare_gradle(self, bom_version):
+    logging.info("-----prepare build.gradle for example project-----")
+    try:
+      os.makedirs(self.BUILD_DIR)
+    except OSError as e:
+      if e.errno != errno.EEXIST:
+        raise
+
+    deps = []
+    for dep in list(self.KNOWN_DEPS.values()) + self.OTHER_CONSTRANTS:
+      deps.append(f"implementation '{dep}'")
+    gradle_file = self.GRADLE_TEMPLATE % (bom_version, "\n".join(deps))
+    with open(os.path.join(self.BUILD_DIR, 'build.gradle'), 'w') as fout:
+      fout.write(gradle_file)
+    # we need a settings.gradle
+    with open(os.path.join(self.BUILD_DIR, 'settings.gradle'), 'w') as fout:
+      fout.write('\n')
+
+  def resolve(self):
+    logging.info("-----resolve dependency-----")
+    subp = subprocess.run([
+        self.runnable,
+        *(
+            '-q dependencies --configuration implementation --console=plain'.
+            split())
+    ],
+                          cwd=self.BUILD_DIR,
+                          stdout=subprocess.PIPE)
+
+    result = subp.stdout.decode('utf-8')
+    # example line: |    +--- com.google.guava:guava:32.1.3-android -> 
32.1.3-jre (*)
+    logging.debug(result)
+    for line in result.splitlines():
+      for id in self.KNOWN_DEPS.values():
+        idx = line.find(id + ':')
+        if idx == -1:
+          continue
+        dep_and_other = line[idx + len(id) + 1:].split()
+        try:
+          jdx = dep_and_other.index('->')
+          ver = dep_and_other[jdx + 1]
+        except ValueError:
+          # there might be multiple ':', e.g. 
come.group.id:some-package:test:1.2.3
+          ver = dep_and_other[0].split(':')[-1]
+        self.dep_versions[id] = ver
+        break
+
+    if len(self.dep_versions) < len(self.KNOWN_DEPS):
+      logging.warning(
+          "Warning: not all dependencies are resolved: %s", self.dep_versions)
+      logging.info(result)
+
+  def write_back(self):
+    logging.info("-----Update BeamModulePlugin-----")
+    # make a shallow copy
+    self.target_lines = list(self.original_lines)
+    found_bom = False
+
+    for idx, line in enumerate(self.original_lines):
+      m = self.ANCHOR.match(line)
+      if m:
+        n = self.VERSION_STRING.search(self.original_lines[idx + 1])
+        if not n:
+          raise RuntimeError(
+              "Version definition not found after anchor comment. Try 
standardize it."
+          )
+        id = self.KNOWN_DEPS[n.group(1)]
+        new_v = self.dep_versions[id]
+        old_v = self.dep_versions_current[id]
+        if new_v != old_v:
+          self.target_lines[idx + 1] = self.original_lines[idx + 1].replace(
+              old_v, new_v)
+          logging.info('Changed %s: %s -> %s', id, old_v, new_v)
+        else:
+          logging.info('Unchanged: %s:%s', id, new_v)
+      else:
+        # replace GCP BOM version
+        n = self.BOM_VERSION_STRING.match(line)
+        if n:
+          self.target_lines[idx] = self.original_lines[idx].replace(
+              n.group(1), self.bom_version)
+          found_bom = True
+
+    if not found_bom:
+      logging.warning(
+          'GCP_BOM version declaration not found in BeamModulePlugin')
+
+    with open(os.path.join(self.project_root, self.BEAMMPLG_PATH), 'w') as 
fout:
+      for line in self.target_lines:
+        fout.write(line)
+
+  def write_license_script(self):
+    logging.info("-----Update dep_urls_java.yaml-----")
+    with open(os.path.join(self.project_root, self.LICENSE_SC_PATH),
+              'r') as fin:
+      lines = fin.readlines()
+    with open(os.path.join(self.project_root, self.LICENSE_SC_PATH),
+              'w') as fout:
+      for idx, line in enumerate(lines):
+        if line.strip() == 'libraries-bom:':
+          lines[idx + 1] = re.sub(
+              r'[\'"]\d[\.\d]+[\'"]', f"'{self.bom_version}'", lines[idx + 1])
+        fout.write(line)
+
+  def run(self):
+    self.check_dependencies()
+    self.prepare_gradle(self.bom_version)
+    self.resolve()
+    self.write_back()
+    self.write_license_script()
+
+if __name__ == '__main__':
+  logging.getLogger().setLevel(logging.INFO)
+  if len(sys.argv) < 2:
+    print("Usage: python scripts/tools/gcpbomupgrader.py target_version")
+    exit(1)
+  processor = BeamModulePluginProcessor(sys.argv[1])
+  processor.run()

Reply via email to