This is an automated email from the ASF dual-hosted git repository.

damccorm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
     new 3a1ae18a8d4 Split some dependencies out of main beam package into 
existing extras (#36697)
3a1ae18a8d4 is described below

commit 3a1ae18a8d4fdf5c7ce74065b8893d0edc5e0579
Author: Danny McCormick <[email protected]>
AuthorDate: Mon Nov 3 13:04:40 2025 -0500

    Split some dependencies out of main beam package into existing extras 
(#36697)
    
    * Split some dependencies out of main beam package
    
    * httplib used more broadly
    
    * A few more split outs
    
    * Try to fix linting
    
    * Guard imports
    
    * yaml test exclusions
    
    * yapf
    
    * correctly skip
    
    * Fix annotations
---
 sdks/python/apache_beam/io/gcp/bigquery_tools.py               |  2 +-
 .../apache_beam/runners/interactive/display/pipeline_graph.py  |  7 +++++--
 sdks/python/apache_beam/yaml/json_utils.py                     |  7 +++++--
 sdks/python/apache_beam/yaml/main_test.py                      |  6 ++++++
 sdks/python/apache_beam/yaml/yaml_io_test.py                   |  6 ++++++
 sdks/python/apache_beam/yaml/yaml_mapping_test.py              |  6 ++++++
 sdks/python/apache_beam/yaml/yaml_transform_test.py            |  6 ++++++
 sdks/python/apache_beam/yaml/yaml_transform_unit_test.py       |  1 +
 sdks/python/setup.py                                           | 10 +++++-----
 sdks/python/tox.ini                                            |  2 +-
 10 files changed, 42 insertions(+), 11 deletions(-)

diff --git a/sdks/python/apache_beam/io/gcp/bigquery_tools.py 
b/sdks/python/apache_beam/io/gcp/bigquery_tools.py
index b2fedb1746d..ddab941f927 100644
--- a/sdks/python/apache_beam/io/gcp/bigquery_tools.py
+++ b/sdks/python/apache_beam/io/gcp/bigquery_tools.py
@@ -46,7 +46,6 @@ from typing import Union
 
 import fastavro
 import numpy as np
-import regex
 
 import apache_beam
 from apache_beam import coders
@@ -70,6 +69,7 @@ from apache_beam.utils.histogram import LinearBucket
 
 # Protect against environments where bigquery library is not available.
 try:
+  import regex
   from apitools.base.py.exceptions import HttpError
   from apitools.base.py.exceptions import HttpForbiddenError
   from apitools.base.py.transfer import Upload
diff --git 
a/sdks/python/apache_beam/runners/interactive/display/pipeline_graph.py 
b/sdks/python/apache_beam/runners/interactive/display/pipeline_graph.py
index 1f1e315fea0..10058351938 100644
--- a/sdks/python/apache_beam/runners/interactive/display/pipeline_graph.py
+++ b/sdks/python/apache_beam/runners/interactive/display/pipeline_graph.py
@@ -32,14 +32,17 @@ from typing import List
 from typing import Tuple
 from typing import Union
 
-import pydot
-
 import apache_beam as beam
 from apache_beam.portability.api import beam_runner_api_pb2
 from apache_beam.runners.interactive import interactive_environment as ie
 from apache_beam.runners.interactive import pipeline_instrument as inst
 from apache_beam.runners.interactive.display import pipeline_graph_renderer
 
+try:
+  import pydot
+except ImportError:
+  pass
+
 # pylint does not understand context
 # pylint:disable=dangerous-default-value
 
diff --git a/sdks/python/apache_beam/yaml/json_utils.py 
b/sdks/python/apache_beam/yaml/json_utils.py
index 2d8f3205197..832651a477d 100644
--- a/sdks/python/apache_beam/yaml/json_utils.py
+++ b/sdks/python/apache_beam/yaml/json_utils.py
@@ -25,12 +25,15 @@ from collections.abc import Callable
 from typing import Any
 from typing import Optional
 
-import jsonschema
-
 import apache_beam as beam
 from apache_beam.portability.api import schema_pb2
 from apache_beam.typehints import schemas
 
+try:
+  import jsonschema
+except ImportError:
+  pass
+
 JSON_ATOMIC_TYPES_TO_BEAM = {
     'boolean': schema_pb2.BOOLEAN,
     'integer': schema_pb2.INT64,
diff --git a/sdks/python/apache_beam/yaml/main_test.py 
b/sdks/python/apache_beam/yaml/main_test.py
index d233e0e2d73..43b8caa1853 100644
--- a/sdks/python/apache_beam/yaml/main_test.py
+++ b/sdks/python/apache_beam/yaml/main_test.py
@@ -24,6 +24,11 @@ import unittest
 
 from apache_beam.yaml import main
 
+try:
+  import jsonschema
+except ImportError:
+  jsonschema = None
+
 TEST_PIPELINE = '''
 pipeline:
   type: chain
@@ -79,6 +84,7 @@ tests:
 '''
 
 
[email protected](jsonschema is None, "Yaml dependencies not installed")
 class MainTest(unittest.TestCase):
   def test_pipeline_spec_from_file(self):
     with tempfile.TemporaryDirectory() as tmpdir:
diff --git a/sdks/python/apache_beam/yaml/yaml_io_test.py 
b/sdks/python/apache_beam/yaml/yaml_io_test.py
index a19dfd694a8..1e13038512c 100644
--- a/sdks/python/apache_beam/yaml/yaml_io_test.py
+++ b/sdks/python/apache_beam/yaml/yaml_io_test.py
@@ -32,6 +32,11 @@ from apache_beam.testing.util import equal_to
 from apache_beam.typehints import schemas as schema_utils
 from apache_beam.yaml.yaml_transform import YamlTransform
 
+try:
+  import jsonschema
+except ImportError:
+  jsonschema = None
+
 
 class FakeReadFromPubSub:
   def __init__(
@@ -82,6 +87,7 @@ class FakeWriteToPubSub:
     return AssertThat(equal_to(self._messages))
 
 
[email protected](jsonschema is None, "Yaml dependencies not installed")
 class YamlPubSubTest(unittest.TestCase):
   def test_simple_read(self):
     with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions(
diff --git a/sdks/python/apache_beam/yaml/yaml_mapping_test.py 
b/sdks/python/apache_beam/yaml/yaml_mapping_test.py
index cc2fe4639ab..169c86d7b87 100644
--- a/sdks/python/apache_beam/yaml/yaml_mapping_test.py
+++ b/sdks/python/apache_beam/yaml/yaml_mapping_test.py
@@ -30,6 +30,11 @@ from apache_beam.utils.timestamp import Timestamp
 from apache_beam.yaml import yaml_mapping
 from apache_beam.yaml.yaml_transform import YamlTransform
 
+try:
+  import jsonschema
+except ImportError:
+  jsonschema = None
+
 DATA = [
     beam.Row(label='11a', conductor=11, rank=0),
     beam.Row(label='37a', conductor=37, rank=1),
@@ -37,6 +42,7 @@ DATA = [
 ]
 
 
[email protected](jsonschema is None, "Yaml dependencies not installed")
 class YamlMappingTest(unittest.TestCase):
   def test_basic(self):
     with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions(
diff --git a/sdks/python/apache_beam/yaml/yaml_transform_test.py 
b/sdks/python/apache_beam/yaml/yaml_transform_test.py
index 2ba49a1fab8..d5950fb9efa 100644
--- a/sdks/python/apache_beam/yaml/yaml_transform_test.py
+++ b/sdks/python/apache_beam/yaml/yaml_transform_test.py
@@ -29,6 +29,11 @@ from apache_beam.utils import python_callable
 from apache_beam.yaml import yaml_provider
 from apache_beam.yaml.yaml_transform import YamlTransform
 
+try:
+  import jsonschema
+except ImportError:
+  jsonschema = None
+
 
 class CreateTimestamped(beam.PTransform):
   _yaml_requires_inputs = False
@@ -83,6 +88,7 @@ TEST_PROVIDERS = {
 }
 
 
[email protected](jsonschema is None, "Yaml dependencies not installed")
 class YamlTransformE2ETest(unittest.TestCase):
   def test_composite(self):
     with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions(
diff --git a/sdks/python/apache_beam/yaml/yaml_transform_unit_test.py 
b/sdks/python/apache_beam/yaml/yaml_transform_unit_test.py
index 14bd758ebae..59b1619b651 100644
--- a/sdks/python/apache_beam/yaml/yaml_transform_unit_test.py
+++ b/sdks/python/apache_beam/yaml/yaml_transform_unit_test.py
@@ -55,6 +55,7 @@ def new_pipeline():
           pickle_library='cloudpickle'))
 
 
[email protected](jsonschema is None, "Yaml dependencies not installed")
 class MainTest(unittest.TestCase):
   def assertYaml(self, expected, result):
     result = SafeLineLoader.strip_metadata(result)
diff --git a/sdks/python/setup.py b/sdks/python/setup.py
index 9ed2a124e94..534324b83c1 100644
--- a/sdks/python/setup.py
+++ b/sdks/python/setup.py
@@ -379,7 +379,6 @@ if __name__ == '__main__':
       install_requires=[
           'crcmod>=1.7,<2.0',
           'cryptography>=39.0.0,<48.0.0',
-          'orjson>=3.9.7,<4',
           'fastavro>=0.23.6,<2',
           'fasteners>=0.3,<1.0',
           # TODO(https://github.com/grpc/grpc/issues/37710): Unpin grpc
@@ -387,7 +386,6 @@ if __name__ == '__main__':
           'grpcio>=1.67.0; python_version >= "3.13"',
           'hdfs>=2.1.0,<3.0.0',
           'httplib2>=0.8,<0.23.0',
-          'jsonschema>=4.0.0,<5.0.0',
           'jsonpickle>=3.0.0,<4.0.0',
           # numpy can have breaking changes in minor versions.
           # Use a strict upper bound.
@@ -407,11 +405,9 @@ if __name__ == '__main__':
           # 3. Exclude protobuf 4 versions that leak memory, see:
           # https://github.com/apache/beam/issues/28246
           
'protobuf>=3.20.3,<7.0.0.dev0,!=4.0.*,!=4.21.*,!=4.22.0,!=4.23.*,!=4.24.*',  # 
pylint: disable=line-too-long
-          'pydot>=1.2.0,<2',
           'python-dateutil>=2.8.0,<3',
           'pytz>=2018.3',
           'redis>=5.0.0,<6',
-          'regex>=2020.6.8',
           'requests>=2.32.4,<3.0.0',
           'sortedcontainers>=2.4.0',
           'typing-extensions>=3.7.0',
@@ -509,7 +505,9 @@ if __name__ == '__main__':
               # --extra-index-url or --index-url in requirements.txt in
               # Dataflow, which allows installing python packages from private
               # Python repositories in GAR.
-              'keyrings.google-artifactregistry-auth'
+              'keyrings.google-artifactregistry-auth',
+              'orjson>=3.9.7,<4',
+              'regex>=2020.6.8',
           ],
           'interactive': [
               'facets-overview>=1.1.0,<2',
@@ -520,6 +518,7 @@ if __name__ == '__main__':
               # Skip version 6.1.13 due to
               # https://github.com/jupyter/jupyter_client/issues/637
               'jupyter-client>=6.1.11,!=6.1.13,<8.2.1',
+              'pydot>=1.2.0,<2',
               'timeloop>=1.0.2,<2',
               'nbformat>=5.0.5,<6',
               'nbconvert>=6.2.0,<8',
@@ -577,6 +576,7 @@ if __name__ == '__main__':
               'virtualenv-clone>=0.5,<1.0',
               # https://github.com/PiotrDabkowski/Js2Py/issues/317
               'js2py>=0.74,<1; python_version<"3.12"',
+              'jsonschema>=4.0.0,<5.0.0',
           ] + dataframe_dependency,
           # Keep the following dependencies in line with what we test against
           # in https://github.com/apache/beam/blob/master/sdks/python/tox.ini
diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini
index 8ea95ad8fc8..921833d9f4b 100644
--- a/sdks/python/tox.ini
+++ b/sdks/python/tox.ini
@@ -33,7 +33,7 @@ pip_pre = True
 # allow apps that support color to use it.
 passenv=TERM,CLOUDSDK_CONFIG,DOCKER_*,TESTCONTAINERS_*,TC_*,ALLOYDB_PASSWORD
 # Set [] options for pip installation of apache-beam tarball.
-extras = test,dataframe
+extras = test,dataframe,yaml
 # Don't warn that these commands aren't installed.
 allowlist_externals =
   false

Reply via email to