This is an automated email from the ASF dual-hosted git repository.
damccorm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git
The following commit(s) were added to refs/heads/master by this push:
new 3a1ae18a8d4 Split some dependencies out of main beam package into
existing extras (#36697)
3a1ae18a8d4 is described below
commit 3a1ae18a8d4fdf5c7ce74065b8893d0edc5e0579
Author: Danny McCormick <[email protected]>
AuthorDate: Mon Nov 3 13:04:40 2025 -0500
Split some dependencies out of main beam package into existing extras
(#36697)
* Split some dependencies out of main beam package
* httplib used more broadly
* A few more split outs
* Try to fix linting
* Guard imports
* yaml test exclusions
* yapf
* correctly skip
* Fix annotations
---
sdks/python/apache_beam/io/gcp/bigquery_tools.py | 2 +-
.../apache_beam/runners/interactive/display/pipeline_graph.py | 7 +++++--
sdks/python/apache_beam/yaml/json_utils.py | 7 +++++--
sdks/python/apache_beam/yaml/main_test.py | 6 ++++++
sdks/python/apache_beam/yaml/yaml_io_test.py | 6 ++++++
sdks/python/apache_beam/yaml/yaml_mapping_test.py | 6 ++++++
sdks/python/apache_beam/yaml/yaml_transform_test.py | 6 ++++++
sdks/python/apache_beam/yaml/yaml_transform_unit_test.py | 1 +
sdks/python/setup.py | 10 +++++-----
sdks/python/tox.ini | 2 +-
10 files changed, 42 insertions(+), 11 deletions(-)
diff --git a/sdks/python/apache_beam/io/gcp/bigquery_tools.py
b/sdks/python/apache_beam/io/gcp/bigquery_tools.py
index b2fedb1746d..ddab941f927 100644
--- a/sdks/python/apache_beam/io/gcp/bigquery_tools.py
+++ b/sdks/python/apache_beam/io/gcp/bigquery_tools.py
@@ -46,7 +46,6 @@ from typing import Union
import fastavro
import numpy as np
-import regex
import apache_beam
from apache_beam import coders
@@ -70,6 +69,7 @@ from apache_beam.utils.histogram import LinearBucket
# Protect against environments where bigquery library is not available.
try:
+ import regex
from apitools.base.py.exceptions import HttpError
from apitools.base.py.exceptions import HttpForbiddenError
from apitools.base.py.transfer import Upload
diff --git
a/sdks/python/apache_beam/runners/interactive/display/pipeline_graph.py
b/sdks/python/apache_beam/runners/interactive/display/pipeline_graph.py
index 1f1e315fea0..10058351938 100644
--- a/sdks/python/apache_beam/runners/interactive/display/pipeline_graph.py
+++ b/sdks/python/apache_beam/runners/interactive/display/pipeline_graph.py
@@ -32,14 +32,17 @@ from typing import List
from typing import Tuple
from typing import Union
-import pydot
-
import apache_beam as beam
from apache_beam.portability.api import beam_runner_api_pb2
from apache_beam.runners.interactive import interactive_environment as ie
from apache_beam.runners.interactive import pipeline_instrument as inst
from apache_beam.runners.interactive.display import pipeline_graph_renderer
+try:
+ import pydot
+except ImportError:
+ pass
+
# pylint does not understand context
# pylint:disable=dangerous-default-value
diff --git a/sdks/python/apache_beam/yaml/json_utils.py
b/sdks/python/apache_beam/yaml/json_utils.py
index 2d8f3205197..832651a477d 100644
--- a/sdks/python/apache_beam/yaml/json_utils.py
+++ b/sdks/python/apache_beam/yaml/json_utils.py
@@ -25,12 +25,15 @@ from collections.abc import Callable
from typing import Any
from typing import Optional
-import jsonschema
-
import apache_beam as beam
from apache_beam.portability.api import schema_pb2
from apache_beam.typehints import schemas
+try:
+ import jsonschema
+except ImportError:
+ pass
+
JSON_ATOMIC_TYPES_TO_BEAM = {
'boolean': schema_pb2.BOOLEAN,
'integer': schema_pb2.INT64,
diff --git a/sdks/python/apache_beam/yaml/main_test.py
b/sdks/python/apache_beam/yaml/main_test.py
index d233e0e2d73..43b8caa1853 100644
--- a/sdks/python/apache_beam/yaml/main_test.py
+++ b/sdks/python/apache_beam/yaml/main_test.py
@@ -24,6 +24,11 @@ import unittest
from apache_beam.yaml import main
+try:
+ import jsonschema
+except ImportError:
+ jsonschema = None
+
TEST_PIPELINE = '''
pipeline:
type: chain
@@ -79,6 +84,7 @@ tests:
'''
[email protected](jsonschema is None, "Yaml dependencies not installed")
class MainTest(unittest.TestCase):
def test_pipeline_spec_from_file(self):
with tempfile.TemporaryDirectory() as tmpdir:
diff --git a/sdks/python/apache_beam/yaml/yaml_io_test.py
b/sdks/python/apache_beam/yaml/yaml_io_test.py
index a19dfd694a8..1e13038512c 100644
--- a/sdks/python/apache_beam/yaml/yaml_io_test.py
+++ b/sdks/python/apache_beam/yaml/yaml_io_test.py
@@ -32,6 +32,11 @@ from apache_beam.testing.util import equal_to
from apache_beam.typehints import schemas as schema_utils
from apache_beam.yaml.yaml_transform import YamlTransform
+try:
+ import jsonschema
+except ImportError:
+ jsonschema = None
+
class FakeReadFromPubSub:
def __init__(
@@ -82,6 +87,7 @@ class FakeWriteToPubSub:
return AssertThat(equal_to(self._messages))
[email protected](jsonschema is None, "Yaml dependencies not installed")
class YamlPubSubTest(unittest.TestCase):
def test_simple_read(self):
with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions(
diff --git a/sdks/python/apache_beam/yaml/yaml_mapping_test.py
b/sdks/python/apache_beam/yaml/yaml_mapping_test.py
index cc2fe4639ab..169c86d7b87 100644
--- a/sdks/python/apache_beam/yaml/yaml_mapping_test.py
+++ b/sdks/python/apache_beam/yaml/yaml_mapping_test.py
@@ -30,6 +30,11 @@ from apache_beam.utils.timestamp import Timestamp
from apache_beam.yaml import yaml_mapping
from apache_beam.yaml.yaml_transform import YamlTransform
+try:
+ import jsonschema
+except ImportError:
+ jsonschema = None
+
DATA = [
beam.Row(label='11a', conductor=11, rank=0),
beam.Row(label='37a', conductor=37, rank=1),
@@ -37,6 +42,7 @@ DATA = [
]
[email protected](jsonschema is None, "Yaml dependencies not installed")
class YamlMappingTest(unittest.TestCase):
def test_basic(self):
with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions(
diff --git a/sdks/python/apache_beam/yaml/yaml_transform_test.py
b/sdks/python/apache_beam/yaml/yaml_transform_test.py
index 2ba49a1fab8..d5950fb9efa 100644
--- a/sdks/python/apache_beam/yaml/yaml_transform_test.py
+++ b/sdks/python/apache_beam/yaml/yaml_transform_test.py
@@ -29,6 +29,11 @@ from apache_beam.utils import python_callable
from apache_beam.yaml import yaml_provider
from apache_beam.yaml.yaml_transform import YamlTransform
+try:
+ import jsonschema
+except ImportError:
+ jsonschema = None
+
class CreateTimestamped(beam.PTransform):
_yaml_requires_inputs = False
@@ -83,6 +88,7 @@ TEST_PROVIDERS = {
}
[email protected](jsonschema is None, "Yaml dependencies not installed")
class YamlTransformE2ETest(unittest.TestCase):
def test_composite(self):
with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions(
diff --git a/sdks/python/apache_beam/yaml/yaml_transform_unit_test.py
b/sdks/python/apache_beam/yaml/yaml_transform_unit_test.py
index 14bd758ebae..59b1619b651 100644
--- a/sdks/python/apache_beam/yaml/yaml_transform_unit_test.py
+++ b/sdks/python/apache_beam/yaml/yaml_transform_unit_test.py
@@ -55,6 +55,7 @@ def new_pipeline():
pickle_library='cloudpickle'))
[email protected](jsonschema is None, "Yaml dependencies not installed")
class MainTest(unittest.TestCase):
def assertYaml(self, expected, result):
result = SafeLineLoader.strip_metadata(result)
diff --git a/sdks/python/setup.py b/sdks/python/setup.py
index 9ed2a124e94..534324b83c1 100644
--- a/sdks/python/setup.py
+++ b/sdks/python/setup.py
@@ -379,7 +379,6 @@ if __name__ == '__main__':
install_requires=[
'crcmod>=1.7,<2.0',
'cryptography>=39.0.0,<48.0.0',
- 'orjson>=3.9.7,<4',
'fastavro>=0.23.6,<2',
'fasteners>=0.3,<1.0',
# TODO(https://github.com/grpc/grpc/issues/37710): Unpin grpc
@@ -387,7 +386,6 @@ if __name__ == '__main__':
'grpcio>=1.67.0; python_version >= "3.13"',
'hdfs>=2.1.0,<3.0.0',
'httplib2>=0.8,<0.23.0',
- 'jsonschema>=4.0.0,<5.0.0',
'jsonpickle>=3.0.0,<4.0.0',
# numpy can have breaking changes in minor versions.
# Use a strict upper bound.
@@ -407,11 +405,9 @@ if __name__ == '__main__':
# 3. Exclude protobuf 4 versions that leak memory, see:
# https://github.com/apache/beam/issues/28246
'protobuf>=3.20.3,<7.0.0.dev0,!=4.0.*,!=4.21.*,!=4.22.0,!=4.23.*,!=4.24.*', #
pylint: disable=line-too-long
- 'pydot>=1.2.0,<2',
'python-dateutil>=2.8.0,<3',
'pytz>=2018.3',
'redis>=5.0.0,<6',
- 'regex>=2020.6.8',
'requests>=2.32.4,<3.0.0',
'sortedcontainers>=2.4.0',
'typing-extensions>=3.7.0',
@@ -509,7 +505,9 @@ if __name__ == '__main__':
# --extra-index-url or --index-url in requirements.txt in
# Dataflow, which allows installing python packages from private
# Python repositories in GAR.
- 'keyrings.google-artifactregistry-auth'
+ 'keyrings.google-artifactregistry-auth',
+ 'orjson>=3.9.7,<4',
+ 'regex>=2020.6.8',
],
'interactive': [
'facets-overview>=1.1.0,<2',
@@ -520,6 +518,7 @@ if __name__ == '__main__':
# Skip version 6.1.13 due to
# https://github.com/jupyter/jupyter_client/issues/637
'jupyter-client>=6.1.11,!=6.1.13,<8.2.1',
+ 'pydot>=1.2.0,<2',
'timeloop>=1.0.2,<2',
'nbformat>=5.0.5,<6',
'nbconvert>=6.2.0,<8',
@@ -577,6 +576,7 @@ if __name__ == '__main__':
'virtualenv-clone>=0.5,<1.0',
# https://github.com/PiotrDabkowski/Js2Py/issues/317
'js2py>=0.74,<1; python_version<"3.12"',
+ 'jsonschema>=4.0.0,<5.0.0',
] + dataframe_dependency,
# Keep the following dependencies in line with what we test against
# in https://github.com/apache/beam/blob/master/sdks/python/tox.ini
diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini
index 8ea95ad8fc8..921833d9f4b 100644
--- a/sdks/python/tox.ini
+++ b/sdks/python/tox.ini
@@ -33,7 +33,7 @@ pip_pre = True
# allow apps that support color to use it.
passenv=TERM,CLOUDSDK_CONFIG,DOCKER_*,TESTCONTAINERS_*,TC_*,ALLOYDB_PASSWORD
# Set [] options for pip installation of apache-beam tarball.
-extras = test,dataframe
+extras = test,dataframe,yaml
# Don't warn that these commands aren't installed.
allowlist_externals =
false