This is an automated email from the ASF dual-hosted git repository.
cvandermerwe pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git
The following commit(s) were added to refs/heads/master by this push:
new 32d6bce8c23 Enable pickling main by reference option within
CloudpickleConfig (#37554)
32d6bce8c23 is described below
commit 32d6bce8c232beb260d0d21ba8ec8a01447fbb85
Author: Praneet Nadella <[email protected]>
AuthorDate: Thu Feb 26 07:00:45 2026 -0800
Enable pickling main by reference option within CloudpickleConfig (#37554)
* Enable pickling main by reference in cloudpickle vendor
* Reverted changes to paths not taken for the main by ref
* yapf
* undo unnecessary move
---
.../internal/cloudpickle/cloudpickle.py | 23 +++++++++++++---------
1 file changed, 14 insertions(+), 9 deletions(-)
diff --git a/sdks/python/apache_beam/internal/cloudpickle/cloudpickle.py
b/sdks/python/apache_beam/internal/cloudpickle/cloudpickle.py
index 495e888a516..4f2c5321edf 100644
--- a/sdks/python/apache_beam/internal/cloudpickle/cloudpickle.py
+++ b/sdks/python/apache_beam/internal/cloudpickle/cloudpickle.py
@@ -161,11 +161,15 @@ class CloudPickleConfig:
code changes: when a particular lambda function is slightly
modified but the location of the function in the codebase has not
changed, the pickled representation might stay the same.
+
+ pickle_main_by_ref: An optional boolean. If provided, cloudpickle will
+ pickle main by reference instead of by value.
"""
id_generator: typing.Optional[callable] = uuid_generator
skip_reset_dynamic_type_state: bool = False
filepath_interceptor: typing.Optional[callable] = None
get_code_object_params: typing.Optional[GetCodeObjectParams] = None
+ pickle_main_by_ref: bool = False
DEFAULT_CONFIG = CloudPickleConfig()
@@ -316,7 +320,7 @@ def _whichmodule(obj, name):
return None
-def _should_pickle_by_reference(obj, name=None):
+def _should_pickle_by_reference(obj, name=None, config=DEFAULT_CONFIG):
"""Test whether an function or a class should be pickled by reference
Pickling by reference means by that the object (typically a function or a
@@ -331,7 +335,7 @@ def _should_pickle_by_reference(obj, name=None):
explicitly registered to be pickled by value.
"""
if isinstance(obj, types.FunctionType) or issubclass(type(obj), type):
- module_and_name = _lookup_module_and_qualname(obj, name=name)
+ module_and_name = _lookup_module_and_qualname(obj, name=name,
config=config)
if module_and_name is None:
return False
module, name = module_and_name
@@ -351,7 +355,7 @@ def _should_pickle_by_reference(obj, name=None):
"cannot check importability of {}
instances".format(type(obj).__name__))
-def _lookup_module_and_qualname(obj, name=None):
+def _lookup_module_and_qualname(obj, name=None, config=DEFAULT_CONFIG):
if name is None:
name = getattr(obj, "__qualname__", None)
if name is None: # pragma: no cover
@@ -367,7 +371,7 @@ def _lookup_module_and_qualname(obj, name=None):
# imported module. obj is thus treated as dynamic.
return None
- if module_name == "__main__":
+ if module_name == "__main__" and not config.pickle_main_by_ref:
return None
# Note: if module_name is in sys.modules, the corresponding module is
@@ -718,7 +722,8 @@ def _decompose_typevar(obj, config: CloudPickleConfig):
def _typevar_reduce(obj, config: CloudPickleConfig):
# TypeVar instances require the module information hence why we
# are not using the _should_pickle_by_reference directly
- module_and_name = _lookup_module_and_qualname(obj, name=obj.__name__)
+ module_and_name = _lookup_module_and_qualname(
+ obj, name=obj.__name__, config=config)
if module_and_name is None:
return (_make_typevar, _decompose_typevar(obj, config))
@@ -1185,7 +1190,7 @@ def _class_reduce(obj, config: CloudPickleConfig):
return type, (NotImplemented, )
elif obj in _BUILTIN_TYPE_NAMES:
return _builtin_type, (_BUILTIN_TYPE_NAMES[obj], )
- elif not _should_pickle_by_reference(obj):
+ elif not _should_pickle_by_reference(obj, config=config):
return _dynamic_class_reduce(obj, config)
return NotImplemented
@@ -1410,7 +1415,7 @@ class Pickler(pickle.Pickler):
obj using a custom cloudpickle reducer designed specifically to handle
dynamic functions.
"""
- if _should_pickle_by_reference(obj):
+ if _should_pickle_by_reference(obj, config=self.config):
return NotImplemented
elif self.config.get_code_object_params is not None:
return self._stable_identifier_function_reduce(obj)
@@ -1617,7 +1622,7 @@ class Pickler(pickle.Pickler):
if name is not None:
super().save_global(obj, name=name)
- elif not _should_pickle_by_reference(obj, name=name):
+ elif not _should_pickle_by_reference(obj, name=name, config=self.config):
self._save_reduce_pickle5(
*_dynamic_class_reduce(obj, self.config), obj=obj)
else:
@@ -1642,7 +1647,7 @@ class Pickler(pickle.Pickler):
Determines what kind of function obj is (e.g. lambda, defined at
interactive prompt, etc) and handles the pickling appropriately.
"""
- if _should_pickle_by_reference(obj, name=name):
+ if _should_pickle_by_reference(obj, name=name, config=self.config):
return super().save_global(obj, name=name)
elif PYPY and isinstance(obj.__code__, builtin_code_type):
return self.save_pypy_builtin_func(obj)