udim commented on code in PR #28418:
URL: https://github.com/apache/beam/pull/28418#discussion_r1323632600


##########
sdks/python/apache_beam/runners/worker/data_sampler.py:
##########
@@ -216,17 +218,40 @@ def __init__(
       self,
       max_samples: int = 10,
       sample_every_sec: float = 30,
+      sample_only_exceptions: bool = False,
       clock=None) -> None:
     # Key is PCollection id. Is guarded by the _samplers_lock.
     self._samplers: Dict[str, OutputSampler] = {}
     # Bundles are processed in parallel, so new samplers may be added when the
     # runner queries for samples.
     self._samplers_lock: threading.Lock = threading.Lock()
     self._max_samples = max_samples
-    self._sample_every_sec = sample_every_sec
+    self._sample_every_sec = 0.0 if sample_only_exceptions else 
sample_every_sec
     self._samplers_by_output: Dict[str, List[OutputSampler]] = {}
     self._clock = clock
 
+  _ENABLE_DATA_SAMPLING = 'enable_data_sampling'
+  _ENABLE_ALWAYS_ON_EXCEPTION_SAMPLING = 'enable_always_on_exception_sampling'
+  _DISABLE_ALWAYS_ON_EXCEPTION_SAMPLING = 
'disable_always_on_exception_sampling'
+
+  @staticmethod
+  def create(sdk_pipeline_options: PipelineOptions, **kwargs):
+    experiments = sdk_pipeline_options.view_as(DebugOptions).experiments or []
+
+    # When true, enables only the sampling of exceptions.
+    always_on_exception_sampling = (
+        DataSampler._ENABLE_ALWAYS_ON_EXCEPTION_SAMPLING in experiments and
+        DataSampler._DISABLE_ALWAYS_ON_EXCEPTION_SAMPLING not in experiments)
+
+    # When true, enables the sampling of all PCollections and exceptions.
+    enable_data_sampling = DataSampler._ENABLE_DATA_SAMPLING in experiments
+
+    if enable_data_sampling or always_on_exception_sampling:

Review Comment:
   If both of these flags are true, only exception sampling is enabled



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to