This is an automated email from the ASF dual-hosted git repository.

striker pushed a commit to branch striker/speculative-actions
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit db40b9a4b71655a4377da88aca5c1ecace0ab253
Author: Sander Striker <[email protected]>
AuthorDate: Sat Mar 21 22:07:05 2026 +0100

    speculative-actions: Eliminate redundant work in generator
    
    Two optimizations to reduce generation overhead:
    
    1. Return input_digests from _generate_action_overlays() so the caller
       can reuse them for ACTION overlay matching, instead of re-fetching
       the Action and re-extracting digests (eliminated duplicate CAS reads
       per subaction).
    
    2. Collect artifact file entries during _build_digest_cache traversal
       (_own_artifact_entries) and reuse in _generate_artifact_overlays(),
       eliminating a redundant full traversal of the element's artifact tree.
    
    Cross-element ACTION overlays use eager dependency output seeding
    because they enable earlier resolution than ARTIFACT overlays (the
    dep's adapted actions are available via instantiated_actions before
    the dep's full build completes). The docstring on
    _seed_dependency_outputs explains this trade-off.
    
    Co-Authored-By: Claude Opus 4.6 (1M context) <[email protected]>
---
 src/buildstream/_speculative_actions/generator.py | 160 ++++++++++------------
 1 file changed, 71 insertions(+), 89 deletions(-)

diff --git a/src/buildstream/_speculative_actions/generator.py 
b/src/buildstream/_speculative_actions/generator.py
index 9c6be7308..dfd6738d0 100644
--- a/src/buildstream/_speculative_actions/generator.py
+++ b/src/buildstream/_speculative_actions/generator.py
@@ -58,6 +58,11 @@ class SpeculativeActionsGenerator:
         # Multiple entries per digest enable fallback resolution:
         # SOURCE overlays are tried first, then ACTION, then ARTIFACT.
         self._digest_cache: Dict[str, list] = {}
+        # Artifact file entries for the element being processed,
+        # collected during _build_digest_cache to avoid re-traversal
+        # in _generate_artifact_overlays.
+        # List of (digest_hash, digest_size) tuples.
+        self._own_artifact_entries: list = []
 
     def generate_speculative_actions(self, element, subaction_digests, 
dependencies, mode=None):
         """
@@ -97,43 +102,45 @@ class SpeculativeActionsGenerator:
 
         # Seed prior_outputs with dependency subaction outputs for
         # cross-element ACTION overlays (full mode only).
+        # These enable earlier resolution than ARTIFACT overlays: an
+        # ACTION overlay resolves when the dep is primed (via
+        # instantiated_actions), while an ARTIFACT overlay only resolves
+        # when the dep is fully built.  This parallelism is the core
+        # benefit of speculative actions.
         if mode == _SpeculativeActionMode.FULL:
             if self._ac_service and self._artifactcache:
                 self._seed_dependency_outputs(dependencies, prior_outputs)
 
         # Generate overlays for each subaction
         for subaction_digest in subaction_digests:
-            spec_action = self._generate_action_overlays(element, 
subaction_digest)
+            spec_action, input_digests = 
self._generate_action_overlays(element, subaction_digest)
 
             # Generate ACTION overlays for digests that match prior subaction 
outputs
             # but weren't already resolved as SOURCE or ARTIFACT.
             # Requires intra-element or full mode.
             if mode in (_SpeculativeActionMode.INTRA_ELEMENT, 
_SpeculativeActionMode.FULL):
-                if self._ac_service and prior_outputs:
-                    action = self._cas.fetch_action(subaction_digest)
-                    if action:
-                        input_digests = 
self._extract_digests_from_action(action)
-                        # Collect hashes already covered by SOURCE/ARTIFACT 
overlays
-                        already_overlaid = set()
-                        if spec_action:
-                            for overlay in spec_action.overlays:
-                                
already_overlaid.add(overlay.target_digest.hash)
-
-                        for digest_hash, digest_size in input_digests:
-                            if digest_hash in prior_outputs and digest_hash 
not in already_overlaid:
-                                source_element, producing_action_digest, 
output_path = prior_outputs[digest_hash]
-                                # Create ACTION overlay
-                                if spec_action is None:
-                                    spec_action = 
speculative_actions_pb2.SpeculativeActions.SpeculativeAction()
-                                    
spec_action.base_action_digest.CopyFrom(subaction_digest)
-                                overlay = 
speculative_actions_pb2.SpeculativeActions.Overlay()
-                                overlay.type = 
speculative_actions_pb2.SpeculativeActions.Overlay.ACTION
-                                overlay.source_element = source_element
-                                
overlay.source_action_digest.CopyFrom(producing_action_digest)
-                                overlay.source_path = output_path
-                                overlay.target_digest.hash = digest_hash
-                                overlay.target_digest.size_bytes = digest_size
-                                spec_action.overlays.append(overlay)
+                if self._ac_service and prior_outputs and input_digests:
+                    # Collect hashes already covered by SOURCE/ARTIFACT 
overlays
+                    already_overlaid = set()
+                    if spec_action:
+                        for overlay in spec_action.overlays:
+                            already_overlaid.add(overlay.target_digest.hash)
+
+                    for digest_hash, digest_size in input_digests:
+                        if digest_hash in prior_outputs and digest_hash not in 
already_overlaid:
+                            source_element, producing_action_digest, 
output_path = prior_outputs[digest_hash]
+                            # Create ACTION overlay
+                            if spec_action is None:
+                                spec_action = 
speculative_actions_pb2.SpeculativeActions.SpeculativeAction()
+                                
spec_action.base_action_digest.CopyFrom(subaction_digest)
+                            overlay = 
speculative_actions_pb2.SpeculativeActions.Overlay()
+                            overlay.type = 
speculative_actions_pb2.SpeculativeActions.Overlay.ACTION
+                            overlay.source_element = source_element
+                            
overlay.source_action_digest.CopyFrom(producing_action_digest)
+                            overlay.source_path = output_path
+                            overlay.target_digest.hash = digest_hash
+                            overlay.target_digest.size_bytes = digest_size
+                            spec_action.overlays.append(overlay)
 
             # Sort overlays: SOURCE > ACTION > ARTIFACT
             # This ensures the instantiator tries SOURCE first, then
@@ -194,6 +201,11 @@ class SpeculativeActionsGenerator:
         subaction input tree contains a file that was produced by a
         dependency's subaction, the overlay will reference it.
 
+        Cross-element ACTION overlays enable earlier resolution than
+        ARTIFACT overlays: they resolve when the dep is primed (via
+        instantiated_actions), while ARTIFACT overlays only resolve
+        when the dep is fully built.
+
         Args:
             dependencies: List of dependency elements
             prior_outputs: Dict to seed with file_digest_hash ->
@@ -233,6 +245,7 @@ class SpeculativeActionsGenerator:
             dependencies: List of dependency elements
         """
         self._digest_cache.clear()
+        self._own_artifact_entries.clear()
 
         # Index element's own sources (highest priority)
         self._index_element_sources(element, element)
@@ -248,6 +261,10 @@ class SpeculativeActionsGenerator:
         for dep in dependencies:
             self._index_element_artifact(dep)
 
+        # Index element's own artifact and collect entries for
+        # artifact_overlays generation (avoids re-traversal later)
+        self._index_element_artifact(element, collect_entries=True)
+
     def _index_element_sources(self, element, source_element):
         """
         Index all file digests in an element's source tree.
@@ -279,12 +296,14 @@ class SpeculativeActionsGenerator:
             # Gracefully handle missing sources
             pass
 
-    def _index_element_artifact(self, element):
+    def _index_element_artifact(self, element, collect_entries=False):
         """
         Index all file digests in an element's artifact output.
 
         Args:
             element: The element whose artifact to index
+            collect_entries: If True, also collect (digest_hash, digest_size)
+                tuples in self._own_artifact_entries for artifact_overlays
         """
         try:
             # Check if element is cached
@@ -303,13 +322,15 @@ class SpeculativeActionsGenerator:
 
             # Traverse the artifact files directory with full paths
             self._traverse_directory_with_paths(
-                files_dir._get_digest(), element.name, "ARTIFACT", ""  # Start 
with empty path
+                files_dir._get_digest(), element.name, "ARTIFACT", "",
+                collect_entries=collect_entries,
             )
         except Exception as e:
             # Gracefully handle missing artifacts
             pass
 
-    def _traverse_directory_with_paths(self, directory_digest, element_name, 
overlay_type, current_path):
+    def _traverse_directory_with_paths(self, directory_digest, element_name, 
overlay_type, current_path,
+                                       collect_entries=False):
         """
         Recursively traverse a Directory tree and index all file digests with 
full paths.
 
@@ -318,6 +339,7 @@ class SpeculativeActionsGenerator:
             element_name: The element name to associate with found files
             overlay_type: Either "SOURCE" or "ARTIFACT"
             current_path: Current relative path from root (e.g., "src/foo")
+            collect_entries: If True, also append to self._own_artifact_entries
         """
         try:
             directory = self._cas.fetch_directory_proto(directory_digest)
@@ -338,11 +360,19 @@ class SpeculativeActionsGenerator:
                     if entry not in self._digest_cache[digest_hash]:
                         self._digest_cache[digest_hash].append(entry)
 
+                if collect_entries:
+                    self._own_artifact_entries.append(
+                        (digest_hash, file_node.digest.size_bytes)
+                    )
+
             # Recursively traverse subdirectories
             for dir_node in directory.directories:
                 # Build path for subdirectory
                 subdir_path = dir_node.name if not current_path else 
f"{current_path}/{dir_node.name}"
-                self._traverse_directory_with_paths(dir_node.digest, 
element_name, overlay_type, subdir_path)
+                self._traverse_directory_with_paths(
+                    dir_node.digest, element_name, overlay_type, subdir_path,
+                    collect_entries=collect_entries,
+                )
         except Exception as e:
             # Gracefully handle errors
             pass
@@ -356,14 +386,14 @@ class SpeculativeActionsGenerator:
             action_digest: The Action digest to generate overlays for
 
         Returns:
-            SpeculativeAction proto or None if action not found
+            Tuple of (SpeculativeAction proto or None, input_digests set)
         """
         from .._protos.buildstream.v2 import speculative_actions_pb2
 
         # Fetch the action from CAS
         action = self._cas.fetch_action(action_digest)
         if not action:
-            return None
+            return None, set()
 
         spec_action = 
speculative_actions_pb2.SpeculativeActions.SpeculativeAction()
         spec_action.base_action_digest.CopyFrom(action_digest)
@@ -377,7 +407,7 @@ class SpeculativeActionsGenerator:
             overlays = self._resolve_digest_to_overlays(digest, element)
             spec_action.overlays.extend(overlays)
 
-        return spec_action if spec_action.overlays else None
+        return (spec_action if spec_action.overlays else None), input_digests
 
     def _extract_digests_from_action(self, action):
         """
@@ -480,8 +510,8 @@ class SpeculativeActionsGenerator:
         """
         Generate artifact_overlays for the element's output files.
 
-        This creates a mapping from artifact file digests back to their
-        sources, enabling downstream elements to trace dependencies.
+        Uses _own_artifact_entries collected during _build_digest_cache
+        to avoid re-traversing the artifact tree.
 
         Args:
             element: The element with the artifact
@@ -490,59 +520,11 @@ class SpeculativeActionsGenerator:
             List of Overlay protos
         """
         overlays = []
-
-        try:
-            # Check if element is cached
-            if not element._cached():
-                return overlays
-
-            # Get the artifact object
-            artifact = element._get_artifact()
-            if not artifact or not artifact.cached():
-                return overlays
-
-            # Get the artifact files directory
-            files_dir = artifact.get_files()
-            if not files_dir:
-                return overlays
-
-            # Traverse artifact files and create overlays for each
-            self._generate_overlays_for_directory(
-                files_dir._get_digest(), element, overlays, ""  # Start with 
empty path
+        for digest_hash, digest_size in self._own_artifact_entries:
+            resolved = self._resolve_digest_to_overlays(
+                (digest_hash, digest_size), element
             )
-        except Exception as e:
-            pass
-
+            # For artifact_overlays, take the highest-priority overlay
+            if resolved:
+                overlays.append(resolved[0])
         return overlays
-
-    def _generate_overlays_for_directory(self, directory_digest, element, 
overlays, current_path):
-        """
-        Recursively generate overlays for files in a directory.
-
-        Args:
-            directory_digest: Directory to process
-            element: The element being processed
-            overlays: List to append overlays to
-            current_path: Current relative path from root
-        """
-        try:
-            directory = self._cas.fetch_directory_proto(directory_digest)
-            if not directory:
-                return
-
-            # Process each file with full path
-            for file_node in directory.files:
-                file_path = file_node.name if not current_path else 
f"{current_path}/{file_node.name}"
-                resolved = self._resolve_digest_to_overlays(
-                    (file_node.digest.hash, file_node.digest.size_bytes), 
element
-                )
-                # For artifact_overlays, take the highest-priority overlay
-                if resolved:
-                    overlays.append(resolved[0])
-
-            # Recursively process subdirectories
-            for dir_node in directory.directories:
-                subdir_path = dir_node.name if not current_path else 
f"{current_path}/{dir_node.name}"
-                self._generate_overlays_for_directory(dir_node.digest, 
element, overlays, subdir_path)
-        except Exception as e:
-            pass

Reply via email to