This is an automated email from the ASF dual-hosted git repository. striker pushed a commit to branch striker/speculative-actions in repository https://gitbox.apache.org/repos/asf/buildstream.git
commit e7d0a3f64064545075e9c289a0507eee8d91df30 Author: Sander Striker <[email protected]> AuthorDate: Sat Mar 21 22:11:16 2026 +0100 speculative-actions: Add directory proto cache in instantiator Add an in-memory cache for parsed Directory protos to avoid redundant CAS reads during overlay resolution and tree modification. Many overlays reference files in the same directory trees — the same intermediate Directory protos were being read from disk repeatedly. The cache is keyed by digest hash and shared across all subactions within a single instantiator instance (~1MB for ~10K directories). Used in both _find_file_by_path() (overlay resolution) and _replace_digests_in_tree() (action adaptation). Co-Authored-By: Claude Opus 4.6 (1M context) <[email protected]> --- src/buildstream/_speculative_actions/instantiator.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/src/buildstream/_speculative_actions/instantiator.py b/src/buildstream/_speculative_actions/instantiator.py index 4ec0412ed..085974db0 100644 --- a/src/buildstream/_speculative_actions/instantiator.py +++ b/src/buildstream/_speculative_actions/instantiator.py @@ -51,6 +51,10 @@ class SpeculativeActionInstantiator: self._cas = cas self._artifactcache = artifactcache self._ac_service = ac_service + # Cache parsed Directory protos to avoid redundant CAS reads. + # Many overlays reference files in the same directory trees, + # so intermediate Directory protos are fetched repeatedly. + self._dir_cache = {} # type: dict[str, object] def instantiate_action(self, spec_action, element, element_lookup, instantiated_actions=None, resolved_cache=None): @@ -392,6 +396,16 @@ class SpeculativeActionInstantiator: return None + def _cached_fetch_directory(self, digest): + """Fetch a Directory proto, using the in-memory cache.""" + cached = self._dir_cache.get(digest.hash) + if cached is not None: + return cached + directory = self._cas.fetch_directory_proto(digest) + if directory is not None: + self._dir_cache[digest.hash] = directory + return directory + def _find_file_by_path(self, directory_digest, file_path): """ Find a file in a directory tree by full relative path. @@ -413,7 +427,7 @@ class SpeculativeActionInstantiator: # Navigate through directories for i, part in enumerate(parts[:-1]): # All but the last (filename) - directory = self._cas.fetch_directory_proto(current_digest) + directory = self._cached_fetch_directory(current_digest) if not directory: return None @@ -430,7 +444,7 @@ class SpeculativeActionInstantiator: # Now find the file filename = parts[-1] - directory = self._cas.fetch_directory_proto(current_digest) + directory = self._cached_fetch_directory(current_digest) if not directory: return None @@ -455,7 +469,7 @@ class SpeculativeActionInstantiator: New directory digest or None """ try: - directory = self._cas.fetch_directory_proto(directory_digest) + directory = self._cached_fetch_directory(directory_digest) if not directory: return None
