gtristan commented on code in PR #2035:
URL: https://github.com/apache/buildstream/pull/2035#discussion_r2212439426


##########
src/buildstream/_frontend/inspect.py:
##########
@@ -0,0 +1,397 @@
+import json
+import sys
+from dataclasses import dataclass, fields, is_dataclass
+from enum import StrEnum
+
+from ..types import _PipelineSelection, _Scope
+
+
+# Inspectable Elements as serialized to the terminal
+@dataclass
+class _Element:
+    name: str
+    description: str
+    workspace: any
+    key: str
+    key_full: str
+    state: str
+    environment: dict[str, str]
+    variables: dict[str, str]
+    artifact: any
+    dependencies: list[str]
+    build_dependencies: list[str]
+    runtime_dependencies: list[str]
+    sources: list[dict[str, str]]
+
+
+# Representation of a cache server
+@dataclass
+class _CacheServer:
+    url: str
+    instance: str
+
+    def __init__(self, spec):
+        self.url = spec.url
+        self.instance = spec.instance_name
+
+
+# User configuration
+@dataclass
+class _UserConfig:
+    configuration: str
+    cache_directory: str
+    log_directory: str
+    source_directory: str
+    build_directory: str
+    source_mirrors: str
+    build_area: str
+    strict_build_plan: bool
+    cache_directory: str
+    maximum_fetch_tasks: int
+    maximum_build_tasks: int
+    maximum_push_tasks: int
+    maximum_network_retries: int
+    cache_storage_service: _CacheServer | None
+    # remote specs
+    remote_execution_service: _CacheServer | None
+    remote_storage_service: _CacheServer | None
+    remote_action_cache_service: _CacheServer | None
+
+
+# String representation of loaded plugins
+@dataclass
+class _Plugin:
+    name: str
+    full: str  # class str
+
+
+# Configuration of a given project
+@dataclass
+class _ProjectConfig:
+    name: str
+    directory: str | None
+    junction: str | None
+    variables: [(str, str)]
+    element_plugins: [_Plugin]
+    source_plugins: [_Plugin]
+
+
+# A single project loaded from the current configuration
+@dataclass
+class _Project:
+    provenance: str
+    duplicates: [str]
+    declarations: [str]
+    config: _ProjectConfig
+
+
+# Wrapper object ecapsulating the entire output of `bst inspect`
+@dataclass
+class _InspectOutput:
+    project: [_Project]
+    # user configuration
+    user_config: _UserConfig
+    elements: list[_Element]
+
+
+# Used to indicate the state of a given element
+class _ElementState(StrEnum):
+    # Cannot determine the element state
+    NO_REFERENCE = "no-reference"
+
+    # The element has failed
+    FAILED = "failed"
+
+    # The element is a junction
+    JUNCTION = "junction"
+
+    # The element is waiting
+    WAITING = "waiting"
+
+    # The element is cached
+    CACHED = "cached"
+
+    # The element needs to be loaded from a remote source
+    FETCH_NEEDED = "fetch-needed"
+
+    # The element my be built
+    BUILDABLE = "buildable"
+
+
+# _make_dataclass()
+#
+# This is a helper class for extracting values from different objects used
+# across Buildstream into JSON serializable output.
+#
+# If keys is a list of str then each attribute is copied directly to the
+# dataclass.
+# If keys is a tuple of str then the first value is extracted from the object
+# and renamed to the second value.
+#
+# The field of kwarg is mapped directly onto the dataclass. If the value is
+# callable then that function is called passing the object to it.
+#
+# Args:
+#       obj: Whichever object you are serializing
+#       _cls: The dataclass you are constructing
+#       keys: attributes to include directly from the obj
+#       kwargs: key values passed into the dataclass
+def _make_dataclass(obj, _cls, keys: list[(str, str)] | list[str], **kwargs):
+    params = dict()
+    for key in keys:
+        name = None
+        rename = None
+        if isinstance(key, tuple):
+            name = key[0]
+            rename = key[1]
+        elif isinstance(key, str):
+            name = key
+            rename = None
+        else:
+            raise Exception("BUG: Keys may only be (str, str) or str")
+        value = None
+        if isinstance(obj, dict):
+            value = obj.get(name)
+        elif isinstance(obj, object):
+            try:
+                value = getattr(obj, name)
+            except AttributeError:
+                pass
+        else:
+            raise Exception("BUG: obj must be a dict or object")
+        if rename:
+            params[rename] = value
+        else:
+            params[name] = value
+    for key, helper in kwargs.items():
+        if callable(helper):
+            params[key] = helper(obj)
+        else:
+            params[key] = helper
+    return _cls(**params)
+
+
+# Recursively dump the dataclass into a serializable dictionary. Null values
+# are dropped from the output.
+def _dump_dataclass(_cls):
+    d = dict()
+    if not is_dataclass(_cls):
+        raise Exception("BUG: obj must be a dataclass")
+    for field in fields(_cls):
+        value = getattr(_cls, field.name)
+        if value is None:  # hide null values
+            continue
+        if is_dataclass(value):
+            d[field.name] = _dump_dataclass(value)
+        elif isinstance(value, list):
+            items = []
+            for item in value:
+                if is_dataclass(item):
+                    # check if it's a list of dataclasses
+                    items.append(_dump_dataclass(item))
+                else:
+                    items.append(item)
+            d[field.name] = items
+        else:
+            d[field.name] = value
+    return d
+
+
+# Inspect elements from a given Buildstream project
+class Inspector:
+    def __init__(self, stream, project, context):
+        self.stream = stream
+        self.project = project
+        self.context = context
+
+    def _read_state(self, element):
+        try:
+            if not element._has_all_sources_resolved():
+                return _ElementState.NO_REFERENCE
+            else:
+                if element.get_kind() == "junction":
+                    return _ElementState.JUNCTION
+                elif not element._can_query_cache():
+                    return _ElementState.WAITING
+                elif element._cached_failure():
+                    return _ElementState.FAILED
+                elif element._cached_success():
+                    return _ElementState.CACHED
+                elif not element._can_query_source_cache():
+                    return _ElementState.WAITING
+                elif element._fetch_needed():
+                    return _ElementState.FETCH_NEEDED
+                elif element._buildable():
+                    return _ElementState.BUILDABLE
+                else:
+                    return _ElementState.WAITING
+        except BstError as e:
+            # Provide context to plugin error
+            e.args = ("Failed to determine state for {}: 
{}".format(element._get_full_name(), str(e)),)
+            raise e
+
+    def _elements(self, dependencies, with_state=False):
+        for element in dependencies:
+
+            # These operations require state and are only shown if requested
+            key = None
+            key_full = None
+            state = None
+            artifact = None
+
+            if with_state:
+                key = element._get_display_key().brief
+
+                key_full = element._get_display_key().full
+
+                state = self._read_state(element).value
+
+                # BUG: Due to the assersion within .get_artifact this will

Review Comment:
   I think what you are misunderstanding is that there is a difference between 
the local and remote caches.
   
   loading artifacts from the local CAS is expensive, so is checking the cached 
state of artifacts. That is why we avoid loading cached state where possible.
   
   In this comment, we’re talking about loading the artifact to check its files 
and CAS digest, we can do that if the artifact is in the local cache.
   
   Some bst commands also allow pulling from remotes, in case the artifacts 
were built in CI or pushed by another user, it can be useful, but I don’t think 
having `bst inspect` support downloading artifacts from remotes is important 
for an initial implementation.
   
   For this case, you should follow the code paths for `bst artifact 
list-contents` in order to see how artifact loading is done… and follow the 
`bst show` code path for displaying `%{artifact-cas-digest}` to observe how to 
get the digest.
   
   It is acceptable to just omit the data for these in the case that the local 
artifact is not cached and is thus unavailable.
   
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to