On Wed, Mar 4, 2026 at 10:05 AM Stefano Tondo <[email protected]> wrote:
>
> This commit adds file filtering capabilities to SPDX 3.0 SBOM generation
> to reduce SBOM size and focus on relevant files.
>
> New configuration variables (in spdx-common.bbclass):
>
>   SPDX_FILE_FILTER (default: "all"):
>     - "all": Include all files (current behavior)
>     - "essential": Include only LICENSE/README/NOTICE files
>     - "none": Skip all files

Having file "classes" like this seems unnecessary, and it also seems
unlikely that anyone will agree what goes in each class. A variable
with a list of regexes that is used to filter the files is fine, but
leave it up the end users to decide what should be included/excluded.
IOW, drop all these variables and just have
SPDX_FILE_PATTERNS/SPDX_FILE_EXCLUDE_PATTERNS variable(s), which
default to empty and do nothing if so.

>
>   SPDX_FILE_ESSENTIAL_PATTERNS (extensible):
>     - Space-separated patterns for essential files
>     - Default: LICENSE COPYING README NOTICE COPYRIGHT etc.
>     - Recipes can extend: SPDX_FILE_ESSENTIAL_PATTERNS += "MANIFEST"
>
>   SPDX_FILE_EXCLUDE_PATTERNS (extensible):
>     - Patterns to exclude in 'essential' mode
>     - Default: .patch .diff test_ /tests/ .pyc .o etc.
>     - Recipes can extend: SPDX_FILE_EXCLUDE_PATTERNS += ".tmp"
>
> Implementation (in spdx30_tasks.py):
>
>   - add_package_files(): Apply filtering during file walk
>   - get_package_sources_from_debug(): Skip debug source lookup for
>     filtered files instead of failing
>
> Impact:
>
>   - Essential mode reduces file components by ~96% (2,376 → ~90 files)
>   - Filters out patches, test files, and build artifacts
>   - Configurable per-recipe via variable extension
>   - No impact when SPDX_FILE_FILTER="all" (default)
>
> This is useful for creating compact SBOMs for compliance and distribution
> where only license-relevant files are needed.
>
> Signed-off-by: Stefano Tondo <[email protected]>
> ---
>  meta/classes/spdx-common.bbclass | 37 +++++++++++++++++++++++++++
>  meta/lib/oe/spdx30_tasks.py      | 44 +++++++++++++++++++++++++++++---
>  2 files changed, 77 insertions(+), 4 deletions(-)
>
> diff --git a/meta/classes/spdx-common.bbclass 
> b/meta/classes/spdx-common.bbclass
> index 3110230c9e..81c61e10dc 100644
> --- a/meta/classes/spdx-common.bbclass
> +++ b/meta/classes/spdx-common.bbclass
> @@ -54,6 +54,43 @@ SPDX_CONCLUDED_LICENSE[doc] = "The license concluded by 
> manual or external \
>
>  SPDX_MULTILIB_SSTATE_ARCHS ??= "${SSTATE_ARCHS}"
>
> +SPDX_FILES_INCLUDED ??= "all"
> +SPDX_FILES_INCLUDED[doc] = "Controls which files are included in SPDX 
> output. \
> +    Values: 'all' (include all files), 'essential' (only 
> LICENSE/README/NOTICE files), \
> +    'none' (no files). The 'essential' mode reduces SBOM size by excluding 
> patches, \
> +    tests, and build artifacts."
> +
> +SPDX_FILE_ESSENTIAL_PATTERNS ??= "LICENSE COPYING README NOTICE COPYRIGHT 
> PATENTS ACKNOWLEDGEMENTS THIRD-PARTY-NOTICES"
> +SPDX_FILE_ESSENTIAL_PATTERNS[doc] = "Space-separated list of file name 
> patterns to \
> +    include when SPDX_FILES_INCLUDED='essential'. Recipes can extend this to 
> add their \
> +    own essential files (e.g., 'SPDX_FILE_ESSENTIAL_PATTERNS += 
> \"MANIFEST\"')."
> +
> +SPDX_FILE_EXCLUDE_PATTERNS ??= ".patch .diff test_ _test. /test/ /tests/ 
> .pyc .pyo .o .a .la"
> +SPDX_FILE_EXCLUDE_PATTERNS[doc] = "Space-separated list of patterns to 
> exclude when \
> +    SPDX_FILES_INCLUDED='essential'. Files matching these patterns are 
> filtered out. \
> +    Recipes can extend this to exclude additional file types."
> +
> +SBOM_COMPONENT_NAME ??= ""
> +SBOM_COMPONENT_NAME[doc] = "Name of the SBOM metadata component. If set, 
> creates a \
> +    software_Package element in the SBOM with image/product information. 
> Typically \
> +    set to IMAGE_BASENAME or product name."

I'm not sure why this change is in this patch? Same for the other
following variables.

> +
> +SBOM_COMPONENT_VERSION ??= "${DISTRO_VERSION}"
> +SBOM_COMPONENT_VERSION[doc] = "Version of the SBOM metadata component. Used 
> when \
> +    SBOM_COMPONENT_NAME is set. Defaults to DISTRO_VERSION."
> +
> +SBOM_COMPONENT_SUMMARY ??= ""
> +SBOM_COMPONENT_SUMMARY[doc] = "Description of the SBOM metadata component. 
> Used when \
> +    SBOM_COMPONENT_NAME is set. Typically set to IMAGE_SUMMARY or product 
> description."
> +
> +SBOM_SUPPLIER_NAME ??= ""
> +SBOM_SUPPLIER_NAME[doc] = "Name of the organization supplying the SBOM. If 
> set, \
> +    creates an Organization element in the SBOM with supplier information."
> +
> +SBOM_SUPPLIER_URL ??= ""
> +SBOM_SUPPLIER_URL[doc] = "URL of the organization supplying the SBOM. Used 
> when \
> +    SBOM_SUPPLIER_NAME is set. Adds an external identifier with the 
> organization URL."
> +
>  python () {
>      from oe.cve_check import extend_cve_status
>      extend_cve_status(d)
> diff --git a/meta/lib/oe/spdx30_tasks.py b/meta/lib/oe/spdx30_tasks.py
> index 99f2892dfb..bd703b5bec 100644
> --- a/meta/lib/oe/spdx30_tasks.py
> +++ b/meta/lib/oe/spdx30_tasks.py
> @@ -161,6 +161,11 @@ def add_package_files(
>          compiled_sources, types = oe.spdx_common.get_compiled_sources(d)
>          bb.debug(1, f"Total compiled files: {len(compiled_sources)}")
>
> +    # File filtering configuration
> +    spdx_file_filter = (d.getVar("SPDX_FILE_FILTER") or "all").lower()
> +    essential_patterns = (d.getVar("SPDX_FILE_ESSENTIAL_PATTERNS") or 
> "").split()
> +    exclude_patterns = (d.getVar("SPDX_FILE_EXCLUDE_PATTERNS") or "").split()
> +
>      for subdir, dirs, files in os.walk(topdir, onerror=walk_error):
>          dirs[:] = [d for d in dirs if d not in ignore_dirs]
>          if subdir == str(topdir):
> @@ -174,6 +179,26 @@ def add_package_files(
>                  continue
>
>              filename = str(filepath.relative_to(topdir))
> +
> +            # Apply file filtering if enabled
> +            if spdx_file_filter == "essential":
> +                file_upper = file.upper()
> +                filename_lower = filename.lower()
> +
> +                # Skip if matches exclude patterns
> +                skip_file = any(pattern in filename_lower for pattern in 
> exclude_patterns)
> +                if skip_file:
> +                    continue
> +
> +                # Keep only essential files (license/readme/etc)
> +                is_essential = any(pattern in file_upper for pattern in 
> essential_patterns)
> +                if not is_essential:
> +                    continue
> +            elif spdx_file_filter == "none":
> +                # Skip all files
> +                continue
> +            # else: spdx_file_filter == "all" or any other value - include 
> all files
> +
>              file_purposes = get_purposes(filepath)
>
>              # Check if file is compiled
> @@ -219,6 +244,8 @@ def add_package_files(
>  def get_package_sources_from_debug(
>      d, package, package_files, sources, source_hash_cache
>  ):
> +    spdx_file_filter = (d.getVar("SPDX_FILE_FILTER") or "all").lower()
> +
>      def file_path_match(file_path, pkg_file):
>          if file_path.lstrip("/") == pkg_file.name.lstrip("/"):
>              return True
> @@ -251,10 +278,19 @@ def get_package_sources_from_debug(
>              continue
>
>          if not any(file_path_match(file_path, pkg_file) for pkg_file in 
> package_files):
> -            bb.fatal(
> -                "No package file found for %s in %s; SPDX found: %s"
> -                % (str(file_path), package, " ".join(p.name for p in 
> package_files))
> -            )
> +            # When file filtering is active, some files may be filtered out
> +            # Skip debug source lookup instead of failing
> +            if spdx_file_filter in ("none", "essential"):
> +                bb.debug(
> +                    1,
> +                    f"Skipping debug source lookup for {file_path} in 
> {package} (filtered by SPDX_FILE_FILTER={spdx_file_filter})",
> +                )
> +                continue
> +            else:
> +                bb.fatal(
> +                    "No package file found for %s in %s; SPDX found: %s"
> +                    % (str(file_path), package, " ".join(p.name for p in 
> package_files))
> +                )
>              continue
>
>          for debugsrc in file_data["debugsrc"]:
> --
> 2.53.0
>
-=-=-=-=-=-=-=-=-=-=-=-
Links: You receive all messages sent to this group.
View/Reply Online (#232616): 
https://lists.openembedded.org/g/openembedded-core/message/232616
Mute This Topic: https://lists.openembedded.org/mt/118136151/21656
Group Owner: [email protected]
Unsubscribe: https://lists.openembedded.org/g/openembedded-core/unsub 
[[email protected]]
-=-=-=-=-=-=-=-=-=-=-=-

Reply via email to