On Wed, Mar 4, 2026 at 10:05 AM Stefano Tondo <[email protected]> wrote: > > This commit adds file filtering capabilities to SPDX 3.0 SBOM generation > to reduce SBOM size and focus on relevant files. > > New configuration variables (in spdx-common.bbclass): > > SPDX_FILE_FILTER (default: "all"): > - "all": Include all files (current behavior) > - "essential": Include only LICENSE/README/NOTICE files > - "none": Skip all files
Having file "classes" like this seems unnecessary, and it also seems unlikely that anyone will agree what goes in each class. A variable with a list of regexes that is used to filter the files is fine, but leave it up the end users to decide what should be included/excluded. IOW, drop all these variables and just have SPDX_FILE_PATTERNS/SPDX_FILE_EXCLUDE_PATTERNS variable(s), which default to empty and do nothing if so. > > SPDX_FILE_ESSENTIAL_PATTERNS (extensible): > - Space-separated patterns for essential files > - Default: LICENSE COPYING README NOTICE COPYRIGHT etc. > - Recipes can extend: SPDX_FILE_ESSENTIAL_PATTERNS += "MANIFEST" > > SPDX_FILE_EXCLUDE_PATTERNS (extensible): > - Patterns to exclude in 'essential' mode > - Default: .patch .diff test_ /tests/ .pyc .o etc. > - Recipes can extend: SPDX_FILE_EXCLUDE_PATTERNS += ".tmp" > > Implementation (in spdx30_tasks.py): > > - add_package_files(): Apply filtering during file walk > - get_package_sources_from_debug(): Skip debug source lookup for > filtered files instead of failing > > Impact: > > - Essential mode reduces file components by ~96% (2,376 → ~90 files) > - Filters out patches, test files, and build artifacts > - Configurable per-recipe via variable extension > - No impact when SPDX_FILE_FILTER="all" (default) > > This is useful for creating compact SBOMs for compliance and distribution > where only license-relevant files are needed. > > Signed-off-by: Stefano Tondo <[email protected]> > --- > meta/classes/spdx-common.bbclass | 37 +++++++++++++++++++++++++++ > meta/lib/oe/spdx30_tasks.py | 44 +++++++++++++++++++++++++++++--- > 2 files changed, 77 insertions(+), 4 deletions(-) > > diff --git a/meta/classes/spdx-common.bbclass > b/meta/classes/spdx-common.bbclass > index 3110230c9e..81c61e10dc 100644 > --- a/meta/classes/spdx-common.bbclass > +++ b/meta/classes/spdx-common.bbclass > @@ -54,6 +54,43 @@ SPDX_CONCLUDED_LICENSE[doc] = "The license concluded by > manual or external \ > > SPDX_MULTILIB_SSTATE_ARCHS ??= "${SSTATE_ARCHS}" > > +SPDX_FILES_INCLUDED ??= "all" > +SPDX_FILES_INCLUDED[doc] = "Controls which files are included in SPDX > output. \ > + Values: 'all' (include all files), 'essential' (only > LICENSE/README/NOTICE files), \ > + 'none' (no files). The 'essential' mode reduces SBOM size by excluding > patches, \ > + tests, and build artifacts." > + > +SPDX_FILE_ESSENTIAL_PATTERNS ??= "LICENSE COPYING README NOTICE COPYRIGHT > PATENTS ACKNOWLEDGEMENTS THIRD-PARTY-NOTICES" > +SPDX_FILE_ESSENTIAL_PATTERNS[doc] = "Space-separated list of file name > patterns to \ > + include when SPDX_FILES_INCLUDED='essential'. Recipes can extend this to > add their \ > + own essential files (e.g., 'SPDX_FILE_ESSENTIAL_PATTERNS += > \"MANIFEST\"')." > + > +SPDX_FILE_EXCLUDE_PATTERNS ??= ".patch .diff test_ _test. /test/ /tests/ > .pyc .pyo .o .a .la" > +SPDX_FILE_EXCLUDE_PATTERNS[doc] = "Space-separated list of patterns to > exclude when \ > + SPDX_FILES_INCLUDED='essential'. Files matching these patterns are > filtered out. \ > + Recipes can extend this to exclude additional file types." > + > +SBOM_COMPONENT_NAME ??= "" > +SBOM_COMPONENT_NAME[doc] = "Name of the SBOM metadata component. If set, > creates a \ > + software_Package element in the SBOM with image/product information. > Typically \ > + set to IMAGE_BASENAME or product name." I'm not sure why this change is in this patch? Same for the other following variables. > + > +SBOM_COMPONENT_VERSION ??= "${DISTRO_VERSION}" > +SBOM_COMPONENT_VERSION[doc] = "Version of the SBOM metadata component. Used > when \ > + SBOM_COMPONENT_NAME is set. Defaults to DISTRO_VERSION." > + > +SBOM_COMPONENT_SUMMARY ??= "" > +SBOM_COMPONENT_SUMMARY[doc] = "Description of the SBOM metadata component. > Used when \ > + SBOM_COMPONENT_NAME is set. Typically set to IMAGE_SUMMARY or product > description." > + > +SBOM_SUPPLIER_NAME ??= "" > +SBOM_SUPPLIER_NAME[doc] = "Name of the organization supplying the SBOM. If > set, \ > + creates an Organization element in the SBOM with supplier information." > + > +SBOM_SUPPLIER_URL ??= "" > +SBOM_SUPPLIER_URL[doc] = "URL of the organization supplying the SBOM. Used > when \ > + SBOM_SUPPLIER_NAME is set. Adds an external identifier with the > organization URL." > + > python () { > from oe.cve_check import extend_cve_status > extend_cve_status(d) > diff --git a/meta/lib/oe/spdx30_tasks.py b/meta/lib/oe/spdx30_tasks.py > index 99f2892dfb..bd703b5bec 100644 > --- a/meta/lib/oe/spdx30_tasks.py > +++ b/meta/lib/oe/spdx30_tasks.py > @@ -161,6 +161,11 @@ def add_package_files( > compiled_sources, types = oe.spdx_common.get_compiled_sources(d) > bb.debug(1, f"Total compiled files: {len(compiled_sources)}") > > + # File filtering configuration > + spdx_file_filter = (d.getVar("SPDX_FILE_FILTER") or "all").lower() > + essential_patterns = (d.getVar("SPDX_FILE_ESSENTIAL_PATTERNS") or > "").split() > + exclude_patterns = (d.getVar("SPDX_FILE_EXCLUDE_PATTERNS") or "").split() > + > for subdir, dirs, files in os.walk(topdir, onerror=walk_error): > dirs[:] = [d for d in dirs if d not in ignore_dirs] > if subdir == str(topdir): > @@ -174,6 +179,26 @@ def add_package_files( > continue > > filename = str(filepath.relative_to(topdir)) > + > + # Apply file filtering if enabled > + if spdx_file_filter == "essential": > + file_upper = file.upper() > + filename_lower = filename.lower() > + > + # Skip if matches exclude patterns > + skip_file = any(pattern in filename_lower for pattern in > exclude_patterns) > + if skip_file: > + continue > + > + # Keep only essential files (license/readme/etc) > + is_essential = any(pattern in file_upper for pattern in > essential_patterns) > + if not is_essential: > + continue > + elif spdx_file_filter == "none": > + # Skip all files > + continue > + # else: spdx_file_filter == "all" or any other value - include > all files > + > file_purposes = get_purposes(filepath) > > # Check if file is compiled > @@ -219,6 +244,8 @@ def add_package_files( > def get_package_sources_from_debug( > d, package, package_files, sources, source_hash_cache > ): > + spdx_file_filter = (d.getVar("SPDX_FILE_FILTER") or "all").lower() > + > def file_path_match(file_path, pkg_file): > if file_path.lstrip("/") == pkg_file.name.lstrip("/"): > return True > @@ -251,10 +278,19 @@ def get_package_sources_from_debug( > continue > > if not any(file_path_match(file_path, pkg_file) for pkg_file in > package_files): > - bb.fatal( > - "No package file found for %s in %s; SPDX found: %s" > - % (str(file_path), package, " ".join(p.name for p in > package_files)) > - ) > + # When file filtering is active, some files may be filtered out > + # Skip debug source lookup instead of failing > + if spdx_file_filter in ("none", "essential"): > + bb.debug( > + 1, > + f"Skipping debug source lookup for {file_path} in > {package} (filtered by SPDX_FILE_FILTER={spdx_file_filter})", > + ) > + continue > + else: > + bb.fatal( > + "No package file found for %s in %s; SPDX found: %s" > + % (str(file_path), package, " ".join(p.name for p in > package_files)) > + ) > continue > > for debugsrc in file_data["debugsrc"]: > -- > 2.53.0 >
-=-=-=-=-=-=-=-=-=-=-=- Links: You receive all messages sent to this group. View/Reply Online (#232616): https://lists.openembedded.org/g/openembedded-core/message/232616 Mute This Topic: https://lists.openembedded.org/mt/118136151/21656 Group Owner: [email protected] Unsubscribe: https://lists.openembedded.org/g/openembedded-core/unsub [[email protected]] -=-=-=-=-=-=-=-=-=-=-=-
