On Mon, Mar 9, 2026 at 7:29 AM <[email protected]> wrote:
>
> From: Stefano Tondo <[email protected]>
>
> Add version extraction, PURL generation, and external references
> to source download packages in SPDX 3.0 SBOMs:
>
> - Extract version from SRCREV for Git sources (full SHA-1)
> - Generate PURLs for Git sources on github.com by default
> - Support custom mappings via SPDX_GIT_PURL_MAPPINGS variable
> (format: "domain:purl_type", split(':', 1) for parsing)
> - Use ecosystem PURLs from SPDX_PACKAGE_URLS for non-Git
> - Add VCS external references for Git downloads
> - Add distribution external references for tarball downloads
> - Parse Git URLs using urllib.parse
> - Extract logic into _generate_git_purl() and
> _enrich_source_package() helpers
>
> The SPDX_GIT_PURL_MAPPINGS variable allows configuring PURL
> generation for self-hosted Git services (e.g., GitLab).
> github.com is always mapped to pkg:github by default.
>
> Signed-off-by: Stefano Tondo <[email protected]>
> ---
> meta/classes/create-spdx-3.0.bbclass | 7 ++
> meta/lib/oe/spdx30_tasks.py | 122 +++++++++++++++++++++++++++
> 2 files changed, 129 insertions(+)
>
> diff --git a/meta/classes/create-spdx-3.0.bbclass
> b/meta/classes/create-spdx-3.0.bbclass
> index def2dacbc3..9e912b34e1 100644
> --- a/meta/classes/create-spdx-3.0.bbclass
> +++ b/meta/classes/create-spdx-3.0.bbclass
> @@ -152,6 +152,13 @@ SPDX_PACKAGE_URLS[doc] = "A space separated list of
> Package URLs (purls) for \
> Override this variable to replace the default, otherwise append or
> prepend \
> to add additional purls."
>
> +SPDX_GIT_PURL_MAPPINGS ??= ""
> +SPDX_GIT_PURL_MAPPINGS[doc] = "A space separated list of domain:purl_type \
> + mappings to configure PURL generation for Git source downloads. \
> + For example, "gitlab.example.com:pkg:gitlab" maps repositories hosted \
> + on gitlab.example.com to the pkg:gitlab PURL type. \
> + github.com is always mapped to pkg:github by default."
> +
> IMAGE_CLASSES:append = " create-spdx-image-3.0"
> SDK_CLASSES += "create-spdx-sdk-3.0"
>
> diff --git a/meta/lib/oe/spdx30_tasks.py b/meta/lib/oe/spdx30_tasks.py
> index c3a23d7889..1f6c84628d 100644
> --- a/meta/lib/oe/spdx30_tasks.py
> +++ b/meta/lib/oe/spdx30_tasks.py
> @@ -13,6 +13,7 @@ import oe.spdx30
> import oe.spdx_common
> import oe.sdk
> import os
> +import urllib.parse
>
> from contextlib import contextmanager
> from datetime import datetime, timezone
> @@ -377,6 +378,125 @@ def collect_dep_sources(dep_objsets, dest):
> index_sources_by_hash(e.to, dest)
>
>
> +def _generate_git_purl(d, download_location, srcrev):
> + """Generate a Package URL for a Git source from its download location.
> +
> + Parses the Git URL to identify the hosting service and generates the
> + appropriate PURL type. Supports github.com by default and custom
> + mappings via SPDX_GIT_PURL_MAPPINGS.
> +
> + Returns the PURL string or None if no mapping matches.
> + """
> + if not download_location or not download_location.startswith('git+'):
> + return None
> +
> + git_url = download_location[4:] # Remove 'git+' prefix
> +
> + # Default handler: github.com
> + git_purl_handlers = {
> + 'github.com': 'pkg:github',
> + }
> +
> + # Custom PURL mappings from SPDX_GIT_PURL_MAPPINGS
> + # Format: "domain1:purl_type1 domain2:purl_type2"
> + custom_mappings = d.getVar('SPDX_GIT_PURL_MAPPINGS')
> + if custom_mappings:
> + for mapping in custom_mappings.split():
> + parts = mapping.split(':', 1)
> + if len(parts) == 2:
> + git_purl_handlers[parts[0]] = parts[1]
> + bb.debug(2, f"Added custom Git PURL mapping: {parts[0]} ->
> {parts[1]}")
> + else:
> + bb.warn(f"Invalid SPDX_GIT_PURL_MAPPINGS entry: {mapping}
> (expected format: domain:purl_type)")
> +
> + try:
> + parsed = urllib.parse.urlparse(git_url)
> + except Exception:
> + return None
> +
> + hostname = parsed.hostname
> + if not hostname:
> + return None
> +
> + for domain, purl_type in git_purl_handlers.items():
> + if hostname == domain:
> + path = parsed.path.strip('/')
> + path_parts = path.split('/')
> + if len(path_parts) >= 2:
> + owner = path_parts[0]
> + repo = path_parts[1].replace('.git', '')
> + return f"{purl_type}/{owner}/{repo}@{srcrev}"
> + break
> +
> + return None
> +
> +
> +def _enrich_source_package(d, dl, fd, file_name, primary_purpose):
> + """Enrich a source download package with version, PURL, and external
> refs.
> +
> + Extracts version from SRCREV for Git sources, generates PURLs for
> + known hosting services, and adds external references for VCS,
> + distribution URLs, and homepage.
> + """
> + version = None
> + purl = None
> +
> + if fd.type == "git":
> + # Use full SHA-1 from fd.revision
> + srcrev = getattr(fd, 'revision', None)
> + if srcrev and srcrev not in {'${AUTOREV}', 'AUTOINC', 'INVALID'}:
> + version = srcrev
> +
> + # Generate PURL for Git hosting services
> + download_location = getattr(dl, 'software_downloadLocation', None)
> + if version and download_location:
> + purl = _generate_git_purl(d, download_location, version)
> + else:
Everything else looks OK except for this else block. I'm not sure that
we can reasonably say that the recipe PURL applies to all download
sources, just because they are part of the recipe. _Most_ of the time
this is probably true, but I'm not sure it's the case all the time,
which makes it feel a little dangerous (for example, crates, which I
know you had handled before).
> + # For non-Git sources, use recipe PV as version
> + pv = d.getVar('PV')
> + if pv and pv not in {'git', 'AUTOINC', 'INVALID', '${PV}'}:
> + version = pv
> +
> + # Use ecosystem PURL from SPDX_PACKAGE_URLS if available
> + package_urls = (d.getVar('SPDX_PACKAGE_URLS') or '').split()
> + for url in package_urls:
> + if not url.startswith('pkg:yocto'):
> + purl = url
> + break
> +
> + if version:
> + dl.software_packageVersion = version
> +
> + if purl:
> + dl.software_packageUrl = purl
> +
> + # Add external references
> + download_location = getattr(dl, 'software_downloadLocation', None)
> + if download_location and isinstance(download_location, str):
> + dl.externalRef = dl.externalRef or []
> +
> + if download_location.startswith('git+'):
> + # VCS reference for Git repositories
> + git_url = download_location[4:]
> + if '@' in git_url:
> + git_url = git_url.split('@')[0]
> +
> + dl.externalRef.append(
> + oe.spdx30.ExternalRef(
> + externalRefType=oe.spdx30.ExternalRefType.vcs,
> + locator=[git_url],
> + )
> + )
> + elif download_location.startswith(('http://', 'https://', 'ftp://')):
> + # Distribution reference for tarball/archive downloads
> + dl.externalRef.append(
> + oe.spdx30.ExternalRef(
> +
> externalRefType=oe.spdx30.ExternalRefType.altDownloadLocation,
> + locator=[download_location],
> + )
> + )
> +
> +
> def add_download_files(d, objset):
> inputs = set()
>
> @@ -440,6 +560,8 @@ def add_download_files(d, objset):
> )
> )
>
> + _enrich_source_package(d, dl, fd, file_name, primary_purpose)
> +
> if fd.method.supports_checksum(fd):
> # TODO Need something better than hard coding this
> for checksum_id in ["sha256", "sha1"]:
> --
> 2.53.0
>
-=-=-=-=-=-=-=-=-=-=-=-
Links: You receive all messages sent to this group.
View/Reply Online (#232916):
https://lists.openembedded.org/g/openembedded-core/message/232916
Mute This Topic: https://lists.openembedded.org/mt/118221139/21656
Group Owner: [email protected]
Unsubscribe: https://lists.openembedded.org/g/openembedded-core/unsub
[[email protected]]
-=-=-=-=-=-=-=-=-=-=-=-