On Mon, Mar 9, 2026 at 7:29 AM <[email protected]> wrote:
>
> From: Stefano Tondo <[email protected]>
>
> Add version extraction, PURL generation, and external references
> to source download packages in SPDX 3.0 SBOMs:
>
> - Extract version from SRCREV for Git sources (full SHA-1)
> - Generate PURLs for Git sources on github.com by default
> - Support custom mappings via SPDX_GIT_PURL_MAPPINGS variable
>   (format: "domain:purl_type", split(':', 1) for parsing)
> - Use ecosystem PURLs from SPDX_PACKAGE_URLS for non-Git
> - Add VCS external references for Git downloads
> - Add distribution external references for tarball downloads
> - Parse Git URLs using urllib.parse
> - Extract logic into _generate_git_purl() and
>   _enrich_source_package() helpers
>
> The SPDX_GIT_PURL_MAPPINGS variable allows configuring PURL
> generation for self-hosted Git services (e.g., GitLab).
> github.com is always mapped to pkg:github by default.
>
> Signed-off-by: Stefano Tondo <[email protected]>
> ---
>  meta/classes/create-spdx-3.0.bbclass |   7 ++
>  meta/lib/oe/spdx30_tasks.py          | 122 +++++++++++++++++++++++++++
>  2 files changed, 129 insertions(+)
>
> diff --git a/meta/classes/create-spdx-3.0.bbclass 
> b/meta/classes/create-spdx-3.0.bbclass
> index def2dacbc3..9e912b34e1 100644
> --- a/meta/classes/create-spdx-3.0.bbclass
> +++ b/meta/classes/create-spdx-3.0.bbclass
> @@ -152,6 +152,13 @@ SPDX_PACKAGE_URLS[doc] = "A space separated list of 
> Package URLs (purls) for \
>      Override this variable to replace the default, otherwise append or 
> prepend \
>      to add additional purls."
>
> +SPDX_GIT_PURL_MAPPINGS ??= ""
> +SPDX_GIT_PURL_MAPPINGS[doc] = "A space separated list of domain:purl_type \
> +    mappings to configure PURL generation for Git source downloads. \
> +    For example, "gitlab.example.com:pkg:gitlab" maps repositories hosted \
> +    on gitlab.example.com to the pkg:gitlab PURL type. \
> +    github.com is always mapped to pkg:github by default."
> +
>  IMAGE_CLASSES:append = " create-spdx-image-3.0"
>  SDK_CLASSES += "create-spdx-sdk-3.0"
>
> diff --git a/meta/lib/oe/spdx30_tasks.py b/meta/lib/oe/spdx30_tasks.py
> index c3a23d7889..1f6c84628d 100644
> --- a/meta/lib/oe/spdx30_tasks.py
> +++ b/meta/lib/oe/spdx30_tasks.py
> @@ -13,6 +13,7 @@ import oe.spdx30
>  import oe.spdx_common
>  import oe.sdk
>  import os
> +import urllib.parse
>
>  from contextlib import contextmanager
>  from datetime import datetime, timezone
> @@ -377,6 +378,125 @@ def collect_dep_sources(dep_objsets, dest):
>              index_sources_by_hash(e.to, dest)
>
>
> +def _generate_git_purl(d, download_location, srcrev):
> +    """Generate a Package URL for a Git source from its download location.
> +
> +    Parses the Git URL to identify the hosting service and generates the
> +    appropriate PURL type. Supports github.com by default and custom
> +    mappings via SPDX_GIT_PURL_MAPPINGS.
> +
> +    Returns the PURL string or None if no mapping matches.
> +    """
> +    if not download_location or not download_location.startswith('git+'):
> +        return None
> +
> +    git_url = download_location[4:]  # Remove 'git+' prefix
> +
> +    # Default handler: github.com
> +    git_purl_handlers = {
> +        'github.com': 'pkg:github',
> +    }
> +
> +    # Custom PURL mappings from SPDX_GIT_PURL_MAPPINGS
> +    # Format: "domain1:purl_type1 domain2:purl_type2"
> +    custom_mappings = d.getVar('SPDX_GIT_PURL_MAPPINGS')
> +    if custom_mappings:
> +        for mapping in custom_mappings.split():
> +            parts = mapping.split(':', 1)
> +            if len(parts) == 2:
> +                git_purl_handlers[parts[0]] = parts[1]
> +                bb.debug(2, f"Added custom Git PURL mapping: {parts[0]} -> 
> {parts[1]}")
> +            else:
> +                bb.warn(f"Invalid SPDX_GIT_PURL_MAPPINGS entry: {mapping} 
> (expected format: domain:purl_type)")
> +
> +    try:
> +        parsed = urllib.parse.urlparse(git_url)
> +    except Exception:
> +        return None
> +
> +    hostname = parsed.hostname
> +    if not hostname:
> +        return None
> +
> +    for domain, purl_type in git_purl_handlers.items():
> +        if hostname == domain:
> +            path = parsed.path.strip('/')
> +            path_parts = path.split('/')
> +            if len(path_parts) >= 2:
> +                owner = path_parts[0]
> +                repo = path_parts[1].replace('.git', '')
> +                return f"{purl_type}/{owner}/{repo}@{srcrev}"
> +            break
> +
> +    return None
> +
> +
> +def _enrich_source_package(d, dl, fd, file_name, primary_purpose):
> +    """Enrich a source download package with version, PURL, and external 
> refs.
> +
> +    Extracts version from SRCREV for Git sources, generates PURLs for
> +    known hosting services, and adds external references for VCS,
> +    distribution URLs, and homepage.
> +    """
> +    version = None
> +    purl = None
> +
> +    if fd.type == "git":
> +        # Use full SHA-1 from fd.revision
> +        srcrev = getattr(fd, 'revision', None)
> +        if srcrev and srcrev not in {'${AUTOREV}', 'AUTOINC', 'INVALID'}:
> +            version = srcrev
> +
> +        # Generate PURL for Git hosting services
> +        download_location = getattr(dl, 'software_downloadLocation', None)
> +        if version and download_location:
> +            purl = _generate_git_purl(d, download_location, version)
> +    else:
> +        # For non-Git sources, use recipe PV as version
> +        pv = d.getVar('PV')
> +        if pv and pv not in {'git', 'AUTOINC', 'INVALID', '${PV}'}:
> +            version = pv
> +
> +        # Use ecosystem PURL from SPDX_PACKAGE_URLS if available
> +        package_urls = (d.getVar('SPDX_PACKAGE_URLS') or '').split()
> +        for url in package_urls:
> +            if not url.startswith('pkg:yocto'):
> +                purl = url
> +                break
> +
> +    if version:
> +        dl.software_packageVersion = version

Oh, and this version; I'm not sure you can say the version of the
recipe is the version of all downloaded files

> +
> +    if purl:
> +        dl.software_packageUrl = purl
> +
> +    # Add external references
> +    download_location = getattr(dl, 'software_downloadLocation', None)
> +    if download_location and isinstance(download_location, str):
> +        dl.externalRef = dl.externalRef or []
> +
> +        if download_location.startswith('git+'):
> +            # VCS reference for Git repositories
> +            git_url = download_location[4:]
> +            if '@' in git_url:
> +                git_url = git_url.split('@')[0]
> +
> +            dl.externalRef.append(
> +                oe.spdx30.ExternalRef(
> +                    externalRefType=oe.spdx30.ExternalRefType.vcs,
> +                    locator=[git_url],
> +                )
> +            )
> +        elif download_location.startswith(('http://', 'https://', 'ftp://')):
> +            # Distribution reference for tarball/archive downloads
> +            dl.externalRef.append(
> +                oe.spdx30.ExternalRef(
> +                    
> externalRefType=oe.spdx30.ExternalRefType.altDownloadLocation,
> +                    locator=[download_location],
> +                )
> +            )
> +
> +
>  def add_download_files(d, objset):
>      inputs = set()
>
> @@ -440,6 +560,8 @@ def add_download_files(d, objset):
>                  )
>              )
>
> +            _enrich_source_package(d, dl, fd, file_name, primary_purpose)
> +
>              if fd.method.supports_checksum(fd):
>                  # TODO Need something better than hard coding this
>                  for checksum_id in ["sha256", "sha1"]:
> --
> 2.53.0
>
-=-=-=-=-=-=-=-=-=-=-=-
Links: You receive all messages sent to this group.
View/Reply Online (#232917): 
https://lists.openembedded.org/g/openembedded-core/message/232917
Mute This Topic: https://lists.openembedded.org/mt/118221139/21656
Group Owner: [email protected]
Unsubscribe: https://lists.openembedded.org/g/openembedded-core/unsub 
[[email protected]]
-=-=-=-=-=-=-=-=-=-=-=-

Reply via email to