This patch will read the begining of source files and try to find
the SPDX-License-Identifier to populate the licenseInfoInFiles
field for each source file. This does not populate licenseConculed
at this time, nor rolls it up to package level.

Signed-off-by: Saul Wold <saul.w...@windriver.com>
---
 classes/create-spdx.bbclass | 25 +++++++++++++++++++++++++
 lib/oe/spdx.py              |  2 +-
 2 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/classes/create-spdx.bbclass b/classes/create-spdx.bbclass
index 180d667..9c11945 100644
--- a/classes/create-spdx.bbclass
+++ b/classes/create-spdx.bbclass
@@ -30,6 +30,21 @@ SPDX_LICENSES ??= "${COREBASE}/meta/files/spdx-licenses.json"
 
 do_image_complete[depends] = "virtual/kernel:do_create_spdx"
 
+def extract_licenses(filename):
+    import re
+    lic_regex = re.compile('SPDX-License-Identifier:\s+([-A-Za-z\d. ]+)[ 
|\n|\r\n]*?')
+
+    try:
+        with open(filename, 'r') as f:
+            size = min(15000, os.stat(filename).st_size)
+            txt = f.read(size)
+            licenses = re.findall(lic_regex, txt)
+            if licenses:
+                return licenses
+    except Exception as e:
+        bb.warn(f"Exception on {filename}: {e}")
+        return None
+
 def get_doc_namespace(d, doc):
     import uuid
     namespace_uuid = uuid.uuid5(uuid.NAMESPACE_DNS, 
d.getVar("SPDX_UUID_NAMESPACE"))
@@ -232,6 +247,16 @@ def add_package_files(d, doc, spdx_pkg, topdir, 
get_spdxid, get_types, *, archiv
                         checksumValue=bb.utils.sha256_file(filepath),
                     ))
 
+                if "SOURCES" in spdx_file.fileTypes:
+                    licenses = extract_licenses(filepath)
+                    if licenses is not None:
+                        for lic in licenses:
+                            spdx_file.licenseInfoInFiles.append(lic.strip())
+                    else:
+                        spdx_file.licenseInfoInFiles.append("NOASSERTATION")
+                else:
+                    spdx_file.licenseInfoInFiles.append("NOASSERTATION")
+
                 doc.files.append(spdx_file)
                 doc.add_relationship(spdx_pkg, "CONTAINS", spdx_file)
                 spdx_pkg.hasFiles.append(spdx_file.SPDXID)
diff --git a/lib/oe/spdx.py b/lib/oe/spdx.py
index 9e7ced5..71e7c1c 100644
--- a/lib/oe/spdx.py
+++ b/lib/oe/spdx.py
@@ -236,7 +236,7 @@ class SPDXFile(SPDXObject):
     fileName = _String()
     licenseConcluded = _String(default="NOASSERTION")
     copyrightText = _String(default="NOASSERTION")
-    licenseInfoInFiles = _StringList(default=["NOASSERTION"])
+    licenseInfoInFiles = _StringList()
     checksums = _ObjectList(SPDXChecksum)
     fileTypes = _StringList()
 
-- 
2.31.1

-=-=-=-=-=-=-=-=-=-=-=-
Links: You receive all messages sent to this group.
View/Reply Online (#161084): 
https://lists.openembedded.org/g/openembedded-core/message/161084
Mute This Topic: https://lists.openembedded.org/mt/88756042/21656
Group Owner: openembedded-core+ow...@lists.openembedded.org
Unsubscribe: https://lists.openembedded.org/g/openembedded-core/unsub 
[arch...@mail-archive.com]
-=-=-=-=-=-=-=-=-=-=-=-

Reply via email to