On Tue, 2026-02-17 at 14:01 -0600, Randolph Sapp via
lists.openembedded.org wrote:
> From: Randolph Sapp <[email protected]>
> 
> Anything that defines multiple git sources should have the largest value
> taken when calculating the SOURCE_DATE_EPOCH for a package.
> 
> The previous iteration actually introduced some degree of randomness, as
> it would stop on the first git repository reported by os.walk, which
> does not assure any specific ordering by default.
> 
> Signed-off-by: Randolph Sapp <[email protected]>
> ---
> 
> v2: Use os.walk method as opposed to glob to avoid infinite recursion when
> navigating symbolic links
> 
>  meta/lib/oe/reproducible.py | 63 ++++++++++++++++---------------------
>  1 file changed, 27 insertions(+), 36 deletions(-)
> 
> diff --git a/meta/lib/oe/reproducible.py b/meta/lib/oe/reproducible.py
> index 0270024a83..c58db48fb1 100644
> --- a/meta/lib/oe/reproducible.py
> +++ b/meta/lib/oe/reproducible.py
> @@ -74,52 +74,43 @@ def get_source_date_epoch_from_known_files(d, sourcedir):
>          bb.debug(1, "SOURCE_DATE_EPOCH taken from: %s" % newest_file)
>      return source_date_epoch
> 
> -def find_git_folder(d, sourcedir):
> -    # First guess: UNPACKDIR/BB_GIT_DEFAULT_DESTSUFFIX
> -    # This is the default git fetcher unpack path
> +def find_git_folders(d, sourcedir):
>      unpackdir = d.getVar('UNPACKDIR')
> -    default_destsuffix = d.getVar('BB_GIT_DEFAULT_DESTSUFFIX')
> -    gitpath = os.path.join(unpackdir, default_destsuffix, ".git")
> -    if os.path.isdir(gitpath):
> -        return gitpath
> -
> -    # Second guess: ${S}
> -    gitpath = os.path.join(sourcedir, ".git")
> -    if os.path.isdir(gitpath):
> -        return gitpath
> -
> -    # Perhaps there was a subpath or destsuffix specified.
> -    # Go looking in the UNPACKDIR
> -    for root, dirs, files in os.walk(unpackdir, topdown=True):
> -        if '.git' in dirs:
> -            return os.path.join(root, ".git")
> +    git_folders = []
> 
> -    for root, dirs, files in os.walk(sourcedir, topdown=True):
> -        if '.git' in dirs:
> -            return os.path.join(root, ".git")
> +    for mainpath in (sourcedir, unpackdir):
> +        for root, dirs, _ in os.walk(mainpath, topdown=True):
> +            if ".git" in dirs:

Do we need to add handling for git submodules? In submodules, '.git' is
a file instead of a directory.

> +                git_folders.append(os.path.join(root, ".git"))

We should change this to `git_folders.append(root)` (see below).

> 
> -    bb.warn("Failed to find a git repository in UNPACKDIR: %s" % unpackdir)
> -    return None
> +    if not git_folders:
> +        bb.warn("Failed to find any git repository in UNPACKDIR or S")
> +
> +    return git_folders
> 
>  def get_source_date_epoch_from_git(d, sourcedir):
>      if not "git://" in d.getVar('SRC_URI') and not "gitsm://" in 
> d.getVar('SRC_URI'):
>          return None
> 
> -    gitpath = find_git_folder(d, sourcedir)
> -    if not gitpath:
> -        return None
> +    # Get an epoch from all valid git repositoies
> +    sources_dates = []
> +    for gitpath in find_git_folders(d, sourcedir):
> +        # Check that the repository has a valid HEAD; it may not if subdir 
> is used
> +        # in SRC_URI
> +        p = subprocess.run(['git', '--git-dir', gitpath, 'rev-parse', 
> 'HEAD'], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)

Using '--git-dir' does not set the path to the worktree correctly. This
may work, but it's fragile. While we're modifying things here, can we
change find_git_folders() to return the paths of the repository roots
instead of the .git directories? Then we can use 'git -C path ...' here,
which is much less likely to have issues in the future?

> +        if p.returncode != 0:
> +            bb.debug(1, "%s does not have a valid HEAD: %s" % (gitpath, 
> p.stdout.decode('utf-8')))
> +            continue
> 
> -    # Check that the repository has a valid HEAD; it may not if subdir is 
> used
> -    # in SRC_URI
> -    p = subprocess.run(['git', '--git-dir', gitpath, 'rev-parse', 'HEAD'], 
> stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
> -    if p.returncode != 0:
> -        bb.debug(1, "%s does not have a valid HEAD: %s" % (gitpath, 
> p.stdout.decode('utf-8')))
> -        return None
> +        bb.debug(1, "git repository: %s" % gitpath)
> +        p = subprocess.run(['git', '-c', 'log.showSignature=false', 
> '--git-dir', gitpath, 'log', '-1', '--pretty=%ct'],
> +                           check=True, stdout=subprocess.PIPE)
> +        sources_dates.append(int(p.stdout.decode('utf-8')))
> +
> +    if sources_dates:
> +        return sorted(sources_dates, reverse=True)[0]

Can we use `max(sources_dates)` here?

Best regards,

-- 
Paul Barker

Attachment: signature.asc
Description: This is a digitally signed message part

-=-=-=-=-=-=-=-=-=-=-=-
Links: You receive all messages sent to this group.
View/Reply Online (#231428): 
https://lists.openembedded.org/g/openembedded-core/message/231428
Mute This Topic: https://lists.openembedded.org/mt/117863504/21656
Group Owner: [email protected]
Unsubscribe: https://lists.openembedded.org/g/openembedded-core/unsub 
[[email protected]]
-=-=-=-=-=-=-=-=-=-=-=-

Reply via email to