Re: [arch-projects] [dbscripts] [PATCH 2/4] Add reproducible archive of packages.

2018-12-04 Thread Eli Schwartz via arch-projects
On 12/4/18 1:09 PM, Eli Schwartz wrote:
> Whenever adding new package files to the pool of distributed packages,
> hardlink a copy of every package it was built with, into a
> "reproducible" pool, and log which file required it.

The question becomes, where can I store these? As-is, this will burden
the mirror network as well. Unsure how to handle this. Could this be
configurable by the mirror, as ISOs are now? Should we exclusively
self-host this, and if so, where?

archive.archlinux.org is managed by another service with its own
exclusively writable location.

> Signed-off-by: Eli Schwartz 
> ---
>  config |  1 +
>  config.local.svn-community |  1 +
>  config.local.svn-packages  |  1 +
>  db-functions   | 49 +++---
>  db-update  |  4 
>  5 files changed, 48 insertions(+), 8 deletions(-)
> 
> diff --git a/config b/config
> index 1cfe11f4..5144fca7 100644
> --- a/config
> +++ b/config
> @@ -3,6 +3,7 @@
>  FTP_BASE="/srv/ftp"
>  PKGREPOS=()
>  PKGPOOL=''
> +EXTRA_PKGPOOLS=()
>  SRCPOOL=''
>  TESTING_REPO=''
>  STABLE_REPOS=()
> diff --git a/config.local.svn-community b/config.local.svn-community
> index 5d61b5ea..15bcc17f 100644
> --- a/config.local.svn-community
> +++ b/config.local.svn-community
> @@ -2,6 +2,7 @@
>  
>  PKGREPOS=('community' 'community-testing' 'community-staging' 'multilib' 
> 'multilib-testing' 'multilib-staging')
>  PKGPOOL='pool/community'
> +EXTRA_PKGPOOLS=('pool/packages')
>  SRCPOOL='sources/community'
>  SVNREPO='file:///srv/repos/svn-community/svn'
>  SVNUSER='svn-community'
> diff --git a/config.local.svn-packages b/config.local.svn-packages
> index 34aab35c..75986b65 100644
> --- a/config.local.svn-packages
> +++ b/config.local.svn-packages
> @@ -2,6 +2,7 @@
>  
>  PKGREPOS=('core' 'extra' 'testing' 'staging' 'kde-unstable' 'gnome-unstable')
>  PKGPOOL='pool/packages'
> +EXTRA_PKGPOOLS=('pool/community')
>  SRCPOOL='sources/packages'
>  SVNREPO='file:///srv/repos/svn-packages/svn'
>  SVNUSER='svn-packages'
> diff --git a/db-functions b/db-functions
> index 7aeedced..2b1ae87a 100644
> --- a/db-functions
> +++ b/db-functions
> @@ -165,20 +165,23 @@ repo_unlock () { #repo_unlock  
>   fi
>  }
>  
> +# usage: _grep_all_info pkgfile infofile key
> +_grep_all_info() {
> + local _ret=()
> +
> + mapfile -t _ret < <(/usr/bin/bsdtar -xOqf "$1" "${2}" | grep "^${3} = ")
> +
> + printf '%s\n' "${_ret[@]#${3} = }"
> +}
> +
>  # usage: _grep_pkginfo pkgfile pattern
>  _grep_pkginfo() {
> - local _ret
> -
> - _ret="$(/usr/bin/bsdtar -xOqf "$1" .PKGINFO | grep "^${2} = " | tail 
> -1)"
> - echo "${_ret#${2} = }"
> + _grep_all_info "${1}" .PKGINFO "${2}" | tail -1
>  }
>  
>  # usage: _grep_buildinfo pkgfile pattern
>  _grep_buildinfo() {
> - local _ret
> -
> - _ret="$(/usr/bin/bsdtar -xOqf "$1" .BUILDINFO | grep "^${2} = " | tail 
> -1)"
> - echo "${_ret#${2} = }"
> + _grep_all_info "${1}" .BUILDINFO "${2}" | tail -1
>  }
>  
>  # Get the package base or name as fallback
> @@ -444,4 +447,34 @@ arch_repo_modify() {
>   REPO_MODIFIED=1
>  }
>  
> +# Build an index of dependent packages needed by a given pkgfile
> +# usage: make_reproducible pkgfile [check]
> +make_reproducible() {
> + local pkg dir pkgs=() pkgfile pkgfiles=()
> +
> + mapfile -t pkgs < <(_grep_all_info "${1}" .BUILDINFO installed)
> +
> + for pkg in "${pkgs[@]}"; do
> + for dir in "${FTP_BASE}/${PKGPOOL}" 
> "${EXTRA_PKGPOOLS[@]/#/${FTP_BASE}/}" "${STAGING}"/**/; do
> + if pkgfile="$(getpkgfile "${dir}/${pkg}"${PKGEXTS} 
> 2>/dev/null)"; then
> + pkgfiles+=("${pkgfile}")
> + continue 2
> + fi
> + done
> + error "could not find existing package for %s" "${pkg}"
> + return 1
> + done
> +
> + if [[ ${2} = check ]]; then
> + return 0
> + fi
> +
> + for pkg in "${pkgfiles[@]}"; do
> + if [[ ! -f ${FTP_BASE}/${PKGPOOL}-reproducible/${pkg##*/} ]]; 
> then
> + ln -L "${pkg}" 
> "${FTP_BASE}/${PKGPOOL}-reproducible/${pkg##*/}"
> + fi
> + echo "${1}" >> 
> "${FTP_BASE}/${PKGPOOL}-reproducible/${pkg##*/}.buildlinks"
> + done
> +}
> +
>  . "$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")/db-functions-${VCS}"
> diff --git a/db-update b/db-update
> index 313fb999..11ec185f 100755
> --- a/db-update
> +++ b/db-update
> @@ -61,6 +61,9 @@ for repo in "${repos[@]}"; do
>   if ! check_builddir "${pkg}"; then
>   die "Package %s was not built in a chroot" 
> "$repo/${pkg##*/}"
>   fi
> + if ! make_reproducible "${pkg}" "check"; then
> + die "Package %s is not reproducible" "${pkg}"
> + fi
>   done
>   if ! 

[arch-projects] [dbscripts] [PATCH 3/4] ftpdir-cleanup: remove useless loop and make clean_pkg take a targetdir

2018-12-04 Thread Eli Schwartz via arch-projects
This is never ever called for multiple packages at once, and if it was,
it would be named clean_pkgs. Meanwhile, it was implied that this could
take a variable target, but that was never-used code. Make it useful
because we might (will) use it.

Signed-off-by: Eli Schwartz 
---
 cron-jobs/ftpdir-cleanup | 22 ++
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/cron-jobs/ftpdir-cleanup b/cron-jobs/ftpdir-cleanup
index 9df5f99a..74b771cd 100755
--- a/cron-jobs/ftpdir-cleanup
+++ b/cron-jobs/ftpdir-cleanup
@@ -4,21 +4,19 @@
 . "$(dirname "$(readlink -e "$0")")/../db-functions"
 
 clean_pkg() {
-   local pkg
-   local target
+   local pkg=${1}
+   local targetdir=${2:-${CLEANUP_DESTDIR}}
 
if [[ $CLEANUP_DRYRUN != true ]]; then
-   for pkg in "$@"; do
-   if [[ -h $pkg ]]; then
-   rm -f "$pkg" "$pkg.sig"
-   else
-   mv_acl "$pkg" "$CLEANUP_DESTDIR/${pkg##*/}"
-   if [[ -e $pkg.sig ]]; then
-   mv_acl "$pkg.sig" 
"$CLEANUP_DESTDIR/${pkg##*/}.sig"
-   fi
-   touch "${CLEANUP_DESTDIR}/${pkg##*/}"
+   if [[ -h ${pkg} ]]; then
+   rm -f "${pkg}" "${pkg}.sig"
+   else
+   mv_acl "${pkg}" "${targetdir}/${pkg##*/}"
+   if [[ -e ${pkg}.sig ]]; then
+   mv_acl "${pkg}.sig" 
"${targetdir}/${pkg##*/}.sig"
fi
-   done
+   touch "${targetdir}/${pkg##*/}"
+   fi
fi
 }
 
-- 
2.19.2


[arch-projects] [dbscripts] [PATCH 2/4] Add reproducible archive of packages.

2018-12-04 Thread Eli Schwartz via arch-projects
Whenever adding new package files to the pool of distributed packages,
hardlink a copy of every package it was built with, into a
"reproducible" pool, and log which file required it.

Signed-off-by: Eli Schwartz 
---
 config |  1 +
 config.local.svn-community |  1 +
 config.local.svn-packages  |  1 +
 db-functions   | 49 +++---
 db-update  |  4 
 5 files changed, 48 insertions(+), 8 deletions(-)

diff --git a/config b/config
index 1cfe11f4..5144fca7 100644
--- a/config
+++ b/config
@@ -3,6 +3,7 @@
 FTP_BASE="/srv/ftp"
 PKGREPOS=()
 PKGPOOL=''
+EXTRA_PKGPOOLS=()
 SRCPOOL=''
 TESTING_REPO=''
 STABLE_REPOS=()
diff --git a/config.local.svn-community b/config.local.svn-community
index 5d61b5ea..15bcc17f 100644
--- a/config.local.svn-community
+++ b/config.local.svn-community
@@ -2,6 +2,7 @@
 
 PKGREPOS=('community' 'community-testing' 'community-staging' 'multilib' 
'multilib-testing' 'multilib-staging')
 PKGPOOL='pool/community'
+EXTRA_PKGPOOLS=('pool/packages')
 SRCPOOL='sources/community'
 SVNREPO='file:///srv/repos/svn-community/svn'
 SVNUSER='svn-community'
diff --git a/config.local.svn-packages b/config.local.svn-packages
index 34aab35c..75986b65 100644
--- a/config.local.svn-packages
+++ b/config.local.svn-packages
@@ -2,6 +2,7 @@
 
 PKGREPOS=('core' 'extra' 'testing' 'staging' 'kde-unstable' 'gnome-unstable')
 PKGPOOL='pool/packages'
+EXTRA_PKGPOOLS=('pool/community')
 SRCPOOL='sources/packages'
 SVNREPO='file:///srv/repos/svn-packages/svn'
 SVNUSER='svn-packages'
diff --git a/db-functions b/db-functions
index 7aeedced..2b1ae87a 100644
--- a/db-functions
+++ b/db-functions
@@ -165,20 +165,23 @@ repo_unlock () { #repo_unlock  
fi
 }
 
+# usage: _grep_all_info pkgfile infofile key
+_grep_all_info() {
+   local _ret=()
+
+   mapfile -t _ret < <(/usr/bin/bsdtar -xOqf "$1" "${2}" | grep "^${3} = ")
+
+   printf '%s\n' "${_ret[@]#${3} = }"
+}
+
 # usage: _grep_pkginfo pkgfile pattern
 _grep_pkginfo() {
-   local _ret
-
-   _ret="$(/usr/bin/bsdtar -xOqf "$1" .PKGINFO | grep "^${2} = " | tail 
-1)"
-   echo "${_ret#${2} = }"
+   _grep_all_info "${1}" .PKGINFO "${2}" | tail -1
 }
 
 # usage: _grep_buildinfo pkgfile pattern
 _grep_buildinfo() {
-   local _ret
-
-   _ret="$(/usr/bin/bsdtar -xOqf "$1" .BUILDINFO | grep "^${2} = " | tail 
-1)"
-   echo "${_ret#${2} = }"
+   _grep_all_info "${1}" .BUILDINFO "${2}" | tail -1
 }
 
 # Get the package base or name as fallback
@@ -444,4 +447,34 @@ arch_repo_modify() {
REPO_MODIFIED=1
 }
 
+# Build an index of dependent packages needed by a given pkgfile
+# usage: make_reproducible pkgfile [check]
+make_reproducible() {
+   local pkg dir pkgs=() pkgfile pkgfiles=()
+
+   mapfile -t pkgs < <(_grep_all_info "${1}" .BUILDINFO installed)
+
+   for pkg in "${pkgs[@]}"; do
+   for dir in "${FTP_BASE}/${PKGPOOL}" 
"${EXTRA_PKGPOOLS[@]/#/${FTP_BASE}/}" "${STAGING}"/**/; do
+   if pkgfile="$(getpkgfile "${dir}/${pkg}"${PKGEXTS} 
2>/dev/null)"; then
+   pkgfiles+=("${pkgfile}")
+   continue 2
+   fi
+   done
+   error "could not find existing package for %s" "${pkg}"
+   return 1
+   done
+
+   if [[ ${2} = check ]]; then
+   return 0
+   fi
+
+   for pkg in "${pkgfiles[@]}"; do
+   if [[ ! -f ${FTP_BASE}/${PKGPOOL}-reproducible/${pkg##*/} ]]; 
then
+   ln -L "${pkg}" 
"${FTP_BASE}/${PKGPOOL}-reproducible/${pkg##*/}"
+   fi
+   echo "${1}" >> 
"${FTP_BASE}/${PKGPOOL}-reproducible/${pkg##*/}.buildlinks"
+   done
+}
+
 . "$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")/db-functions-${VCS}"
diff --git a/db-update b/db-update
index 313fb999..11ec185f 100755
--- a/db-update
+++ b/db-update
@@ -61,6 +61,9 @@ for repo in "${repos[@]}"; do
if ! check_builddir "${pkg}"; then
die "Package %s was not built in a chroot" 
"$repo/${pkg##*/}"
fi
+   if ! make_reproducible "${pkg}" "check"; then
+   die "Package %s is not reproducible" "${pkg}"
+   fi
done
if ! check_splitpkgs "${repo}" "${pkgs[@]}"; then
die "Missing split packages for %s" "$repo"
@@ -82,6 +85,7 @@ for repo in "${repos[@]}"; do
# any packages might have been moved by the previous run
if [[ -f ${pkg} ]]; then
mv "${pkg}" "$FTP_BASE/${PKGPOOL}"
+   make_reproducible 
"${FTP_BASE}/${PKGPOOL}${pkg##*/}"
fi
ln -s "../../../${PKGPOOL}/${pkgfile}" 
"$FTP_BASE/$repo/os/${pkgarch}"

[arch-projects] [dbscripts] [PATCH 4/4] ftpdir-cleanup: handle removal of reproducible archives as well

2018-12-04 Thread Eli Schwartz via arch-projects
This reuses the same logic used for normally deleting packages, but
cleanup of the reproducible archive happens in a subdirectory of
${CLEANUP_DESTDIR} while still subject to the same timeouts.

Signed-off-by: Eli Schwartz 
---
 cron-jobs/ftpdir-cleanup | 18 ++
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/cron-jobs/ftpdir-cleanup b/cron-jobs/ftpdir-cleanup
index 74b771cd..87af2486 100755
--- a/cron-jobs/ftpdir-cleanup
+++ b/cron-jobs/ftpdir-cleanup
@@ -58,6 +58,7 @@ for repo in "${PKGREPOS[@]}"; do
for old_pkg in "${old_pkgs[@]}"; do
msg2 '%s' "${old_pkg}"
clean_pkg 
"${FTP_BASE}/${repo}/os/${arch}/${old_pkg}"
+   find "${FTP_BASE}/${PKGPOOL}-reproducible/" 
-name "*.buildlinks" -exec sed -i "/${old_pkg%${PKGEXTS}}/d" {} +
done
fi
done
@@ -79,20 +80,29 @@ if (( ${#old_pkgs[@]} >= 1 )); then
done
 fi
 
+mapfile -td '' old_reproducible_pkgs < <(find 
"${FTP_BASE}/${PKGPOOL}-reproducible/" -name '*.buildlinks' -empty -printf 
'%f\0')
+if (( ${#old_reproducible_pkgs[@]} >= 1 )); then
+   msg "Removing old packages from reproducible pool..."
+   for old_pkg in "${old_reproducible_pkgs[@]}"; do
+   msg2 '%s' "${old_pkg}"
+   clean_pkg "${FTP_BASE}/${PKGPOOL}-reproducible/${old_pkg}" 
"${CLEANUP_DESTDIR}/reproducible"
+   done
+fi
+
 unset old_pkgs
 touch -d "${CLEANUP_KEEP} days ago"  "${WORKDIR}/cleanup_timestamp"
 for f in "${CLEANUP_DESTDIR}"/**/*${PKGEXTS}; do
if [[ ${WORKDIR}/cleanup_timestamp -nt $f ]]; then
-   old_pkgs+=("${f##*/}")
+   old_pkgs+=("${f}")
fi
 done
 if (( ${#old_pkgs[@]} >= 1 )); then
msg "Removing old packages from the cleanup directory..."
for old_pkg in "${old_pkgs[@]}"; do
-   msg2 '%s' "${old_pkg}"
+   msg2 '%s' "${old_pkg#${CLEANUP_DESTDIR}/}"
if [[ $CLEANUP_DRYRUN != true ]]; then
-   rm -f "${CLEANUP_DESTDIR}/${old_pkg}"
-   rm -f "${CLEANUP_DESTDIR}/${old_pkg}.sig"
+   rm -f "${old_pkg}"
+   rm -f "${old_pkg}.sig"
fi
done
 fi
-- 
2.19.2


[arch-projects] [dbscripts] [PATCH 1/4] tests: make dummy copies of all pkgpool packages in the test environment

2018-12-04 Thread Eli Schwartz via arch-projects
Prerequisite for reproducible archives of packages.

Signed-off-by: Eli Schwartz 
---
 test/lib/common.bash | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/test/lib/common.bash b/test/lib/common.bash
index bc2b4e6d..3dda5f62 100644
--- a/test/lib/common.bash
+++ b/test/lib/common.bash
@@ -102,6 +102,7 @@ setup() {
SVNREPO="file://${TMP}/svn-packages-repo"
PKGREPOS=('core' 'extra' 'testing')
PKGPOOL='pool/packages'
+   EXTRA_PKGPOOLS=('pool/community')
SRCPOOL='sources/packages'
TESTING_REPO='testing'
STABLE_REPOS=('core' 'extra')
@@ -123,9 +124,18 @@ eot
mkdir -p "${TMP}/ftp/${r}/os/${a}"
done
done
-   mkdir -p "${TMP}/ftp/${PKGPOOL}"
+   mkdir -p "${TMP}/ftp/${PKGPOOL}"{,-reproducible}
+   mkdir -p "${TMP}/ftp/${EXTRA_PKGPOOLS[0]}"
mkdir -p "${TMP}/ftp/${SRCPOOL}"
 
+   # make dummy packages for "reproducibility"
+   comm -12 <(pacman -Sql core extra | sort -u) <(pacman -Qq | sort -u) | 
pacman -Sddp - | while read -r line; do
+   touch "${FTP_BASE}/${PKGPOOL}/${line##*/}"{,.sig}
+   done
+   comm -12 <(pacman -Sql community | sort -u) <(pacman -Qq | sort -u) | 
pacman -Sddp - | while read -r line; do
+   touch "${FTP_BASE}/${EXTRA_PKGPOOLS[0]}/${line##*/}"{,.sig}
+   done
+
svnadmin create "${TMP}/svn-packages-repo"
svn checkout -q "file://${TMP}/svn-packages-repo" 
"${TMP}/svn-packages-copy"
 }
-- 
2.19.2