The global 'exclude' has '--exclude "*/repodata/*"' and you are using
"${excludes[@]}" everywhere. In all three syncs. This looks like
'repodata/*' will never synced.

Besides that I think it is very good change.

                Adrian

On Fri, Mar 27, 2020 at 09:33:03AM +0100, Pavel Raiskup wrote:
> 1. sync everything except for repomd.xml
> 2. then sync repomd.xml files only, and invalidate caches
> 3. gently wait a bit to give current downloads a chance
> 4. delete outdated RPMs and metadata, shouldn't be needed
> 
> Also make the sleep/cache configurable.
> ---
>  roles/s3-mirror/files/s3-sync-path.sh | 99 ++++++++++++++-------------
>  roles/s3-mirror/files/s3.sh           | 19 +++--
>  2 files changed, 65 insertions(+), 53 deletions(-)
> 
> diff --git a/roles/s3-mirror/files/s3-sync-path.sh 
> b/roles/s3-mirror/files/s3-sync-path.sh
> index 79b4d63eb..5a414e3ad 100644
> --- a/roles/s3-mirror/files/s3-sync-path.sh
> +++ b/roles/s3-mirror/files/s3-sync-path.sh
> @@ -9,58 +9,65 @@ if [[ "$1" == "" ]] || [[ $1 != /pub* ]] || [[ $1 != */ ]]; 
> then
>    exit 1
>  fi
>  
> +aws_sync=( aws s3 sync --no-follow-symlinks )
> +
>  # first run do not delete anything or copy the repodata.
> -CMD1="aws s3 sync                   \
> -  --exclude */repodata/*         \
> -  --exclude *.snapshot/*          \
> -  --exclude *source/*             \
> -  --exclude *SRPMS/*              \
> -  --exclude *debug/*              \
> -  --exclude *beta/*               \
> -  --exclude *ppc/*                \
> -  --exclude *ppc64/*              \
> -  --exclude *repoview/*           \
> -  --exclude *Fedora/*             \
> -  --exclude *EFI/*                \
> -  --exclude *core/*               \
> -  --exclude *extras/*             \
> -  --exclude *LiveOS/*             \
> -  --exclude *development/rawhide/* \
> -  --no-follow-symlinks            \
> -  --only-show-errors              \
> -  "
> -  #--dryrun                         \
> +exclude=(
> +  --exclude "*/repodata/*"
> +  --exclude "*.snapshot/*"
> +  --exclude "*source/*"
> +  --exclude "*SRPMS/*"
> +  --exclude "*debug/*"
> +  --exclude "*beta/*"
> +  --exclude "*ppc/*"
> +  --exclude "*ppc64/*"
> +  --exclude "*repoview/*"
> +  --exclude "*Fedora/*"
> +  --exclude "*EFI/*"
> +  --exclude "*core/*"
> +  --exclude "*extras/*"
> +  --exclude "*LiveOS/*"
> +  --exclude "*development/rawhide/*"
> +  --only-show-errors
> +)
>  
> -# second we delete old content and also copy the repodata
> -CMD2="aws s3 sync                   \
> -  --delete                         \
> -  --exclude *.snapshot/*          \
> -  --exclude *source/*             \
> -  --exclude *SRPMS/*              \
> -  --exclude *debug/*              \
> -  --exclude *beta/*               \
> -  --exclude *ppc/*                \
> -  --exclude *ppc64/*              \
> -  --exclude *repoview/*           \
> -  --exclude *Fedora/*             \
> -  --exclude *EFI/*                \
> -  --exclude *core/*               \
> -  --exclude *extras/*             \
> -  --exclude *LiveOS/*             \
> -  --exclude *development/rawhide/* \
> -  --no-follow-symlinks            \
> -  --only-show-errors              \
> -  "
> -  #--dryrun                         \
> +S3_MIRROR=s3-mirror-us-west-1-02.fedoraproject.org
> +DIST_ID=E2KJMDC0QAJDMU
> +MAX_CACHE_SEC=60
> +DNF_GENTLY_TIMEOUT=120
> +
> +# First run this command that syncs, but does not delete.
> +# It also excludes repomd.xml.
> +CMD1=( "${aws_sync[@]}" "${excludes[@]}" --exclude "*/repomd.xml" )
> +
> +# Next we run this command which syncs repomd.xml files.  Include must 
> precede
> +# the large set of excludes.  Make sure that the 'max-age' isn't too large so
> +# we know that we can start removing old data ASAP.
> +CMD2=( "${aws_sync[@]}" --exclude "*" --include "*/repomd.xml" 
> "${excludes[@]}"
> +                        --cache-control "max-age=$MAX_CACHE_SEC" )
> +
> +# Then we delete old RPMs and old metadata (but after invalidating caches).
> +CMD3=( "${aws_sync[@]}" "${excludes[@]}" --delete )
>  
>  #echo "$CMD /srv$1 s3://s3-mirror-us-west-1-02.fedoraproject.org$1"
>  echo "Starting $1 sync at $(date)" >> /var/log/s3-mirror/timestamps
> -$CMD1 /srv$1 s3://s3-mirror-us-west-1-02.fedoraproject.org$1
> -$CMD1 /srv$1/repodata/ 
> s3://s3-mirror-us-west-1-02.fedoraproject.org$1/repodata/
> +"${CMD1[@]}" "/srv$1" "s3://$S3_MIRROR$1"
> +"${CMD2[@]}" "/srv$1" "s3://$S3_MIRROR$1"
> +
>  # Always do the invalidations because they are quick and prevent issues
>  # depending on which path is synced.
> -for file in $(echo $1/repodata/* ); do
> -  aws cloudfront create-invalidation --distribution-id E2KJMDC0QAJDMU 
> --paths "$file" > /dev/null
> +for file in $(echo $1/repodata/repomd.xml ); do
> +  aws cloudfront create-invalidation --distribution-id $DIST_ID --paths 
> "$file" > /dev/null
>  done
> -$CMD2 /srv$1 s3://s3-mirror-us-west-1-02.fedoraproject.org$1
> +
> +SLEEP=$(( MAX_CACHE_SEC + DNF_GENTLY_TIMEOUT ))
> +echo "Ready $1 sync, giving dnf downloads ${SLEEP}s before delete, at 
> $(date)" >> /var/log/s3-mirror/timestamps
> +
> +# Consider some DNF processes started downloading metadata before we 
> invalidated
> +# caches, and started with outdated repomd.xml file.  Give it few more 
> seconds
> +# so they have chance to download the rest of metadata and RPMs.
> +sleep $SLEEP
> +
> +"${CMD3[@]}" "/srv$1" "s3://$S3_MIRROR$1"
> +
>  echo "Ending $1 sync at $(date)" >> /var/log/s3-mirror/timestamps
> diff --git a/roles/s3-mirror/files/s3.sh b/roles/s3-mirror/files/s3.sh
> index c157b0cdb..df58ac153 100644
> --- a/roles/s3-mirror/files/s3.sh
> +++ b/roles/s3-mirror/files/s3.sh
> @@ -88,6 +88,11 @@ excludes=(
>    --exclude "*/updates/testing/29/*"
>  )
>  
> +S3_MIRROR=s3-mirror-us-west-1-02.fedoraproject.org
> +DIST_ID=E2KJMDC0QAJDMU
> +MAX_CACHE_SEC=60
> +DNF_GENTLY_TIMEOUT=120
> +
>  # First run this command that syncs, but does not delete.
>  # It also excludes repomd.xml.
>  CMD1=( "${aws_sync[@]}" "${excludes[@]}" --exclude "*/repomd.xml" )
> @@ -95,14 +100,12 @@ CMD1=( "${aws_sync[@]}" "${excludes[@]}" --exclude 
> "*/repomd.xml" )
>  # Next we run this command which syncs repomd.xml files.  Include must 
> precede
>  # the large set of excludes.  Make sure that the 'max-age' isn't too large so
>  # we know that we can start removing old data ASAP.
> -CMD2=( "${aws_sync[@]}" --exclude "*" --include "*/repomd.xml" 
> "${excludes[@]}" --cache-control max-age=300 )
> +CMD2=( "${aws_sync[@]}" --exclude "*" --include "*/repomd.xml" 
> "${excludes[@]}"
> +                        --cache-control "max-age=$MAX_CACHE_SEC" )
>  
>  # Then we delete old RPMs and old metadata (but after invalidating caches).
>  CMD3=( "${aws_sync[@]}" "${excludes[@]}" --delete )
>  
> -S3_MIRROR=s3-mirror-us-west-1-02.fedoraproject.org
> -DIST_ID=E2KJMDC0QAJDMU
> -
>  # Sync EPEL
>  #echo $CMD /srv/pub/epel/ s3://$S3_MIRROR/pub/epel/
>  echo "Starting EPEL sync at $(date)" >> /var/log/s3-mirror/timestamps
> @@ -132,10 +135,12 @@ for file in $(echo 
> /srv/pub/fedora/linux/updates/*/*/*/repodata/repomd.xml | sed
>    aws cloudfront create-invalidation --distribution-id "$DIST_ID" --paths 
> "$file"
>  done
>  
> +SLEEP=$(( MAX_CACHE_SEC + DNF_GENTLY_TIMEOUT ))
> +
>  # Consider some DNF processes started downloading metadata before we 
> invalidated
> -# caches, and started with outdated repomd.xml file.  Give it 10 minutes so 
> they
> -# have chance to download the rest of metadata and RPMs.
> -sleep 600
> +# caches, and started with outdated repomd.xml file.  Give it few more 
> seconds
> +# so they have chance to download the rest of metadata and RPMs.
> +sleep $SLEEP
>  
>  "${CMD3[@]}" /srv/pub/epel/ "s3://$S3_MIRROR/pub/epel/"
>  "${CMD3[@]}" /srv/pub/fedora/ s3://$S3_MIRROR/pub/fedora/
> -- 
> 2.25.1
> _______________________________________________
> infrastructure mailing list -- infrastructure@lists.fedoraproject.org
> To unsubscribe send an email to infrastructure-le...@lists.fedoraproject.org
> Fedora Code of Conduct: 
> https://docs.fedoraproject.org/en-US/project/code-of-conduct/
> List Guidelines: https://fedoraproject.org/wiki/Mailing_list_guidelines
> List Archives: 
> https://lists.fedoraproject.org/archives/list/infrastructure@lists.fedoraproject.org

                Adrian

-- 
Adrian Reber <adr...@lisas.de>            http://lisas.de/~adrian/
QOTD:
        "My life is a soap opera, but who gets the movie rights?"
_______________________________________________
infrastructure mailing list -- infrastructure@lists.fedoraproject.org
To unsubscribe send an email to infrastructure-le...@lists.fedoraproject.org
Fedora Code of Conduct: 
https://docs.fedoraproject.org/en-US/project/code-of-conduct/
List Guidelines: https://fedoraproject.org/wiki/Mailing_list_guidelines
List Archives: 
https://lists.fedoraproject.org/archives/list/infrastructure@lists.fedoraproject.org

Reply via email to