commit:     6a539b7c5163899db1d58cf152aeab1b2b4f9be4
Author:     Michał Górny <mgorny <AT> gentoo <DOT> org>
AuthorDate: Thu Oct  3 14:19:54 2019 +0000
Commit:     Michał Górny <mgorny <AT> gentoo <DOT> org>
CommitDate: Fri Oct  4 21:25:00 2019 +0000
URL:        https://gitweb.gentoo.org/proj/portage.git/commit/?id=6a539b7c

fetch: Support GLEP 75 mirror structure

Add a support for the subset of GLEP 75 needed by Gentoo Infra.  This
includes fetching and parsing layout.conf, and support for flat layout
and filename-hash layout with cutoffs being multiplies of 4.

Bug: https://bugs.gentoo.org/646898
Closes: https://github.com/gentoo/portage/pull/462
Reviewed-by: Zac Medico <zmedico <AT> gentoo.org>
Signed-off-by: Michał Górny <mgorny <AT> gentoo.org>

 lib/portage/package/ebuild/fetch.py    | 160 ++++++++++++++++++++++++++++++++-
 lib/portage/tests/ebuild/test_fetch.py |  94 ++++++++++++++++++-
 2 files changed, 250 insertions(+), 4 deletions(-)

diff --git a/lib/portage/package/ebuild/fetch.py 
b/lib/portage/package/ebuild/fetch.py
index 227bf45ae..4458796fc 100644
--- a/lib/portage/package/ebuild/fetch.py
+++ b/lib/portage/package/ebuild/fetch.py
@@ -6,13 +6,17 @@ from __future__ import print_function
 __all__ = ['fetch']
 
 import errno
+import functools
 import io
+import itertools
+import json
 import logging
 import random
 import re
 import stat
 import sys
 import tempfile
+import time
 
 from collections import OrderedDict
 
@@ -27,12 +31,17 @@ portage.proxy.lazyimport.lazyimport(globals(),
        'portage.package.ebuild.doebuild:doebuild_environment,' + \
                '_doebuild_spawn',
        'portage.package.ebuild.prepare_build_dirs:prepare_build_dirs',
+       'portage.util:atomic_ofstream',
+       'portage.util.configparser:SafeConfigParser,read_configs,' +
+               'NoOptionError,ConfigParserError',
+       'portage.util._urlopen:urlopen',
 )
 
 from portage import os, selinux, shutil, _encodings, \
        _movefile, _shell_quote, _unicode_encode
 from portage.checksum import (get_valid_checksum_keys, perform_md5, verify_all,
-       _filter_unaccelarated_hashes, _hash_filter, _apply_hash_filter)
+       _filter_unaccelarated_hashes, _hash_filter, _apply_hash_filter,
+       checksum_str)
 from portage.const import BASH_BINARY, CUSTOM_MIRRORS_FILE, \
        GLOBAL_CONFIG_PATH
 from portage.data import portage_gid, portage_uid, secpass, userpriv_groups
@@ -253,6 +262,146 @@ _size_suffix_map = {
        'Y' : 80,
 }
 
+
+class FlatLayout(object):
+       def get_path(self, filename):
+               return filename
+
+       @staticmethod
+       def verify_args(args):
+               return len(args) == 1
+
+
+class FilenameHashLayout(object):
+       def __init__(self, algo, cutoffs):
+               self.algo = algo
+               self.cutoffs = [int(x) for x in cutoffs.split(':')]
+
+       def get_path(self, filename):
+               fnhash = checksum_str(filename.encode('utf8'), self.algo)
+               ret = ''
+               for c in self.cutoffs:
+                       assert c % 4 == 0
+                       c = c // 4
+                       ret += fnhash[:c] + '/'
+                       fnhash = fnhash[c:]
+               return ret + filename
+
+       @staticmethod
+       def verify_args(args):
+               if len(args) != 3:
+                       return False
+               if args[1] not in get_valid_checksum_keys():
+                       return False
+               # argsidate cutoffs
+               for c in args[2].split(':'):
+                       try:
+                               c = int(c)
+                       except ValueError:
+                               break
+                       else:
+                               if c % 4 != 0:
+                                       break
+               else:
+                       return True
+               return False
+
+
+class MirrorLayoutConfig(object):
+       """
+       Class to read layout.conf from a mirror.
+       """
+
+       def __init__(self):
+               self.structure = ()
+
+       def read_from_file(self, f):
+               cp = SafeConfigParser()
+               read_configs(cp, [f])
+               vals = []
+               for i in itertools.count():
+                       try:
+                               vals.append(tuple(cp.get('structure', '%d' % 
i).split()))
+                       except NoOptionError:
+                               break
+               self.structure = tuple(vals)
+
+       def serialize(self):
+               return self.structure
+
+       def deserialize(self, data):
+               self.structure = data
+
+       @staticmethod
+       def validate_structure(val):
+               if val[0] == 'flat':
+                       return FlatLayout.verify_args(val)
+               if val[0] == 'filename-hash':
+                       return FilenameHashLayout.verify_args(val)
+               return False
+
+       def get_best_supported_layout(self):
+               for val in self.structure:
+                       if self.validate_structure(val):
+                               if val[0] == 'flat':
+                                       return FlatLayout(*val[1:])
+                               elif val[0] == 'filename-hash':
+                                       return FilenameHashLayout(*val[1:])
+               else:
+                       # fallback
+                       return FlatLayout()
+
+
+def get_mirror_url(mirror_url, filename, cache_path=None):
+       """
+       Get correct fetch URL for a given file, accounting for mirror
+       layout configuration.
+
+       @param mirror_url: Base URL to the mirror (without '/distfiles')
+       @param filename: Filename to fetch
+       @param cache_path: Path for mirror metadata cache
+       @return: Full URL to fetch
+       """
+
+       mirror_conf = MirrorLayoutConfig()
+
+       cache = {}
+       if cache_path is not None:
+               try:
+                       with open(cache_path, 'r') as f:
+                               cache = json.load(f)
+               except (IOError, ValueError):
+                       pass
+
+       ts, data = cache.get(mirror_url, (0, None))
+       # refresh at least daily
+       if ts >= time.time() - 86400:
+               mirror_conf.deserialize(data)
+       else:
+               try:
+                       f = urlopen(mirror_url + '/distfiles/layout.conf')
+                       try:
+                               data = io.StringIO(f.read().decode('utf8'))
+                       finally:
+                               f.close()
+
+                       try:
+                               mirror_conf.read_from_file(data)
+                       except ConfigParserError:
+                               pass
+               except IOError:
+                       pass
+
+               cache[mirror_url] = (time.time(), mirror_conf.serialize())
+               if cache_path is not None:
+                       f = atomic_ofstream(cache_path, 'w')
+                       json.dump(cache, f)
+                       f.close()
+
+       return (mirror_url + "/distfiles/" +
+                       
mirror_conf.get_best_supported_layout().get_path(filename))
+
+
 def fetch(myuris, mysettings, listonly=0, fetchonly=0,
        locks_in_subdir=".locks", use_locks=1, try_mirrors=1, digests=None,
        allow_missing_digests=True):
@@ -434,8 +583,11 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
        for myfile, myuri in file_uri_tuples:
                if myfile not in filedict:
                        filedict[myfile]=[]
-                       for y in range(0,len(locations)):
-                               
filedict[myfile].append(locations[y]+"/distfiles/"+myfile)
+                       mirror_cache = os.path.join(mysettings["DISTDIR"],
+                                       ".mirror-cache.json")
+                       for l in locations:
+                               filedict[myfile].append(functools.partial(
+                                       get_mirror_url, l, myfile, 
mirror_cache))
                if myuri is None:
                        continue
                if myuri[:9]=="mirror://":
@@ -895,6 +1047,8 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
                        tried_locations = set()
                        while uri_list:
                                loc = uri_list.pop()
+                               if isinstance(loc, functools.partial):
+                                       loc = loc()
                                # Eliminate duplicates here in case we've 
switched to
                                # "primaryuri" mode on the fly due to a 
checksum failure.
                                if loc in tried_locations:

diff --git a/lib/portage/tests/ebuild/test_fetch.py 
b/lib/portage/tests/ebuild/test_fetch.py
index 83321fed7..f2254c468 100644
--- a/lib/portage/tests/ebuild/test_fetch.py
+++ b/lib/portage/tests/ebuild/test_fetch.py
@@ -4,6 +4,7 @@
 from __future__ import unicode_literals
 
 import functools
+import io
 import tempfile
 
 import portage
@@ -11,12 +12,14 @@ from portage import shutil, os
 from portage.tests import TestCase
 from portage.tests.resolver.ResolverPlayground import ResolverPlayground
 from portage.tests.util.test_socks5 import AsyncHTTPServer
+from portage.util.configparser import ConfigParserError
 from portage.util.futures.executor.fork import ForkExecutor
 from portage.util._async.SchedulerInterface import SchedulerInterface
 from portage.util._eventloop.global_event_loop import global_event_loop
 from portage.package.ebuild.config import config
 from portage.package.ebuild.digestgen import digestgen
-from portage.package.ebuild.fetch import _download_suffix
+from portage.package.ebuild.fetch import (_download_suffix, FlatLayout,
+               FilenameHashLayout, MirrorLayoutConfig)
 from _emerge.EbuildFetcher import EbuildFetcher
 from _emerge.Package import Package
 
@@ -228,3 +231,92 @@ class EbuildFetchTestCase(TestCase):
                        finally:
                                shutil.rmtree(ro_distdir)
                                playground.cleanup()
+
+       def test_flat_layout(self):
+               self.assertTrue(FlatLayout.verify_args(('flat',)))
+               self.assertFalse(FlatLayout.verify_args(('flat', 
'extraneous-arg')))
+               self.assertEqual(FlatLayout().get_path('foo-1.tar.gz'), 
'foo-1.tar.gz')
+
+       def test_filename_hash_layout(self):
+               
self.assertFalse(FilenameHashLayout.verify_args(('filename-hash',)))
+               
self.assertTrue(FilenameHashLayout.verify_args(('filename-hash', 'SHA1', '8')))
+               
self.assertFalse(FilenameHashLayout.verify_args(('filename-hash', 
'INVALID-HASH', '8')))
+               
self.assertTrue(FilenameHashLayout.verify_args(('filename-hash', 'SHA1', 
'4:8:12')))
+               
self.assertFalse(FilenameHashLayout.verify_args(('filename-hash', 'SHA1', '3')))
+               
self.assertFalse(FilenameHashLayout.verify_args(('filename-hash', 'SHA1', 
'junk')))
+               
self.assertFalse(FilenameHashLayout.verify_args(('filename-hash', 'SHA1', 
'4:8:junk')))
+
+               self.assertEqual(FilenameHashLayout('SHA1', 
'4').get_path('foo-1.tar.gz'),
+                               '1/foo-1.tar.gz')
+               self.assertEqual(FilenameHashLayout('SHA1', 
'8').get_path('foo-1.tar.gz'),
+                               '19/foo-1.tar.gz')
+               self.assertEqual(FilenameHashLayout('SHA1', 
'8:16').get_path('foo-1.tar.gz'),
+                               '19/c3b6/foo-1.tar.gz')
+               self.assertEqual(FilenameHashLayout('SHA1', 
'8:16:24').get_path('foo-1.tar.gz'),
+                               '19/c3b6/37a94b/foo-1.tar.gz')
+
+       def test_mirror_layout_config(self):
+               mlc = MirrorLayoutConfig()
+               self.assertEqual(mlc.serialize(), ())
+               self.assertIsInstance(mlc.get_best_supported_layout(), 
FlatLayout)
+
+               conf = '''
+[structure]
+0=flat
+'''
+               mlc.read_from_file(io.StringIO(conf))
+               self.assertEqual(mlc.serialize(), (('flat',),))
+               self.assertIsInstance(mlc.get_best_supported_layout(), 
FlatLayout)
+               
self.assertEqual(mlc.get_best_supported_layout().get_path('foo-1.tar.gz'),
+                               'foo-1.tar.gz')
+
+               conf = '''
+[structure]
+0=filename-hash SHA1 8:16
+1=flat
+'''
+               mlc.read_from_file(io.StringIO(conf))
+               self.assertEqual(mlc.serialize(), (
+                       ('filename-hash', 'SHA1', '8:16'),
+                       ('flat',)
+               ))
+               self.assertIsInstance(mlc.get_best_supported_layout(), 
FilenameHashLayout)
+               
self.assertEqual(mlc.get_best_supported_layout().get_path('foo-1.tar.gz'),
+                               '19/c3b6/foo-1.tar.gz')
+               serialized = mlc.serialize()
+
+               # test fallback
+               conf = '''
+[structure]
+0=filename-hash INVALID-HASH 8:16
+1=filename-hash SHA1 32
+2=flat
+'''
+               mlc.read_from_file(io.StringIO(conf))
+               self.assertEqual(mlc.serialize(), (
+                       ('filename-hash', 'INVALID-HASH', '8:16'),
+                       ('filename-hash', 'SHA1', '32'),
+                       ('flat',)
+               ))
+               self.assertIsInstance(mlc.get_best_supported_layout(), 
FilenameHashLayout)
+               
self.assertEqual(mlc.get_best_supported_layout().get_path('foo-1.tar.gz'),
+                               '19c3b637/foo-1.tar.gz')
+
+               # test deserialization
+               mlc.deserialize(serialized)
+               self.assertEqual(mlc.serialize(), (
+                       ('filename-hash', 'SHA1', '8:16'),
+                       ('flat',)
+               ))
+               self.assertIsInstance(mlc.get_best_supported_layout(), 
FilenameHashLayout)
+               
self.assertEqual(mlc.get_best_supported_layout().get_path('foo-1.tar.gz'),
+                               '19/c3b6/foo-1.tar.gz')
+
+               # test erraneous input
+               conf = '''
+[#(*DA*&*F
+[structure]
+0=filename-hash SHA1 32
+'''
+               self.assertRaises(ConfigParserError, mlc.read_from_file,
+                               io.StringIO(conf))

Reply via email to