Diff
Modified: trunk/Tools/ChangeLog (267578 => 267579)
--- trunk/Tools/ChangeLog 2020-09-25 17:47:30 UTC (rev 267578)
+++ trunk/Tools/ChangeLog 2020-09-25 17:55:03 UTC (rev 267579)
@@ -1,3 +1,31 @@
+2020-09-25 Jonathan Bedard <jbed...@apple.com>
+
+ results.webkit.org: Use s3 for cold storage
+ https://bugs.webkit.org/show_bug.cgi?id=216662
+ <rdar://problem/69092010>
+
+ Rubber-stamped by Aakash Jain.
+
+ * Scripts/libraries/resultsdbpy/resultsdbpy/__init__.py:
+ * Scripts/libraries/resultsdbpy/resultsdbpy/model/archive_context.py:
+ (ArchiveContext.__init__): Differentiate between cache storage and long-term storage of archives.
+ (ArchiveContext.__enter__): Connect to the cold storage archive, if one is available.
+ (ArchiveContext.__exit__): Disconnect from the cold storage archive, if applicable.
+ (ArchiveContext.register): Save to cold storage instead of cache storage by default.
+ (ArchiveContext.find_archive):
+ * Scripts/libraries/resultsdbpy/resultsdbpy/model/model.py:
+ (Model.__init__): Pass S3 credentials to ArchiveContext.
+ * Scripts/libraries/resultsdbpy/resultsdbpy/model/s3_archiver.py: Added.
+ (S3Archiver):
+ (S3Archiver.Credentials):
+ (S3Archiver.__init__): Connect to S3 and configure our bucket.
+ (S3Archiver._cipher): Construct new AES cipher, if a key was provided.
+ (S3Archiver.__enter__): Create S3 resource, if one is not available.
+ (S3Archiver.__exit__): Teardown S3 resource.
+ (S3Archiver.save): Save an archive to S3 by it's hash.
+ (S3Archiver.retrieve): Retreive an archive from S3 by its hash.
+ * Scripts/libraries/resultsdbpy/setup.py: Add boto3.
+
2020-09-25 Youenn Fablet <you...@apple.com>
Implement default behavior for getUserMedia requests in case where there is no delegate set
Modified: trunk/Tools/Scripts/libraries/resultsdbpy/resultsdbpy/__init__.py (267578 => 267579)
--- trunk/Tools/Scripts/libraries/resultsdbpy/resultsdbpy/__init__.py 2020-09-25 17:47:30 UTC (rev 267578)
+++ trunk/Tools/Scripts/libraries/resultsdbpy/resultsdbpy/__init__.py 2020-09-25 17:55:03 UTC (rev 267579)
@@ -43,6 +43,6 @@
"Please install webkitcorepy with `pip install webkitcorepy --extra-index-url <package index URL>`"
)
-version = Version(1, 0, 2)
+version = Version(1, 1, 0)
name = 'resultsdbpy'
Modified: trunk/Tools/Scripts/libraries/resultsdbpy/resultsdbpy/model/archive_context.py (267578 => 267579)
--- trunk/Tools/Scripts/libraries/resultsdbpy/resultsdbpy/model/archive_context.py 2020-09-25 17:47:30 UTC (rev 267578)
+++ trunk/Tools/Scripts/libraries/resultsdbpy/resultsdbpy/model/archive_context.py 2020-09-25 17:55:03 UTC (rev 267579)
@@ -22,20 +22,17 @@
import calendar
import io
-import json
import time
import zipfile
from cassandra.cqlengine import columns
-from cassandra.cqlengine.models import Model
-from collections import OrderedDict
from datetime import datetime
from resultsdbpy.controller.commit import Commit
-from resultsdbpy.controller.configuration import Configuration
from resultsdbpy.model.archiver import Archiver
from resultsdbpy.model.cassandra_archiver import CassandraArchiver
from resultsdbpy.model.commit_context import CommitContext
from resultsdbpy.model.configuration_context import ClusteredByConfiguration
+from resultsdbpy.model.s3_archiver import S3Archiver
from resultsdbpy.model.upload_context import UploadContext
@@ -81,11 +78,16 @@
cls.assert_zipfile(archive)
return zipfile.ZipFile(archive, mode='r')
- def __init__(self, configuration_context, commit_context, ttl_seconds=None):
+ def __init__(self, configuration_context, commit_context, ttl_seconds=None, s3_credentials=None):
self.configuration_context = configuration_context
self.commit_context = commit_context
self.cassandra = self.configuration_context.cassandra
- self.archiver = CassandraArchiver(self.cassandra)
+ self.cached_archiver = CassandraArchiver(self.cassandra)
+ self.cold_archiver = S3Archiver(
+ bucket='{}-results-archive'.format(self.cassandra.keyspace.replace('_', '-')),
+ credentials=s3_credentials,
+ ttl_seconds=ttl_seconds,
+ ) if s3_credentials else None
self.ttl_seconds = ttl_seconds
with self:
@@ -95,12 +97,16 @@
def __enter__(self):
self.configuration_context.__enter__()
self.commit_context.__enter__()
- self.archiver.__enter__()
+ self.cached_archiver.__enter__()
+ if self.cold_archiver:
+ self.cold_archiver.__enter__()
def __exit__(self, *args, **kwargs):
self.commit_context.__exit__(*args, **kwargs)
self.configuration_context.__exit__(*args, **kwargs)
- self.archiver.__exit__(*args, **kwargs)
+ self.cached_archiver.__exit__(*args, **kwargs)
+ if self.cold_archiver:
+ self.cold_archiver.__exit__()
def register(self, archive, configuration, commits, suite, timestamp=None):
self.assert_zipfile(archive)
@@ -116,14 +122,17 @@
self.configuration_context.insert_row_with_configuration(
UploadContext.SuitesByConfiguration.__table_name__, configuration, suite=suite, branch=branch, ttl=ttl,
)
- digest = self.archiver.save(archive, retain_for=ttl)
+ archiver_to_use = self.cold_archiver or self.cached_archiver
+ size = Archiver.archive_size(archive)
+ digest = archiver_to_use.save(archive, retain_for=ttl)
+
self.configuration_context.insert_row_with_configuration(
self.ArchiveMetaDataByCommit.__table_name__, configuration=configuration, suite=suite,
branch=branch, uuid=uuid, ttl=ttl,
sdk=configuration.sdk or '?', start_time=timestamp,
digest=digest,
- size=Archiver.archive_size(archive),
+ size=size,
)
def find_archive(
@@ -167,9 +176,19 @@
continue
if not archive_by_digest.get(value.get('digest')):
- archive = self.archiver.retrieve(value.get('digest'), value.get('size', None))
+ archive = self.cached_archiver.retrieve(value.get('digest'), value.get('size', None))
if not archive:
- continue
+ if not self.cold_archiver:
+ continue
+
+ archive = self.cold_archiver.retrieve(value.get('digest'), value.get('size', None))
+ if not archive:
+ continue
+
+ # If we retrieved an archive from the cold_archiver, it's pretty likely that
+ # the same archive will be retrieved in the near future. Cache the archive for 6 hours
+ self.cached_archiver.save(archive, retain_for=60 * 60 * 6)
+
archive_by_digest[value.get('digest')] = archive
archive_by_digest.get(value.get('digest')).seek(0)
Modified: trunk/Tools/Scripts/libraries/resultsdbpy/resultsdbpy/model/model.py (267578 => 267579)
--- trunk/Tools/Scripts/libraries/resultsdbpy/resultsdbpy/model/model.py 2020-09-25 17:47:30 UTC (rev 267578)
+++ trunk/Tools/Scripts/libraries/resultsdbpy/resultsdbpy/model/model.py 2020-09-25 17:55:03 UTC (rev 267579)
@@ -45,7 +45,7 @@
key = columns.Text(partition_key=True, required=True)
value = columns.Text(required=True)
- def __init__(self, redis, cassandra, repositories=[], default_ttl_seconds=TTL_YEAR * 5, archive_ttl_seconds=TTL_WEEK * 8, async_processing=False):
+ def __init__(self, redis, cassandra, repositories=[], default_ttl_seconds=TTL_YEAR * 5, archive_ttl_seconds=TTL_WEEK * 8, async_processing=False, s3_credentials=None):
if default_ttl_seconds is not None and default_ttl_seconds < 4 * self.TTL_WEEK:
raise ValueError('TTL must be at least 4 weeks')
if archive_ttl_seconds is not None and archive_ttl_seconds < 2 * self.TTL_WEEK:
@@ -99,6 +99,7 @@
configuration_context=self.configuration_context,
commit_context=self.commit_context,
ttl_seconds=self.archive_ttl_seconds,
+ s3_credentials=s3_credentials,
)
def healthy(self, writable=True):
Added: trunk/Tools/Scripts/libraries/resultsdbpy/resultsdbpy/model/s3_archiver.py (0 => 267579)
--- trunk/Tools/Scripts/libraries/resultsdbpy/resultsdbpy/model/s3_archiver.py (rev 0)
+++ trunk/Tools/Scripts/libraries/resultsdbpy/resultsdbpy/model/s3_archiver.py 2020-09-25 17:55:03 UTC (rev 267579)
@@ -0,0 +1,136 @@
+# Copyright (C) 2020 Apple Inc. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR
+# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import boto3
+import botocore
+import io
+import math
+import os
+import hashlib
+
+from Crypto.Cipher import AES
+from Crypto.Util.Padding import pad, unpad
+from resultsdbpy.model.archiver import Archiver
+
+
+class S3Archiver(Archiver):
+ class Credentials(object):
+ def __init__(
+ self,
+ region_name=None,
+ aws_access_key_id=None, aws_secret_access_key=None, aws_session_token=None,
+ key=None,
+ ):
+ self.region_name = region_name or 'us-west-2'
+ self.aws_access_key_id = aws_access_key_id
+ self.aws_secret_access_key = aws_secret_access_key
+ self.aws_session_token = aws_session_token
+ self.key = key
+
+ def __repr__(self):
+ return self.aws_access_key_id
+
+ def __init__(self, credentials, bucket='results-archive', ttl_seconds=None):
+ self.credentials = credentials
+ self.resources = None
+ self.bucket = bucket
+ self._count = 0
+
+ # Only modify the lifecycle of the bucket if we're editing the schema
+ if os.environ['CQLENG_ALLOW_SCHEMA_MANAGEMENT'] == '1':
+ client = boto3.client(
+ service_name='s3',
+ region_name=self.credentials.region_name,
+ aws_access_key_id=self.credentials.aws_access_key_id,
+ aws_secret_access_key=self.credentials.aws_secret_access_key,
+ aws_session_token=self.credentials.aws_session_token,
+ )
+
+ ttl_seconds = ttl_seconds or 60 * 60 * 24 * 365
+ ttl_days = int(math.ceil(ttl_seconds / (60 * 60 * 24)))
+
+ client.put_bucket_lifecycle_configuration(
+ Bucket=self.bucket,
+ LifecycleConfiguration=dict(
+ Rules=[dict(
+ Expiration=dict(Days=ttl_days),
+ ID='ttl',
+ Filter=dict(Prefix='archives/'),
+ Status='Enabled',
+ )],
+ ),
+ )
+
+ @property
+ def _cipher(self):
+ if not self.credentials.key:
+ return None
+ return AES.new(hashlib.sha256(self.credentials.key.encode()).digest(), AES.MODE_ECB)
+
+ def __enter__(self):
+ self._count += 1
+ if not self.resources:
+ self.resources = boto3.resource(
+ service_name='s3',
+ region_name=self.credentials.region_name,
+ aws_access_key_id=self.credentials.aws_access_key_id,
+ aws_secret_access_key=self.credentials.aws_secret_access_key,
+ aws_session_token=self.credentials.aws_session_token,
+ )
+
+ def __exit__(self, *args, **kwargs):
+ self._count -= 1
+ if self._count <= 0:
+ self.resources = None
+ self._count = 0
+
+ def save(self, archive, retain_for=None):
+ with self:
+ # S3 doesn't let us set custom expiration dates. This is a bit annoying because we really want
+ # resources to stay alive based on their commit time, rather than their report time, but the
+ # consequence will be resources kept around for a bit longer than we need access to them.
+ digest = self.archive_digest(archive)
+ cipher = self._cipher
+ if cipher:
+ archive = io.BytesIO(cipher.encrypt(pad(archive.read(), AES.block_size)))
+
+ self.resources.Bucket(self.bucket).upload_fileobj(archive, 'archives/{}'.format(digest))
+ return digest
+
+ def retrieve(self, digest, size=None):
+ with self:
+ try:
+ archive = io.BytesIO()
+ self.resources.Bucket(self.bucket).download_fileobj('archives/{}'.format(digest), archive)
+ archive.seek(0)
+
+ cipher = self._cipher
+ if cipher:
+ archive = io.BytesIO(unpad(cipher.decrypt(archive.read()), AES.block_size))
+
+ if (size and self.archive_size(archive) != size) or digest != self.archive_digest(archive):
+ raise RuntimeError('Retrieved archive does not match provided digest')
+ return archive
+ except botocore.exceptions.ClientError as e:
+ if e.response['Error']['Code'] == "404":
+ return None
+ raise
Modified: trunk/Tools/Scripts/libraries/resultsdbpy/setup.py (267578 => 267579)
--- trunk/Tools/Scripts/libraries/resultsdbpy/setup.py 2020-09-25 17:47:30 UTC (rev 267578)
+++ trunk/Tools/Scripts/libraries/resultsdbpy/setup.py 2020-09-25 17:55:03 UTC (rev 267579)
@@ -59,6 +59,7 @@
'resultsdbpy.view',
],
install_requires=[
+ 'boto3',
'cassandra-driver',
'fakeredis',
'Flask',
@@ -65,6 +66,7 @@
'Flask-Cors',
'gunicorn',
'lupa',
+ 'pycryptodome',
'redis',
'xmltodict',
'selenium',