Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package python-s3fs for openSUSE:Factory checked in at 2022-02-24 18:20:46 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/python-s3fs (Old) and /work/SRC/openSUSE:Factory/.python-s3fs.new.1958 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-s3fs" Thu Feb 24 18:20:46 2022 rev:12 rq:957044 version:2022.2.0 Changes: -------- --- /work/SRC/openSUSE:Factory/python-s3fs/python-s3fs.changes 2022-02-01 14:03:12.383970176 +0100 +++ /work/SRC/openSUSE:Factory/.python-s3fs.new.1958/python-s3fs.changes 2022-02-24 18:24:05.870648407 +0100 @@ -1,0 +2,8 @@ +Tue Feb 22 23:12:22 UTC 2022 - Matej Cepl <mc...@suse.com> + +- Update to 2022.02.0: + - callbacks fixes + - drop py36 + - metadata fixes + +------------------------------------------------------------------- Old: ---- s3fs-2022.1.0.tar.gz New: ---- s3fs-2022.2.0.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-s3fs.spec ++++++ --- /var/tmp/diff_new_pack.ZNteBx/_old 2022-02-24 18:24:06.314648292 +0100 +++ /var/tmp/diff_new_pack.ZNteBx/_new 2022-02-24 18:24:06.322648290 +0100 @@ -19,7 +19,7 @@ %{?!python_module:%define python_module() python3-%{**}} %define skip_python2 1 Name: python-s3fs -Version: 2022.1.0 +Version: 2022.2.0 Release: 0 Summary: Python filesystem interface for S3 License: BSD-3-Clause ++++++ s3fs-2022.1.0.tar.gz -> s3fs-2022.2.0.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/s3fs-2022.1.0/.gitignore new/s3fs-2022.2.0/.gitignore --- old/s3fs-2022.1.0/.gitignore 2020-05-01 18:06:15.000000000 +0200 +++ new/s3fs-2022.2.0/.gitignore 2022-02-08 22:42:09.000000000 +0100 @@ -7,3 +7,4 @@ dist/ *.egg-info build/ +venv/ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/s3fs-2022.1.0/PKG-INFO new/s3fs-2022.2.0/PKG-INFO --- old/s3fs-2022.1.0/PKG-INFO 2022-01-11 20:52:39.153018700 +0100 +++ new/s3fs-2022.2.0/PKG-INFO 2022-02-22 18:52:20.695571000 +0100 @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: s3fs -Version: 2022.1.0 +Version: 2022.2.0 Summary: Convenient Filesystem interface over S3 Home-page: http://github.com/fsspec/s3fs/ Maintainer: Martin Durant @@ -31,10 +31,9 @@ Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: BSD License Classifier: Operating System :: OS Independent -Classifier: Programming Language :: Python :: 3.6 Classifier: Programming Language :: Python :: 3.7 Classifier: Programming Language :: Python :: 3.8 Classifier: Programming Language :: Python :: 3.9 -Requires-Python: >= 3.6 +Requires-Python: >= 3.7 Provides-Extra: awscli Provides-Extra: boto3 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/s3fs-2022.1.0/docs/source/changelog.rst new/s3fs-2022.2.0/docs/source/changelog.rst --- old/s3fs-2022.1.0/docs/source/changelog.rst 2022-01-11 20:51:51.000000000 +0100 +++ new/s3fs-2022.2.0/docs/source/changelog.rst 2022-02-22 18:50:58.000000000 +0100 @@ -1,6 +1,13 @@ Changelog ========= +2022.02.0 +--------- + +- callbacks fixes (#594, 590) +- drop py36 (#582) +- metadata fixes (#575, 579) + 2022.01.0 --------- diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/s3fs-2022.1.0/docs/source/development.rst new/s3fs-2022.2.0/docs/source/development.rst --- old/s3fs-2022.1.0/docs/source/development.rst 1970-01-01 01:00:00.000000000 +0100 +++ new/s3fs-2022.2.0/docs/source/development.rst 2022-02-08 22:42:09.000000000 +0100 @@ -0,0 +1,10 @@ +Development +=========== + +Create a development environment:: + + $ pip install -r requirements.txt -r test_requirements.txt + +Run tests:: + + $ pytest \ No newline at end of file diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/s3fs-2022.1.0/requirements.txt new/s3fs-2022.2.0/requirements.txt --- old/s3fs-2022.1.0/requirements.txt 2022-01-11 20:51:51.000000000 +0100 +++ new/s3fs-2022.2.0/requirements.txt 2022-02-22 18:51:22.000000000 +0100 @@ -1,3 +1,3 @@ aiobotocore~=2.1.0 -fsspec==2022.01.0 +fsspec==2022.02.0 aiohttp<=4 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/s3fs-2022.1.0/s3fs/_version.py new/s3fs-2022.2.0/s3fs/_version.py --- old/s3fs-2022.1.0/s3fs/_version.py 2022-01-11 20:52:39.154271000 +0100 +++ new/s3fs-2022.2.0/s3fs/_version.py 2022-02-22 18:52:20.696371300 +0100 @@ -8,11 +8,11 @@ version_json = ''' { - "date": "2022-01-11T14:51:45-0500", + "date": "2022-02-22T12:51:44-0500", "dirty": false, "error": null, - "full-revisionid": "dfa1cd28467541cfafb25056a0402a49bacd219a", - "version": "2022.01.0" + "full-revisionid": "50bafe4d8766c3b2a4e1fc09669cf02fb2d71454", + "version": "2022.02.0" } ''' # END VERSION_JSON diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/s3fs-2022.1.0/s3fs/core.py new/s3fs-2022.2.0/s3fs/core.py --- old/s3fs-2022.1.0/s3fs/core.py 2022-01-05 19:07:23.000000000 +0100 +++ new/s3fs-2022.2.0/s3fs/core.py 2022-02-08 22:42:09.000000000 +0100 @@ -2,6 +2,7 @@ import asyncio import errno import logging +import mimetypes import os import socket from typing import Tuple, Optional @@ -12,6 +13,7 @@ from fsspec.spec import AbstractBufferedFile from fsspec.utils import infer_storage_options, tokenize, setup_logging as setup_logger from fsspec.asyn import AsyncFileSystem, sync, sync_wrapper, FSTimeoutError +from fsspec.callbacks import _DEFAULT_CALLBACK import aiobotocore import botocore @@ -43,7 +45,7 @@ setup_logging() -MANAGED_COPY_THRESHOLD = 5 * 2 ** 30 +MANAGED_COPY_THRESHOLD = 5 * 2**30 S3_RETRYABLE_ERRORS = (socket.timeout, IncompleteRead) if ClientPayloadError is not None: @@ -51,6 +53,25 @@ _VALID_FILE_MODES = {"r", "w", "a", "rb", "wb", "ab"} +_PRESERVE_KWARGS = [ + "CacheControl", + "ContentDisposition", + "ContentEncoding", + "ContentLanguage", + "ContentLength", + "ContentType", + "Expires", + "WebsiteRedirectLocation", + "ServerSideEncryption", + "SSECustomerAlgorithm", + "SSEKMSKeyId", + "BucketKeyEnabled", + "StorageClass", + "ObjectLockMode", + "ObjectLockRetainUntilDate", + "ObjectLockLegalHoldStatus", + "Metadata", +] key_acls = { "private", @@ -170,7 +191,7 @@ connect_timeout = 5 retries = 5 read_timeout = 15 - default_block_size = 5 * 2 ** 20 + default_block_size = 5 * 2**20 protocol = ["s3", "s3a"] _extra_tokenize_attributes = ("default_block_size",) @@ -263,7 +284,7 @@ except S3_RETRYABLE_ERRORS as e: logger.debug("Retryable error: %s", e) err = e - await asyncio.sleep(min(1.7 ** i * 0.1, 15)) + await asyncio.sleep(min(1.7**i * 0.1, 15)) except Exception as e: logger.debug("Nonretryable error: %s", e) err = e @@ -896,11 +917,11 @@ resp["Body"].close() return data - async def _pipe_file(self, path, data, chunksize=50 * 2 ** 20, **kwargs): + async def _pipe_file(self, path, data, chunksize=50 * 2**20, **kwargs): bucket, key, _ = self.split_path(path) size = len(data) # 5 GB is the limit for an S3 PUT - if size < min(5 * 2 ** 30, 2 * chunksize): + if size < min(5 * 2**30, 2 * chunksize): return await self._call_s3( "put_object", Bucket=bucket, Key=key, Body=data, **kwargs ) @@ -934,7 +955,9 @@ ) self.invalidate_cache(path) - async def _put_file(self, lpath, rpath, chunksize=50 * 2 ** 20, **kwargs): + async def _put_file( + self, lpath, rpath, callback=_DEFAULT_CALLBACK, chunksize=50 * 2**20, **kwargs + ): bucket, key, _ = self.split_path(rpath) if os.path.isdir(lpath): if key: @@ -943,11 +966,19 @@ else: await self._mkdir(lpath) size = os.path.getsize(lpath) + callback.set_size(size) + + if "ContentType" not in kwargs: + content_type, _ = mimetypes.guess_type(lpath) + if content_type is not None: + kwargs["ContentType"] = content_type + with open(lpath, "rb") as f0: - if size < min(5 * 2 ** 30, 2 * chunksize): + if size < min(5 * 2**30, 2 * chunksize): await self._call_s3( "put_object", Bucket=bucket, Key=key, Body=f0, **kwargs ) + callback.relative_update(size) else: mpu = await self._call_s3( @@ -969,6 +1000,7 @@ Key=key, ) ) + callback.relative_update(len(chunk)) parts = [ {"PartNumber": i + 1, "ETag": o["ETag"]} for i, o in enumerate(out) @@ -984,7 +1016,9 @@ self.invalidate_cache(rpath) rpath = self._parent(rpath) - async def _get_file(self, rpath, lpath, version_id=None): + async def _get_file( + self, rpath, lpath, callback=_DEFAULT_CALLBACK, version_id=None + ): bucket, key, vers = self.split_path(rpath) if os.path.isdir(lpath): return @@ -996,13 +1030,15 @@ **self.req_kw, ) body = resp["Body"] + callback.set_size(resp.get("ContentLength", None)) try: with open(lpath, "wb") as f0: while True: - chunk = await body.read(2 ** 16) + chunk = await body.read(2**16) if not chunk: break - f0.write(chunk) + segment_len = f0.write(chunk) + callback.relative_update(segment_len) finally: body.close() @@ -1036,13 +1072,11 @@ ) return { "ETag": out["ETag"], - "Key": "/".join([bucket, key]), "LastModified": out["LastModified"], - "Size": out["ContentLength"], "size": out["ContentLength"], "name": "/".join([bucket, key]), "type": "file", - "StorageClass": "STANDARD", + "StorageClass": out.get("StorageClass", "STANDARD"), "VersionId": out.get("VersionId"), "ContentType": out.get("ContentType"), } @@ -1069,10 +1103,8 @@ or out.get("CommonPrefixes", []) ): return { - "Key": "/".join([bucket, key]), "name": "/".join([bucket, key]), "type": "directory", - "Size": 0, "size": 0, "StorageClass": "DIRECTORY", } @@ -1501,14 +1533,14 @@ ) self.invalidate_cache(path2) - async def _copy_managed(self, path1, path2, size, block=5 * 2 ** 30, **kwargs): + async def _copy_managed(self, path1, path2, size, block=5 * 2**30, **kwargs): """Copy file between locations on S3 as multi-part block: int The size of the pieces, must be larger than 5MB and at most 5GB. Smaller blocks mean more calls, only useful for testing. """ - if block < 5 * 2 ** 20 or block > 5 * 2 ** 30: + if block < 5 * 2**20 or block > 5 * 2**30: raise ValueError("Copy block size must be 5MB<=block<=5GB") bucket, key, version = self.split_path(path2) mpu = await self._call_s3( @@ -1752,15 +1784,15 @@ """ retries = 5 - part_min = 5 * 2 ** 20 - part_max = 5 * 2 ** 30 + part_min = 5 * 2**20 + part_max = 5 * 2**30 def __init__( self, s3, path, mode="rb", - block_size=5 * 2 ** 20, + block_size=5 * 2**20, acl="", version_id=None, fill_cache=True, @@ -1785,7 +1817,7 @@ self.s3_additional_kwargs = s3_additional_kwargs or {} self.req_kw = {"RequestPayer": "requester"} if requester_pays else {} if "r" not in mode: - if block_size < 5 * 2 ** 20: + if block_size < 5 * 2**20: raise ValueError("Block size must be >=5MB") else: if version_id and s3.version_aware: @@ -1814,14 +1846,38 @@ self.append_block = False if "a" in mode and s3.exists(path): - loc = s3.info(path)["size"] - if loc < 5 * 2 ** 20: + # See: + # put: https://boto3.amazonaws.com/v1/documentation/api/latest + # /reference/services/s3.html#S3.Client.put_object + # + # head: https://boto3.amazonaws.com/v1/documentation/api/latest + # /reference/services/s3.html#S3.Client.head_object + head = self._call_s3( + "head_object", + self.kwargs, + Bucket=bucket, + Key=key, + **version_id_kw(version_id), + **self.req_kw, + ) + + head = { + key: value + for key, value in head.items() + if key in _PRESERVE_KWARGS and key not in self.s3_additional_kwargs + } + + loc = head.pop("ContentLength") + if loc < 5 * 2**20: # existing file too small for multi-upload: download self.write(self.fs.cat(self.path)) else: self.append_block = True self.loc = loc + # Reflect head + self.s3_additional_kwargs.update(head) + if "r" in mode and "ETag" in self.details: self.req_kw["IfMatch"] = self.details["ETag"] diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/s3fs-2022.1.0/s3fs/tests/test_s3fs.py new/s3fs-2022.2.0/s3fs/tests/test_s3fs.py --- old/s3fs-2022.1.0/s3fs/tests/test_s3fs.py 2022-01-05 19:07:23.000000000 +0100 +++ new/s3fs-2022.2.0/s3fs/tests/test_s3fs.py 2022-02-22 18:51:29.000000000 +0100 @@ -15,11 +15,14 @@ import moto from itertools import chain import fsspec.core +from dateutil.tz import tzutc + import s3fs.core from s3fs.core import S3FileSystem from s3fs.utils import ignoring, SSEParams from botocore.exceptions import NoCredentialsError from fsspec.asyn import sync +from fsspec.callbacks import Callback from packaging import version test_bucket_name = "test" @@ -161,7 +164,7 @@ def test_simple(s3): - data = b"a" * (10 * 2 ** 20) + data = b"a" * (10 * 2**20) with s3.open(a, "wb") as f: f.write(data) @@ -174,7 +177,7 @@ @pytest.mark.parametrize("default_cache_type", ["none", "bytes", "mmap"]) def test_default_cache_type(s3, default_cache_type): - data = b"a" * (10 * 2 ** 20) + data = b"a" * (10 * 2**20) s3 = S3FileSystem( anon=False, default_cache_type=default_cache_type, @@ -257,6 +260,8 @@ assert abs(info.pop("LastModified") - linfo.pop("LastModified")).seconds < 1 info.pop("VersionId") info.pop("ContentType") + linfo.pop("Key") + linfo.pop("Size") assert info == linfo parent = a.rsplit("/", 1)[0] s3.invalidate_cache() # remove full path from the cache @@ -695,6 +700,22 @@ s3.info(fn + "another") +def test_content_type_is_set(s3, tmpdir): + test_file = str(tmpdir) + "/test.json" + destination = test_bucket_name + "/test.json" + open(test_file, "w").write("text") + s3.put(test_file, destination) + assert s3.info(destination)["ContentType"] == "application/json" + + +def test_content_type_is_not_overrided(s3, tmpdir): + test_file = os.path.join(str(tmpdir), "test.json") + destination = os.path.join(test_bucket_name, "test.json") + open(test_file, "w").write("text") + s3.put(test_file, destination, ContentType="text/css") + assert s3.info(destination)["ContentType"] == "text/css" + + def test_bucket_exists(s3): assert s3.exists(test_bucket_name) assert not s3.exists(test_bucket_name + "x") @@ -842,16 +863,16 @@ def test_copy_managed(s3): - data = b"abc" * 12 * 2 ** 20 + data = b"abc" * 12 * 2**20 fn = test_bucket_name + "/test/biggerfile" with s3.open(fn, "wb") as f: f.write(data) - sync(s3.loop, s3._copy_managed, fn, fn + "2", size=len(data), block=5 * 2 ** 20) + sync(s3.loop, s3._copy_managed, fn, fn + "2", size=len(data), block=5 * 2**20) assert s3.cat(fn) == s3.cat(fn + "2") with pytest.raises(ValueError): - sync(s3.loop, s3._copy_managed, fn, fn + "3", size=len(data), block=4 * 2 ** 20) + sync(s3.loop, s3._copy_managed, fn, fn + "3", size=len(data), block=4 * 2**20) with pytest.raises(ValueError): - sync(s3.loop, s3._copy_managed, fn, fn + "3", size=len(data), block=6 * 2 ** 30) + sync(s3.loop, s3._copy_managed, fn, fn + "3", size=len(data), block=6 * 2**30) @pytest.mark.parametrize("recursive", [True, False]) @@ -878,7 +899,7 @@ def test_get_put_big(s3, tmpdir): test_file = str(tmpdir.join("test")) - data = b"1234567890A" * 2 ** 20 + data = b"1234567890A" * 2**20 open(test_file, "wb").write(data) s3.put(test_file, test_bucket_name + "/bigfile") @@ -887,7 +908,46 @@ assert open(test_file, "rb").read() == data -@pytest.mark.parametrize("size", [2 ** 10, 2 ** 20, 10 * 2 ** 20]) +def test_get_put_with_callback(s3, tmpdir): + test_file = str(tmpdir.join("test.json")) + + class BranchingCallback(Callback): + def branch(self, path_1, path_2, kwargs): + kwargs["callback"] = BranchingCallback() + + cb = BranchingCallback() + s3.get(test_bucket_name + "/test/accounts.1.json", test_file, callback=cb) + assert cb.size == 1 + assert cb.value == 1 + + cb = BranchingCallback() + s3.put(test_file, test_bucket_name + "/temp", callback=cb) + assert cb.size == 1 + assert cb.value == 1 + + +def test_get_file_with_callback(s3, tmpdir): + test_file = str(tmpdir.join("test.json")) + + cb = Callback() + s3.get_file(test_bucket_name + "/test/accounts.1.json", test_file, callback=cb) + assert cb.size == os.stat(test_file).st_size + assert cb.value == cb.size + + +@pytest.mark.parametrize("size", [2**10, 10 * 2**20]) +def test_put_file_with_callback(s3, tmpdir, size): + test_file = str(tmpdir.join("test.json")) + with open(test_file, "wb") as f: + f.write(b"1234567890A" * size) + + cb = Callback() + s3.put_file(test_file, test_bucket_name + "/temp", callback=cb) + assert cb.size == os.stat(test_file).st_size + assert cb.value == cb.size + + +@pytest.mark.parametrize("size", [2**10, 2**20, 10 * 2**20]) def test_pipe_cat_big(s3, size): data = b"1234567890A" * size s3.pipe(test_bucket_name + "/bigfile", data) @@ -1031,7 +1091,7 @@ f.write(b"hello") assert s3.cat(test_bucket_name + "/test") == b"hello" s3.open(test_bucket_name + "/test", "wb").close() - assert s3.info(test_bucket_name + "/test")["Size"] == 0 + assert s3.info(test_bucket_name + "/test")["size"] == 0 def test_write_small_with_acl(s3): @@ -1058,7 +1118,7 @@ def test_write_large(s3): "flush() chunks buffer when processing large singular payload" - mb = 2 ** 20 + mb = 2**20 payload_size = int(2.5 * 5 * mb) payload = b"0" * payload_size @@ -1066,12 +1126,12 @@ fd.write(payload) assert s3.cat(test_bucket_name + "/test") == payload - assert s3.info(test_bucket_name + "/test")["Size"] == payload_size + assert s3.info(test_bucket_name + "/test")["size"] == payload_size def test_write_limit(s3): "flush() respects part_max when processing large singular payload" - mb = 2 ** 20 + mb = 2**20 block_size = 15 * mb payload_size = 44 * mb payload = b"0" * payload_size @@ -1081,7 +1141,7 @@ assert s3.cat(test_bucket_name + "/test") == payload - assert s3.info(test_bucket_name + "/test")["Size"] == payload_size + assert s3.info(test_bucket_name + "/test")["size"] == payload_size def test_write_small_secure(s3): @@ -1105,9 +1165,9 @@ s3.mkdir("mybucket") with s3.open("mybucket/myfile", "wb") as f: - f.write(b"hello hello" * 10 ** 6) + f.write(b"hello hello" * 10**6) - assert s3.cat("mybucket/myfile") == b"hello hello" * 10 ** 6 + assert s3.cat("mybucket/myfile") == b"hello hello" * 10**6 def test_write_fails(s3): @@ -1126,21 +1186,21 @@ def test_write_blocks(s3): with s3.open(test_bucket_name + "/temp", "wb") as f: - f.write(b"a" * 2 * 2 ** 20) - assert f.buffer.tell() == 2 * 2 ** 20 + f.write(b"a" * 2 * 2**20) + assert f.buffer.tell() == 2 * 2**20 assert not (f.parts) f.flush() - assert f.buffer.tell() == 2 * 2 ** 20 + assert f.buffer.tell() == 2 * 2**20 assert not (f.parts) - f.write(b"a" * 2 * 2 ** 20) - f.write(b"a" * 2 * 2 ** 20) + f.write(b"a" * 2 * 2**20) + f.write(b"a" * 2 * 2**20) assert f.mpu assert f.parts - assert s3.info(test_bucket_name + "/temp")["Size"] == 6 * 2 ** 20 - with s3.open(test_bucket_name + "/temp", "wb", block_size=10 * 2 ** 20) as f: - f.write(b"a" * 15 * 2 ** 20) + assert s3.info(test_bucket_name + "/temp")["size"] == 6 * 2**20 + with s3.open(test_bucket_name + "/temp", "wb", block_size=10 * 2**20) as f: + f.write(b"a" * 15 * 2**20) assert f.buffer.tell() == 0 - assert s3.info(test_bucket_name + "/temp")["Size"] == 15 * 2 ** 20 + assert s3.info(test_bucket_name + "/temp")["size"] == 15 * 2**20 def test_readline(s3): @@ -1164,7 +1224,7 @@ def test_readline_blocksize(s3): - data = b"ab\n" + b"a" * (10 * 2 ** 20) + b"\nab" + data = b"ab\n" + b"a" * (10 * 2**20) + b"\nab" with s3.open(a, "wb") as f: f.write(data) with s3.open(a, "rb") as f: @@ -1173,7 +1233,7 @@ assert result == expected result = f.readline() - expected = b"a" * (10 * 2 ** 20) + b"\n" + expected = b"a" * (10 * 2**20) + b"\n" assert result == expected result = f.readline() @@ -1235,12 +1295,12 @@ def test_merge(s3): with s3.open(a, "wb") as f: - f.write(b"a" * 10 * 2 ** 20) + f.write(b"a" * 10 * 2**20) with s3.open(b, "wb") as f: - f.write(b"a" * 10 * 2 ** 20) + f.write(b"a" * 10 * 2**20) s3.merge(test_bucket_name + "/joined", [a, b]) - assert s3.info(test_bucket_name + "/joined")["Size"] == 2 * 10 * 2 ** 20 + assert s3.info(test_bucket_name + "/joined")["size"] == 2 * 10 * 2**20 def test_append(s3): @@ -1253,24 +1313,54 @@ assert s3.cat(test_bucket_name + "/nested/file1") == data + b"extra" with s3.open(a, "wb") as f: - f.write(b"a" * 10 * 2 ** 20) + f.write(b"a" * 10 * 2**20) with s3.open(a, "ab") as f: pass # append, no write, big file - assert s3.cat(a) == b"a" * 10 * 2 ** 20 + assert s3.cat(a) == b"a" * 10 * 2**20 with s3.open(a, "ab") as f: assert f.parts is None f._initiate_upload() assert f.parts - assert f.tell() == 10 * 2 ** 20 + assert f.tell() == 10 * 2**20 f.write(b"extra") # append, small write, big file - assert s3.cat(a) == b"a" * 10 * 2 ** 20 + b"extra" + assert s3.cat(a) == b"a" * 10 * 2**20 + b"extra" + + with s3.open(a, "ab") as f: + assert f.tell() == 10 * 2**20 + 5 + f.write(b"b" * 10 * 2**20) # append, big write, big file + assert f.tell() == 20 * 2**20 + 5 + assert s3.cat(a) == b"a" * 10 * 2**20 + b"extra" + b"b" * 10 * 2**20 + + # Keep Head Metadata + head = dict( + CacheControl="public", + ContentDisposition="string", + ContentEncoding="gzip", + ContentLanguage="ru-RU", + ContentType="text/csv", + Expires=datetime.datetime(2015, 1, 1, 0, 0, tzinfo=tzutc()), + Metadata={"string": "string"}, + ServerSideEncryption="AES256", + StorageClass="REDUCED_REDUNDANCY", + WebsiteRedirectLocation="https://www.example.com/", + BucketKeyEnabled=False, + ) + with s3.open(a, "wb", **head) as f: + f.write(b"data") with s3.open(a, "ab") as f: - assert f.tell() == 10 * 2 ** 20 + 5 - f.write(b"b" * 10 * 2 ** 20) # append, big write, big file - assert f.tell() == 20 * 2 ** 20 + 5 - assert s3.cat(a) == b"a" * 10 * 2 ** 20 + b"extra" + b"b" * 10 * 2 ** 20 + f.write(b"other") + + with s3.open(a) as f: + filehead = { + k: v + for k, v in f._call_s3( + "head_object", f.kwargs, Bucket=f.bucket, Key=f.key + ).items() + if k in head + } + assert filehead == head def test_bigger_than_block_read(s3): @@ -1308,7 +1398,6 @@ return id(s3.s3) -@pytest.mark.skipif(sys.version_info[:2] < (3, 7), reason="ctx method only >py37") @pytest.mark.parametrize( "method", [ @@ -1363,14 +1452,14 @@ path = "s3://test/prefix/key" with s3.open(path, "wb") as f: - f.write(b"a" * (10 * 2 ** 20)) + f.write(b"a" * (10 * 2**20)) with s3.open(path, "ab") as f: - f.write(b"b" * (10 * 2 ** 20)) + f.write(b"b" * (10 * 2**20)) def test_multipart_upload_blocksize(s3): - blocksize = 5 * (2 ** 20) + blocksize = 5 * (2**20) expected_parts = 3 s3f = s3.open(a, "wb", block_size=blocksize) @@ -1584,26 +1673,26 @@ S3FileSystem.cachable = False # don't reuse instances with same pars fs_default = S3FileSystem(client_kwargs={"endpoint_url": endpoint_uri}) - assert fs_default.default_block_size == 5 * (1024 ** 2) + assert fs_default.default_block_size == 5 * (1024**2) fs_overridden = S3FileSystem( - default_block_size=64 * (1024 ** 2), + default_block_size=64 * (1024**2), client_kwargs={"endpoint_url": endpoint_uri}, ) - assert fs_overridden.default_block_size == 64 * (1024 ** 2) + assert fs_overridden.default_block_size == 64 * (1024**2) # Suppose I want all subsequent file systems to have a block size of 1 GiB # instead of 5 MiB: - S3FileSystem.default_block_size = 1024 ** 3 + S3FileSystem.default_block_size = 1024**3 fs_big = S3FileSystem(client_kwargs={"endpoint_url": endpoint_uri}) - assert fs_big.default_block_size == 1024 ** 3 + assert fs_big.default_block_size == 1024**3 # Test the other file systems created to see if their block sizes changed - assert fs_overridden.default_block_size == 64 * (1024 ** 2) - assert fs_default.default_block_size == 5 * (1024 ** 2) + assert fs_overridden.default_block_size == 64 * (1024**2) + assert fs_default.default_block_size == 5 * (1024**2) finally: - S3FileSystem.default_block_size = 5 * (1024 ** 2) + S3FileSystem.default_block_size = 5 * (1024**2) S3FileSystem.cachable = True @@ -1841,7 +1930,6 @@ s3.modified(path=test_bucket_name) -@pytest.mark.skipif(sys.version_info < (3, 7), reason="no asyncio.run in py36") def test_async_s3(s3): async def _(): s3 = S3FileSystem( @@ -1888,7 +1976,6 @@ assert s3.cat_file(fn, start=-5) == data[-5:] -@pytest.mark.skipif(sys.version_info < (3, 7), reason="no asyncio.run in py36") def test_async_s3_old(s3): async def _(): s3 = S3FileSystem( @@ -1945,7 +2032,6 @@ nana.to_dataset().to_zarr(store=s3store, mode="w", consolidated=True, compute=True) -@pytest.mark.skipif(sys.version_info < (3, 7), reason="no asyncio.run in py36") def test_async_close(): async def _(): loop = asyncio.get_event_loop() @@ -2096,12 +2182,12 @@ def test_s3fs_etag_preserving_multipart_copy(monkeypatch, s3): # Set this to a lower value so that we can actually # test this without creating giant objects in memory - monkeypatch.setattr(s3fs.core, "MANAGED_COPY_THRESHOLD", 5 * 2 ** 20) + monkeypatch.setattr(s3fs.core, "MANAGED_COPY_THRESHOLD", 5 * 2**20) test_file1 = test_bucket_name + "/test/multipart-upload.txt" test_file2 = test_bucket_name + "/test/multipart-upload-copy.txt" - with s3.open(test_file1, "wb", block_size=5 * 2 ** 21) as stream: + with s3.open(test_file1, "wb", block_size=5 * 2**21) as stream: for _ in range(5): stream.write(b"b" * (stream.blocksize + random.randrange(200))) @@ -2125,7 +2211,6 @@ s3.rm(test_file1) -@pytest.mark.skipif(sys.version_info < (3, 7), reason="no asyncio.run in py36") def test_sync_from_wihin_async(s3): # if treating as sync but within an even loop, e.g., calling from jupyter; # IO happens on dedicated thread. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/s3fs-2022.1.0/s3fs/utils.py new/s3fs-2022.2.0/s3fs/utils.py --- old/s3fs-2022.1.0/s3fs/utils.py 2021-06-23 14:49:30.000000000 +0200 +++ new/s3fs-2022.2.0/s3fs/utils.py 2022-02-08 22:42:09.000000000 +0100 @@ -1,6 +1,6 @@ import errno import logging -from contextlib import contextmanager +from contextlib import contextmanager, AsyncExitStack from botocore.exceptions import ClientError @@ -15,32 +15,6 @@ pass -try: - from contextlib import AsyncExitStack -except ImportError: - # Since AsyncExitStack is not available for 3.6<= - # we'll create a simple implementation that imitates - # the basic functionality. - class AsyncExitStack: - def __init__(self): - self.contexts = [] - - async def enter_async_context(self, context): - self.contexts.append(context) - return await context.__aenter__() - - async def aclose(self, *args): - args = args or (None, None, None) - for context in self.contexts: - await context.__aexit__(*args) - - async def __aenter__(self): - return self - - async def __aexit__(self, *args): - await self.aclose(*args) - - class S3BucketRegionCache: # See https://github.com/aio-libs/aiobotocore/issues/866 # for details. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/s3fs-2022.1.0/s3fs.egg-info/PKG-INFO new/s3fs-2022.2.0/s3fs.egg-info/PKG-INFO --- old/s3fs-2022.1.0/s3fs.egg-info/PKG-INFO 2022-01-11 20:52:38.000000000 +0100 +++ new/s3fs-2022.2.0/s3fs.egg-info/PKG-INFO 2022-02-22 18:52:20.000000000 +0100 @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: s3fs -Version: 2022.1.0 +Version: 2022.2.0 Summary: Convenient Filesystem interface over S3 Home-page: http://github.com/fsspec/s3fs/ Maintainer: Martin Durant @@ -31,10 +31,9 @@ Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: BSD License Classifier: Operating System :: OS Independent -Classifier: Programming Language :: Python :: 3.6 Classifier: Programming Language :: Python :: 3.7 Classifier: Programming Language :: Python :: 3.8 Classifier: Programming Language :: Python :: 3.9 -Requires-Python: >= 3.6 +Requires-Python: >= 3.7 Provides-Extra: awscli Provides-Extra: boto3 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/s3fs-2022.1.0/s3fs.egg-info/SOURCES.txt new/s3fs-2022.2.0/s3fs.egg-info/SOURCES.txt --- old/s3fs-2022.1.0/s3fs.egg-info/SOURCES.txt 2022-01-11 20:52:38.000000000 +0100 +++ new/s3fs-2022.2.0/s3fs.egg-info/SOURCES.txt 2022-02-22 18:52:20.000000000 +0100 @@ -18,6 +18,7 @@ docs/source/api.rst docs/source/changelog.rst docs/source/conf.py +docs/source/development.rst docs/source/index.rst docs/source/install.rst s3fs/__init__.py diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/s3fs-2022.1.0/s3fs.egg-info/requires.txt new/s3fs-2022.2.0/s3fs.egg-info/requires.txt --- old/s3fs-2022.1.0/s3fs.egg-info/requires.txt 2022-01-11 20:52:38.000000000 +0100 +++ new/s3fs-2022.2.0/s3fs.egg-info/requires.txt 2022-02-22 18:52:20.000000000 +0100 @@ -1,5 +1,5 @@ aiobotocore~=2.1.0 -fsspec==2022.01.0 +fsspec==2022.02.0 aiohttp<=4 [awscli] diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/s3fs-2022.1.0/setup.py new/s3fs-2022.2.0/setup.py --- old/s3fs-2022.1.0/setup.py 2021-11-05 20:51:13.000000000 +0100 +++ new/s3fs-2022.2.0/setup.py 2022-02-08 22:42:09.000000000 +0100 @@ -19,7 +19,6 @@ 'Intended Audience :: Developers', 'License :: OSI Approved :: BSD License', 'Operating System :: OS Independent', - 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9', @@ -31,7 +30,7 @@ license='BSD', keywords='s3, boto', packages=['s3fs'], - python_requires='>= 3.6', + python_requires='>= 3.7', install_requires=[open('requirements.txt').read().strip().split('\n')], extras_require={ 'awscli': [f"aiobotocore[awscli]{aiobotocore_version_suffix}"], diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/s3fs-2022.1.0/test_requirements.txt new/s3fs-2022.2.0/test_requirements.txt --- old/s3fs-2022.1.0/test_requirements.txt 2021-04-14 15:35:30.000000000 +0200 +++ new/s3fs-2022.2.0/test_requirements.txt 2022-02-22 18:51:29.000000000 +0100 @@ -1,5 +1,5 @@ mock; python_version < '3.3' -moto>=2.0.0 +moto>=2,<3 flask pytest>=4.2.0 pytest-env