Hello community,

here is the log from the commit of package python-s3fs for openSUSE:Factory 
checked in at 2019-10-30 14:47:51
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/python-s3fs (Old)
 and      /work/SRC/openSUSE:Factory/.python-s3fs.new.2990 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Package is "python-s3fs"

Wed Oct 30 14:47:51 2019 rev:4 rq:743964 version:0.3.5

Changes:
--------
--- /work/SRC/openSUSE:Factory/python-s3fs/python-s3fs.changes  2019-08-19 
23:02:08.311454091 +0200
+++ /work/SRC/openSUSE:Factory/.python-s3fs.new.2990/python-s3fs.changes        
2019-10-30 14:47:58.202191945 +0100
@@ -1,0 +2,7 @@
+Tue Sep 24 11:00:09 UTC 2019 - Tomáš Chvátal <tchva...@suse.com>
+
+- Update to 0.3.5:
+  * Test expansion
+  * Minor bugfixes
+
+-------------------------------------------------------------------

Old:
----
  s3fs-0.3.3.tar.gz

New:
----
  s3fs-0.3.5.tar.gz

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ python-s3fs.spec ++++++
--- /var/tmp/diff_new_pack.JvEAYm/_old  2019-10-30 14:48:00.014193871 +0100
+++ /var/tmp/diff_new_pack.JvEAYm/_new  2019-10-30 14:48:00.022193880 +0100
@@ -19,17 +19,16 @@
 %{?!python_module:%define python_module() python-%{**} python3-%{**}}
 %define skip_python2 1
 Name:           python-s3fs
-Version:        0.3.3
+Version:        0.3.5
 Release:        0
 Summary:        Python filesystem interface over S3
 License:        BSD-3-Clause
-Group:          Development/Languages/Python
 URL:            https://github.com/dask/s3fs/
 Source:         
https://files.pythonhosted.org/packages/source/s/s3fs/s3fs-%{version}.tar.gz
 BuildRequires:  %{python_module boto3 >= 1.9.91}
 BuildRequires:  %{python_module botocore >= 1.12.91}
 BuildRequires:  %{python_module fsspec >= 0.2.2}
-BuildRequires:  %{python_module moto >= 1.3.7}
+BuildRequires:  %{python_module moto >= 1.3.12}
 BuildRequires:  %{python_module pytest >= 4.2.0}
 BuildRequires:  %{python_module setuptools}
 BuildRequires:  fdupes

++++++ s3fs-0.3.3.tar.gz -> s3fs-0.3.5.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/s3fs-0.3.3/PKG-INFO new/s3fs-0.3.5/PKG-INFO
--- old/s3fs-0.3.3/PKG-INFO     2019-08-08 15:06:16.000000000 +0200
+++ new/s3fs-0.3.5/PKG-INFO     2019-10-06 18:26:35.000000000 +0200
@@ -1,6 +1,6 @@
 Metadata-Version: 1.2
 Name: s3fs
-Version: 0.3.3
+Version: 0.3.5
 Summary: Convenient Filesystem interface over S3
 Home-page: http://github.com/dask/s3fs/
 Maintainer: Martin Durant
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/s3fs-0.3.3/docs/source/index.rst 
new/s3fs-0.3.5/docs/source/index.rst
--- old/s3fs-0.3.3/docs/source/index.rst        2019-08-04 22:54:14.000000000 
+0200
+++ new/s3fs-0.3.5/docs/source/index.rst        2019-09-09 15:14:23.000000000 
+0200
@@ -78,6 +78,13 @@
 - no permissions/access-control (i.e., no chmod/chown methods)
 
 
+Logging
+-------
+
+The logger ``s3fs.core.logger`` provides information about the operations of 
the
+file system. To see messages, set its level to DEBUG. You can also achieve 
this via
+an environment variable ``S3FS_LOGGING_LEVEL=DEBUG``.
+
 Credentials
 -----------
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/s3fs-0.3.3/s3fs/_version.py 
new/s3fs-0.3.5/s3fs/_version.py
--- old/s3fs-0.3.3/s3fs/_version.py     2019-08-08 15:06:16.000000000 +0200
+++ new/s3fs-0.3.5/s3fs/_version.py     2019-10-06 18:26:35.000000000 +0200
@@ -8,11 +8,11 @@
 
 version_json = '''
 {
- "date": "2019-08-08T09:02:10-0400",
+ "date": "2019-10-06T11:15:43-0400",
  "dirty": false,
  "error": null,
- "full-revisionid": "990ceebb5ba73030819ddd09d5696506f0f865d7",
- "version": "0.3.3"
+ "full-revisionid": "571a6463ac7aaaf1a6f80ee776e79e3b0d76a4f4",
+ "version": "0.3.5"
 }
 '''  # END VERSION_JSON
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/s3fs-0.3.3/s3fs/core.py new/s3fs-0.3.5/s3fs/core.py
--- old/s3fs-0.3.3/s3fs/core.py 2019-08-08 14:57:46.000000000 +0200
+++ new/s3fs-0.3.5/s3fs/core.py 2019-10-06 17:15:47.000000000 +0200
@@ -1,7 +1,8 @@
 # -*- coding: utf-8 -*-
-import errno
 import logging
+import os
 import socket
+import time
 from hashlib import md5
 
 from fsspec import AbstractFileSystem
@@ -13,7 +14,14 @@
 from s3fs.errors import translate_boto_error
 from s3fs.utils import ParamKwargsHelper
 
-logger = logging.getLogger(__name__)
+logger = logging.getLogger('s3fs')
+handle = logging.StreamHandler()
+formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s '
+                              '- %(message)s')
+handle.setFormatter(formatter)
+logger.addHandler(handle)
+if "S3FS_LOGGING_LEVEL" in os.environ:
+    logger.setLevel(os.environ["S3FS_LOGGING_LEVEL"])
 
 logging.getLogger('boto3').setLevel(logging.WARNING)
 logging.getLogger('botocore').setLevel(logging.WARNING)
@@ -100,12 +108,16 @@
     client_kwargs : dict of parameters for the boto3 client
     requester_pays : bool (False)
         If RequesterPays buckets are supported.
-    default_block_size: None, int
+    default_block_size: int (None)
         If given, the default block size value used for ``open()``, if no
         specific value is given at all time. The built-in default is 5MB.
     default_fill_cache : Bool (True)
         Whether to use cache filling with open by default. Refer to
         ``S3File.open``.
+    default_cache_type : string ('bytes')
+        If given, the default cache_type value used for ``open()``. Set to 
"none"
+        if no caching is desired. See fsspec's documentation for other 
available
+        cache_type values. Default cache_type is 'bytes'.
     version_aware : bool (False)
         Whether to support bucket versioning.  If enable this will require the
         user to have the necessary IAM permissions for dealing with versioned
@@ -135,7 +147,7 @@
     def __init__(self, anon=False, key=None, secret=None, token=None,
                  use_ssl=True, client_kwargs=None, requester_pays=False,
                  default_block_size=None, default_fill_cache=True,
-                 version_aware=False, config_kwargs=None,
+                 default_cache_type='bytes', version_aware=False, 
config_kwargs=None,
                  s3_additional_kwargs=None, session=None, username=None,
                  password=None, **kwargs):
         if key and username:
@@ -164,6 +176,7 @@
             config_kwargs = {}
         self.default_block_size = default_block_size or self.default_block_size
         self.default_fill_cache = default_fill_cache
+        self.default_cache_type = default_cache_type
         self.version_aware = version_aware
         self.client_kwargs = client_kwargs
         self.config_kwargs = config_kwargs
@@ -177,6 +190,9 @@
         return self._kwargs_helper.filter_dict(s3_method.__name__, kwargs)
 
     def _call_s3(self, method, *akwarglist, **kwargs):
+        kw2 = kwargs.copy()
+        kw2.pop('Body', None)
+        logger.debug("CALL: %s - %s - %s" % (method.__name__, akwarglist, kw2))
         additional_kwargs = self._get_s3_method_kwargs(method, *akwarglist,
                                                        **kwargs)
         return method(**additional_kwargs)
@@ -223,6 +239,7 @@
                 self.session = boto3.Session(self.key, self.secret, self.token,
                                              **self.kwargs)
 
+        logger.debug("Setting up s3fs instance")
         self.s3 = self.session.client('s3', config=conf, use_ssl=ssl,
                                       **self.client_kwargs)
         return self.s3
@@ -253,7 +270,7 @@
                 'token': cred['SessionToken'], 'anon': False}
 
     def _open(self, path, mode='rb', block_size=None, acl='', version_id=None,
-              fill_cache=None, cache_type='bytes', autocommit=True, **kwargs):
+              fill_cache=None, cache_type=None, autocommit=True, **kwargs):
         """ Open a file for reading or writing
 
         Parameters
@@ -280,7 +297,8 @@
             The encoding to use if opening the file in text mode. The 
platform's
             default text encoding is used if not given.
         cache_type : str
-            "bytes", "mmap" or "none"
+            See fsspec's documentation for available cache_type values. Set to 
"none"
+            if no caching is desired. If None, defaults to 
``self.default_cache_type``.
         kwargs: dict-like
             Additional parameters used for s3 methods.  Typically used for
             ServerSideEncryption.
@@ -297,6 +315,9 @@
             raise ValueError("version_id cannot be specified if the filesystem 
"
                              "is not version aware")
 
+        if cache_type is None:
+            cache_type = self.default_cache_type
+
         return S3File(self, path, mode, block_size=block_size, acl=acl,
                       version_id=version_id, fill_cache=fill_cache,
                       s3_additional_kwargs=kw, cache_type=cache_type,
@@ -310,6 +331,7 @@
         prefix = prefix + '/' if prefix else ""
         if path not in self.dircache or refresh:
             try:
+                logger.debug("Get directory listing page for %s" % path)
                 pag = self.s3.get_paginator('list_objects_v2')
                 config = {}
                 if max_items is not None:
@@ -443,6 +465,17 @@
             except FileNotFoundError:
                 return False
 
+    def touch(self, path, truncate=True, data=None, **kwargs):
+        """Create empty file or truncate"""
+        bucket, key = split_path(path)
+        if not truncate and self.exists(path):
+            raise ValueError("S3 does not support touching existent files")
+        try:
+            self._call_s3(self.s3.put_object, kwargs, Bucket=bucket, Key=key)
+        except ClientError as ex:
+            raise translate_boto_error(ex)
+        self.invalidate_cache(self._parent(path))
+
     def info(self, path, version_id=None):
         if path in ['/', '']:
             return {'name': path, 'size': 0, 'type': 'directory'}
@@ -912,6 +945,8 @@
         self.key = key
         self.version_id = version_id
         self.acl = acl
+        if self.acl and self.acl not in key_acls:
+            raise ValueError('ACL not in %s', key_acls)
         self.mpu = None
         self.parts = None
         self.fill_cache = fill_cache
@@ -929,20 +964,20 @@
                 self.size = self.details['size']
             elif self.fs.version_aware:
                 self.version_id = self.details.get('VersionId')
-                # In this case we have not managed to get the VersionId out of 
details and 
-                # we should invalidate the cache and perform a full 
head_object since it 
+                # In this case we have not managed to get the VersionId out of 
details and
+                # we should invalidate the cache and perform a full 
head_object since it
                 # has likely been partially populated by ls.
                 if self.version_id is None:
                     self.fs.invalidate_cache(self.path)
                     self.details = self.fs.info(self.path)
                     self.version_id = self.details.get('VersionId')
 
+        self.append_block = False
         if 'a' in mode and s3.exists(path):
             loc = s3.info(path)['size']
             if loc < 5 * 2 ** 20:
                 # existing file too small for multi-upload: download
                 self.write(self.fs.cat(self.path))
-                self.append_block = False
             else:
                 self.append_block = True
             self.loc = loc
@@ -952,12 +987,11 @@
                                 **kwargs)
 
     def _initiate_upload(self):
-        if self.acl and self.acl not in key_acls:
-            raise ValueError('ACL not in %s', key_acls)
+        if not self.append_block and self.tell() < self.blocksize:
+            # only happens when closing small file, use on-shot PUT
+            return
+        logger.debug("Initiate upload for %s" % self)
         self.parts = []
-        self.size = 0
-        if self.blocksize < 5 * 2 ** 20:
-            raise ValueError('Block size must be >=5MB')
         try:
             self.mpu = self._call_s3(
                 self.fs.s3.create_multipart_upload,
@@ -967,20 +1001,19 @@
         except ParamValidationError as e:
             raise ValueError('Initiating write to %r failed: %s' % (self.path, 
e))
 
-        if 'a' in self.mode and self.fs.exists(self.path):
-            if self.append_block:
-                # use existing data in key when appending,
-                # and block is big enough
-                out = self.fs._call_s3(
-                    self.fs.s3.upload_part_copy,
-                    self.s3_additional_kwargs,
-                    Bucket=self.bucket,
-                    Key=self.key,
-                    PartNumber=1,
-                    UploadId=self.mpu['UploadId'],
-                    CopySource=self.path)
-                self.parts.append({'PartNumber': 1,
-                                   'ETag': out['CopyPartResult']['ETag']})
+        if self.append_block:
+            # use existing data in key when appending,
+            # and block is big enough
+            out = self.fs._call_s3(
+                self.fs.s3.upload_part_copy,
+                self.s3_additional_kwargs,
+                Bucket=self.bucket,
+                Key=self.key,
+                PartNumber=1,
+                UploadId=self.mpu['UploadId'],
+                CopySource=self.path)
+            self.parts.append({'PartNumber': 1,
+                               'ETag': out['CopyPartResult']['ETag']})
 
     def metadata(self, refresh=False, **kwargs):
         """ Return metadata of file.
@@ -1024,8 +1057,16 @@
 
     def _upload_chunk(self, final=False):
         bucket, key = split_path(self.path)
-        self.buffer.seek(0)
-        (data0, data1) = (None, self.buffer.read(self.blocksize))
+        logger.debug("Upload for %s, final=%s, loc=%s, buffer loc=%s" % (
+            self, final, self.loc, self.buffer.tell()
+        ))
+        if not self.append_block and final and self.tell() < self.blocksize:
+            # only happens when closing small file, use on-shot PUT
+            data1 = False
+        else:
+            self.buffer.seek(0)
+            (data0, data1) = (None, self.buffer.read(self.blocksize))
+
         while data1:
             (data0, data1) = (data1, self.buffer.read(self.blocksize))
             data1_size = len(data1)
@@ -1041,6 +1082,7 @@
                     (data0, data1) = (remainder[:partition], 
remainder[partition:])
 
             part = len(self.parts) + 1
+            logger.debug("Upload chunk %s, %s" % (self, part))
 
             for attempt in range(self.retries + 1):
                 try:
@@ -1054,6 +1096,7 @@
                     if attempt < self.retries:
                         logger.debug('Exception %r on S3 write, retrying', exc,
                                      exc_info=True)
+                    time.sleep(1.7**attempt * 0.1)
                 except Exception as exc:
                     raise IOError('Write failed: %r' % exc)
             else:
@@ -1063,21 +1106,41 @@
 
         if self.autocommit and final:
             self.commit()
+        return not final
 
     def commit(self):
-        logger.debug("COMMIT")
-        part_info = {'Parts': self.parts}
-        write_result = self._call_s3(
-            self.fs.s3.complete_multipart_upload,
-            Bucket=self.bucket,
-            Key=self.key,
-            UploadId=self.mpu['UploadId'],
-            MultipartUpload=part_info)
-        if self.fs.version_aware:
-            self.version_id = write_result.get('VersionId')
+        logger.debug("Commit %s" % self)
+        if self.tell() == 0:
+            if self.buffer is not None:
+                logger.debug("Empty file committed %s" % self)
+                self._abort_mpu()
+                self.fs.touch(self.path)
+        elif not self.parts:
+            if self.buffer is not None:
+                logger.debug("One-shot upload of %s" % self)
+                self.buffer.seek(0)
+                data = self.buffer.read()
+                self._call_s3(
+                    self.fs.s3.put_object,
+                    Key=self.key, Bucket=self.bucket, Body=data, **self.kwargs
+                )
+            else:
+                raise RuntimeError
+        else:
+            logger.debug("Complete multi-part upload for %s " % self)
+            part_info = {'Parts': self.parts}
+            write_result = self._call_s3(
+                self.fs.s3.complete_multipart_upload,
+                Bucket=self.bucket,
+                Key=self.key,
+                UploadId=self.mpu['UploadId'],
+                MultipartUpload=part_info)
+            if self.fs.version_aware:
+                self.version_id = write_result.get('VersionId')
 
         # complex cache invalidation, since file's appearance can cause several
         # directories
+        self.buffer = None
         parts = self.path.split('/')
         path = parts[0]
         for p in parts[1:]:
@@ -1088,20 +1151,36 @@
             path = path + '/' + p
 
     def discard(self):
-        if self.autocommit:
-            raise ValueError("Cannot discard when autocommit is enabled")
-        self._call_s3(
-            self.fs.s3.abort_multipart_upload,
-            Bucket=self.bucket,
-            Key=self.key,
-            UploadId=self.mpu['UploadId'],
-        )
+        self._abort_mpu()
+        self.buffer = None  # file becomes unusable
+
+    def _abort_mpu(self):
+        if self.mpu:
+            self._call_s3(
+                self.fs.s3.abort_multipart_upload,
+                Bucket=self.bucket,
+                Key=self.key,
+                UploadId=self.mpu['UploadId'],
+            )
+            self.mpu = None
 
 
 def _fetch_range(client, bucket, key, version_id, start, end, max_attempts=10,
                  req_kw=None):
     if req_kw is None:
         req_kw = {}
+    if start == end:
+        # When these match, we would make a request with `range=start-end - 1`
+        # According to RFC2616, servers are supposed to ignore the Range
+        # field when it's invalid like this. S3 does ignore it, moto doesn't.
+        # To avoid differences in behavior under mocking, we just avoid
+        # making these requests. It's hoped that since we're being called
+        # from a caching object, this won't end up mattering.
+        logger.debug(
+            'skip fetch for negative range - bucket=%s,key=%s,start=%d,end=%d',
+            bucket, key, start, end
+        )
+        return b''
     logger.debug("Fetch: %s/%s, %s-%s", bucket, key, start, end)
     for i in range(max_attempts):
         try:
@@ -1116,10 +1195,12 @@
         except S3_RETRYABLE_ERRORS as e:
             logger.debug('Exception %r on S3 download, retrying', e,
                          exc_info=True)
+            time.sleep(1.7**i * 0.1)
             continue
         except ConnectionError as e:
             logger.debug('ConnectionError %r on S3 download, retrying', e,
                          exc_info=True)
+            time.sleep(1.7**i * 0.1)
             continue
         except ClientError as e:
             if e.response['Error'].get('Code', 'Unknown') in ['416',
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/s3fs-0.3.3/s3fs/tests/test_s3fs.py 
new/s3fs-0.3.5/s3fs/tests/test_s3fs.py
--- old/s3fs-0.3.3/s3fs/tests/test_s3fs.py      2019-08-08 00:14:52.000000000 
+0200
+++ new/s3fs-0.3.5/s3fs/tests/test_s3fs.py      2019-08-27 20:52:18.000000000 
+0200
@@ -4,10 +4,10 @@
 import json
 from concurrent.futures import ProcessPoolExecutor
 import io
-import re
 import time
 import pytest
 from itertools import chain
+import fsspec.core
 from s3fs.core import S3FileSystem
 from s3fs.utils import seek_delimiter, ignoring, SSEParams
 import moto
@@ -134,6 +134,21 @@
         assert out == data
 
 
+@pytest.mark.parametrize('default_cache_type', ['none', 'bytes', 'mmap'])
+def test_default_cache_type(s3, default_cache_type):
+    data = b'a' * (10 * 2 ** 20)
+    s3 = S3FileSystem(anon=False, default_cache_type=default_cache_type)
+
+    with s3.open(a, 'wb') as f:
+        f.write(data)
+
+    with s3.open(a, 'rb') as f:
+        assert isinstance(f.cache, fsspec.core.caches[default_cache_type])
+        out = f.read(len(data))
+        assert len(data) == len(out)
+        assert out == data
+
+
 def test_ssl_off():
     s3 = S3FileSystem(use_ssl=False)
     assert s3.s3.meta.endpoint_url.startswith('http://')
@@ -1211,6 +1226,7 @@
         S3FileSystem.default_block_size = 5 * (1024 ** 2)
         S3FileSystem.cachable = True
 
+
 def test_passed_in_session_set_correctly(s3):
     session = boto3.session.Session()
     s3 = S3FileSystem(session=session)
@@ -1279,3 +1295,46 @@
     # Cannot commit a file that was discarded
     with pytest.raises(Exception):
         fo.commit()
+
+
+def test_touch(s3):
+    # create
+    fn = test_bucket_name + "/touched"
+    assert not s3.exists(fn)
+    s3.touch(fn)
+    assert s3.exists(fn)
+    assert s3.size(fn) == 0
+
+    # truncates
+    with s3.open(fn, 'wb') as f:
+        f.write(b'data')
+    assert s3.size(fn) == 4
+    s3.touch(fn, truncate=True)
+    assert s3.size(fn) == 0
+
+    # exists error
+    with s3.open(fn, 'wb') as f:
+        f.write(b'data')
+    assert s3.size(fn) == 4
+    with pytest.raises(ValueError):
+        s3.touch(fn, truncate=False)
+    assert s3.size(fn) == 4
+
+
+def test_seek_reads(s3):
+    fn = test_bucket_name + "/myfile"
+    with s3.open(fn, 'wb') as f:
+        f.write(b'a' * 175_627_146)
+    with s3.open(fn, 'rb', blocksize=100) as f:
+        f.seek(175561610)
+        d1 = f.read(65536)
+
+        f.seek(4)
+        size = 17562198
+        d2 = f.read(size)
+        assert len(d2) == size
+
+        f.seek(17562288)
+        size = 17562187
+        d3 = f.read(size)
+        assert len(d3) == size
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/s3fs-0.3.3/s3fs.egg-info/PKG-INFO 
new/s3fs-0.3.5/s3fs.egg-info/PKG-INFO
--- old/s3fs-0.3.3/s3fs.egg-info/PKG-INFO       2019-08-08 15:06:16.000000000 
+0200
+++ new/s3fs-0.3.5/s3fs.egg-info/PKG-INFO       2019-10-06 18:26:35.000000000 
+0200
@@ -1,6 +1,6 @@
 Metadata-Version: 1.2
 Name: s3fs
-Version: 0.3.3
+Version: 0.3.5
 Summary: Convenient Filesystem interface over S3
 Home-page: http://github.com/dask/s3fs/
 Maintainer: Martin Durant
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/s3fs-0.3.3/setup.cfg new/s3fs-0.3.5/setup.cfg
--- old/s3fs-0.3.3/setup.cfg    2019-08-08 15:06:16.000000000 +0200
+++ new/s3fs-0.3.5/setup.cfg    2019-10-06 18:26:35.000000000 +0200
@@ -1,6 +1,3 @@
-[bdist_wheel]
-universal = 1
-
 [metadata]
 long_description = file: README.rst
 


Reply via email to