Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package python-smart-open for openSUSE:Factory checked in at 2023-10-12 23:44:26 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/python-smart-open (Old) and /work/SRC/openSUSE:Factory/.python-smart-open.new.1807 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-smart-open" Thu Oct 12 23:44:26 2023 rev:3 rq:1117462 version:6.4.0 Changes: -------- --- /work/SRC/openSUSE:Factory/python-smart-open/python-smart-open.changes 2023-01-14 20:32:25.545463751 +0100 +++ /work/SRC/openSUSE:Factory/.python-smart-open.new.1807/python-smart-open.changes 2023-10-12 23:47:39.312944884 +0200 @@ -1,0 +2,13 @@ +Thu Oct 12 13:26:22 UTC 2023 - OndÅej Súkup <mimi...@gmail.com> + +- Update to 6.4.0 +- force require urllib3 < 2 because of https://github.com/RaRe-Technologies/smart_open/issues/784 + * Ignore S3 seeks to the current position (PR #782, @beck3905 + * Set binary mode prior to FTP write ()PR #781, @beck3905) + * Improve S3 URI Parsing for URIs with "@", "/", and ":" (PR #776, @rileypeterson) + * Add python 3.11 to setup.py (PR #775, @tooptoop4) + * Fix retrieving empty but existing object from S3 (PR #771, @Darkheir) + * Avoid overfilling buffer when reading from Azure (PR #767, @ronreiter) + * Add required import for example to work (PR #756, @jensenbox) + +------------------------------------------------------------------- Old: ---- smart_open-6.3.0.tar.gz New: ---- smart_open-6.4.0.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-smart-open.spec ++++++ --- /var/tmp/diff_new_pack.oy5OYQ/_old 2023-10-12 23:47:40.040971245 +0200 +++ /var/tmp/diff_new_pack.oy5OYQ/_new 2023-10-12 23:47:40.040971245 +0200 @@ -16,13 +16,12 @@ # -%{?!python_module:%define python_module() python-%{**} python3-%{**}} Name: python-smart-open -Version: 6.3.0 +Version: 6.4.0 Release: 0 Summary: Python utils for streaming large files License: MIT -Group: Development/Languages/Python +URL: https://github.com/RaRe-Technologies/smart_open Source: https://github.com/RaRe-Technologies/smart_open/archive/refs/tags/v%{version}.tar.gz#/smart_open-%{version}.tar.gz BuildRequires: %{python_module setuptools} BuildRequires: fdupes @@ -33,7 +32,11 @@ Requires: python-boto3 Requires: python-google-cloud-storage Requires: python-requests +Suggests: python-paramiko BuildArch: noarch +# see https://github.com/RaRe-Technologies/smart_open/issues/784 +BuildRequires: %{python_module urllib3 < 2} +Requires: python-urllib3 < 2 # SECTION test requirements BuildRequires: %{python_module azure-common} BuildRequires: %{python_module azure-core} @@ -43,6 +46,7 @@ BuildRequires: %{python_module moto >= 1.3.4} BuildRequires: %{python_module moto-server} BuildRequires: %{python_module paramiko} +BuildRequires: %{python_module pytest-rerunfailures} BuildRequires: %{python_module pytest} BuildRequires: %{python_module requests} BuildRequires: %{python_module responses} ++++++ smart_open-6.3.0.tar.gz -> smart_open-6.4.0.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/smart_open-6.3.0/.github/workflows/python-package.yml new/smart_open-6.4.0/.github/workflows/python-package.yml --- old/smart_open-6.3.0/.github/workflows/python-package.yml 2022-12-12 08:20:26.000000000 +0100 +++ new/smart_open-6.4.0/.github/workflows/python-package.yml 2023-09-07 04:59:17.000000000 +0200 @@ -6,10 +6,10 @@ steps: - uses: actions/checkout@v2 - - name: Setup up Python 3.10 + - name: Setup up Python 3.11 uses: actions/setup-python@v2 with: - python-version: "3.10" + python-version: "3.11" - name: Update pip run: python -m pip install -U pip @@ -26,15 +26,15 @@ strategy: matrix: include: - - {python: '3.7', os: ubuntu-20.04} - {python: '3.8', os: ubuntu-20.04} - {python: '3.9', os: ubuntu-20.04} - {python: '3.10', os: ubuntu-20.04} + - {python: '3.11', os: ubuntu-20.04} - - {python: '3.7', os: windows-2019} - {python: '3.8', os: windows-2019} - {python: '3.9', os: windows-2019} - {python: '3.10', os: windows-2019} + - {python: '3.11', os: windows-2019} steps: - uses: actions/checkout@v2 @@ -63,10 +63,10 @@ strategy: matrix: include: - - {python: '3.7', os: ubuntu-20.04} - {python: '3.8', os: ubuntu-20.04} - {python: '3.9', os: ubuntu-20.04} - {python: '3.10', os: ubuntu-20.04} + - {python: '3.11', os: ubuntu-20.04} # # Some of the doctests don't pass on Windows because of Windows-specific @@ -105,10 +105,10 @@ strategy: matrix: include: - - {python: '3.7', os: ubuntu-20.04, moto_server: true} - {python: '3.8', os: ubuntu-20.04} - {python: '3.9', os: ubuntu-20.04} - {python: '3.10', os: ubuntu-20.04} + - {python: '3.11', os: ubuntu-20.04} # Not sure why we exclude these, perhaps for historical reasons? # @@ -159,10 +159,10 @@ strategy: matrix: include: - - {python: '3.7', os: ubuntu-20.04} - {python: '3.8', os: ubuntu-20.04} - {python: '3.9', os: ubuntu-20.04} - {python: '3.10', os: ubuntu-20.04} + - {python: '3.11', os: ubuntu-20.04} # - {python: '3.7', os: windows-2019} # - {python: '3.8', os: windows-2019} diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/smart_open-6.3.0/CHANGELOG.md new/smart_open-6.4.0/CHANGELOG.md --- old/smart_open-6.3.0/CHANGELOG.md 2022-12-12 08:20:26.000000000 +0100 +++ new/smart_open-6.4.0/CHANGELOG.md 2023-09-07 04:59:17.000000000 +0200 @@ -1,5 +1,15 @@ # Unreleased +## 6.4.0, 2023-09-07 + +* Ignore S3 seeks to the current position (PR [#782](https://github.com/RaRe-Technologies/smart_open/pull/782), [@beck3905](https://github.com/beck3905)) +* Set binary mode prior to FTP write (PR [#781](https://github.com/RaRe-Technologies/smart_open/pull/781), [@beck3905](https://github.com/beck3905)) +* Improve S3 URI Parsing for URIs with "@", "/", and ":" (PR [#776](https://github.com/RaRe-Technologies/smart_open/pull/776), [@rileypeterson](https://github.com/rileypeterson)) +* Add python 3.11 to setup.py (PR [#775](https://github.com/RaRe-Technologies/smart_open/pull/775), [@tooptoop4](https://github.com/tooptoop4)) +* Fix retrieving empty but existing object from S3 (PR [#771](https://github.com/RaRe-Technologies/smart_open/pull/771), [@Darkheir](https://github.com/Darkheir)) +* Avoid overfilling buffer when reading from Azure (PR [#767](https://github.com/RaRe-Technologies/smart_open/pull/767), [@ronreiter](https://github.com/ronreiter)) +* Add required import for example to work (PR [#756](https://github.com/RaRe-Technologies/smart_open/pull/756), [@jensenbox](https://github.com/jensenbox)) + ## 6.3.0, 2022-12-12 * Refactor Google Cloud Storage to use blob.open (__[ddelange](https://github.com/ddelange)__, [#744](https://github.com/RaRe-Technologies/smart_open/pull/744)) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/smart_open-6.3.0/README.rst new/smart_open-6.4.0/README.rst --- old/smart_open-6.3.0/README.rst 2022-12-12 08:20:26.000000000 +0100 +++ new/smart_open-6.4.0/README.rst 2023-09-07 04:59:17.000000000 +0200 @@ -151,6 +151,7 @@ .. code-block:: python >>> import os, boto3 + >>> from smart_open import open >>> >>> # stream content *into* S3 (write mode) using a custom session >>> session = boto3.Session( diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/smart_open-6.3.0/setup.py new/smart_open-6.4.0/setup.py --- old/smart_open-6.3.0/setup.py 2022-12-12 08:20:26.000000000 +0100 +++ new/smart_open-6.4.0/setup.py 2023-09-07 04:59:17.000000000 +0200 @@ -95,6 +95,7 @@ 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11', 'Topic :: System :: Distributed Computing', 'Topic :: Database :: Front-Ends', ], diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/smart_open-6.3.0/smart_open/azure.py new/smart_open-6.4.0/smart_open/azure.py --- old/smart_open-6.3.0/smart_open/azure.py 2022-12-12 08:20:26.000000000 +0100 +++ new/smart_open-6.4.0/smart_open/azure.py 2023-09-07 04:59:17.000000000 +0200 @@ -306,7 +306,7 @@ if self._position == self._size: return self._read_from_buffer() - self._fill_buffer() + self._fill_buffer(size) return self._read_from_buffer(size) def read1(self, size=-1): diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/smart_open-6.3.0/smart_open/ftp.py new/smart_open-6.4.0/smart_open/ftp.py --- old/smart_open-6.3.0/smart_open/ftp.py 2022-12-12 08:20:26.000000000 +0100 +++ new/smart_open-6.4.0/smart_open/ftp.py 2023-09-07 04:59:17.000000000 +0200 @@ -14,6 +14,7 @@ import smart_open.utils from ftplib import FTP, FTP_TLS, error_reply import types + logger = logging.getLogger(__name__) SCHEMES = ("ftp", "ftps") @@ -55,8 +56,13 @@ uri_path = parsed_uri.pop("uri_path") scheme = parsed_uri.pop("scheme") secure_conn = True if scheme == "ftps" else False - return open(uri_path, mode, secure_connection=secure_conn, - transport_params=transport_params, **parsed_uri) + return open( + uri_path, + mode, + secure_connection=secure_conn, + transport_params=transport_params, + **parsed_uri, + ) def convert_transport_params_to_args(transport_params): @@ -90,7 +96,9 @@ try: ftp.login(username, password) except error_reply as e: - logger.error("Unable to login to FTP server: try checking the username and password!") + logger.error( + "Unable to login to FTP server: try checking the username and password!" + ) raise e if secure_connection: ftp.prot_p() @@ -99,7 +107,7 @@ def open( path, - mode="r", + mode="rb", host=None, user=None, password=None, @@ -146,6 +154,7 @@ except KeyError: raise ValueError(f"unsupported mode: {mode!r}") ftp_mode, file_obj_mode = mode_to_ftp_cmds[mode] + conn.voidcmd("TYPE I") socket = conn.transfercmd(f"{ftp_mode} {path}") fobj = socket.makefile(file_obj_mode) @@ -153,6 +162,7 @@ self.orig_close() self.socket.close() self.conn.close() + fobj.orig_close = fobj.close fobj.socket = socket fobj.conn = conn diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/smart_open-6.3.0/smart_open/s3.py new/smart_open-6.4.0/smart_open/s3.py --- old/smart_open-6.3.0/smart_open/s3.py 2022-12-12 08:20:26.000000000 +0100 +++ new/smart_open-6.4.0/smart_open/s3.py 2023-09-07 04:59:17.000000000 +0200 @@ -105,9 +105,20 @@ # uri = split_uri.netloc + split_uri.path - if '@' in uri and ':' in uri.split('@')[0]: - auth, uri = uri.split('@', 1) - access_id, access_secret = auth.split(':') + # + # Attempt to extract edge-case authentication details from the URL. + # + # See: + # 1. https://summitroute.com/blog/2018/06/20/aws_security_credential_formats/ + # 2. test_s3_uri_with_credentials* in test_smart_open.py for example edge cases + # + if '@' in uri: + maybe_auth, rest = uri.split('@', 1) + if ':' in maybe_auth: + maybe_id, maybe_secret = maybe_auth.split(':', 1) + if '/' not in maybe_id: + access_id, access_secret = maybe_id, maybe_secret + uri = rest head, key_id = uri.split('/', 1) if '@' in head and ':' in head: @@ -324,10 +335,13 @@ def _get(client, bucket, key, version, range_string): try: + params = dict(Bucket=bucket, Key=key) if version: - return client.get_object(Bucket=bucket, Key=key, VersionId=version, Range=range_string) - else: - return client.get_object(Bucket=bucket, Key=key, Range=range_string) + params["VersionId"] = version + if range_string: + params["Range"] = range_string + + return client.get_object(**params) except botocore.client.ClientError as error: wrapped_error = IOError( 'unable to access bucket: %r key: %r version: %r error: %s' % ( @@ -447,8 +461,19 @@ error_response = _unwrap_ioerror(ioe) if error_response is None or error_response.get('Code') != _OUT_OF_RANGE: raise - self._position = self._content_length = int(error_response['ActualObjectSize']) - self._body = io.BytesIO() + try: + self._position = self._content_length = int(error_response['ActualObjectSize']) + self._body = io.BytesIO() + except KeyError: + response = _get( + self._client, + self._bucket, + self._key, + self._version_id, + None, + ) + self._position = self._content_length = response["ContentLength"] + self._body = response["Body"] else: # # Keep track of how many times boto3's built-in retry mechanism @@ -461,7 +486,7 @@ self, response['ResponseMetadata']['RetryAttempts'], ) - units, start, stop, length = smart_open.utils.parse_content_range(response['ContentRange']) + _, start, stop, length = smart_open.utils.parse_content_range(response['ContentRange']) self._content_length = length self._position = start self._body = response['Body'] @@ -564,6 +589,7 @@ self._buffer = smart_open.bytebuffer.ByteBuffer(buffer_size) self._eof = False self._line_terminator = line_terminator + self._seek_initialized = False # # This member is part of the io.BufferedIOBase interface. @@ -663,10 +689,16 @@ whence = constants.WHENCE_START offset += self._current_pos - self._current_pos = self._raw_reader.seek(offset, whence) + if not self._seek_initialized or not ( + whence == constants.WHENCE_START and offset == self._current_pos + ): + self._current_pos = self._raw_reader.seek(offset, whence) + + self._buffer.empty() - self._buffer.empty() self._eof = self._current_pos == self._raw_reader._content_length + + self._seek_initialized = True return self._current_pos def tell(self): diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/smart_open-6.3.0/smart_open/tests/test_s3.py new/smart_open-6.4.0/smart_open/tests/test_s3.py --- old/smart_open-6.3.0/smart_open/tests/test_s3.py 2022-12-12 08:20:26.000000000 +0100 +++ new/smart_open-6.4.0/smart_open/tests/test_s3.py 2023-09-07 04:59:17.000000000 +0200 @@ -73,6 +73,8 @@ error_response['ActualObjectSize'] = actual_size error_response['Code'] = 'InvalidRange' error_response['Message'] = 'The requested range is not satisfiable' + if actual_size is None: + error_response.pop('ActualObjectSize', None) raise with mock.patch('smart_open.s3._get', new=mock_get): @@ -397,6 +399,15 @@ with smart_open.s3.Reader(BUCKET_NAME, KEY_NAME) as fin: data = fin.read() + self.assertEqual(data, b'') + + def test_read_empty_file_no_actual_size(self): + _resource('s3').Object(BUCKET_NAME, KEY_NAME).put(Body=b'') + + with self.assertApiCalls(GetObject=2), patch_invalid_range_response(None): + with smart_open.s3.Reader(BUCKET_NAME, KEY_NAME) as fin: + data = fin.read() + self.assertEqual(data, b'') diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/smart_open-6.3.0/smart_open/tests/test_smart_open.py new/smart_open-6.4.0/smart_open/tests/test_smart_open.py --- old/smart_open-6.3.0/smart_open/tests/test_smart_open.py 2022-12-12 08:20:26.000000000 +0100 +++ new/smart_open-6.4.0/smart_open/tests/test_smart_open.py 2023-09-07 04:59:17.000000000 +0200 @@ -132,6 +132,17 @@ self.assertEqual(parsed_uri.access_id, "accessid") self.assertEqual(parsed_uri.access_secret, "access/secret") + # + # Nb. should never happen in theory, but if it does, we should avoid crashing + # + def test_s3_uri_has_colon_in_secret(self): + parsed_uri = smart_open_lib._parse_uri("s3://accessid:access/secret:totally@mybucket/my@ke@y") + self.assertEqual(parsed_uri.scheme, "s3") + self.assertEqual(parsed_uri.bucket_id, "mybucket") + self.assertEqual(parsed_uri.key_id, "my@ke@y") + self.assertEqual(parsed_uri.access_id, "accessid") + self.assertEqual(parsed_uri.access_secret, "access/secret:totally") + def test_s3_uri_has_atmark_in_key_name2(self): parsed_uri = smart_open_lib._parse_uri( "s3://accessid:access/secret@hostname:1234@mybucket/dir/my@ke@y" @@ -218,6 +229,24 @@ self.assertEqual(parsed_uri.access_id, None) self.assertEqual(parsed_uri.access_secret, None) + def test_s3_uri_with_at_symbol_in_key_name0(self): + """ Correctly parse the s3 url if there is an @ symbol (and colon) in the key or dir """ + parsed_uri = smart_open_lib._parse_uri("s3://mybucket/mydir:my@key") + self.assertEqual(parsed_uri.scheme, "s3") + self.assertEqual(parsed_uri.bucket_id, "mybucket") + self.assertEqual(parsed_uri.key_id, "mydir:my@key") + self.assertEqual(parsed_uri.access_id, None) + self.assertEqual(parsed_uri.access_secret, None) + + def test_s3_uri_with_at_symbol_in_key_name1(self): + """ Correctly parse the s3 url if there is an @ symbol (and colon) in the key or dir """ + parsed_uri = smart_open_lib._parse_uri("s3://mybucket/my:dir@my/key") + self.assertEqual(parsed_uri.scheme, "s3") + self.assertEqual(parsed_uri.bucket_id, "mybucket") + self.assertEqual(parsed_uri.key_id, "my:dir@my/key") + self.assertEqual(parsed_uri.access_id, None) + self.assertEqual(parsed_uri.access_secret, None) + def test_s3_uri_contains_question_mark(self): parsed_uri = smart_open_lib._parse_uri("s3://mybucket/mydir/mykey?param") self.assertEqual(parsed_uri.scheme, "s3") diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/smart_open-6.3.0/smart_open/version.py new/smart_open-6.4.0/smart_open/version.py --- old/smart_open-6.3.0/smart_open/version.py 2022-12-12 08:20:26.000000000 +0100 +++ new/smart_open-6.4.0/smart_open/version.py 2023-09-07 04:59:17.000000000 +0200 @@ -1,4 +1,4 @@ -__version__ = '6.3.0' +__version__ = '6.4.0' if __name__ == '__main__':