Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package python-fsspec for openSUSE:Factory checked in at 2024-04-07 22:13:13 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/python-fsspec (Old) and /work/SRC/openSUSE:Factory/.python-fsspec.new.1905 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-fsspec" Sun Apr 7 22:13:13 2024 rev:33 rq:1165946 version:2024.3.1 Changes: -------- --- /work/SRC/openSUSE:Factory/python-fsspec/python-fsspec.changes 2024-03-13 22:18:36.828349566 +0100 +++ /work/SRC/openSUSE:Factory/.python-fsspec.new.1905/python-fsspec.changes 2024-04-07 22:15:04.306526975 +0200 @@ -1,0 +2,14 @@ +Mon Mar 25 17:26:58 UTC 2024 - Dirk Müller <dmuel...@suse.com> + +- update to 2024.3.1: + * allow override of expand in open() + * root handling in local file paths, fix for windows + * coroutines throttle to stream pool rather than batches + * write transactions in simplecache + * allow deep nested refs in referenceFS/parquet + * Fixes bug (#1476) that made open_files ignore expand=False + * remove extra calling mapper contains + * connection retry for SMB + * zip64 should be on is allowZip64 is + +------------------------------------------------------------------- Old: ---- fsspec-2024.2.0.tar.gz New: ---- fsspec-2024.3.1.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-fsspec.spec ++++++ --- /var/tmp/diff_new_pack.bRelbo/_old 2024-04-07 22:15:04.702541487 +0200 +++ /var/tmp/diff_new_pack.bRelbo/_new 2024-04-07 22:15:04.702541487 +0200 @@ -29,7 +29,7 @@ %{?sle15_python_module_pythons} Name: python-fsspec%{psuffix} -Version: 2024.2.0 +Version: 2024.3.1 Release: 0 Summary: Filesystem specification package License: BSD-3-Clause ++++++ fsspec-2024.2.0.tar.gz -> fsspec-2024.3.1.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2024.2.0/.pre-commit-config.yaml new/filesystem_spec-2024.3.1/.pre-commit-config.yaml --- old/filesystem_spec-2024.2.0/.pre-commit-config.yaml 2024-02-05 02:21:42.000000000 +0100 +++ new/filesystem_spec-2024.3.1/.pre-commit-config.yaml 2024-03-18 20:33:58.000000000 +0100 @@ -18,7 +18,7 @@ - id: black - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: v0.1.13 + rev: v0.2.1 hooks: # Run the linter. - id: ruff diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2024.2.0/ci/environment-downstream.yml new/filesystem_spec-2024.3.1/ci/environment-downstream.yml --- old/filesystem_spec-2024.2.0/ci/environment-downstream.yml 2024-02-05 02:21:42.000000000 +0100 +++ new/filesystem_spec-2024.3.1/ci/environment-downstream.yml 2024-03-18 20:33:58.000000000 +0100 @@ -17,3 +17,4 @@ - moto <5 - sqlalchemy<2 - flask + - dask-expr diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2024.2.0/ci/environment-friends.yml new/filesystem_spec-2024.3.1/ci/environment-friends.yml --- old/filesystem_spec-2024.2.0/ci/environment-friends.yml 2024-02-05 02:21:42.000000000 +0100 +++ new/filesystem_spec-2024.3.1/ci/environment-friends.yml 2024-03-18 20:33:58.000000000 +0100 @@ -21,6 +21,7 @@ - flake8 - black - google-cloud-core + - google-cloud-storage - google-api-core - google-api-python-client - httpretty diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2024.2.0/ci/environment-py38.yml new/filesystem_spec-2024.3.1/ci/environment-py38.yml --- old/filesystem_spec-2024.2.0/ci/environment-py38.yml 2024-02-05 02:21:42.000000000 +0100 +++ new/filesystem_spec-2024.3.1/ci/environment-py38.yml 2024-03-18 20:33:58.000000000 +0100 @@ -36,6 +36,7 @@ - nomkl - jinja2 - tqdm + - urllib3 <=1.26.18 - pip: - hadoop-test-cluster - smbprotocol diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2024.2.0/docs/source/changelog.rst new/filesystem_spec-2024.3.1/docs/source/changelog.rst --- old/filesystem_spec-2024.2.0/docs/source/changelog.rst 2024-02-05 02:21:42.000000000 +0100 +++ new/filesystem_spec-2024.3.1/docs/source/changelog.rst 2024-03-18 20:33:58.000000000 +0100 @@ -1,6 +1,37 @@ Changelog ========= +2024.3.1 +-------- + +Fixes + +- allow override of expand in open() (#1549) +- root handling in local file paths, fix for windows (#1477) + +2024.3.0 +-------- + +Enhancements + +- coroutines throttle to stream pool rather than batches (#1544) +- write transactions in simplecache (#1531) +- allow deep nested refs in referenceFS/parquet (#1530) + +Fixes + +- Fixes bug (#1476) that made open_files ignore expand=False (#1536) +- remove extra calling mapper contains (#1546) +- connection retry for SMB (#1533) +- zip64 should be on is allowZip64 is (#1532) + +Other + +- HTTP logging (#1547) +- url_to_fs exposed in package root (#1540) +- sort known_implementations (#1549) +- code quality/stype (#1538, 1537, 1528, 1526) + 2024.2.0 -------- diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2024.2.0/fsspec/__init__.py new/filesystem_spec-2024.3.1/fsspec/__init__.py --- old/filesystem_spec-2024.2.0/fsspec/__init__.py 2024-02-05 02:21:42.000000000 +0100 +++ new/filesystem_spec-2024.3.1/fsspec/__init__.py 2024-03-18 20:33:58.000000000 +0100 @@ -3,7 +3,7 @@ from . import _version, caching from .callbacks import Callback from .compression import available_compressions -from .core import get_fs_token_paths, open, open_files, open_local +from .core import get_fs_token_paths, open, open_files, open_local, url_to_fs from .exceptions import FSTimeoutError from .mapping import FSMap, get_mapper from .registry import ( @@ -34,6 +34,7 @@ "Callback", "available_protocols", "available_compressions", + "url_to_fs", ] diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2024.2.0/fsspec/_version.py new/filesystem_spec-2024.3.1/fsspec/_version.py --- old/filesystem_spec-2024.2.0/fsspec/_version.py 2024-02-05 02:21:42.000000000 +0100 +++ new/filesystem_spec-2024.3.1/fsspec/_version.py 2024-03-18 20:33:58.000000000 +0100 @@ -25,9 +25,9 @@ # setup.py/versioneer.py will grep for the variable names, so they must # each be defined on a line of their own. _version.py will just call # get_keywords(). - git_refnames = " (tag: 2024.2.0)" - git_full = "5dc364e13b63609717d77b7361e80cfa64e3b8fd" - git_date = "2024-02-04 20:21:42 -0500" + git_refnames = " (tag: 2024.3.1)" + git_full = "47b445ae4c284a82dd15e0287b1ffc410e8fc470" + git_date = "2024-03-18 15:33:58 -0400" keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} return keywords diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2024.2.0/fsspec/asyn.py new/filesystem_spec-2024.3.1/fsspec/asyn.py --- old/filesystem_spec-2024.2.0/fsspec/asyn.py 2024-02-05 02:21:42.000000000 +0100 +++ new/filesystem_spec-2024.3.1/fsspec/asyn.py 2024-03-18 20:33:58.000000000 +0100 @@ -239,20 +239,35 @@ batch_size = len(coros) assert batch_size > 0 - results = [] - for start in range(0, len(coros), batch_size): - chunk = [ - asyncio.Task(asyncio.wait_for(c, timeout=timeout)) - for c in coros[start : start + batch_size] - ] - if callback is not DEFAULT_CALLBACK: - [ - t.add_done_callback(lambda *_, **__: callback.relative_update(1)) - for t in chunk - ] - results.extend( - await asyncio.gather(*chunk, return_exceptions=return_exceptions), - ) + + async def _run_coro(coro, i): + try: + return await asyncio.wait_for(coro, timeout=timeout), i + except Exception as e: + if not return_exceptions: + raise + return e, i + finally: + callback.relative_update(1) + + i = 0 + n = len(coros) + results = [None] * n + pending = set() + + while pending or i < n: + while len(pending) < batch_size and i < n: + pending.add(asyncio.ensure_future(_run_coro(coros[i], i))) + i += 1 + + if not pending: + break + + done, pending = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED) + while done: + result, k = await done.pop() + results[k] = result + return results diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2024.2.0/fsspec/caching.py new/filesystem_spec-2024.3.1/fsspec/caching.py --- old/filesystem_spec-2024.2.0/fsspec/caching.py 2024-02-05 02:21:42.000000000 +0100 +++ new/filesystem_spec-2024.3.1/fsspec/caching.py 2024-03-18 20:33:58.000000000 +0100 @@ -345,14 +345,17 @@ else: # read from the initial - out = [] - out.append(self._fetch_block_cached(start_block_number)[start_pos:]) + out = [self._fetch_block_cached(start_block_number)[start_pos:]] # intermediate blocks # Note: it'd be nice to combine these into one big request. However # that doesn't play nicely with our LRU cache. - for block_number in range(start_block_number + 1, end_block_number): - out.append(self._fetch_block_cached(block_number)) + out.extend( + map( + self._fetch_block_cached, + range(start_block_number + 1, end_block_number), + ) + ) # final block out.append(self._fetch_block_cached(end_block_number)[:end_pos]) @@ -821,14 +824,17 @@ else: # read from the initial - out = [] - out.append(self._fetch_block_cached(start_block_number)[start_pos:]) + out = [self._fetch_block_cached(start_block_number)[start_pos:]] # intermediate blocks # Note: it'd be nice to combine these into one big request. However # that doesn't play nicely with our LRU cache. - for block_number in range(start_block_number + 1, end_block_number): - out.append(self._fetch_block_cached(block_number)) + out.extend( + map( + self._fetch_block_cached, + range(start_block_number + 1, end_block_number), + ) + ) # final block out.append(self._fetch_block_cached(end_block_number)[:end_pos]) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2024.2.0/fsspec/core.py new/filesystem_spec-2024.3.1/fsspec/core.py --- old/filesystem_spec-2024.2.0/fsspec/core.py 2024-02-05 02:21:42.000000000 +0100 +++ new/filesystem_spec-2024.3.1/fsspec/core.py 2024-03-18 20:33:58.000000000 +0100 @@ -343,7 +343,7 @@ bit = previous_bit out.append((bit, protocol, kw)) previous_bit = bit - out = list(reversed(out)) + out.reverse() return out @@ -456,6 +456,8 @@ - For implementations in separate packages see https://filesystem-spec.readthedocs.io/en/latest/api.html#other-known-implementations """ + kw = {"expand": False} + kw.update(kwargs) out = open_files( urlpath=[urlpath], mode=mode, @@ -464,8 +466,7 @@ errors=errors, protocol=protocol, newline=newline, - expand=False, - **kwargs, + **kw, ) if not out: raise FileNotFoundError(urlpath) @@ -643,7 +644,10 @@ else: paths = fs._strip_protocol(paths) if isinstance(paths, (list, tuple, set)): - paths = expand_paths_if_needed(paths, mode, num, fs, name_function) + if expand: + paths = expand_paths_if_needed(paths, mode, num, fs, name_function) + elif not isinstance(paths, list): + paths = list(paths) else: if "w" in mode and expand: paths = _expand_paths(paths, name_function, num) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2024.2.0/fsspec/generic.py new/filesystem_spec-2024.3.1/fsspec/generic.py --- old/filesystem_spec-2024.2.0/fsspec/generic.py 2024-02-05 02:21:42.000000000 +0100 +++ new/filesystem_spec-2024.3.1/fsspec/generic.py 2024-03-18 20:33:58.000000000 +0100 @@ -87,6 +87,10 @@ fs: GenericFileSystem|None Instance to use if explicitly given. The instance defines how to to make downstream file system instances from paths. + + Returns + ------- + dict of the copy operations that were performed, {source: destination} """ fs = fs or GenericFileSystem(**(inst_kwargs or {})) source = fs._strip_protocol(source) @@ -137,6 +141,7 @@ logger.debug(f"{len(to_delete)} files to delete") if delete_missing: fs.rm(to_delete) + return allfiles class GenericFileSystem(AsyncFileSystem): diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2024.2.0/fsspec/implementations/cached.py new/filesystem_spec-2024.3.1/fsspec/implementations/cached.py --- old/filesystem_spec-2024.2.0/fsspec/implementations/cached.py 2024-02-05 02:21:42.000000000 +0100 +++ new/filesystem_spec-2024.3.1/fsspec/implementations/cached.py 2024-03-18 20:33:58.000000000 +0100 @@ -32,8 +32,10 @@ lpaths = [f.fn for f in self.files] if commit: self.fs.put(lpaths, rpaths) - # else remove? + self.files.clear() self.fs._intrans = False + self.fs._transaction = None + self.fs = None # break cycle class CachingFileSystem(AbstractFileSystem): @@ -391,8 +393,11 @@ close() f.closed = True + def ls(self, path, detail=True): + return self.fs.ls(path, detail) + def __getattribute__(self, item): - if item in [ + if item in { "load_cache", "_open", "save_cache", @@ -409,6 +414,11 @@ "read_block", "tail", "head", + "info", + "ls", + "exists", + "isfile", + "isdir", "_check_file", "_check_cache", "_mkcache", @@ -428,9 +438,12 @@ "cache_size", "pipe_file", "pipe", + "isdir", + "isfile", + "exists", "start_transaction", "end_transaction", - ]: + }: # all the methods defined in this class. Note `open` here, since # it calls `_open`, but is actually in superclass return lambda *args, **kw: getattr(type(self), item).__get__(self)( @@ -756,6 +769,49 @@ else: super().pipe_file(path, value) + def ls(self, path, detail=True, **kwargs): + path = self._strip_protocol(path) + details = [] + try: + details = self.fs.ls( + path, detail=True, **kwargs + ).copy() # don't edit original! + except FileNotFoundError as e: + ex = e + else: + ex = None + if self._intrans: + path1 = path.rstrip("/") + "/" + for f in self.transaction.files: + if f.path == path: + details.append( + {"name": path, "size": f.size or f.tell(), "type": "file"} + ) + elif f.path.startswith(path1): + if f.path.count("/") == path1.count("/"): + details.append( + {"name": f.path, "size": f.size or f.tell(), "type": "file"} + ) + else: + dname = "/".join(f.path.split("/")[: path1.count("/") + 1]) + details.append({"name": dname, "size": 0, "type": "directory"}) + if ex is not None and not details: + raise ex + if detail: + return details + return sorted(_["name"] for _ in details) + + def info(self, path, **kwargs): + path = self._strip_protocol(path) + if self._intrans: + f = [_ for _ in self.transaction.files if _.path == path] + if f: + return {"name": path, "size": f[0].size or f[0].tell(), "type": "file"} + f = any(_.path.startswith(path + "/") for _ in self.transaction.files) + if f: + return {"name": path, "size": 0, "type": "directory"} + return self.fs.info(path, **kwargs) + def pipe(self, path, value=None, **kwargs): if isinstance(path, str): self.pipe_file(self._strip_protocol(path), value, **kwargs) @@ -836,6 +892,7 @@ if seek: self.fh.seek(seek) self.path = path + self.size = None self.fs = fs self.closed = False self.autocommit = autocommit @@ -855,6 +912,7 @@ self.close() def close(self): + self.size = self.fh.tell() if self.closed: return self.fh.close() @@ -868,15 +926,14 @@ def commit(self): self.fs.put(self.fn, self.path, **self.kwargs) - try: - os.remove(self.fn) - except (PermissionError, FileNotFoundError): - # file path may be held by new version of the file on windows - pass + # we do not delete local copy - it's still in the cache @property def name(self): return self.fn + def __repr__(self) -> str: + return f"LocalTempFile: {self.path}" + def __getattr__(self, item): return getattr(self.fh, item) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2024.2.0/fsspec/implementations/data.py new/filesystem_spec-2024.3.1/fsspec/implementations/data.py --- old/filesystem_spec-2024.2.0/fsspec/implementations/data.py 2024-02-05 02:21:42.000000000 +0100 +++ new/filesystem_spec-2024.3.1/fsspec/implementations/data.py 2024-03-18 20:33:58.000000000 +0100 @@ -1,5 +1,6 @@ import base64 import io +from typing import Optional from urllib.parse import unquote from fsspec import AbstractFileSystem @@ -14,6 +15,7 @@ ... print(f.read()) b"Hello, World!" + See https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs """ protocol = "data" @@ -46,3 +48,11 @@ if "r" not in mode: raise ValueError("Read only filesystem") return io.BytesIO(self.cat_file(path)) + + @staticmethod + def encode(data: bytes, mime: Optional[str] = None): + """Format the given data into data-URL syntax + + This version always base64 encodes, even when the data is ascii/url-safe. + """ + return f"data:{mime or ''};base64,{base64.b64encode(data).decode()}" diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2024.2.0/fsspec/implementations/http.py new/filesystem_spec-2024.3.1/fsspec/implementations/http.py --- old/filesystem_spec-2024.2.0/fsspec/implementations/http.py 2024-02-05 02:21:42.000000000 +0100 +++ new/filesystem_spec-2024.3.1/fsspec/implementations/http.py 2024-03-18 20:33:58.000000000 +0100 @@ -158,11 +158,14 @@ session = await self.set_session() async with session.get(self.encode_url(url), **self.kwargs) as r: self._raise_not_found_for_status(r, url) - text = await r.text() - if self.simple_links: - links = ex2.findall(text) + [u[2] for u in ex.findall(text)] - else: - links = [u[2] for u in ex.findall(text)] + try: + text = await r.text() + if self.simple_links: + links = ex2.findall(text) + [u[2] for u in ex.findall(text)] + else: + links = [u[2] for u in ex.findall(text)] + except UnicodeDecodeError: + links = [] # binary, not HTML out = set() parts = urlparse(url) for l in links: @@ -430,7 +433,7 @@ if policy == "get": # If get failed, then raise a FileNotFoundError raise FileNotFoundError(url) from exc - logger.debug(str(exc)) + logger.debug("", exc_info=exc) return {"name": url, "size": None, **info, "type": "file"} diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2024.2.0/fsspec/implementations/local.py new/filesystem_spec-2024.3.1/fsspec/implementations/local.py --- old/filesystem_spec-2024.2.0/fsspec/implementations/local.py 2024-02-05 02:21:42.000000000 +0100 +++ new/filesystem_spec-2024.3.1/fsspec/implementations/local.py 2024-03-18 20:33:58.000000000 +0100 @@ -3,7 +3,6 @@ import logging import os import os.path as osp -import re import shutil import stat import tempfile @@ -16,6 +15,12 @@ logger = logging.getLogger("fsspec.local") +def _remove_prefix(text: str, prefix: str): + if text.startswith(prefix): + return text[len(prefix) :] + return text + + class LocalFileSystem(AbstractFileSystem): """Interface to files on local storage @@ -116,8 +121,8 @@ return osp.lexists(path) def cp_file(self, path1, path2, **kwargs): - path1 = self._strip_protocol(path1).rstrip("/") - path2 = self._strip_protocol(path2).rstrip("/") + path1 = self._strip_protocol(path1, remove_trailing_slash=True) + path2 = self._strip_protocol(path2, remove_trailing_slash=True) if self.auto_mkdir: self.makedirs(self._parent(path2), exist_ok=True) if self.isfile(path1): @@ -127,6 +132,14 @@ else: raise FileNotFoundError(path1) + def isfile(self, path): + path = self._strip_protocol(path) + return os.path.isfile(path) + + def isdir(self, path): + path = self._strip_protocol(path) + return os.path.isdir(path) + def get_file(self, path1, path2, callback=None, **kwargs): if isfilelike(path2): with open(path1, "rb") as f: @@ -138,8 +151,8 @@ return self.cp_file(path1, path2, **kwargs) def mv_file(self, path1, path2, **kwargs): - path1 = self._strip_protocol(path1).rstrip("/") - path2 = self._strip_protocol(path2).rstrip("/") + path1 = self._strip_protocol(path1, remove_trailing_slash=True) + path2 = self._strip_protocol(path2, remove_trailing_slash=True) shutil.move(path1, path2) def link(self, src, dst, **kwargs): @@ -163,7 +176,7 @@ path = [path] for p in path: - p = self._strip_protocol(p).rstrip("/") + p = self._strip_protocol(p, remove_trailing_slash=True) if self.isdir(p): if not recursive: raise ValueError("Cannot delete directory, set recursive=True") @@ -206,24 +219,32 @@ @classmethod def _parent(cls, path): - path = cls._strip_protocol(path).rstrip("/") - if "/" in path: - return path.rsplit("/", 1)[0] + path = cls._strip_protocol(path, remove_trailing_slash=True) + if os.sep == "/": + # posix native + return path.rsplit("/", 1)[0] or "/" else: - return cls.root_marker + # NT + path_ = path.rsplit("/", 1)[0] + if len(path_) <= 3: + if path_[1:2] == ":": + # nt root (something like c:/) + return path_[0] + ":/" + # More cases may be required here + return path_ @classmethod - def _strip_protocol(cls, path): + def _strip_protocol(cls, path, remove_trailing_slash=False): path = stringify_path(path) - if path.startswith("file://"): - path = path[7:] - elif path.startswith("file:"): - path = path[5:] - elif path.startswith("local://"): - path = path[8:] + if path.startswith("file:"): + path = _remove_prefix(_remove_prefix(path, "file://"), "file:") + if os.sep == "\\": + path = path.lstrip("/") elif path.startswith("local:"): - path = path[6:] - return make_path_posix(path).rstrip("/") or cls.root_marker + path = _remove_prefix(_remove_prefix(path, "local://"), "local:") + if os.sep == "\\": + path = path.lstrip("/") + return make_path_posix(path, remove_trailing_slash) def _isfilestore(self): # Inheriting from DaskFileSystem makes this False (S3, etc. were) @@ -236,47 +257,42 @@ return os.chmod(path, mode) -def make_path_posix(path, sep=os.sep): - """Make path generic""" - if isinstance(path, (list, set, tuple)): - return type(path)(make_path_posix(p) for p in path) - if "~" in path: - path = osp.expanduser(path) - if sep == "/": - # most common fast case for posix +def make_path_posix(path, remove_trailing_slash=False): + """Make path generic for current OS""" + if not isinstance(path, str): + if isinstance(path, (list, set, tuple)): + return type(path)(make_path_posix(p, remove_trailing_slash) for p in path) + else: + path = str(stringify_path(path)) + if os.sep == "/": + # Native posix if path.startswith("/"): - return path - if path.startswith("./"): + # most common fast case for posix + return path.rstrip("/") or "/" if remove_trailing_slash else path + elif path.startswith("~"): + return make_path_posix(osp.expanduser(path), remove_trailing_slash) + elif path.startswith("./"): path = path[2:] + path = f"{os.getcwd()}/{path}" + return path.rstrip("/") or "/" if remove_trailing_slash else path return f"{os.getcwd()}/{path}" - if ( - (sep not in path and "/" not in path) - or (sep == "/" and not path.startswith("/")) - or (sep == "\\" and ":" not in path and not path.startswith("\\\\")) - ): - # relative path like "path" or "rel\\path" (win) or rel/path" - if os.sep == "\\": - # abspath made some more '\\' separators - return make_path_posix(osp.abspath(path)) - else: - return f"{os.getcwd()}/{path}" - if path.startswith("file://"): - path = path[7:] - if re.match("/[A-Za-z]:", path): - # for windows file URI like "file:///C:/folder/file" - # or "file:///C:\\dir\\file" - path = path[1:].replace("\\", "/").replace("//", "/") - if path.startswith("\\\\"): - # special case for windows UNC/DFS-style paths, do nothing, - # just flip the slashes around (case below does not work!) - return path.replace("\\", "/") - if re.match("[A-Za-z]:", path): - # windows full path like "C:\\local\\path" - return path.lstrip("\\").replace("\\", "/").replace("//", "/") - if path.startswith("\\"): - # windows network path like "\\server\\path" - return "/" + path.lstrip("\\").replace("\\", "/").replace("//", "/") - return path + else: + # NT handling + if len(path) > 1: + if path[1] == ":": + # windows full path like "C:\\local\\path" + if len(path) <= 3: + # nt root (something like c:/) + return path[0] + ":/" + path = path.replace("\\", "/").replace("//", "/") + return path.rstrip("/") if remove_trailing_slash else path + elif path[0] == "~": + return make_path_posix(osp.expanduser(path), remove_trailing_slash) + elif path.startswith(("\\\\", "//")): + # windows UNC/DFS-style paths + path = "//" + path[2:].replace("\\", "/").replace("//", "/") + return path.rstrip("/") if remove_trailing_slash else path + return make_path_posix(osp.abspath(path), remove_trailing_slash) def trailing_sep(path): diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2024.2.0/fsspec/implementations/memory.py new/filesystem_spec-2024.3.1/fsspec/implementations/memory.py --- old/filesystem_spec-2024.2.0/fsspec/implementations/memory.py 2024-02-05 02:21:42.000000000 +0100 +++ new/filesystem_spec-2024.3.1/fsspec/implementations/memory.py 2024-03-18 20:33:58.000000000 +0100 @@ -138,6 +138,7 @@ raise FileNotFoundError(path) def info(self, path, **kwargs): + logger.debug("info: %s", path) path = self._strip_protocol(path) if path in self.pseudo_dirs or any( p.startswith(path + "/") for p in list(self.store) + self.pseudo_dirs @@ -210,6 +211,7 @@ raise FileNotFoundError(path1) def cat_file(self, path, start=None, end=None, **kwargs): + logger.debug("cat: %s", path) path = self._strip_protocol(path) try: return bytes(self.store[path].getbuffer()[start:end]) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2024.2.0/fsspec/implementations/reference.py new/filesystem_spec-2024.3.1/fsspec/implementations/reference.py --- old/filesystem_spec-2024.2.0/fsspec/implementations/reference.py 2024-02-05 02:21:42.000000000 +0100 +++ new/filesystem_spec-2024.3.1/fsspec/implementations/reference.py 2024-03-18 20:33:58.000000000 +0100 @@ -277,7 +277,7 @@ return json.dumps(self.zmetadata[key]).encode() elif "/" not in key or self._is_meta(key): raise KeyError(key) - field, sub_key = key.split("/") + field, _ = key.rsplit("/", 1) record, ri, chunk_size = self._key_to_record(key) maybe = self._items.get((field, record), {}).get(ri, False) if maybe is None: @@ -309,7 +309,7 @@ @lru_cache(4096) def _key_to_record(self, key): """Details needed to construct a reference for one key""" - field, chunk = key.split("/") + field, chunk = key.rsplit("/", 1) chunk_sizes = self._get_chunk_sizes(field) if len(chunk_sizes) == 0: return 0, 0, 0 @@ -366,7 +366,7 @@ def __setitem__(self, key, value): if "/" in key and not self._is_meta(key): - field, chunk = key.split("/") + field, chunk = key.rsplit("/", 1) record, i, _ = self._key_to_record(key) subdict = self._items.setdefault((field, record), {}) subdict[i] = value @@ -391,7 +391,7 @@ del self.zmetadata[key] else: if "/" in key and not self._is_meta(key): - field, chunk = key.split("/") + field, _ = key.rsplit("/", 1) record, i, _ = self._key_to_record(key) subdict = self._items.setdefault((field, record), {}) subdict[i] = None @@ -1035,7 +1035,7 @@ par0 = self._parent(par0) subdirs.append(par0) - subdirs = subdirs[::-1] + subdirs.reverse() for parent, child in zip(subdirs, subdirs[1:]): # register newly discovered directories assert child not in self.dircache diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2024.2.0/fsspec/implementations/smb.py new/filesystem_spec-2024.3.1/fsspec/implementations/smb.py --- old/filesystem_spec-2024.2.0/fsspec/implementations/smb.py 2024-02-05 02:21:42.000000000 +0100 +++ new/filesystem_spec-2024.3.1/fsspec/implementations/smb.py 2024-03-18 20:33:58.000000000 +0100 @@ -118,14 +118,21 @@ return 445 if self.port is None else self.port def _connect(self): - smbclient.register_session( - self.host, - username=self.username, - password=self.password, - port=self._port, - encrypt=self.encrypt, - connection_timeout=self.timeout, - ) + import time + + for _ in range(5): + try: + smbclient.register_session( + self.host, + username=self.username, + password=self.password, + port=self._port, + encrypt=self.encrypt, + connection_timeout=self.timeout, + ) + break + except Exception: + time.sleep(0.1) @classmethod def _strip_protocol(cls, path): diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2024.2.0/fsspec/implementations/tests/test_cached.py new/filesystem_spec-2024.3.1/fsspec/implementations/tests/test_cached.py --- old/filesystem_spec-2024.2.0/fsspec/implementations/tests/test_cached.py 2024-02-05 02:21:42.000000000 +0100 +++ new/filesystem_spec-2024.3.1/fsspec/implementations/tests/test_cached.py 2024-03-18 20:33:58.000000000 +0100 @@ -1291,10 +1291,16 @@ with fs.transaction: fs.pipe("myfile", b"1") fs.pipe("otherfile", b"2") + fs.pipe("deep/dir/otherfile", b"3") with fs.open("blarh", "wb") as f: f.write(b"ff") assert not m.find("") + assert fs.info("otherfile")["size"] == 1 + assert fs.info("deep")["type"] == "directory" + assert fs.isdir("deep") + assert fs.ls("deep", detail=False) == ["/deep/dir"] + assert m.cat("myfile") == b"1" assert m.cat("otherfile") == b"2" assert called[0] == 1 # copy was done in one go diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2024.2.0/fsspec/implementations/tests/test_jupyter.py new/filesystem_spec-2024.3.1/fsspec/implementations/tests/test_jupyter.py --- old/filesystem_spec-2024.2.0/fsspec/implementations/tests/test_jupyter.py 2024-02-05 02:21:42.000000000 +0100 +++ new/filesystem_spec-2024.3.1/fsspec/implementations/tests/test_jupyter.py 2024-03-18 20:33:58.000000000 +0100 @@ -13,11 +13,10 @@ @pytest.fixture() def jupyter(tmpdir): - tmpdir = str(tmpdir) os.environ["JUPYTER_TOKEN"] = "blah" try: - cmd = f"jupyter notebook --notebook-dir={tmpdir} --no-browser --port=5566" + cmd = f'jupyter notebook --notebook-dir="{tmpdir}" --no-browser --port=5566' P = subprocess.Popen(shlex.split(cmd)) except FileNotFoundError: pytest.skip("notebook not installed correctly") diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2024.2.0/fsspec/implementations/tests/test_local.py new/filesystem_spec-2024.3.1/fsspec/implementations/tests/test_local.py --- old/filesystem_spec-2024.2.0/fsspec/implementations/tests/test_local.py 2024-02-05 02:21:42.000000000 +0100 +++ new/filesystem_spec-2024.3.1/fsspec/implementations/tests/test_local.py 2024-03-18 20:33:58.000000000 +0100 @@ -472,6 +472,9 @@ drive = cwd[0] assert make_path_posix("/a/posix/path") == f"{drive}:/a/posix/path" assert make_path_posix("/posix") == f"{drive}:/posix" + # Windows drive requires trailing slash + assert make_path_posix("C:\\") == "C:/" + assert make_path_posix("C:\\", remove_trailing_slash=True) == "C:/" else: assert make_path_posix("/a/posix/path") == "/a/posix/path" assert make_path_posix("/posix") == "/posix" @@ -479,26 +482,38 @@ assert make_path_posix("rel/path") == posixpath.join( make_path_posix(cwd), "rel/path" ) + # NT style if WIN: assert make_path_posix("C:\\path") == "C:/path" - assert make_path_posix("file://C:\\path\\file") == "C:/path/file" - if WIN: assert ( make_path_posix( - "\\\\windows-server\\someshare\\path\\more\\path\\dir\\foo.parquet" + "\\\\windows-server\\someshare\\path\\more\\path\\dir\\foo.parquet", ) == "//windows-server/someshare/path/more/path/dir/foo.parquet" ) assert ( make_path_posix( - r"\\SERVER\UserHomeFolder$\me\My Documents\project1\data\filen.csv" + "\\\\SERVER\\UserHomeFolder$\\me\\My Documents\\proj\\data\\fname.csv", ) - == "//SERVER/UserHomeFolder$/me/My Documents/project1/data/filen.csv" + == "//SERVER/UserHomeFolder$/me/My Documents/proj/data/fname.csv" ) assert "/" in make_path_posix("rel\\path") - + # Relative pp = make_path_posix("./path") - assert "./" not in pp and ".\\" not in pp + cd = make_path_posix(cwd) + assert pp == cd + "/path" + # Userpath + userpath = make_path_posix("~/path") + assert userpath.endswith("/path") + + +def test_parent(): + if WIN: + assert LocalFileSystem._parent("C:\\file or folder") == "C:/" + assert LocalFileSystem._parent("C:\\") == "C:/" + else: + assert LocalFileSystem._parent("/file or folder") == "/" + assert LocalFileSystem._parent("/") == "/" def test_linked_files(tmpdir): @@ -638,9 +653,11 @@ path = "file://~\\foo\\bar" if WIN else "file://~/foo/bar" stripped = LocalFileSystem._strip_protocol(path) assert path != stripped + assert "~" not in stripped assert "file://" not in stripped assert stripped.startswith(os.path.expanduser("~").replace("\\", "/")) - assert not LocalFileSystem._strip_protocol("./").endswith("/") + path = LocalFileSystem._strip_protocol("./", remove_trailing_slash=True) + assert not path.endswith("/") def test_strip_protocol_no_authority(): @@ -648,6 +665,10 @@ stripped = LocalFileSystem._strip_protocol(path) assert "file:" not in stripped assert stripped.endswith("/foo/bar") + if WIN: + assert ( + LocalFileSystem._strip_protocol("file://C:\\path\\file") == "C:/path/file" + ) def test_mkdir_twice_faile(tmpdir): diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2024.2.0/fsspec/implementations/tests/test_memory.py new/filesystem_spec-2024.3.1/fsspec/implementations/tests/test_memory.py --- old/filesystem_spec-2024.2.0/fsspec/implementations/tests/test_memory.py 2024-02-05 02:21:42.000000000 +0100 +++ new/filesystem_spec-2024.3.1/fsspec/implementations/tests/test_memory.py 2024-03-18 20:33:58.000000000 +0100 @@ -111,7 +111,7 @@ assert m.exists("src/file.txt") -def test_rm_no_psuedo_dir(m): +def test_rm_no_pseudo_dir(m): m.touch("/dir1/dir2/file") m.rm("/dir1", recursive=True) assert not m.exists("/dir1/dir2/file") diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2024.2.0/fsspec/implementations/tests/test_reference.py new/filesystem_spec-2024.3.1/fsspec/implementations/tests/test_reference.py --- old/filesystem_spec-2024.2.0/fsspec/implementations/tests/test_reference.py 2024-02-05 02:21:42.000000000 +0100 +++ new/filesystem_spec-2024.3.1/fsspec/implementations/tests/test_reference.py 2024-03-18 20:33:58.000000000 +0100 @@ -690,6 +690,7 @@ def test_append_parquet(lazy_refs, m): + pytest.importorskip("kerchunk") with pytest.raises(KeyError): lazy_refs["data/0"] lazy_refs["data/0"] = b"data" diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2024.2.0/fsspec/implementations/zip.py new/filesystem_spec-2024.3.1/fsspec/implementations/zip.py --- old/filesystem_spec-2024.2.0/fsspec/implementations/zip.py 2024-02-05 02:21:42.000000000 +0100 +++ new/filesystem_spec-2024.3.1/fsspec/implementations/zip.py 2024-03-18 20:33:58.000000000 +0100 @@ -56,6 +56,7 @@ fo = fsspec.open( fo, mode=m, protocol=target_protocol, **(target_options or {}) ) + self.force_zip_64 = allowZip64 self.of = fo self.fo = fo.__enter__() # the whole instance is a context self.zip = zipfile.ZipFile( @@ -125,7 +126,7 @@ raise FileNotFoundError(path) if "r" in self.mode and "w" in mode: raise OSError("ZipFS can only be open for reading or writing, not both") - out = self.zip.open(path, mode.strip("b")) + out = self.zip.open(path, mode.strip("b"), force_zip64=self.force_zip_64) if "r" in mode: info = self.info(path) out.size = info["size"] diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2024.2.0/fsspec/mapping.py new/filesystem_spec-2024.3.1/fsspec/mapping.py --- old/filesystem_spec-2024.2.0/fsspec/mapping.py 2024-02-05 02:21:42.000000000 +0100 +++ new/filesystem_spec-2024.3.1/fsspec/mapping.py 2024-03-18 20:33:58.000000000 +0100 @@ -1,10 +1,13 @@ import array +import logging import posixpath import warnings from collections.abc import MutableMapping from functools import cached_property -from .core import url_to_fs +from fsspec.core import url_to_fs + +logger = logging.getLogger("fsspec.mapping") class FSMap(MutableMapping): @@ -37,7 +40,7 @@ def __init__(self, root, fs, check=False, create=False, missing_exceptions=None): self.fs = fs - self.root = fs._strip_protocol(root).rstrip("/") + self.root = fs._strip_protocol(root) self._root_key_to_str = fs._strip_protocol(posixpath.join(root, "x"))[:-1] if missing_exceptions is None: missing_exceptions = ( @@ -69,6 +72,7 @@ def clear(self): """Remove all keys below root - empties out mapping""" + logger.info("Clear mapping at %s", self.root) try: self.fs.rm(self.root, True) self.fs.mkdir(self.root) @@ -138,7 +142,7 @@ if isinstance(key, list): key = tuple(key) key = str(key) - return f"{self._root_key_to_str}{key}" + return f"{self._root_key_to_str}{key}".rstrip("/") def _str_to_key(self, s): """Strip path of to leave key name""" @@ -186,7 +190,7 @@ def __contains__(self, key): """Does key exist in mapping?""" path = self._key_to_str(key) - return self.fs.exists(path) and self.fs.isfile(path) + return self.fs.isfile(path) def __reduce__(self): return FSMap, (self.root, self.fs, False, False, self.missing_exceptions) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2024.2.0/fsspec/registry.py new/filesystem_spec-2024.3.1/fsspec/registry.py --- old/filesystem_spec-2024.2.0/fsspec/registry.py 2024-02-05 02:21:42.000000000 +0100 +++ new/filesystem_spec-2024.3.1/fsspec/registry.py 2024-03-18 20:33:58.000000000 +0100 @@ -60,156 +60,158 @@ # protocols mapped to the class which implements them. This dict can be # updated with register_implementation known_implementations = { - "data": {"class": "fsspec.implementations.data.DataFileSystem"}, - "file": {"class": "fsspec.implementations.local.LocalFileSystem"}, - "local": {"class": "fsspec.implementations.local.LocalFileSystem"}, - "memory": {"class": "fsspec.implementations.memory.MemoryFileSystem"}, - "dropbox": { - "class": "dropboxdrivefs.DropboxDriveFileSystem", - "err": ( - 'DropboxFileSystem requires "dropboxdrivefs",' - '"requests" and "dropbox" to be installed' - ), - }, - "http": { - "class": "fsspec.implementations.http.HTTPFileSystem", - "err": 'HTTPFileSystem requires "requests" and "aiohttp" to be installed', - }, - "https": { - "class": "fsspec.implementations.http.HTTPFileSystem", - "err": 'HTTPFileSystem requires "requests" and "aiohttp" to be installed', - }, - "zip": {"class": "fsspec.implementations.zip.ZipFileSystem"}, - "tar": {"class": "fsspec.implementations.tar.TarFileSystem"}, - "gcs": { - "class": "gcsfs.GCSFileSystem", - "err": "Please install gcsfs to access Google Storage", - }, - "gs": { - "class": "gcsfs.GCSFileSystem", - "err": "Please install gcsfs to access Google Storage", - }, - "gdrive": { - "class": "gdrivefs.GoogleDriveFileSystem", - "err": "Please install gdrivefs for access to Google Drive", - }, - "sftp": { - "class": "fsspec.implementations.sftp.SFTPFileSystem", - "err": 'SFTPFileSystem requires "paramiko" to be installed', - }, - "ssh": { - "class": "fsspec.implementations.sftp.SFTPFileSystem", - "err": 'SFTPFileSystem requires "paramiko" to be installed', + "abfs": { + "class": "adlfs.AzureBlobFileSystem", + "err": "Install adlfs to access Azure Datalake Gen2 and Azure Blob Storage", }, - "ftp": {"class": "fsspec.implementations.ftp.FTPFileSystem"}, - "hdfs": { - "class": "fsspec.implementations.arrow.HadoopFileSystem", - "err": "pyarrow and local java libraries required for HDFS", + "adl": { + "class": "adlfs.AzureDatalakeFileSystem", + "err": "Install adlfs to access Azure Datalake Gen1", }, "arrow_hdfs": { "class": "fsspec.implementations.arrow.HadoopFileSystem", "err": "pyarrow and local java libraries required for HDFS", }, - "webhdfs": { - "class": "fsspec.implementations.webhdfs.WebHDFS", - "err": 'webHDFS access requires "requests" to be installed', - }, - "s3": {"class": "s3fs.S3FileSystem", "err": "Install s3fs to access S3"}, - "s3a": {"class": "s3fs.S3FileSystem", "err": "Install s3fs to access S3"}, - "wandb": {"class": "wandbfs.WandbFS", "err": "Install wandbfs to access wandb"}, - "oci": { - "class": "ocifs.OCIFileSystem", - "err": "Install ocifs to access OCI Object Storage", - }, - "ocilake": { - "class": "ocifs.OCIFileSystem", - "err": "Install ocifs to access OCI Data Lake", - }, "asynclocal": { "class": "morefs.asyn_local.AsyncLocalFileSystem", "err": "Install 'morefs[asynclocalfs]' to use AsyncLocalFileSystem", }, - "adl": { - "class": "adlfs.AzureDatalakeFileSystem", - "err": "Install adlfs to access Azure Datalake Gen1", - }, - "abfs": { - "class": "adlfs.AzureBlobFileSystem", - "err": "Install adlfs to access Azure Datalake Gen2 and Azure Blob Storage", - }, "az": { "class": "adlfs.AzureBlobFileSystem", "err": "Install adlfs to access Azure Datalake Gen2 and Azure Blob Storage", }, - "cached": {"class": "fsspec.implementations.cached.CachingFileSystem"}, "blockcache": {"class": "fsspec.implementations.cached.CachingFileSystem"}, - "filecache": {"class": "fsspec.implementations.cached.WholeFileCacheFileSystem"}, - "simplecache": {"class": "fsspec.implementations.cached.SimpleCacheFileSystem"}, + "box": { + "class": "boxfs.BoxFileSystem", + "err": "Please install boxfs to access BoxFileSystem", + }, + "cached": {"class": "fsspec.implementations.cached.CachingFileSystem"}, "dask": { "class": "fsspec.implementations.dask.DaskWorkerFileSystem", "err": "Install dask distributed to access worker file system", }, + "data": {"class": "fsspec.implementations.data.DataFileSystem"}, "dbfs": { "class": "fsspec.implementations.dbfs.DatabricksFileSystem", "err": "Install the requests package to use the DatabricksFileSystem", }, - "github": { - "class": "fsspec.implementations.github.GithubFileSystem", - "err": "Install the requests package to use the github FS", + "dir": {"class": "fsspec.implementations.dirfs.DirFileSystem"}, + "dropbox": { + "class": "dropboxdrivefs.DropboxDriveFileSystem", + "err": ( + 'DropboxFileSystem requires "dropboxdrivefs","requests" and "' + '"dropbox" to be installed' + ), + }, + "dvc": { + "class": "dvc.api.DVCFileSystem", + "err": "Install dvc to access DVCFileSystem", + }, + "file": {"class": "fsspec.implementations.local.LocalFileSystem"}, + "filecache": {"class": "fsspec.implementations.cached.WholeFileCacheFileSystem"}, + "ftp": {"class": "fsspec.implementations.ftp.FTPFileSystem"}, + "gcs": { + "class": "gcsfs.GCSFileSystem", + "err": "Please install gcsfs to access Google Storage", }, + "gdrive": { + "class": "gdrivefs.GoogleDriveFileSystem", + "err": "Please install gdrivefs for access to Google Drive", + }, + "generic": {"class": "fsspec.generic.GenericFileSystem"}, "git": { "class": "fsspec.implementations.git.GitFileSystem", "err": "Install pygit2 to browse local git repos", }, - "smb": { - "class": "fsspec.implementations.smb.SMBFileSystem", - "err": 'SMB requires "smbprotocol" or "smbprotocol[kerberos]" installed', + "github": { + "class": "fsspec.implementations.github.GithubFileSystem", + "err": "Install the requests package to use the github FS", }, - "jupyter": { + "gs": { + "class": "gcsfs.GCSFileSystem", + "err": "Please install gcsfs to access Google Storage", + }, + "hdfs": { + "class": "fsspec.implementations.arrow.HadoopFileSystem", + "err": "pyarrow and local java libraries required for HDFS", + }, + "hf": { + "class": "huggingface_hub.HfFileSystem", + "err": "Install huggingface_hub to access HfFileSystem", + }, + "http": { + "class": "fsspec.implementations.http.HTTPFileSystem", + "err": 'HTTPFileSystem requires "requests" and "aiohttp" to be installed', + }, + "https": { + "class": "fsspec.implementations.http.HTTPFileSystem", + "err": 'HTTPFileSystem requires "requests" and "aiohttp" to be installed', + }, + "jlab": { "class": "fsspec.implementations.jupyter.JupyterFileSystem", "err": "Jupyter FS requires requests to be installed", }, - "jlab": { + "jupyter": { "class": "fsspec.implementations.jupyter.JupyterFileSystem", "err": "Jupyter FS requires requests to be installed", }, + "lakefs": { + "class": "lakefs_spec.LakeFSFileSystem", + "err": "Please install lakefs-spec to access LakeFSFileSystem", + }, "libarchive": { "class": "fsspec.implementations.libarchive.LibArchiveFileSystem", "err": "LibArchive requires to be installed", }, - "reference": {"class": "fsspec.implementations.reference.ReferenceFileSystem"}, - "generic": {"class": "fsspec.generic.GenericFileSystem"}, + "local": {"class": "fsspec.implementations.local.LocalFileSystem"}, + "memory": {"class": "fsspec.implementations.memory.MemoryFileSystem"}, + "oci": { + "class": "ocifs.OCIFileSystem", + "err": "Install ocifs to access OCI Object Storage", + }, + "ocilake": { + "class": "ocifs.OCIFileSystem", + "err": "Install ocifs to access OCI Data Lake", + }, "oss": { "class": "ossfs.OSSFileSystem", "err": "Install ossfs to access Alibaba Object Storage System", }, - "webdav": { - "class": "webdav4.fsspec.WebdavFileSystem", - "err": "Install webdav4 to access WebDAV", + "reference": {"class": "fsspec.implementations.reference.ReferenceFileSystem"}, + "root": { + "class": "fsspec_xrootd.XRootDFileSystem", + "err": ( + "Install fsspec-xrootd to access xrootd storage system. " + "Note: 'root' is the protocol name for xrootd storage systems, " + "not referring to root directories" + ), }, - "dvc": { - "class": "dvc.api.DVCFileSystem", - "err": "Install dvc to access DVCFileSystem", + "s3": {"class": "s3fs.S3FileSystem", "err": "Install s3fs to access S3"}, + "s3a": {"class": "s3fs.S3FileSystem", "err": "Install s3fs to access S3"}, + "sftp": { + "class": "fsspec.implementations.sftp.SFTPFileSystem", + "err": 'SFTPFileSystem requires "paramiko" to be installed', }, - "hf": { - "class": "huggingface_hub.HfFileSystem", - "err": "Install huggingface_hub to access HfFileSystem", + "simplecache": {"class": "fsspec.implementations.cached.SimpleCacheFileSystem"}, + "smb": { + "class": "fsspec.implementations.smb.SMBFileSystem", + "err": 'SMB requires "smbprotocol" or "smbprotocol[kerberos]" installed', }, - "root": { - "class": "fsspec_xrootd.XRootDFileSystem", - "err": "Install fsspec-xrootd to access xrootd storage system." - + " Note: 'root' is the protocol name for xrootd storage systems," - + " not referring to root directories", + "ssh": { + "class": "fsspec.implementations.sftp.SFTPFileSystem", + "err": 'SFTPFileSystem requires "paramiko" to be installed', }, - "dir": {"class": "fsspec.implementations.dirfs.DirFileSystem"}, - "box": { - "class": "boxfs.BoxFileSystem", - "err": "Please install boxfs to access BoxFileSystem", + "tar": {"class": "fsspec.implementations.tar.TarFileSystem"}, + "wandb": {"class": "wandbfs.WandbFS", "err": "Install wandbfs to access wandb"}, + "webdav": { + "class": "webdav4.fsspec.WebdavFileSystem", + "err": "Install webdav4 to access WebDAV", }, - "lakefs": { - "class": "lakefs_spec.LakeFSFileSystem", - "err": "Please install lakefs-spec to access LakeFSFileSystem", + "webhdfs": { + "class": "fsspec.implementations.webhdfs.WebHDFS", + "err": 'webHDFS access requires "requests" to be installed', }, + "zip": {"class": "fsspec.implementations.zip.ZipFileSystem"}, } @@ -254,7 +256,7 @@ def _import_class(cls, minv=None): """Take a string FQP and return the imported class or identifier - clas is of the form "package.module.klass" or "package.module:subobject.klass" + cls is of the form "package.module.klass" or "package.module:subobject.klass" """ if ":" in cls: mod, name = cls.rsplit(":", 1) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2024.2.0/fsspec/tests/test_core.py new/filesystem_spec-2024.3.1/fsspec/tests/test_core.py --- old/filesystem_spec-2024.2.0/fsspec/tests/test_core.py 2024-02-05 02:21:42.000000000 +0100 +++ new/filesystem_spec-2024.3.1/fsspec/tests/test_core.py 2024-03-18 20:33:58.000000000 +0100 @@ -239,6 +239,101 @@ of2.close() +# Define a list of special glob characters. +# Note that we need to escape some characters and also consider file system limitations. +# '*' and '?' are excluded because they are not valid for many file systems. +# Similarly, we're careful with '{', '}', and '@' as their special meaning is +# context-specific and might not be considered special for filenames. +# Add tests for more file systems and for more glob magic later +glob_magic_characters = ["[", "]", "!"] +if os.name != "nt": + glob_magic_characters.extend(("*", "?")) # not valid on Windows + + +@pytest.mark.parametrize("char", glob_magic_characters) +def test_open_file_read_with_special_characters(tmp_path, char): + # Create a filename incorporating the special character + file_name = f"test{char}.txt" + file_path = tmp_path / file_name + expected_content = "Hello, world!" + + with open(file_path, "w") as f: + f.write(expected_content) + + with fsspec.open(file_path, "r") as f: + actual_content = f.read() + + assert actual_content == expected_content + + +@pytest.mark.parametrize("char", glob_magic_characters) +def test_open_files_read_with_special_characters(tmp_path, char): + # Create a filename incorporating the special character + file_name = f"test{char}.txt" + file_path = tmp_path / file_name + expected_content = "Hello, world!" + + with open(file_path, "w") as f: + f.write(expected_content) + + with fsspec.open_files(file_path, "r")[0] as f: + actual_content = f.read() + + assert actual_content == expected_content + + +@pytest.mark.parametrize("char", glob_magic_characters) +def test_open_file_write_with_special_characters(tmp_path, char): + # Create a filename incorporating the special character + file_name = f"test{char}.txt" + file_path = tmp_path / file_name + expected_content = "Hello, world!" + + with fsspec.open(file_path, "w") as f: + f.write(expected_content) + + with open(file_path, "r") as f: + actual_content = f.read() + + assert actual_content == expected_content + + +@pytest.mark.parametrize("char", glob_magic_characters) +def test_open_files_read_with_special_characters(tmp_path, char): + # Create a filename incorporating the special character + file_name = f"test{char}.txt" + file_path = tmp_path / file_name + expected_content = "Hello, world!" + + with open(file_path, "w") as f: + f.write(expected_content) + + with fsspec.open_files( + urlpath=[os.fspath(file_path)], mode="r", auto_mkdir=False, expand=False + )[0] as f: + actual_content = f.read() + + assert actual_content == expected_content + + +@pytest.mark.parametrize("char", glob_magic_characters) +def test_open_files_write_with_special_characters(tmp_path, char): + # Create a filename incorporating the special character + file_name = f"test{char}.txt" + file_path = tmp_path / file_name + expected_content = "Hello, world!" + + with fsspec.open_files( + urlpath=[os.fspath(file_path)], mode="w", auto_mkdir=False, expand=False + )[0] as f: + f.write(expected_content) + + with open(file_path, "r") as f: + actual_content = f.read() + + assert actual_content == expected_content + + def test_mismatch(): pytest.importorskip("s3fs") with pytest.raises(ValueError): diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2024.2.0/fsspec/tests/test_mapping.py new/filesystem_spec-2024.3.1/fsspec/tests/test_mapping.py --- old/filesystem_spec-2024.2.0/fsspec/tests/test_mapping.py 2024-02-05 02:21:42.000000000 +0100 +++ new/filesystem_spec-2024.3.1/fsspec/tests/test_mapping.py 2024-03-18 20:33:58.000000000 +0100 @@ -200,19 +200,23 @@ _ = m[f"memory://{root}/a"] -# on Windows opening a directory will raise PermissionError -# see: https://bugs.python.org/issue43095 -@pytest.mark.skipif( - platform.system() == "Windows", reason="raises PermissionError on windows" -) def test_fsmap_access_with_suffix(tmp_path): tmp_path.joinpath("b").mkdir() tmp_path.joinpath("b", "a").write_bytes(b"data") - m = fsspec.get_mapper(f"file://{tmp_path}") - + if platform.system() == "Windows": + # on Windows opening a directory will raise PermissionError + # see: https://bugs.python.org/issue43095 + missing_exceptions = ( + FileNotFoundError, + IsADirectoryError, + NotADirectoryError, + PermissionError, + ) + else: + missing_exceptions = None + m = fsspec.get_mapper(f"file://{tmp_path}", missing_exceptions=missing_exceptions) with pytest.raises(KeyError): _ = m["b/"] - assert m["b/a/"] == b"data" diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2024.2.0/fsspec/tests/test_registry.py new/filesystem_spec-2024.3.1/fsspec/tests/test_registry.py --- old/filesystem_spec-2024.2.0/fsspec/tests/test_registry.py 2024-02-05 02:21:42.000000000 +0100 +++ new/filesystem_spec-2024.3.1/fsspec/tests/test_registry.py 2024-03-18 20:33:58.000000000 +0100 @@ -36,6 +36,12 @@ sys.modules["fsspec"] = real_module +def test_sorted_known_implementations(): + expected = sorted(known_implementations.keys()) + actual = list(known_implementations.keys()) + assert actual == expected + + def test_registry_readonly(): get_filesystem_class("file") assert "file" in registry diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2024.2.0/fsspec/transaction.py new/filesystem_spec-2024.3.1/fsspec/transaction.py --- old/filesystem_spec-2024.2.0/fsspec/transaction.py 2024-02-05 02:21:42.000000000 +0100 +++ new/filesystem_spec-2024.3.1/fsspec/transaction.py 2024-03-18 20:33:58.000000000 +0100 @@ -9,7 +9,7 @@ instance as the ``.transaction`` attribute of the given filesystem """ - def __init__(self, fs): + def __init__(self, fs, **kwargs): """ Parameters ---------- @@ -26,8 +26,10 @@ """End transaction and commit, if exit is not due to exception""" # only commit if there was no exception self.complete(commit=exc_type is None) - self.fs._intrans = False - self.fs._transaction = None + if self.fs: + self.fs._intrans = False + self.fs._transaction = None + self.fs = None def start(self): """Start a transaction on this FileSystem""" @@ -43,6 +45,8 @@ else: f.discard() self.fs._intrans = False + self.fs._transaction = None + self.fs = None class FileActor: @@ -83,3 +87,4 @@ else: self.files.discard().result() self.fs._intrans = False + self.fs = None diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2024.2.0/fsspec/utils.py new/filesystem_spec-2024.3.1/fsspec/utils.py --- old/filesystem_spec-2024.2.0/fsspec/utils.py 2024-02-05 02:21:42.000000000 +0100 +++ new/filesystem_spec-2024.3.1/fsspec/utils.py 2024-03-18 20:33:58.000000000 +0100 @@ -284,7 +284,7 @@ found_end_delim = seek_delimiter(f, delimiter, 2**16) end = f.tell() - # Adjust split location to before delimiter iff seek found the + # Adjust split location to before delimiter if seek found the # delimiter sequence, not start or end of file. if found_start_delim and split_before: start -= len(delimiter) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/filesystem_spec-2024.2.0/pyproject.toml new/filesystem_spec-2024.3.1/pyproject.toml --- old/filesystem_spec-2024.2.0/pyproject.toml 2024-02-05 02:21:42.000000000 +0100 +++ new/filesystem_spec-2024.3.1/pyproject.toml 2024-03-18 20:33:58.000000000 +0100 @@ -30,6 +30,9 @@ "versioneer.py", "fsspec/_version", ] +line-length = 88 + +[tool.ruff.lint] select = [ # fix noqas in fsspec/implementations/http.py "ASYNC", @@ -40,6 +43,7 @@ "E7", "E9", "F", + "LOG", "PERF", "PLC", "PLE", @@ -56,7 +60,6 @@ "SLOT", "SIM101", ] -line-length = 88 ignore = [ # Assigning lambda expression "E731", @@ -72,7 +75,6 @@ # Fix these codes later "G004", "PERF203", - "PERF401", ] [tool.pytest.ini_options]