# HG changeset patch # User Gregory Szorc <gregory.sz...@gmail.com> # Date 1476649322 25200 # Sun Oct 16 13:22:02 2016 -0700 # Node ID 735adf139447190fe52c65ec6e9b4e5e0c81d6aa # Parent 757d25d2bbe63fc560e92b6bb25fbfbf09b09342 revlog: add a context manager to allow file handle reuse
Currently, read-only operations traversing revlogs must open a new file handle whenever they need uncached data from the underlying revlog. This can add overhead to operations such as changegroup generation. The revlog APIs have a mechanism for reusing a file descriptor for I/O. This was added by me a while ago as a means to speed up revlog.addgroup(). At that time, I didn't do any work around reusing file handles for read operations. This patch introduces a context manager to cache an open file handle on a revlog. When the context manager is active, revlog reads will be routed to the opened file handle instead of opening a single use file handle. There is definitely room to improve the API. We could probably even refactor the write file descriptor caching to use a context manager - possibly the same one! However, this is a bit of work since the write file handle can be swapped out if a revlog transitions from inline to non-inline in the course of adding revisions. diff --git a/mercurial/revlog.py b/mercurial/revlog.py --- a/mercurial/revlog.py +++ b/mercurial/revlog.py @@ -9,16 +9,17 @@ This provides efficient delta storage with O(1) retrieve and append and O(changes) merge between branches. """ from __future__ import absolute_import import collections +import contextlib import errno import hashlib import os import struct import zlib # import stuff from node for others to import from revlog from .node import ( @@ -309,16 +310,18 @@ class revlog(object): self.index, nodemap, self._chunkcache = d if nodemap is not None: self.nodemap = self._nodecache = nodemap if not self._chunkcache: self._chunkclear() # revnum -> (chain-length, sum-delta-length) self._chaininfocache = {} + self._readfh = None + def tip(self): return self.node(len(self.index) - 2) def __contains__(self, rev): return 0 <= rev < len(self) def __len__(self): return len(self.index) - 1 def __iter__(self): return iter(xrange(len(self))) @@ -1013,27 +1016,57 @@ class revlog(object): """ o, d = self._chunkcache # try to add to existing cache if o + len(d) == offset and len(d) + len(data) < _chunksize: self._chunkcache = o, d + data else: self._chunkcache = offset, data + @contextlib.contextmanager + def cachefilehandle(self): + """Maintain a persistent file handle during operations. + + When this context manager is active, a file descriptor will be reused + for all read operations, ensuring the underlying revlog file isn't + reopened multiple times. + """ + if self._readfh: + raise error.Abort('cachefilehandle already active') + + # Inline revlogs have data chunks cached at open time. If we opened + # a file handle it wouldn't be read. + if self._inline: + yield + return + + try: + self._readfh = self.opener(self.datafile) + yield + finally: + self._readfh.close() + self._readfh = None + def _loadchunk(self, offset, length, df=None): """Load a segment of raw data from the revlog. Accepts an absolute offset, length to read, and an optional existing file handle to read from. If an existing file handle is passed, it will be seeked and the original seek position will NOT be restored. Returns a str or buffer of raw byte data. """ + # Use cached file handle from context manager automatically. + # self._readfh is always opened in read-only mode. df may be opened + # in append mode. The context manager should only be active during + # read operations. So this mismatch is OK. + df = df or self._readfh + if df is not None: closehandle = False else: if self._inline: df = self.opener(self.indexfile) else: df = self.opener(self.datafile) closehandle = True _______________________________________________ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel