# HG changeset patch # User Mads Kiilerich <mad...@unity3d.com> # Date 1475882684 -7200 # Sat Oct 08 01:24:44 2016 +0200 # Node ID 36e7e42f266e5cc70535479e50445395402c7400 # Parent 2c25dc4a5a556abbebe09fe3c4eb0ff4c8fa0cd4 largefiles: PoC: replace largefile copies in working directory with symlinks
Updating between distant revisions can be expensive when working on multiple divergent branches. The basic working directory update might be efficient but copying the largefiles and hashing them will take a lot of bandwidth and thus take time. Usually, largefiles are copied from the store to the working directory to make sure that any in-place modification of largefiles in the working directory doesn't corrupt the store. In a build farm, there will often be a lot of switching between branches, and the risk of someone doing in-place modification of largefiles is quite small. It might thus make sense to make a different trade-off. The idea here is to make a symlink pointing at the storage file instead of copying it. In some cases, this can make working copy updates NaN times faster. To reduce the risk of corruption, storage files are made read-only. To make sure that time-stamp based build systems manage a symlink change correctly, storage files are touched when they are linked to. This should also work on Windows IF running as admin or ordinary users have been authorized to make symlinks (which probably in general not is a good idea). This is a proof of concept and doesn't contain or pass any tests. It should perhaps turn into an extension. diff --git a/hgext/largefiles/lfutil.py b/hgext/largefiles/lfutil.py --- a/hgext/largefiles/lfutil.py +++ b/hgext/largefiles/lfutil.py @@ -10,6 +10,7 @@ from __future__ import absolute_import import copy +import errno import hashlib import os import platform @@ -32,6 +33,20 @@ shortnameslash = shortname + '/' longname = 'largefiles' filechunkitersize = 128 * 1024 +if os.name == 'nt': + import ctypes + FILESYMLINK = 0 + _kernel32 = ctypes.windll.kernel32 + def symlink(src, dst): + """Create a symbolic link pointing to src named dst. + Requires running as Admin ... or enabling in: + Computer configuration > Windows Settings > Security Settings > + Local Policies > User Rights Assignment > Create symbolic links + """ + _kernel32.CreateSymbolicLinkA(dst, os.path.normpath(src), FILESYMLINK) +else: + symlink = os.symlink + # -- Private worker functions ------------------------------------------ def getminsize(ui, assumelfiles, opt, default=10): @@ -227,18 +242,26 @@ def copyfromcache(repo, hash, filename): path = findfile(repo, hash) if path is None: return False + # make storage file read-only before pointing at it - reduce risk of + # corruption + try: + os.chmod(path, os.stat(path).st_mode & + ~(stat.S_IWUSR|stat.S_IWGRP|stat.S_IWOTH)) + except OSError as e: + if e.errno != errno.EPERM: + raise + # no chmod access probably means that it is safe (but touch will fail) + repo.ui.debug("can't chmod %s\n" % path) wvfs.makedirs(wvfs.dirname(wvfs.join(filename))) - # The write may fail before the file is fully written, but we - # don't use atomic writes in the working copy. - with open(path, 'rb') as srcfd: - with wvfs(filename, 'wb') as destfd: - gothash = copyandhash( - util.filechunkiter(srcfd, filechunkitersize), destfd) - if gothash != hash: - repo.ui.warn(_('%s: data corruption in %s with hash %s\n') - % (filename, path, gothash)) - wvfs.unlink(filename) - return False + symlink(path, wvfs.join(filename)) + # touch the storage file so build systems see the file as modified + try: + os.utime(path, None) # touch + except OSError as e: + if e.errno != errno.EACCES: + raise + repo.ui.warn(_("can't touch %s - that might confuse build systems\n") + % path) return True def copytostore(repo, rev, file, uploaded=False): diff --git a/hgext/largefiles/overrides.py b/hgext/largefiles/overrides.py --- a/hgext/largefiles/overrides.py +++ b/hgext/largefiles/overrides.py @@ -1392,6 +1392,8 @@ def mergeupdate(orig, repo, node, branch lfileabs = repo.wvfs.join(lfile) if not repo.wvfs.exists(lfileabs): continue + if repo.wvfs.islink(lfile): + continue # never update standins from symlinks lfhash = lfutil.hashrepofile(repo, lfile) standin = lfutil.standin(lfile) lfutil.writestandin(repo, standin, lfhash, diff --git a/hgext/largefiles/reposetup.py b/hgext/largefiles/reposetup.py --- a/hgext/largefiles/reposetup.py +++ b/hgext/largefiles/reposetup.py @@ -172,7 +172,8 @@ def reposetup(ui, repo): if standin not in ctx1: # from second parent modified.append(lfile) - elif ctx1[standin].data().strip() \ + elif not self.wvfs.islink(lfile) and \ + ctx1[standin].data().strip() \ != lfutil.hashfile(self.wjoin(lfile)): modified.append(lfile) else: _______________________________________________ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel