[PATCH] templates: add substring and string length operations
# HG changeset patch # User Rodrigo Damazio Bovendorp# Date 1500072378 25200 # Fri Jul 14 15:46:18 2017 -0700 # Node ID 0ccebbd04efbd672fc71df7f52ec243057cbed7d # Parent c0d8de2724ce6240d2a4241aff78ce2ee92359c2 templates: add substring and string length operations This will allow substr(text, start, end) and strlen(text) in templates, permitting various text formatting, such as making a (non-graphing) log line be limited to terminal width ("substr(desc, 0, termwidth)") diff --git a/mercurial/templatefilters.py b/mercurial/templatefilters.py --- a/mercurial/templatefilters.py +++ b/mercurial/templatefilters.py @@ -362,6 +362,11 @@ return "" return pycompat.bytestr(thing) +@templatefilter('strlen') +def stringlen(text): +"""Any text. Turns the value into its length.""" +return len(text) + @templatefilter('stripdir') def stripdir(text): """Treat the text as path and strip a directory level, if diff --git a/mercurial/templater.py b/mercurial/templater.py --- a/mercurial/templater.py +++ b/mercurial/templater.py @@ -1015,6 +1015,25 @@ # i18n: "sub" is a keyword raise error.ParseError(_("sub got an invalid replacement: %s") % rpl) +@templatefunc('substr(text, start[, end])') +def substring(context, mapping, args): +"""Returns a substring of the given text. Negative indices reference the end +of the string.""" +if len(args) < 2 or len(args) > 3: + raise error.ParseError(_("substring takes 2 or 3 arguments")) + +text = evalstring(context, mapping, args[0]) +textlen = len(text) +start = evalinteger(context, mapping, args[1], + _("start expects an integer index")) +end = -1 +if len(args) > 2: + end = evalinteger(context, mapping, args[2], +_("end expects an integer index")) + +# Python's [] already handles start and end boundary conditions. +return text[start:end] + @templatefunc('startswith(pattern, text)') def startswith(context, mapping, args): """Returns the value from the "text" argument diff --git a/tests/test-command-template.t b/tests/test-command-template.t --- a/tests/test-command-template.t +++ b/tests/test-command-template.t @@ -4011,6 +4011,35 @@ o line 1 line 2 +Test stringlen and substring +Full desc is "Modify, add, remove, rename". +String idxs: 012345678901 +Reverse string idxs: 10987654321 + + $ hg log -R a -r . --template '{desc|strlen}\n' + 27 + $ hg log -R a -r . --template '{substr(desc, 5, 10)}\n' + y, ad + $ hg log -R a -r . --template '{substr(desc, 5, -10)}\n' + y, add, remo + $ hg log -R a -r . --template '{substr(desc, 5, strlen(desc) - 10)}\n' + y, add, remo + $ hg log -R a -r . --template '{substr(desc, -10, -3)}\n' + ve, ren + +Test substr with invalid indices + + $ hg log -R a -r . --template '{substr(desc, 5, 200)}\n' + y, add, remove, rename + $ hg log -R a -r . --template '{substr(desc, 10, 5)}\n' + + $ hg log -R a -r . --template '{substr(desc, 100, 200)}\n' + + $ hg log -R a -r . --template '{substr(desc, -100, -50)}\n' + + $ hg log -R a -r . --template '{substr(desc, -50, -100)}\n' + + Test bad template with better error message $ hg log -Gv -R a --template '{desc|user()}' ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH v2] templates: add substring and string length operations
# HG changeset patch # User Rodrigo Damazio Bovendorp# Date 1500075683 25200 # Fri Jul 14 16:41:23 2017 -0700 # Node ID c4bac4ea7b1ea923d6ba4299cd9c974469b39cb0 # Parent c0d8de2724ce6240d2a4241aff78ce2ee92359c2 templates: add substring and string length operations This will allow substr(text, start, end) and strlen(text) in templates, permitting various text formatting, such as making a (non-graphing) log line be limited to terminal width ("substr(desc, 0, termwidth)") diff --git a/mercurial/templatefilters.py b/mercurial/templatefilters.py --- a/mercurial/templatefilters.py +++ b/mercurial/templatefilters.py @@ -362,6 +362,11 @@ return "" return pycompat.bytestr(thing) +@templatefilter('strlen') +def stringlen(text): +"""Any text. Turns the value into its length.""" +return len(text) + @templatefilter('stripdir') def stripdir(text): """Treat the text as path and strip a directory level, if diff --git a/mercurial/templater.py b/mercurial/templater.py --- a/mercurial/templater.py +++ b/mercurial/templater.py @@ -1015,6 +1015,25 @@ # i18n: "sub" is a keyword raise error.ParseError(_("sub got an invalid replacement: %s") % rpl) +@templatefunc('substr(text, start[, end])') +def substring(context, mapping, args): +"""Returns a substring of the given text. Negative indices reference the end +of the string.""" +if len(args) < 2 or len(args) > 3: +raise error.ParseError( +_("substring expects two or three arguments, got %d") % len(args)) + +text = evalstring(context, mapping, args[0]) +start = evalinteger(context, mapping, args[1], + _("start expects an integer index")) +end = len(text) +if len(args) > 2: +end = evalinteger(context, mapping, args[2], + _("end expects an integer index")) + +# Python's [] already handles start and end boundary conditions. +return text[start:end] + @templatefunc('startswith(pattern, text)') def startswith(context, mapping, args): """Returns the value from the "text" argument diff --git a/tests/test-command-template.t b/tests/test-command-template.t --- a/tests/test-command-template.t +++ b/tests/test-command-template.t @@ -4011,6 +4011,39 @@ o line 1 line 2 +Test stringlen and substring +Full desc is "Modify, add, remove, rename". +String idxs: 012345678901 +Reverse string idxs: 10987654321 + + $ hg log -R a -r . --template '{desc|strlen}\n' + 27 + $ hg log -R a -r . --template '{substr(desc, 5)}\n' + y, add, remove, rename + $ hg log -R a -r . --template '{substr(desc, -10)}\n' + ve, rename + $ hg log -R a -r . --template '{substr(desc, 5, 10)}\n' + y, ad + $ hg log -R a -r . --template '{substr(desc, 5, -10)}\n' + y, add, remo + $ hg log -R a -r . --template '{substr(desc, 5, strlen(desc) - 10)}\n' + y, add, remo + $ hg log -R a -r . --template '{substr(desc, -10, -3)}\n' + ve, ren + +Test substr with invalid indices + + $ hg log -R a -r . --template '{substr(desc, 5, 200)}\n' + y, add, remove, rename + $ hg log -R a -r . --template '{substr(desc, 10, 5)}\n' + + $ hg log -R a -r . --template '{substr(desc, 100, 200)}\n' + + $ hg log -R a -r . --template '{substr(desc, -100, -50)}\n' + + $ hg log -R a -r . --template '{substr(desc, -50, -100)}\n' + + Test bad template with better error message $ hg log -Gv -R a --template '{desc|user()}' ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH] debian: add less as a build dependency
# HG changeset patch # User Rodrigo Damazio Bovendorp# Date 1495852400 25200 # Fri May 26 19:33:20 2017 -0700 # Node ID eb70f0115b8c880218a45fc9e1f59eeb38b2f219 # Parent b647b923486f38d83b92089eafa9faafaa79785d debian: add less as a build dependency For builds that run on hermetic environments, it's possible that the "less" package is not installed by default, yet it's needed for tests to pass after revision bf5e13e38390 (which sets less as the fallback pager). diff --git a/contrib/debian/control b/contrib/debian/control --- a/contrib/debian/control +++ b/contrib/debian/control @@ -5,6 +5,7 @@ Build-Depends: debhelper (>= 9), dh-python, + less, netbase, python-all, python-all-dev, ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH evolve-ext] evolve: adding longer aliases for olog
# HG changeset patch # User Rodrigo Damazio Bovendorp# Date 1495172417 25200 # Thu May 18 22:40:17 2017 -0700 # Node ID 6d3120ae0f41701b90c7c9cec93a659ee5f68787 # Parent f7d67b6ee44f8bb98578fbef8bf7e2e86df638e8 evolve: adding longer aliases for olog Novice users will likely find the longer versions more obvious to understand what they're doing (olog only makes sense once you already know what it does). Especially on the help page, "obsoletelog" will be easier to find and make sense of. diff --git a/hgext3rd/evolve/obshistory.py b/hgext3rd/evolve/obshistory.py --- a/hgext3rd/evolve/obshistory.py +++ b/hgext3rd/evolve/obshistory.py @@ -25,7 +25,7 @@ eh = exthelper.exthelper() @eh.command( -'olog', +'^obsoletelog|obslog|olog', [('G', 'graph', True, _("show the revision DAG")), ('r', 'rev', [], _('show the specified revision or revset'), _('REV')) ] + commands.formatteropts, diff --git a/tests/test-evolve-obshistory.t b/tests/test-evolve-obshistory.t --- a/tests/test-evolve-obshistory.t +++ b/tests/test-evolve-obshistory.t @@ -59,7 +59,7 @@ x 471f378eab4c (1) A0 rewritten by test (*20*) as 4ae3a4151de9 (glob) - $ hg olog 4ae3a4151de9 --no-graph -Tjson | python -m json.tool + $ hg obslog 4ae3a4151de9 --no-graph -Tjson | python -m json.tool [ { "debugobshistory.markers": [], @@ -86,7 +86,7 @@ "debugobshistory.shortdescription": "A0" } ] - $ hg olog --hidden 471f378eab4c + $ hg obsoletelog --hidden 471f378eab4c x 471f378eab4c (1) A0 rewritten by test (*20*) as 4ae3a4151de9 (glob) ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH evolve-ext] evolve: fixing obscache invalidation
# HG changeset patch # User Rodrigo Damazio Bovendorp# Date 1495003030 25200 # Tue May 16 23:37:10 2017 -0700 # Node ID 8a19c6e8a1125be6bf6a5c97dc345c37c39a1189 # Parent 2241433a77e543ee068039fa2fb9ad514decca93 evolve: fixing obscache invalidation This was missing a call to the parent's destroyed(), such that any transaction after stripping some nodes would result in a crash (by attempting to read nodes which were stripped). diff --git a/README b/README --- a/README +++ b/README @@ -119,6 +119,7 @@ - topic: have thg display topic name if possible, - obscache: more efficient update in the (rare) case of a transaction adding markers without changesets + - obscache: fix more cache invalidation propagation - obshashrange-cache: update incrementally in the (common) case of a transaction not affecting existing range, - obshashrange-cache: keep the cache mostly warm after each transaction. diff --git a/hgext3rd/evolve/obscache.py b/hgext3rd/evolve/obscache.py --- a/hgext3rd/evolve/obscache.py +++ b/hgext3rd/evolve/obscache.py @@ -482,6 +482,7 @@ def destroyed(self): if 'obsstore' in vars(self): self.obsstore.obscache.clear() +super(obscacherepo, self).destroyed() def transaction(self, *args, **kwargs): tr = super(obscacherepo, self).transaction(*args, **kwargs) ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH] fancyopts: making config defaults actually override defaults
# HG changeset patch # User Rodrigo Damazio# Date 1489274373 28800 # Sat Mar 11 15:19:33 2017 -0800 # Node ID 8c833b81a994e2d3304c3b06793f536355528aab # Parent 62939e0148f170b67ca8c7374f36c413b67fd387 fancyopts: making config defaults actually override defaults This introduces the new defaults format "command.option" which directly overrides the default of the option, instead of prepending the command-line option. diff -r 62939e0148f1 -r 8c833b81a994 mercurial/dispatch.py --- a/mercurial/dispatch.py Wed Mar 08 18:11:41 2017 -0500 +++ b/mercurial/dispatch.py Sat Mar 11 15:19:33 2017 -0800 @@ -470,10 +470,29 @@ ui.configbool("ui", "strict")) cmd = aliases[0] args = aliasargs(entry[0], args) + +# override old-style defaults from config file by prepending command-line +# flags defaults = ui.config("defaults", cmd) if defaults: -args = map(util.expandpath, pycompat.shlexsplit(defaults)) + args +args = [util.expandpath(x) for x +in pycompat.shlexsplit(defaults)] + args c = list(entry[1]) + +# override new-style defaults from config file by actually changing the +# option defaults. +for idx, opt in enumerate(c): +optname = opt[1] +shortname = opt[0] +defaulttype = type(opt[2]) +rawdefault = ui.config("defaults", "%s.%s" % (cmd, optname)) or ( + ui.config("defaults", "%s.%s" % (cmd, shortname))) +if rawdefault: +# parse the new default using the type of the original default. +default = fancyopts.parsevalue(optname, rawdefault, defaulttype, + util.parsebool(rawdefault)) +c[idx] = (opt[0], opt[1], default, opt[3]) + else: cmd = None c = [] diff -r 62939e0148f1 -r 8c833b81a994 mercurial/fancyopts.py --- a/mercurial/fancyopts.pyWed Mar 08 18:11:41 2017 -0500 +++ b/mercurial/fancyopts.pySat Mar 11 15:19:33 2017 -0800 @@ -142,18 +142,27 @@ t = type(obj) if callable(obj): state[name] = defmap[name](val) -elif t is type(1): -try: -state[name] = int(val) -except ValueError: -raise error.Abort(_('invalid value %r for option %s, ' - 'expected int') % (val, opt)) -elif t is type(''): -state[name] = val elif t is type([]): state[name].append(val) -elif t is type(None) or t is type(False): -state[name] = boolval +else: +# non-callable single value. +state[name] = parsevalue(name, val, t, boolval) # return unparsed args return args + +def parsevalue(name, val, typ, boolval=True): +if typ is type(1): +try: +return int(val) +except ValueError: +raise error.Abort(_('invalid value %r for option %s, ' +'expected int') % (val, name)) +elif typ is type(''): +return val +elif (typ is type(None) or typ is type(False)) and ( + type(boolval) is type(False)): +return boolval +else: +raise error.Abort(_('invalid value %r for option %s, ' +'unknown type') % (val, name)) diff -r 62939e0148f1 -r 8c833b81a994 tests/test-dispatch.t --- a/tests/test-dispatch.t Wed Mar 08 18:11:41 2017 -0500 +++ b/tests/test-dispatch.t Sat Mar 11 15:19:33 2017 -0800 @@ -8,8 +8,10 @@ $ hg -v log -v x $ echo a > a + $ echo b > b $ hg ci -Ama adding a + adding b Missing arg: @@ -34,6 +36,7 @@ $ hg cat a a + $ cp $HGRCPATH hgrc.bak $ cat >> $HGRCPATH < [defaults] > cat = -r null @@ -42,6 +45,54 @@ a: no such file in rev [1] +new-style [defaults] and overrides + + $ cp -f hgrc.bak $HGRCPATH + $ hg cat a + a + $ cat >> $HGRCPATH < [defaults] + > cat.r = null + > EOF + $ hg cat a + a: no such file in rev + [1] + + $ cp -f hgrc.bak $HGRCPATH + $ cat >> $HGRCPATH < [defaults] + > cat.rev = null + > EOF + $ hg cat a + a: no such file in rev + [1] + + $ mv -f hgrc.bak $HGRCPATH + $ echo foo >> a + $ hg rm b + $ echo bar > c + $ hg add c + $ hg status + M a + A c + R b + $ cat >> $HGRCPATH < [defaults] + > status.removed = 1 + > EOF + $ hg status + R b + $ hg status --modified + M a + R b + $ hg status --modified --no-removed + M a + $ hg status --no-removed + M a + A c + R b + $ hg revert a b c + $ cd "$TESTTMP" OSError "No such file or directory" / "The system cannot find the path ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org
[PATCH 2 of 2 v2] match: making visitdir() deal with non-recursive entries
# HG changeset patch # User Rodrigo Damazio Bovendorp# Date 1487034194 28800 # Mon Feb 13 17:03:14 2017 -0800 # Node ID e90de197586d0749e64cef752613e6fe41d1c8e3 # Parent 94264a6e6672c917d42518f7ae9322445868d067 match: making visitdir() deal with non-recursive entries Primarily as an optimization to avoid recursing into directories that will never have a match inside, this classifies each matcher pattern's root as recursive or non-recursive (erring on the side of keeping it recursive, which may lead to wasteful directory or manifest walks that yield no matches). I measured the performance of "rootfilesin" in two repos: - The Firefox repo with tree manifests, with "hg files -r . -I rootfilesin:browser". The browser directory contains about 3K files across 249 subdirectories. - A specific Google-internal directory which contains 75K files across 19K subdirectories, with "hg files -r . -I rootfilesin:REDACTED". I tested with both cold and warm disk caches. Cold cache was produced by running "sync; echo 3 > /proc/sys/vm/drop_caches". Warm cache was produced by re-running the same command a few times. These were the results: Cold cache Warm cache Before After Before After firefox 0m5.1s 0m2.18s 0m0.22s 0m0.14s google3 dir 2m3.9s 0m1.57s 0m8.12s 0m0.16s Certain extensions, notably narrowhg, can depend on this for correctness (not trying to recurse into directories for which it has no information). diff -r 94264a6e6672 -r e90de197586d mercurial/match.py --- a/mercurial/match.pyMon Feb 13 15:39:29 2017 -0800 +++ b/mercurial/match.pyMon Feb 13 17:03:14 2017 -0800 @@ -125,9 +125,12 @@ self._always = False self._pathrestricted = bool(include or exclude or patterns) self._warn = warn + +# roots are directories which are recursively included/excluded. self._includeroots = set() +self._excluderoots = set() +# dirs are directories which are non-recursively included. self._includedirs = set(['.']) -self._excluderoots = set() if badfn is not None: self.bad = badfn @@ -137,14 +140,20 @@ kindpats = self._normalize(include, 'glob', root, cwd, auditor) self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)', listsubrepos, root) -self._includeroots.update(_roots(kindpats)) -self._includedirs.update(util.dirs(self._includeroots)) +roots, dirs = _rootsanddirs(kindpats) +self._includeroots.update(roots) +self._includedirs.update(dirs) matchfns.append(im) if exclude: kindpats = self._normalize(exclude, 'glob', root, cwd, auditor) self.excludepat, em = _buildmatch(ctx, kindpats, '(?:/|$)', listsubrepos, root) if not _anypats(kindpats): +# Only consider recursive excludes as such - if a non-recursive +# exclude is used, we must still recurse into the excluded +# directory, at least to find subdirectories. In such a case, +# the regex still won't match the non-recursively-excluded +# files. self._excluderoots.update(_roots(kindpats)) matchfns.append(lambda f: not em(f)) if exact: @@ -241,7 +250,7 @@ return 'all' if dir in self._excluderoots: return False -if (self._includeroots and +if ((self._includeroots or self._includedirs != set(['.'])) and '.' not in self._includeroots and dir not in self._includeroots and dir not in self._includedirs and @@ -422,7 +431,9 @@ # m.exact(file) must be based off of the actual user input, otherwise # inexact case matches are treated as exact, and not noted without -v. if self._files: -self._fileroots = set(_roots(self._kp)) +roots, dirs = _rootsanddirs(self._kp) +self._fileroots = set(roots) +self._fileroots.update(dirs) def _normalize(self, patterns, default, root, cwd, auditor): self._kp = super(icasefsmatcher, self)._normalize(patterns, default, @@ -621,19 +632,16 @@ raise error.Abort(_("invalid pattern (%s): %s") % (k, p)) raise error.Abort(_("invalid pattern")) -def _roots(kindpats): -'''return roots and exact explicitly listed files from patterns - ->>> _roots([('glob', 'g/*', ''), ('glob', 'g', ''), ('glob', 'g*', '')]) -['g', 'g', '.'] ->>> _roots([('rootfilesin', 'g', ''), ('rootfilesin', '', '')]) -['g', '.'] ->>> _roots([('relpath', 'r', ''), ('path', 'p/p', ''), ('path', '', '')]) -['r', 'p/p', '.'] ->>> _roots([('relglob', 'rg*', ''), ('re', 're/', ''), ('relre', 'rr',
[PATCH 1 of 2 v2] match: adding support for matching files inside a directory
# HG changeset patch # User Rodrigo Damazio Bovendorp# Date 1487029169 28800 # Mon Feb 13 15:39:29 2017 -0800 # Node ID 94264a6e6672c917d42518f7ae9322445868d067 # Parent 72f25e17af9d6a206ea374c30f229ae9513f3f23 match: adding support for matching files inside a directory This adds a new "rootfilesin" matcher type which matches files inside a directory, but not any subdirectories (so it matches non-recursively). This has the "root" prefix per foozy's plan for other matchers (rootglob, rootpath, cwdre, etc.). diff -r 72f25e17af9d -r 94264a6e6672 mercurial/help/patterns.txt --- a/mercurial/help/patterns.txt Mon Feb 13 02:31:56 2017 -0800 +++ b/mercurial/help/patterns.txt Mon Feb 13 15:39:29 2017 -0800 @@ -13,7 +13,10 @@ To use a plain path name without any pattern matching, start it with ``path:``. These path names must completely match starting at the -current repository root. +current repository root, and when the path points to a directory, it is matched +recursively. To match all files in a directory non-recursively (not including +any files in subdirectories), ``rootfilesin:`` can be used, specifying an +absolute path (relative to the repository root). To use an extended glob, start a name with ``glob:``. Globs are rooted at the current directory; a glob such as ``*.c`` will only match files @@ -39,12 +42,15 @@ All patterns, except for ``glob:`` specified in command line (not for ``-I`` or ``-X`` options), can match also against directories: files under matched directories are treated as matched. +For ``-I`` and ``-X`` options, ``glob:`` will match directories recursively. Plain examples:: - path:foo/bar a name bar in a directory named foo in the root - of the repository - path:path:name a file or directory named "path:name" + path:foo/bara name bar in a directory named foo in the root + of the repository + path:path:name a file or directory named "path:name" + rootfilesin:foo/bar the files in a directory called foo/bar, but not any files + in its subdirectories and not a file bar in directory foo Glob examples:: @@ -52,6 +58,8 @@ *.cany name ending in ".c" in the current directory **.c any name ending in ".c" in any subdirectory of the current directory including itself. + foo/* any file in directory foo plus all its subdirectories, + recursively foo/*.cany name ending in ".c" in the directory foo foo/**.c any name ending in ".c" in any subdirectory of foo including itself. diff -r 72f25e17af9d -r 94264a6e6672 mercurial/match.py --- a/mercurial/match.pyMon Feb 13 02:31:56 2017 -0800 +++ b/mercurial/match.pyMon Feb 13 15:39:29 2017 -0800 @@ -104,7 +104,10 @@ a pattern is one of: 'glob:' - a glob relative to cwd 're:' - a regular expression -'path:' - a path relative to repository root +'path:' - a path relative to repository root, which is matched +recursively +'rootfilesin:' - a path relative to repository root, which is +matched non-recursively (will not match subdirectories) 'relglob:' - an unrooted glob (*.c matches C files in all dirs) 'relpath:' - a path relative to cwd 'relre:' - a regexp that needn't match the start of a name @@ -153,7 +156,7 @@ elif patterns: kindpats = self._normalize(patterns, default, root, cwd, auditor) if not _kindpatsalwaysmatch(kindpats): -self._files = _roots(kindpats) +self._files = _explicitfiles(kindpats) self._anypats = self._anypats or _anypats(kindpats) self.patternspat, pm = _buildmatch(ctx, kindpats, '$', listsubrepos, root) @@ -286,7 +289,7 @@ for kind, pat in [_patsplit(p, default) for p in patterns]: if kind in ('glob', 'relpath'): pat = pathutil.canonpath(root, cwd, pat, auditor) -elif kind in ('relglob', 'path'): +elif kind in ('relglob', 'path', 'rootfilesin'): pat = util.normpath(pat) elif kind in ('listfile', 'listfile0'): try: @@ -447,7 +450,8 @@ if ':' in pattern: kind, pat = pattern.split(':', 1) if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre', -'listfile', 'listfile0', 'set', 'include', 'subinclude'): +'listfile', 'listfile0', 'set', 'include', 'subinclude', +'rootfilesin'): return kind, pat return default, pattern @@ -540,6 +544,14 @@ if pat == '.': return '' return '^' + util.re.escape(pat) + '(?:/|$)' +if kind == 'rootfilesin': +if pat == '.':
[PATCH 2 of 2] match: making visitdir() deal with non-recursive entries
# HG changeset patch # User Rodrigo Damazio Bovendorp# Date 1486767895 28800 # Fri Feb 10 15:04:55 2017 -0800 # Node ID 10d1ea213c7dd22b0843970eb88220d69b7c84cb # Parent 2d9523f80c5b5fbace1b0566fb47bed7468369b0 match: making visitdir() deal with non-recursive entries Primarily as an optimization to avoid recursing into directories that will never have a match inside, this classifies each matcher pattern's root as recursive or non-recursive (erring on the side of keeping it recursive, which may lead to wasteful directory or manifest walks that yield no matches). I measured the performance of "rootfilesin" in two repos: - The Firefox repo with tree manifests, with "hg files -r . -I rootfilesin:browser". The browser directory contains about 3K files across 249 subdirectories. - A specific Google-internal directory which contains 75K files across 19K subdirectories, with "hg files -r . -I rootfilesin:REDACTED". I tested with both cold and warm disk caches. Cold cache was produced by running "sync; echo 3 > /proc/sys/vm/drop_caches". Warm cache was produced by re-running the same command a few times. These were the results: Cold cache Warm cache Before After Before After firefox 0m5.1s 0m2.18s 0m0.22s 0m0.14s google3 dir 2m3.9s 0m1.57s 0m8.12s 0m0.16s Certain extensions, notably narrowhg, can depend on this for correctness (not trying to recurse into directories for which it has no information). diff -r 2d9523f80c5b -r 10d1ea213c7d mercurial/match.py --- a/mercurial/match.pyFri Feb 10 15:12:00 2017 -0800 +++ b/mercurial/match.pyFri Feb 10 15:04:55 2017 -0800 @@ -125,9 +125,12 @@ self._always = False self._pathrestricted = bool(include or exclude or patterns) self._warn = warn + +# roots are directories which are recursively included/excluded. self._includeroots = set() +self._excluderoots = set() +# dirs are directories which are non-recursively included. self._includedirs = set(['.']) -self._excluderoots = set() if badfn is not None: self.bad = badfn @@ -137,15 +140,22 @@ kindpats = self._normalize(include, 'glob', root, cwd, auditor) self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)', listsubrepos, root) -self._includeroots.update(_roots(kindpats)) -self._includedirs.update(util.dirs(self._includeroots)) +roots, dirs = _roots(kindpats) +self._includeroots.update(roots) +self._includedirs.update(dirs) matchfns.append(im) if exclude: kindpats = self._normalize(exclude, 'glob', root, cwd, auditor) self.excludepat, em = _buildmatch(ctx, kindpats, '(?:/|$)', listsubrepos, root) if not _anypats(kindpats): -self._excluderoots.update(_roots(kindpats)) +# Only consider recursive excludes as such - if a non-recursive +# exclude is used, we must still recurse into the excluded +# directory, at least to find subdirectories. In such a case, +# the regex still won't match the non-recursively-excluded +# files. +roots, dirs = _roots(kindpats) +self._excluderoots.update(roots) matchfns.append(lambda f: not em(f)) if exact: if isinstance(patterns, list): @@ -156,6 +166,7 @@ elif patterns: kindpats = self._normalize(patterns, default, root, cwd, auditor) if not _kindpatsalwaysmatch(kindpats): +roots, dirs = _roots(kindpats) self._files = _explicitfiles(kindpats) self._anypats = self._anypats or _anypats(kindpats) self.patternspat, pm = _buildmatch(ctx, kindpats, '$', @@ -241,7 +252,7 @@ return 'all' if dir in self._excluderoots: return False -if (self._includeroots and +if ((self._includeroots or self._includedirs != set(['.'])) and '.' not in self._includeroots and dir not in self._includeroots and dir not in self._includedirs and @@ -422,7 +433,9 @@ # m.exact(file) must be based off of the actual user input, otherwise # inexact case matches are treated as exact, and not noted without -v. if self._files: -self._fileroots = set(_roots(self._kp)) +roots, dirs = _roots(self._kp) +self._fileroots = set(roots) +self._fileroots.update(dirs) def _normalize(self, patterns, default, root, cwd, auditor): self._kp = super(icasefsmatcher, self)._normalize(patterns, default, @@ -622,18 +635,22 @@ raise error.Abort(_("invalid
[PATCH 1 of 2] match: adding support for matching files inside a directory
# HG changeset patch # User Rodrigo Damazio Bovendorp# Date 1486768320 28800 # Fri Feb 10 15:12:00 2017 -0800 # Node ID 2d9523f80c5b5fbace1b0566fb47bed7468369b0 # Parent a95fc01aaffe805bcc4c02a822b82a1162fa35b9 match: adding support for matching files inside a directory This adds a new "rootfilesin" matcher type which matches files inside a directory, but not any subdirectories (so it matches non-recursively). This has the "root" prefix per foozy's plan for other matchers (rootglob, rootpath, cwdre, etc.). diff -r a95fc01aaffe -r 2d9523f80c5b mercurial/help/patterns.txt --- a/mercurial/help/patterns.txt Wed Feb 08 14:37:38 2017 -0800 +++ b/mercurial/help/patterns.txt Fri Feb 10 15:12:00 2017 -0800 @@ -13,7 +13,10 @@ To use a plain path name without any pattern matching, start it with ``path:``. These path names must completely match starting at the -current repository root. +current repository root, and when the path points to a directory, it is matched +recursively. To match all files in a directory non-recursively (not including +any files in subdirectories), ``rootfilesin:`` can be used, specifying an +absolute path (relative to the repository root). To use an extended glob, start a name with ``glob:``. Globs are rooted at the current directory; a glob such as ``*.c`` will only match files @@ -39,12 +42,15 @@ All patterns, except for ``glob:`` specified in command line (not for ``-I`` or ``-X`` options), can match also against directories: files under matched directories are treated as matched. +For ``-I`` and ``-X`` options, ``glob:`` will match directories recursively. Plain examples:: - path:foo/bar a name bar in a directory named foo in the root - of the repository - path:path:name a file or directory named "path:name" + path:foo/bara name bar in a directory named foo in the root + of the repository + path:path:name a file or directory named "path:name" + rootfilesin:foo/bar the files in a directory called foo/bar, but not any files + in its subdirectories and not a file bar in directory foo Glob examples:: @@ -52,6 +58,8 @@ *.cany name ending in ".c" in the current directory **.c any name ending in ".c" in any subdirectory of the current directory including itself. + foo/* any file in directory foo plus all its subdirectories, + recursively foo/*.cany name ending in ".c" in the directory foo foo/**.c any name ending in ".c" in any subdirectory of foo including itself. diff -r a95fc01aaffe -r 2d9523f80c5b mercurial/match.py --- a/mercurial/match.pyWed Feb 08 14:37:38 2017 -0800 +++ b/mercurial/match.pyFri Feb 10 15:12:00 2017 -0800 @@ -104,7 +104,10 @@ a pattern is one of: 'glob:' - a glob relative to cwd 're:' - a regular expression -'path:' - a path relative to repository root +'path:' - a path relative to repository root, which is matched +recursively +'rootfilesin:' - a path relative to repository root, which is +matched non-recursively (will not match subdirectories) 'relglob:' - an unrooted glob (*.c matches C files in all dirs) 'relpath:' - a path relative to cwd 'relre:' - a regexp that needn't match the start of a name @@ -153,7 +156,7 @@ elif patterns: kindpats = self._normalize(patterns, default, root, cwd, auditor) if not _kindpatsalwaysmatch(kindpats): -self._files = _roots(kindpats) +self._files = _explicitfiles(kindpats) self._anypats = self._anypats or _anypats(kindpats) self.patternspat, pm = _buildmatch(ctx, kindpats, '$', listsubrepos, root) @@ -286,7 +289,7 @@ for kind, pat in [_patsplit(p, default) for p in patterns]: if kind in ('glob', 'relpath'): pat = pathutil.canonpath(root, cwd, pat, auditor) -elif kind in ('relglob', 'path'): +elif kind in ('relglob', 'path', 'rootfilesin'): pat = util.normpath(pat) elif kind in ('listfile', 'listfile0'): try: @@ -447,7 +450,8 @@ if ':' in pattern: kind, pat = pattern.split(':', 1) if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre', -'listfile', 'listfile0', 'set', 'include', 'subinclude'): +'listfile', 'listfile0', 'set', 'include', 'subinclude', +'rootfilesin'): return kind, pat return default, pattern @@ -540,6 +544,14 @@ if pat == '.': return '' return '^' + util.re.escape(pat) + '(?:/|$)' +if kind == 'rootfilesin': +if pat == '.':
[PATCH] match: adding support for repository-root-based globs
# HG changeset patch # User Rodrigo Damazio Bovendorp# Date 1475944120 25200 # Sat Oct 08 09:28:40 2016 -0700 # Node ID 93434cce258a797fcc3997c0af994a524695e273 # Parent b032a7b676c6637b2ac6f3ef29431013b15a08f9 match: adding support for repository-root-based globs The broader plan is to add explicit base directories for all patterns: === === pattern type root-ed cwd-ed any-of-path === === wildcard rootglob cwdglob anyglob regexp rootre cwdre anyre raw string rootpath cwdpath anypath === === (table by foozy) I'm starting by adding rootglob. One important characteristic and difference from the older glob types is that rootglob does a *full* match, meaning that a * at the end will never match recursively, even when the glob is used as an include pattern. diff -r b032a7b676c6 -r 93434cce258a mercurial/help/patterns.txt --- a/mercurial/help/patterns.txt Tue Nov 01 18:54:03 2016 -0700 +++ b/mercurial/help/patterns.txt Sat Oct 08 09:28:40 2016 -0700 @@ -40,6 +40,11 @@ ``-I`` or ``-X`` options), can match also against directories: files under matched directories are treated as matched. +For ``-I`` and ``-X`` options, ``glob:`` will match directories recursively. +``rootglob:``, on the other end, does a full match, meaning that all files, in +directories or subdirectories, will only match if the entire expression matches. +In that case, ``**`` can be used to obtain recursiveness. + Plain examples:: path:foo/bar a name bar in a directory named foo in the root @@ -48,13 +53,18 @@ Glob examples:: - glob:*.c any name ending in ".c" in the current directory - *.cany name ending in ".c" in the current directory - **.c any name ending in ".c" in any subdirectory of the - current directory including itself. - foo/*.cany name ending in ".c" in the directory foo - foo/**.c any name ending in ".c" in any subdirectory of foo - including itself. + glob:*.cany name ending in ".c" in the current directory + *.c any name ending in ".c" in the current directory + **.cany name ending in ".c" in any subdirectory of the + current directory including itself. + foo/* any file in directory foo plus all its subdirectories, + recursively + foo/*.c any name ending in ".c" in the directory foo + foo/**.cany name ending in ".c" in any subdirectory of foo + including itself. + rootglob:*.cany name ending in ".c" in the repository root + rootglob:foo/* all files inside foo but not its subdirectories + rootglob:foo/** all files inside foo and its subdirectories Regexp examples:: diff -r b032a7b676c6 -r 93434cce258a mercurial/match.py --- a/mercurial/match.pyTue Nov 01 18:54:03 2016 -0700 +++ b/mercurial/match.pySat Oct 08 09:28:40 2016 -0700 @@ -105,6 +105,8 @@ 'glob:' - a glob relative to cwd 're:' - a regular expression 'path:' - a path relative to repository root +'rootglob:' - a glob relative to repository root. Unlike glob, * +will never match subdirectories. 'relglob:' - an unrooted glob (*.c matches C files in all dirs) 'relpath:' - a path relative to cwd 'relre:' - a regexp that needn't match the start of a name @@ -286,7 +288,7 @@ for kind, pat in [_patsplit(p, default) for p in patterns]: if kind in ('glob', 'relpath'): pat = pathutil.canonpath(root, cwd, pat, auditor) -elif kind in ('relglob', 'path'): +elif kind in ('relglob', 'path', 'rootglob'): pat = util.normpath(pat) elif kind in ('listfile', 'listfile0'): try: @@ -447,7 +449,8 @@ if ':' in pattern: kind, pat = pattern.split(':', 1) if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre', -'listfile', 'listfile0', 'set', 'include', 'subinclude'): +'listfile', 'listfile0', 'set', 'include', 'subinclude', +'rootglob'): return kind, pat return default, pattern @@ -540,6 +543,8 @@ if pat == '.': return '' return '^' + util.re.escape(pat) + '(?:/|$)' +if kind == 'rootglob': +return '^' + _globre(pat) + '$' if kind == 'relglob': return '(?:|.*/)' + _globre(pat) + globsuffix if kind == 'relpath': @@ -614,6 +619,8 @@ >>> _roots([('glob', 'g/*', ''), ('glob', 'g', ''), ('glob', 'g*', '')]) ['g', 'g', '.'] +>>> _roots([('rootglob', 'g/*', ''), ('rootglob', 'g', '')]) +['g', 'g'] >>> _roots([('relpath', 'r', ''), ('path', 'p/p', ''), ('path', '', '')]) ['r', 'p/p', '.'] >>>
[PATCH] match: adding support for repository-root-based globs
# HG changeset patch # User Rodrigo Damazio Bovendorp# Date 1475944120 25200 # Sat Oct 08 09:28:40 2016 -0700 # Node ID 5a24706699632b6b91f1079549e7ddd0ea952267 # Parent b032a7b676c6637b2ac6f3ef29431013b15a08f9 match: adding support for repository-root-based globs The broader plan is to add explicit base directories for all patterns: === === pattern type root-ed cwd-ed any-of-path === === wildcard rootglob cwdglob anyglob regexp rootre cwdre anyre raw string rootpath cwdpath anypath === === (table by foozy) I'm starting by adding rootglob. One important characteristic and difference from the older glob types is that rootglob does a *full* match, meaning that a * at the end will never match recursively, even when the glob is used as an include pattern. diff -r b032a7b676c6 -r 5a2470669963 mercurial/help/patterns.txt --- a/mercurial/help/patterns.txt Tue Nov 01 18:54:03 2016 -0700 +++ b/mercurial/help/patterns.txt Sat Oct 08 09:28:40 2016 -0700 @@ -40,6 +40,11 @@ ``-I`` or ``-X`` options), can match also against directories: files under matched directories are treated as matched. +For ``-I`` and ``-X`` options, ``glob:`` will match directories recursively. +``rootglob:``, on the other end, does a full match, meaning that all files, in +directories or subdirectories, will only match if the entire expression matches. +In that case, ``**`` can be used to obtain recursiveness. + Plain examples:: path:foo/bar a name bar in a directory named foo in the root @@ -48,13 +53,18 @@ Glob examples:: - glob:*.c any name ending in ".c" in the current directory - *.cany name ending in ".c" in the current directory - **.c any name ending in ".c" in any subdirectory of the - current directory including itself. - foo/*.cany name ending in ".c" in the directory foo - foo/**.c any name ending in ".c" in any subdirectory of foo - including itself. + glob:*.cany name ending in ".c" in the current directory + *.c any name ending in ".c" in the current directory + **.cany name ending in ".c" in any subdirectory of the + current directory including itself. + foo/* any file in directory foo plus all its subdirectories, + recursively + foo/*.c any name ending in ".c" in the directory foo + foo/**.cany name ending in ".c" in any subdirectory of foo + including itself. + rootglob:*.cany name ending in ".c" in the repository root + rootglob:foo/* all files inside foo but not its subdirectories + rootglob:foo/** all files inside foo and its subdirectories Regexp examples:: diff -r b032a7b676c6 -r 5a2470669963 mercurial/match.py --- a/mercurial/match.pyTue Nov 01 18:54:03 2016 -0700 +++ b/mercurial/match.pySat Oct 08 09:28:40 2016 -0700 @@ -105,6 +105,8 @@ 'glob:' - a glob relative to cwd 're:' - a regular expression 'path:' - a path relative to repository root +'rootglob:' - a glob relative to repository root. Unlike glob, * +will never match subdirectories. 'relglob:' - an unrooted glob (*.c matches C files in all dirs) 'relpath:' - a path relative to cwd 'relre:' - a regexp that needn't match the start of a name @@ -286,7 +288,7 @@ for kind, pat in [_patsplit(p, default) for p in patterns]: if kind in ('glob', 'relpath'): pat = pathutil.canonpath(root, cwd, pat, auditor) -elif kind in ('relglob', 'path'): +elif kind in ('relglob', 'path', 'rootglob'): pat = util.normpath(pat) elif kind in ('listfile', 'listfile0'): try: @@ -447,7 +449,8 @@ if ':' in pattern: kind, pat = pattern.split(':', 1) if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre', -'listfile', 'listfile0', 'set', 'include', 'subinclude'): +'listfile', 'listfile0', 'set', 'include', 'subinclude', +'rootglob'): return kind, pat return default, pattern @@ -540,6 +543,8 @@ if pat == '.': return '' return '^' + util.re.escape(pat) + '(?:/|$)' +if kind == 'rootglob': +return '^' + _globre(pat) + '$' if kind == 'relglob': return '(?:|.*/)' + _globre(pat) + globsuffix if kind == 'relpath': @@ -614,6 +619,8 @@ >>> _roots([('glob', 'g/*', ''), ('glob', 'g', ''), ('glob', 'g*', '')]) ['g', 'g', '.'] +>>> _roots([('rootglob', 'g/*', ''), ('rootglob', 'g'), ('glob', 'g*', '')]) +['g', 'g', '.'] >>> _roots([('relpath', 'r', ''), ('path', 'p/p', ''), ('path', '', '')]) ['r', 'p/p', '.']
[PATCH] match: adding non-recursive directory matching
# HG changeset patch # User Rodrigo Damazio Bovendorp# Date 1475944120 25200 # Sat Oct 08 09:28:40 2016 -0700 # Node ID 545efe5a72efdce925a6a3fd3774b350c90b5c55 # Parent dbcef8918bbdd8a64d9f79a37bcfa284a26f3a39 match: adding non-recursive directory matching This allows one to match all files in a directory, without matching anything in subdirectories. It's implemented almost identically to path:, except for the regex termination, which doesn't allow more than one / after the directory name. diff --git a/mercurial/match.py b/mercurial/match.py --- a/mercurial/match.py +++ b/mercurial/match.py @@ -105,6 +105,9 @@ 'glob:' - a glob relative to cwd 're:' - a regular expression 'path:' - a path relative to repository root +'files:' - a path relative to repository root, which is matched + non-recursively (files inside the directory will match, + but subdirectories and files in them won't 'relglob:' - an unrooted glob (*.c matches C files in all dirs) 'relpath:' - a path relative to cwd 'relre:' - a regexp that needn't match the start of a name @@ -286,7 +289,7 @@ for kind, pat in [_patsplit(p, default) for p in patterns]: if kind in ('glob', 'relpath'): pat = pathutil.canonpath(root, cwd, pat, auditor) -elif kind in ('relglob', 'path'): +elif kind in ('relglob', 'path', 'files'): pat = util.normpath(pat) elif kind in ('listfile', 'listfile0'): try: @@ -447,7 +450,8 @@ if ':' in pattern: kind, pat = pattern.split(':', 1) if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre', -'listfile', 'listfile0', 'set', 'include', 'subinclude'): +'listfile', 'listfile0', 'set', 'include', 'subinclude', +'files'): return kind, pat return default, pattern @@ -540,6 +544,19 @@ if pat == '.': return '' return '^' + util.re.escape(pat) + '(?:/|$)' +if kind == 'files': +# Match one of: +# For pat = 'some/dir': +# some/dir +# some/dir/ +# some/dir/filename +# For pat = '' or pat = '.': +# filename +if pat == '.': +escaped = '' +else: +escaped = util.re.escape(pat) +return '^' + escaped + '(?:^|/|$)[^/]*$' if kind == 'relglob': return '(?:|.*/)' + _globre(pat) + globsuffix if kind == 'relpath': @@ -628,7 +645,7 @@ break root.append(p) r.append('/'.join(root) or '.') -elif kind in ('relpath', 'path'): +elif kind in ('relpath', 'path', 'files'): r.append(pat or '.') else: # relglob, re, relre r.append('.') diff --git a/tests/test-locate.t b/tests/test-locate.t --- a/tests/test-locate.t +++ b/tests/test-locate.t @@ -52,6 +52,12 @@ t/b t/e.h t/x + $ hg locate files: + b + t.h + $ hg locate files:. + b + t.h $ hg locate -r 0 a a $ hg locate -r 0 NONEXISTENT @@ -119,6 +125,13 @@ ../t/e.h (glob) ../t/x (glob) + $ hg files files: + ../b (glob) + ../t.h (glob) + $ hg files files:. + ../b (glob) + ../t.h (glob) + $ hg locate b ../b (glob) ../t/b (glob) diff --git a/tests/test-walk.t b/tests/test-walk.t --- a/tests/test-walk.t +++ b/tests/test-walk.t @@ -112,6 +112,8 @@ f beans/navy ../beans/navy f beans/pinto ../beans/pinto f beans/turtle../beans/turtle + $ hg debugwalk -I 'files:mammals' + f mammals/skunk skunk $ hg debugwalk . f mammals/Procyonidae/cacomistle Procyonidae/cacomistle f mammals/Procyonidae/coatimundi Procyonidae/coatimundi ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel