---------- Forwarded message --------- From: Martin von Zweigbergk <martinv...@google.com> Date: Fri, Jan 13, 2017, 16:19 Subject: Re: [PATCH] similar: remove caching from the module level To: Pierre-Yves David <pierre-yves.da...@ens-lyon.org>
On Fri, Jan 13, 2017 at 2:43 PM, Pierre-Yves David <pierre-yves.da...@ens-lyon.org> wrote: > > > On 01/13/2017 08:43 PM, Sean Farley wrote: >> >> # HG changeset patch >> # User Sean Farley <s...@farley.io> >> # Date 1484336556 28800 >> # Fri Jan 13 11:42:36 2017 -0800 >> # Node ID 7c54c9524a0954a3bd2f2fb4451028869f851f6d >> # Parent 8540967cd9e0909a9a73dbff458c4cedd4db26aa >> similar: remove caching from the module level >> >> To prevent Bad Things™ from happening, let's rework the logic to not use >> util.cachefunc. Queued with author changed to Pierre-Yves and with a bug fixed. > > > Looks good to me, but… since this is my code, that should probably have my > name of it ;-) > > https://www.mercurial-scm.org/pipermail/mercurial-devel/2017-January/092290.html > > (I saw with Sean on IRC and he is okay with changing the name) Can another > reviewer fix this in flight ? > > >> diff --git a/mercurial/similar.py b/mercurial/similar.py >> --- a/mercurial/similar.py >> +++ b/mercurial/similar.py >> @@ -11,11 +11,10 @@ import hashlib >> >> from .i18n import _ >> from . import ( >> bdiff, >> mdiff, >> - util, >> ) >> >> def _findexactmatches(repo, added, removed): >> '''find renamed files that have no changes >> >> @@ -41,20 +40,18 @@ def _findexactmatches(repo, added, remov >> yield (hashes[h], fctx) >> >> # Done >> repo.ui.progress(_('searching for exact renames'), None) >> >> -@util.cachefunc >> def _ctxdata(fctx): >> # lazily load text >> orig = fctx.data() >> return orig, mdiff.splitnewlines(orig) >> >> -@util.cachefunc >> -def score(fctx1, fctx2): >> - text = fctx1.data() >> - orig, lines = _ctxdata(fctx2) >> +def _score(fctx, otherdata): >> + orig, lines = otherdata >> + text = fctx.data() >> # bdiff.blocks() returns blocks of matching lines >> # count the number of bytes in each >> equal = 0 >> matches = bdiff.blocks(text, orig) >> for x1, x2, y1, y2 in matches: >> @@ -62,10 +59,13 @@ def score(fctx1, fctx2): >> equal += len(line) >> >> lengths = len(text) + len(orig) >> return equal * 2.0 / lengths >> >> +def score(fctx1, fctx2): >> + return _score(fctx1, _ctxdata(fctx2)) >> + >> def _findsimilarmatches(repo, added, removed, threshold): >> '''find potentially renamed files based on similar file content >> >> Takes a list of new filectxs and a list of removed filectxs, and >> yields >> (before, after, score) tuples of partial matches. >> @@ -73,13 +73,16 @@ def _findsimilarmatches(repo, added, rem >> copies = {} >> for i, r in enumerate(removed): >> repo.ui.progress(_('searching for similar files'), i, >> total=len(removed), unit=_('files')) >> >> + data = None >> for a in added: >> bestscore = copies.get(a, (None, threshold))[1] >> - myscore = score(a, r) >> + if data is None: >> + data = _ctxdata(r) >> + myscore = score(a, data) Tests didn't like this. Changed to _score(). >> if myscore >= bestscore: >> copies[a] = (r, myscore) >> repo.ui.progress(_('searching'), None) >> >> for dest, v in copies.iteritems(): >> _______________________________________________ >> Mercurial-devel mailing list >> Mercurial-devel@mercurial-scm.org >> https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel >> > > -- > Pierre-Yves David > > _______________________________________________ > Mercurial-devel mailing list > Mercurial-devel@mercurial-scm.org > https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
_______________________________________________ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel