quark created this revision. Herald added a subscriber: mercurial-devel. Herald added a reviewer: hg-reviewers.
REVISION SUMMARY NOTE: I'm still reading xdiffi.c to understand whether this is a safe optimization or not. xdiff has a "xdl_trim_ends" function that removes common prefix and suffix. Previously, xdiff will build a hashtable for all lines. That is a waste of time for trimmed lines. This diff changes the logic so trimmed lines will be ignored when building the hashtable. Note: the hashtable is still needed for shifting purpose, so it does not blindly take whatever `xdl_trim_ends` says, but also looks around. For the following test case: #!python open('a','w').write(''.join('%s\n' % (i % 100000) for i in xrange(10000000))) open('b','w').write(''.join('%s\n' % (i % 100000) for i in xrange(10000001))) This series reduces xdiff's time for the above case from 1.1 seconds (https://phab.mercurial-scm.org/D2604) to 0.6 seconds. However, GNU diffutils can perform even better (<0.1 seconds), there are still things to catch up. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D2631 AFFECTED FILES mercurial/thirdparty/xdiff/xprepare.c CHANGE DETAILS diff --git a/mercurial/thirdparty/xdiff/xprepare.c b/mercurial/thirdparty/xdiff/xprepare.c --- a/mercurial/thirdparty/xdiff/xprepare.c +++ b/mercurial/thirdparty/xdiff/xprepare.c @@ -62,6 +62,7 @@ static int xdl_clean_mmatch(char const *dis, long i, long s, long e); static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2); static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2); +static long xdl_trim_reserved_lines(xdfile_t *xdf1, xdfile_t *xdf2); @@ -234,21 +235,32 @@ * unique. This makes future calculation faster - they can just compare "ha" * instead of comparing line content. */ -static int xdl_prepare_hashtable(unsigned int pass, mmfile_t *mf, - xpparam_t const *xpp, xdlclassifier_t *cf, xdfile_t *xdf) { +static int xdl_prepare_hashtable(unsigned int pass, long reserved, mmfile_t + *mf, xpparam_t const *xpp, xdlclassifier_t *cf, xdfile_t *xdf) +{ xrecord_t **rhash = NULL; - long nrec = xdf->nrec; + long nrec; + unsigned int hbits; long hsize; long i; + long start = xdf->dstart - reserved; + long end = xdf->dend + reserved; + + if (start < 0) + start = 0; + if (end >= xdf->nrec) + end = xdf->nrec - 1; + + nrec = end - start; hbits = xdl_hashbits((unsigned int) nrec); hsize = 1 << hbits; if (!(rhash = (xrecord_t **) xdl_malloc(hsize * sizeof(xrecord_t *)))) goto abort; memset(rhash, 0, hsize * sizeof(xrecord_t *)); - for (i = 0; i < nrec; ++i) { + for (i = start; i <= end; i++) { if (xdl_classify_record(pass, cf, rhash, hbits, xdf->recs[i]) < 0) goto abort; } @@ -275,7 +287,7 @@ int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, xdfenv_t *xe) { - long enl1, enl2, sample; + long enl1, enl2, sample, reserved; xdlclassifier_t cf; memset(&cf, 0, sizeof(cf)); @@ -307,13 +319,14 @@ return -1; } - if (xdl_prepare_hashtable(1, mf1, xpp, &cf, &xe->xdf1) < 0) { + reserved = xdl_trim_reserved_lines(&xe->xdf1, &xe->xdf2); + if (xdl_prepare_hashtable(1, reserved, mf1, xpp, &cf, &xe->xdf1) < 0) { xdl_free_ctx(&xe->xdf1); xdl_free_ctx(&xe->xdf2); xdl_free_classifier(&cf); return -1; } - if (xdl_prepare_hashtable(2, mf2, xpp, &cf, &xe->xdf2) < 0) { + if (xdl_prepare_hashtable(2, reserved, mf2, xpp, &cf, &xe->xdf2) < 0) { xdl_free_ctx(&xe->xdf1); xdl_free_ctx(&xe->xdf2); xdl_free_classifier(&cf); @@ -462,7 +475,6 @@ return 0; } - /* * Early trim initial and terminal matching records. */ @@ -500,3 +512,22 @@ return 0; } + + +/* + * Return "reserved lines" for possible hunk shifting. Normally, only look at + * lines in dstart..dend range. But hunk shifting also needs accurate line + * hashes. Estimated hunk size and reserve lines for shifting purpose. + * + * This would be used by xdl_prepare_hashtable, to build accurate hash values. + */ +static long xdl_trim_reserved_lines(xdfile_t *xdf1, xdfile_t *xdf2) { + long lines = 0; + if (xdf1->dend > xdf1->dstart) + lines += xdf1->dend - xdf1->dstart; + if (xdf2->dend > xdf2->dstart) + lines += xdf2->dend - xdf2->dstart; + return lines; +} + + To: quark, #hg-reviewers Cc: mercurial-devel _______________________________________________ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel