D2067: changegroup: do not delta lfs revisions
This revision was automatically updated to reflect the committed changes. Closed by commit rHGd031609b3cb7: changegroup: do not delta lfs revisions (authored by quark, committed by ). REPOSITORY rHG Mercurial CHANGES SINCE LAST UPDATE https://phab.mercurial-scm.org/D2067?vs=6469&id=6670 REVISION DETAIL https://phab.mercurial-scm.org/D2067 AFFECTED FILES mercurial/changegroup.py mercurial/revlog.py tests/test-lfs-bundle.t tests/test-lfs.t CHANGE DETAILS diff --git a/tests/test-lfs.t b/tests/test-lfs.t --- a/tests/test-lfs.t +++ b/tests/test-lfs.t @@ -349,7 +349,7 @@ uncompressed size of bundle content: * (changelog) (glob) * (manifests) (glob) - * a (glob) + * a (glob) $ hg --config extensions.strip= strip -r 2 --no-backup --force -q $ hg -R bundle.hg log -p -T '{rev} {desc}\n' a 5 branching diff --git a/tests/test-lfs-bundle.t b/tests/test-lfs-bundle.t --- a/tests/test-lfs-bundle.t +++ b/tests/test-lfs-bundle.t @@ -95,6 +95,6 @@ 2 integrity errors encountered! (first damaged changeset appears to be 2) Applying src-lfs.bundle to dst-normal - CRASHED + OK Applying src-lfs.bundle to dst-lfs OK diff --git a/mercurial/revlog.py b/mercurial/revlog.py --- a/mercurial/revlog.py +++ b/mercurial/revlog.py @@ -77,6 +77,8 @@ REVIDX_EXTSTORED, ] REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER) +# bitmark for flags that could cause rawdata content change +REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED # max size of revlog with inline data _maxinline = 131072 @@ -96,7 +98,8 @@ """Register a flag processor on a revision data flag. Invariant: -- Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER. +- Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER, + and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext. - Only one flag processor can be registered on a specific flag. - flagprocessors must be 3-tuples of functions (read, write, raw) with the following signatures: @@ -713,6 +716,18 @@ except KeyError: return False +def candelta(self, baserev, rev): +"""whether two revisions (baserev, rev) can be delta-ed or not""" +# Disable delta if either rev requires a content-changing flag +# processor (ex. LFS). This is because such flag processor can alter +# the rawtext content that the delta will be based on, and two clients +# could have a same revlog node with different flags (i.e. different +# rawtext contents) and the delta could be incompatible. +if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) +or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)): +return False +return True + def clearcaches(self): self._cache = None self._chainbasecache.clear() diff --git a/mercurial/changegroup.py b/mercurial/changegroup.py --- a/mercurial/changegroup.py +++ b/mercurial/changegroup.py @@ -770,6 +770,8 @@ progress(msgbundling, None) def deltaparent(self, revlog, rev, p1, p2, prev): +if not revlog.candelta(prev, rev): +raise error.ProgrammingError('cg1 should not be used in this case') return prev def revchunk(self, revlog, rev, prev, linknode): @@ -829,16 +831,19 @@ # expensive. The revlog caches should have prev cached, meaning # less CPU for changegroup generation. There is likely room to add # a flag and/or config option to control this behavior. -return prev +base = prev elif dp == nullrev: # revlog is configured to use full snapshot for a reason, # stick to full snapshot. -return nullrev +base = nullrev elif dp not in (p1, p2, prev): # Pick prev when we can't be sure remote has the base revision. return prev else: -return dp +base = dp +if base != nullrev and not revlog.candelta(base, rev): +base = nullrev +return base def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags): # Do nothing with flags, it is implicitly 0 in cg1 and cg2 To: quark, indygreg, #hg-reviewers, ryanmce Cc: martinvonz, durin42, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D2067: changegroup: do not delta lfs revisions
quark added inline comments. INLINE COMMENTS > indygreg wrote in test-lfs.t:374 > Where did this change come from? The number grows from 3 digits to 4 digits because of increased size, and one of the space becomes a digit. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D2067 To: quark, indygreg, #hg-reviewers, ryanmce Cc: martinvonz, durin42, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D2067: changegroup: do not delta lfs revisions
indygreg accepted this revision. indygreg added inline comments. INLINE COMMENTS > test-lfs.t:374 > * (manifests) (glob) > - * a (glob) > + * a (glob) >$ hg --config extensions.strip= strip -r 2 --no-backup --force -q Where did this change come from? REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D2067 To: quark, indygreg, #hg-reviewers, ryanmce Cc: martinvonz, durin42, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D2067: changegroup: do not delta lfs revisions
quark updated this revision to Diff 6469. REPOSITORY rHG Mercurial CHANGES SINCE LAST UPDATE https://phab.mercurial-scm.org/D2067?vs=5650&id=6469 REVISION DETAIL https://phab.mercurial-scm.org/D2067 AFFECTED FILES mercurial/changegroup.py mercurial/revlog.py tests/test-lfs-bundle.t tests/test-lfs.t CHANGE DETAILS diff --git a/tests/test-lfs.t b/tests/test-lfs.t --- a/tests/test-lfs.t +++ b/tests/test-lfs.t @@ -371,7 +371,7 @@ uncompressed size of bundle content: * (changelog) (glob) * (manifests) (glob) - * a (glob) + * a (glob) $ hg --config extensions.strip= strip -r 2 --no-backup --force -q $ hg -R bundle.hg log -p -T '{rev} {desc}\n' a 5 branching diff --git a/tests/test-lfs-bundle.t b/tests/test-lfs-bundle.t --- a/tests/test-lfs-bundle.t +++ b/tests/test-lfs-bundle.t @@ -95,6 +95,6 @@ 2 integrity errors encountered! (first damaged changeset appears to be 2) Applying src-lfs.bundle to dst-normal - CRASHED + OK Applying src-lfs.bundle to dst-lfs OK diff --git a/mercurial/revlog.py b/mercurial/revlog.py --- a/mercurial/revlog.py +++ b/mercurial/revlog.py @@ -77,6 +77,8 @@ REVIDX_EXTSTORED, ] REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER) +# bitmark for flags that could cause rawdata content change +REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED # max size of revlog with inline data _maxinline = 131072 @@ -96,7 +98,8 @@ """Register a flag processor on a revision data flag. Invariant: -- Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER. +- Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER, + and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext. - Only one flag processor can be registered on a specific flag. - flagprocessors must be 3-tuples of functions (read, write, raw) with the following signatures: @@ -738,6 +741,18 @@ except KeyError: return False +def candelta(self, baserev, rev): +"""whether two revisions (baserev, rev) can be delta-ed or not""" +# Disable delta if either rev requires a content-changing flag +# processor (ex. LFS). This is because such flag processor can alter +# the rawtext content that the delta will be based on, and two clients +# could have a same revlog node with different flags (i.e. different +# rawtext contents) and the delta could be incompatible. +if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) +or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)): +return False +return True + def clearcaches(self): self._cache = None self._chainbasecache.clear() diff --git a/mercurial/changegroup.py b/mercurial/changegroup.py --- a/mercurial/changegroup.py +++ b/mercurial/changegroup.py @@ -774,6 +774,8 @@ progress(msgbundling, None) def deltaparent(self, revlog, rev, p1, p2, prev): +if not revlog.candelta(prev, rev): +raise error.ProgrammingError('cg1 should not be used in this case') return prev def revchunk(self, revlog, rev, prev, linknode): @@ -833,16 +835,19 @@ # expensive. The revlog caches should have prev cached, meaning # less CPU for changegroup generation. There is likely room to add # a flag and/or config option to control this behavior. -return prev +base = prev elif dp == nullrev: # revlog is configured to use full snapshot for a reason, # stick to full snapshot. -return nullrev +base = nullrev elif dp not in (p1, p2, prev): # Pick prev when we can't be sure remote has the base revision. return prev else: -return dp +base = dp +if base != nullrev and not revlog.candelta(base, rev): +base = nullrev +return base def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags): # Do nothing with flags, it is implicitly 0 in cg1 and cg2 To: quark, indygreg, #hg-reviewers, ryanmce Cc: martinvonz, durin42, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D2067: changegroup: do not delta lfs revisions
quark added inline comments. INLINE COMMENTS > ryanmce wrote in revlog.py:746 > `prev` should be `baserev`? Should be `baserev`. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D2067 To: quark, indygreg, #hg-reviewers, ryanmce Cc: martinvonz, durin42, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D2067: changegroup: do not delta lfs revisions
ryanmce added inline comments. INLINE COMMENTS > revlog.py:746 > +def candelta(self, baserev, rev): > +"""whether two revisions (prev, rev) can be delta-ed or not""" > +# Disable delta if either rev requires a content-changing flag `prev` should be `baserev`? REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D2067 To: quark, indygreg, #hg-reviewers, ryanmce Cc: martinvonz, durin42, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D2067: changegroup: do not delta lfs revisions
quark updated this revision to Diff 5650. REPOSITORY rHG Mercurial CHANGES SINCE LAST UPDATE https://phab.mercurial-scm.org/D2067?vs=5327&id=5650 REVISION DETAIL https://phab.mercurial-scm.org/D2067 AFFECTED FILES mercurial/changegroup.py mercurial/revlog.py tests/test-lfs-bundle.t tests/test-lfs.t CHANGE DETAILS diff --git a/tests/test-lfs.t b/tests/test-lfs.t --- a/tests/test-lfs.t +++ b/tests/test-lfs.t @@ -371,7 +371,7 @@ uncompressed size of bundle content: * (changelog) (glob) * (manifests) (glob) - * a (glob) + * a (glob) $ hg --config extensions.strip= strip -r 2 --no-backup --force -q $ hg -R bundle.hg log -p -T '{rev} {desc}\n' a 5 branching diff --git a/tests/test-lfs-bundle.t b/tests/test-lfs-bundle.t --- a/tests/test-lfs-bundle.t +++ b/tests/test-lfs-bundle.t @@ -95,6 +95,6 @@ 2 integrity errors encountered! (first damaged changeset appears to be 2) Applying src-lfs.bundle to dst-normal - CRASHED + OK Applying src-lfs.bundle to dst-lfs OK diff --git a/mercurial/revlog.py b/mercurial/revlog.py --- a/mercurial/revlog.py +++ b/mercurial/revlog.py @@ -78,6 +78,8 @@ REVIDX_EXTSTORED, ] REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER) +# bitmark for flags that could cause rawdata content change +REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED # max size of revlog with inline data _maxinline = 131072 @@ -97,7 +99,8 @@ """Register a flag processor on a revision data flag. Invariant: -- Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER. +- Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER, + and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext. - Only one flag processor can be registered on a specific flag. - flagprocessors must be 3-tuples of functions (read, write, raw) with the following signatures: @@ -739,6 +742,18 @@ except KeyError: return False +def candelta(self, baserev, rev): +"""whether two revisions (prev, rev) can be delta-ed or not""" +# Disable delta if either rev requires a content-changing flag +# processor (ex. LFS). This is because such flag processor can alter +# the rawtext content that the delta will be based on, and two clients +# could have a same revlog node with different flags (i.e. different +# rawtext contents) and the delta could be incompatible. +if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) +or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)): +return False +return True + def clearcaches(self): self._cache = None self._chainbasecache.clear() diff --git a/mercurial/changegroup.py b/mercurial/changegroup.py --- a/mercurial/changegroup.py +++ b/mercurial/changegroup.py @@ -770,6 +770,8 @@ progress(msgbundling, None) def deltaparent(self, revlog, rev, p1, p2, prev): +if not revlog.candelta(prev, rev): +raise error.ProgrammingError('cg1 should not be used in this case') return prev def revchunk(self, revlog, rev, prev, linknode): @@ -829,16 +831,19 @@ # expensive. The revlog caches should have prev cached, meaning # less CPU for changegroup generation. There is likely room to add # a flag and/or config option to control this behavior. -return prev +base = prev elif dp == nullrev: # revlog is configured to use full snapshot for a reason, # stick to full snapshot. -return nullrev +base = nullrev elif dp not in (p1, p2, prev): # Pick prev when we can't be sure remote has the base revision. return prev else: -return dp +base = dp +if base != nullrev and not revlog.candelta(base, rev): +base = nullrev +return base def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags): # Do nothing with flags, it is implicitly 0 in cg1 and cg2 To: quark, indygreg, #hg-reviewers Cc: martinvonz, durin42, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D2067: changegroup: do not delta lfs revisions
martinvonz added a comment. In https://phab.mercurial-scm.org/D2067#34758, @indygreg wrote: > This looks mostly good. I would like a change to address a future footgun though. > > I would also appreciate someone familiar with censor and narrow to weigh in on the implications of disabling delta generation for revisions that have the censor and ellipsis flags set. I'm pretty sure narrow will cope since it reimplements changegroup generation. Not sure how censor will react. (But I know we already have random code for detecting censored nodes during changegroup generation.) I think narrow would be functionally fine with it disabled, but it would be terrible for storage space. We don't set the ellipsis flag on files, but we do on manifests (and we use tree manifests). Some directories have tens of thousands of entries and tens of thousands of entries and tens of thousands of revisions. Storing them all in full would be bad. We get a compression of close to 2000x (not a typo) on these directories. I just created a pretty large clone for testing this and one directory's revlog there ended up being 9MB instead of 15GB. In https://phab.mercurial-scm.org/D2067#36959, @durin42 wrote: > In https://phab.mercurial-scm.org/D2067#34758, @indygreg wrote: > > > This looks mostly good. I would like a change to address a future footgun though. > > > > I would also appreciate someone familiar with censor and narrow to weigh in on the implications of disabling delta generation for revisions that have the censor and ellipsis flags set. I'm pretty sure narrow will cope since it reimplements changegroup generation. Not sure how censor will react. (But I know we already have random code for detecting censored nodes during changegroup generation.) > > > Hmm, I could swear we already don't generate deltas for ellipsis nodes, and I'm having trouble defending the idea that maybe we should. Censor I'm 99% sure disables deltas. I'll ask adgar, but if you don't hear from me assume I was right. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D2067 To: quark, indygreg, #hg-reviewers Cc: martinvonz, durin42, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D2067: changegroup: do not delta lfs revisions
durin42 added a comment. In https://phab.mercurial-scm.org/D2067#34758, @indygreg wrote: > This looks mostly good. I would like a change to address a future footgun though. > > I would also appreciate someone familiar with censor and narrow to weigh in on the implications of disabling delta generation for revisions that have the censor and ellipsis flags set. I'm pretty sure narrow will cope since it reimplements changegroup generation. Not sure how censor will react. (But I know we already have random code for detecting censored nodes during changegroup generation.) Hmm, I could swear we already don't generate deltas for ellipsis nodes, and I'm having trouble defending the idea that maybe we should. Censor I'm 99% sure disables deltas. I'll ask adgar, but if you don't hear from me assume I was right. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D2067 To: quark, indygreg, #hg-reviewers Cc: durin42, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D2067: changegroup: do not delta lfs revisions
quark updated this revision to Diff 5327. REPOSITORY rHG Mercurial CHANGES SINCE LAST UPDATE https://phab.mercurial-scm.org/D2067?vs=5273&id=5327 REVISION DETAIL https://phab.mercurial-scm.org/D2067 AFFECTED FILES mercurial/changegroup.py mercurial/revlog.py tests/test-lfs-bundle.t tests/test-lfs.t CHANGE DETAILS diff --git a/tests/test-lfs.t b/tests/test-lfs.t --- a/tests/test-lfs.t +++ b/tests/test-lfs.t @@ -349,7 +349,7 @@ uncompressed size of bundle content: * (changelog) (glob) * (manifests) (glob) - * a (glob) + * a (glob) $ hg --config extensions.strip= strip -r 2 --no-backup --force -q $ hg -R bundle.hg log -p -T '{rev} {desc}\n' a 5 branching diff --git a/tests/test-lfs-bundle.t b/tests/test-lfs-bundle.t --- a/tests/test-lfs-bundle.t +++ b/tests/test-lfs-bundle.t @@ -95,6 +95,6 @@ 2 integrity errors encountered! (first damaged changeset appears to be 2) Applying src-lfs.bundle to dst-normal - CRASHED + OK Applying src-lfs.bundle to dst-lfs OK diff --git a/mercurial/revlog.py b/mercurial/revlog.py --- a/mercurial/revlog.py +++ b/mercurial/revlog.py @@ -713,6 +713,18 @@ except KeyError: return False +def candelta(self, baserev, rev): +"""whether two revisions (prev, rev) can be delta-ed or not""" +# Disable delta if either rev requires flag processor (ex. LFS) +# This is because a flag processor can alter the rawtext content that +# the delta will be based on, and two clients could have a same revlog +# node with different flags (i.e. different rawtext contents) and the +# delta could be incompatible. +if ((self.flags(baserev) & REVIDX_KNOWN_FLAGS) +or (self.flags(rev) & REVIDX_KNOWN_FLAGS)): +return False +return True + def clearcaches(self): self._cache = None self._chainbasecache.clear() diff --git a/mercurial/changegroup.py b/mercurial/changegroup.py --- a/mercurial/changegroup.py +++ b/mercurial/changegroup.py @@ -770,6 +770,8 @@ progress(msgbundling, None) def deltaparent(self, revlog, rev, p1, p2, prev): +if not revlog.candelta(prev, rev): +raise error.ProgrammingError('cg1 should not be used in this case') return prev def revchunk(self, revlog, rev, prev, linknode): @@ -829,16 +831,19 @@ # expensive. The revlog caches should have prev cached, meaning # less CPU for changegroup generation. There is likely room to add # a flag and/or config option to control this behavior. -return prev +base = prev elif dp == nullrev: # revlog is configured to use full snapshot for a reason, # stick to full snapshot. -return nullrev +base = nullrev elif dp not in (p1, p2, prev): # Pick prev when we can't be sure remote has the base revision. return prev else: -return dp +base = dp +if base != nullrev and not revlog.candelta(base, rev): +base = nullrev +return base def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags): # Do nothing with flags, it is implicitly 0 in cg1 and cg2 To: quark, indygreg, #hg-reviewers Cc: mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D2067: changegroup: do not delta lfs revisions
indygreg requested changes to this revision. indygreg added a comment. This revision now requires changes to proceed. This looks mostly good. I would like a change to address a future footgun though. I would also appreciate someone familiar with censor and narrow to weigh in on the implications of disabling delta generation for revisions that have the censor and ellipsis flags set. I'm pretty sure narrow will cope since it reimplements changegroup generation. Not sure how censor will react. (But I know we already have random code for detecting censored nodes during changegroup generation.) INLINE COMMENTS > revlog.py:719 > +# disable delta if either rev uses non-default flag (ex. LFS) > +if self.flags(baserev) or self.flags(rev): > +return False This logic assumes that revision flags will only ever be used to influence the presence of content. That is true today because our flags are for `REVIDX_ISCENSORED`, `REVIDX_ELLIPSIS`, and `REVIDX_EXTSTORED`. But if we ever introduced a new revision flag for e.g. compression strategy, then testing for non-empty revision flags would be wrong. I think we want to capture the bitmask of revision flags influencing delta generation explicitly. Or we want a big comment by the revision flags constants at the top of the file telling people to audit `candelta()` when adding new revision flags. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D2067 To: quark, indygreg, #hg-reviewers Cc: mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D2067: changegroup: do not delta lfs revisions
quark updated this revision to Diff 5273. REPOSITORY rHG Mercurial CHANGES SINCE LAST UPDATE https://phab.mercurial-scm.org/D2067?vs=5269&id=5273 REVISION DETAIL https://phab.mercurial-scm.org/D2067 AFFECTED FILES mercurial/changegroup.py mercurial/revlog.py tests/test-lfs-bundle.t tests/test-lfs.t CHANGE DETAILS diff --git a/tests/test-lfs.t b/tests/test-lfs.t --- a/tests/test-lfs.t +++ b/tests/test-lfs.t @@ -349,7 +349,7 @@ uncompressed size of bundle content: * (changelog) (glob) * (manifests) (glob) - * a (glob) + * a (glob) $ hg --config extensions.strip= strip -r 2 --no-backup --force -q $ hg -R bundle.hg log -p -T '{rev} {desc}\n' a 5 branching diff --git a/tests/test-lfs-bundle.t b/tests/test-lfs-bundle.t --- a/tests/test-lfs-bundle.t +++ b/tests/test-lfs-bundle.t @@ -95,6 +95,6 @@ 2 integrity errors encountered! (first damaged changeset appears to be 2) Applying src-lfs.bundle to dst-normal - CRASHED + OK Applying src-lfs.bundle to dst-lfs OK diff --git a/mercurial/revlog.py b/mercurial/revlog.py --- a/mercurial/revlog.py +++ b/mercurial/revlog.py @@ -713,6 +713,13 @@ except KeyError: return False +def candelta(self, baserev, rev): +"""whether two revisions (prev, rev) can be delta-ed or not""" +# disable delta if either rev uses non-default flag (ex. LFS) +if self.flags(baserev) or self.flags(rev): +return False +return True + def clearcaches(self): self._cache = None self._chainbasecache.clear() diff --git a/mercurial/changegroup.py b/mercurial/changegroup.py --- a/mercurial/changegroup.py +++ b/mercurial/changegroup.py @@ -770,6 +770,8 @@ progress(msgbundling, None) def deltaparent(self, revlog, rev, p1, p2, prev): +if not revlog.candelta(prev, rev): +raise error.ProgrammingError('cg1 should not be used in this case') return prev def revchunk(self, revlog, rev, prev, linknode): @@ -829,16 +831,19 @@ # expensive. The revlog caches should have prev cached, meaning # less CPU for changegroup generation. There is likely room to add # a flag and/or config option to control this behavior. -return prev +base = prev elif dp == nullrev: # revlog is configured to use full snapshot for a reason, # stick to full snapshot. -return nullrev +base = nullrev elif dp not in (p1, p2, prev): # Pick prev when we can't be sure remote has the base revision. return prev else: -return dp +base = dp +if base != nullrev and not revlog.candelta(base, rev): +base = nullrev +return base def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags): # Do nothing with flags, it is implicitly 0 in cg1 and cg2 To: quark, indygreg, #hg-reviewers Cc: mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D2067: changegroup: do not delta lfs revisions
quark updated this revision to Diff 5269. REPOSITORY rHG Mercurial CHANGES SINCE LAST UPDATE https://phab.mercurial-scm.org/D2067?vs=5264&id=5269 REVISION DETAIL https://phab.mercurial-scm.org/D2067 AFFECTED FILES mercurial/changegroup.py mercurial/revlog.py tests/test-lfs-bundle.t tests/test-lfs.t CHANGE DETAILS diff --git a/tests/test-lfs.t b/tests/test-lfs.t --- a/tests/test-lfs.t +++ b/tests/test-lfs.t @@ -349,7 +349,7 @@ uncompressed size of bundle content: * (changelog) (glob) * (manifests) (glob) - * a (glob) + * a (glob) $ hg --config extensions.strip= strip -r 2 --no-backup --force -q $ hg -R bundle.hg log -p -T '{rev} {desc}\n' a 5 branching diff --git a/tests/test-lfs-bundle.t b/tests/test-lfs-bundle.t --- a/tests/test-lfs-bundle.t +++ b/tests/test-lfs-bundle.t @@ -95,6 +95,6 @@ 2 integrity errors encountered! (first damaged changeset appears to be 2) Applying src-lfs.bundle to dst-normal - CRASHED + OK Applying src-lfs.bundle to dst-lfs OK diff --git a/mercurial/revlog.py b/mercurial/revlog.py --- a/mercurial/revlog.py +++ b/mercurial/revlog.py @@ -713,6 +713,13 @@ except KeyError: return False +def candelta(self, baserev, rev): +"""whether two revisions (prev, rev) can be delta-ed or not""" +# disable delta if either rev uses non-default flag (ex. LFS) +if self.flags(baserev) or self.flags(rev): +return False +return True + def clearcaches(self): self._cache = None self._chainbasecache.clear() diff --git a/mercurial/changegroup.py b/mercurial/changegroup.py --- a/mercurial/changegroup.py +++ b/mercurial/changegroup.py @@ -770,6 +770,8 @@ progress(msgbundling, None) def deltaparent(self, revlog, rev, p1, p2, prev): +if not revlog.candelta(prev, rev): +raise error.ProgrammingError('cg1 should not be used in this case') return prev def revchunk(self, revlog, rev, prev, linknode): @@ -829,16 +831,19 @@ # expensive. The revlog caches should have prev cached, meaning # less CPU for changegroup generation. There is likely room to add # a flag and/or config option to control this behavior. -return prev +base = prev elif dp == nullrev: # revlog is configured to use full snapshot for a reason, # stick to full snapshot. -return nullrev +base = nullrev elif dp not in (p1, p2, prev): # Pick prev when we can't be sure remote has the base revision. return prev else: -return dp +base = dp +if base != nullrev and not revlog.candelta(base, rev): +base = nullrev +return base def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags): # Do nothing with flags, it is implicitly 0 in cg1 and cg2 To: quark, indygreg, #hg-reviewers Cc: mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D2067: changegroup: do not delta lfs revisions
quark created this revision. Herald added a reviewer: indygreg. Herald added a subscriber: mercurial-devel. Herald added a reviewer: hg-reviewers. REVISION SUMMARY There is no way to distinguish whether a delta base is LFS or non-LFS. If the delta is against LFS rawtext, and the client trying to apply it has the base revision stored as fulltext, the delta (aka. bundle) will fail to apply. This patch forbids using delta for LFS revisions in changegroup so bad deltas won't be transmitted. Note: this does not solve the problem entirely. It solves LFS delta applying to non-LFS base. But the other direction: non-LFS delta applying to LFS base is not solved yet. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D2067 AFFECTED FILES mercurial/changegroup.py mercurial/revlog.py tests/test-lfs-bundle.t tests/test-lfs.t CHANGE DETAILS diff --git a/tests/test-lfs.t b/tests/test-lfs.t --- a/tests/test-lfs.t +++ b/tests/test-lfs.t @@ -349,7 +349,7 @@ uncompressed size of bundle content: * (changelog) (glob) * (manifests) (glob) - * a (glob) + * a (glob) $ hg --config extensions.strip= strip -r 2 --no-backup --force -q $ hg -R bundle.hg log -p -T '{rev} {desc}\n' a 5 branching diff --git a/tests/test-lfs-bundle.t b/tests/test-lfs-bundle.t --- a/tests/test-lfs-bundle.t +++ b/tests/test-lfs-bundle.t @@ -96,6 +96,6 @@ 2 integrity errors encountered! (first damaged changeset appears to be 2) Applying src-lfs.bundle to dst-normal - CRASHED + OK Applying src-lfs.bundle to dst-lfs OK diff --git a/mercurial/revlog.py b/mercurial/revlog.py --- a/mercurial/revlog.py +++ b/mercurial/revlog.py @@ -713,6 +713,13 @@ except KeyError: return False +def candelta(self, baserev, rev): +"""whether two revisions (prev, rev) can be delta-ed or not""" +# disable delta if either rev uses non-default flag (ex. LFS) +if self.flags(baserev) or self.flags(rev): +return False +return True + def clearcaches(self): self._cache = None self._chainbasecache.clear() diff --git a/mercurial/changegroup.py b/mercurial/changegroup.py --- a/mercurial/changegroup.py +++ b/mercurial/changegroup.py @@ -770,6 +770,8 @@ progress(msgbundling, None) def deltaparent(self, revlog, rev, p1, p2, prev): +if not revlog.candelta(prev, rev): +raise error.ProgrammingError('cg1 should not be used in this case') return prev def revchunk(self, revlog, rev, prev, linknode): @@ -829,16 +831,19 @@ # expensive. The revlog caches should have prev cached, meaning # less CPU for changegroup generation. There is likely room to add # a flag and/or config option to control this behavior. -return prev +base = prev elif dp == nullrev: # revlog is configured to use full snapshot for a reason, # stick to full snapshot. -return nullrev +base = nullrev elif dp not in (p1, p2, prev): # Pick prev when we can't be sure remote has the base revision. return prev else: -return dp +base = dp +if base != nullrev and not revlog.candelta(base, rev): +base = nullrev +return base def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags): # Do nothing with flags, it is implicitly 0 in cg1 and cg2 To: quark, indygreg, #hg-reviewers Cc: mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel