Re: [PATCH v2 08/11] fast-export: add --reference-excluded-parents option

2018-11-14 Thread Elijah Newren
On Wed, Nov 14, 2018 at 11:28 AM SZEDER Gábor  wrote:
>
> On Tue, Nov 13, 2018 at 04:25:57PM -0800, Elijah Newren wrote:
> > diff --git a/builtin/fast-export.c b/builtin/fast-export.c
> > index 2fef00436b..3cc98c31ad 100644
> > --- a/builtin/fast-export.c
> > +++ b/builtin/fast-export.c
> > @@ -37,6 +37,7 @@ static int fake_missing_tagger;
> >  static int use_done_feature;
> >  static int no_data;
> >  static int full_tree;
> > +static int reference_excluded_commits;
> >  static struct string_list extra_refs = STRING_LIST_INIT_NODUP;
> >  static struct string_list tag_refs = STRING_LIST_INIT_NODUP;
> >  static struct refspec refspecs = REFSPEC_INIT_FETCH;
> > @@ -596,7 +597,8 @@ static void handle_commit(struct commit *commit, struct 
> > rev_info *rev,
> >   message += 2;
> >
> >   if (commit->parents &&
> > - get_object_mark(>parents->item->object) != 0 &&
> > + (get_object_mark(>parents->item->object) != 0 ||
> > +  reference_excluded_commits) &&
> >   !full_tree) {
> >   parse_commit_or_die(commit->parents->item);
> >   diff_tree_oid(get_commit_tree_oid(commit->parents->item),
> > @@ -644,13 +646,21 @@ static void handle_commit(struct commit *commit, 
> > struct rev_info *rev,
> >   unuse_commit_buffer(commit, commit_buffer);
> >
> >   for (i = 0, p = commit->parents; p; p = p->next) {
> > - int mark = get_object_mark(>item->object);
> > - if (!mark)
> > + struct object *obj = >item->object;
> > + int mark = get_object_mark(obj);
> > +
> > + if (!mark && !reference_excluded_commits)
> >   continue;
> >   if (i == 0)
> > - printf("from :%d\n", mark);
> > + printf("from ");
> > + else
> > + printf("merge ");
> > + if (mark)
> > + printf(":%d\n", mark);
> >   else
> > - printf("merge :%d\n", mark);
> > + printf("%s\n", sha1_to_hex(anonymize ?
> > +anonymize_sha1(>oid) :
> > +obj->oid.hash));
>
> Since we intend to move away from SHA-1, would this be a good time to
> add an anonymize_oid() function, "while at it"?

Since I already need to add a cleanup commit to remove the
pre-existing sha1_to_hex() calls, I'll just
s/anonymize_sha1/anonymize_oid/ while at it in the same commit; it's
not called from any other file.

> >   i++;
> >   }
> >
> > @@ -931,13 +941,22 @@ static void handle_tags_and_duplicates(struct 
> > string_list *extras)
> >   /*
> >* Getting here means we have a commit which
> >* was excluded by a negative refspec (e.g.
> > -  * fast-export ^master master).  If the user
> > +  * fast-export ^master master).  If we are
> > +  * referencing excluded commits, set the ref
> > +  * to the exact commit.  Otherwise, the user
> >* wants the branch exported but every commit
> > -  * in its history to be deleted, that sounds
> > -  * like a ref deletion to me.
> > +  * in its history to be deleted, which 
> > basically
> > +  * just means deletion of the ref.
> >*/
> > - printf("reset %s\nfrom %s\n\n",
> > -name, sha1_to_hex(null_sha1));
> > + if (!reference_excluded_commits) {
> > + /* delete the ref */
> > + printf("reset %s\nfrom %s\n\n",
> > +name, sha1_to_hex(null_sha1));
> > + continue;
> > + }
> > + /* set ref to commit using oid, not mark */
> > + printf("reset %s\nfrom %s\n\n", name,
> > +sha1_to_hex(commit->object.oid.hash));
>
> Please use oid_to_hex(>object.oid) instead.

Yeah, there were a couple others I introduced too.  I'll fix them all up.


Re: [PATCH v2 08/11] fast-export: add --reference-excluded-parents option

2018-11-14 Thread SZEDER Gábor
On Tue, Nov 13, 2018 at 04:25:57PM -0800, Elijah Newren wrote:
> diff --git a/builtin/fast-export.c b/builtin/fast-export.c
> index 2fef00436b..3cc98c31ad 100644
> --- a/builtin/fast-export.c
> +++ b/builtin/fast-export.c
> @@ -37,6 +37,7 @@ static int fake_missing_tagger;
>  static int use_done_feature;
>  static int no_data;
>  static int full_tree;
> +static int reference_excluded_commits;
>  static struct string_list extra_refs = STRING_LIST_INIT_NODUP;
>  static struct string_list tag_refs = STRING_LIST_INIT_NODUP;
>  static struct refspec refspecs = REFSPEC_INIT_FETCH;
> @@ -596,7 +597,8 @@ static void handle_commit(struct commit *commit, struct 
> rev_info *rev,
>   message += 2;
>  
>   if (commit->parents &&
> - get_object_mark(>parents->item->object) != 0 &&
> + (get_object_mark(>parents->item->object) != 0 ||
> +  reference_excluded_commits) &&
>   !full_tree) {
>   parse_commit_or_die(commit->parents->item);
>   diff_tree_oid(get_commit_tree_oid(commit->parents->item),
> @@ -644,13 +646,21 @@ static void handle_commit(struct commit *commit, struct 
> rev_info *rev,
>   unuse_commit_buffer(commit, commit_buffer);
>  
>   for (i = 0, p = commit->parents; p; p = p->next) {
> - int mark = get_object_mark(>item->object);
> - if (!mark)
> + struct object *obj = >item->object;
> + int mark = get_object_mark(obj);
> +
> + if (!mark && !reference_excluded_commits)
>   continue;
>   if (i == 0)
> - printf("from :%d\n", mark);
> + printf("from ");
> + else
> + printf("merge ");
> + if (mark)
> + printf(":%d\n", mark);
>   else
> - printf("merge :%d\n", mark);
> + printf("%s\n", sha1_to_hex(anonymize ?
> +anonymize_sha1(>oid) :
> +obj->oid.hash));

Since we intend to move away from SHA-1, would this be a good time to
add an anonymize_oid() function, "while at it"?

>   i++;
>   }
>  
> @@ -931,13 +941,22 @@ static void handle_tags_and_duplicates(struct 
> string_list *extras)
>   /*
>* Getting here means we have a commit which
>* was excluded by a negative refspec (e.g.
> -  * fast-export ^master master).  If the user
> +  * fast-export ^master master).  If we are
> +  * referencing excluded commits, set the ref
> +  * to the exact commit.  Otherwise, the user
>* wants the branch exported but every commit
> -  * in its history to be deleted, that sounds
> -  * like a ref deletion to me.
> +  * in its history to be deleted, which basically
> +  * just means deletion of the ref.
>*/
> - printf("reset %s\nfrom %s\n\n",
> -name, sha1_to_hex(null_sha1));
> + if (!reference_excluded_commits) {
> + /* delete the ref */
> + printf("reset %s\nfrom %s\n\n",
> +name, sha1_to_hex(null_sha1));
> + continue;
> + }
> + /* set ref to commit using oid, not mark */
> + printf("reset %s\nfrom %s\n\n", name,
> +sha1_to_hex(commit->object.oid.hash));

Please use oid_to_hex(>object.oid) instead.

>   continue;
>   }
>  


[PATCH v2 08/11] fast-export: add --reference-excluded-parents option

2018-11-13 Thread Elijah Newren
git filter-branch has a nifty feature allowing you to rewrite, e.g. just
the last 8 commits of a linear history
  git filter-branch $OPTIONS HEAD~8..HEAD

If you try the same with git fast-export, you instead get a history of
only 8 commits, with HEAD~7 being rewritten into a root commit.  There
are two alternatives:

  1) Don't use the negative revision specification, and when you're
 filtering the output to make modifications to the last 8 commits,
 just be careful to not modify any earlier commits somehow.

  2) First run 'git fast-export --export-marks=somefile HEAD~8', then
 run 'git fast-export --import-marks=somefile HEAD~8..HEAD'.

Both are more error prone than I'd like (the first for obvious reasons;
with the second option I have sometimes accidentally included too many
revisions in the first command and then found that the corresponding
extra revisions were not exported by the second command and thus were
not modified as I expected).  Also, both are poor from a performance
perspective.

Add a new --reference-excluded-parents option which will cause
fast-export to refer to commits outside the specified rev-list-args
range by their sha1sum.  Such a stream will only be useful in a
repository which already contains the necessary commits (much like the
restriction imposed when using --no-data).

Note from Peff:
  I think we might be able to do a little more optimization here. If
  we're exporting HEAD^..HEAD and there's an object in HEAD^ which is
  unchanged in HEAD, I think we'd still print it (because it would not
  be marked SHOWN), but we could omit it (by walking the tree of the
  boundary commits and marking them shown).  I don't think it's a
  blocker for what you're doing here, but just a possible future
  optimization.

Signed-off-by: Elijah Newren 
---
 Documentation/git-fast-export.txt | 17 +++--
 builtin/fast-export.c | 42 +++
 t/t9350-fast-export.sh| 11 
 3 files changed, 58 insertions(+), 12 deletions(-)

diff --git a/Documentation/git-fast-export.txt 
b/Documentation/git-fast-export.txt
index fda55b3284..f65026662a 100644
--- a/Documentation/git-fast-export.txt
+++ b/Documentation/git-fast-export.txt
@@ -110,6 +110,18 @@ marks the same across runs.
the shape of the history and stored tree.  See the section on
`ANONYMIZING` below.
 
+--reference-excluded-parents::
+   By default, running a command such as `git fast-export
+   master~5..master` will not include the commit master{tilde}5
+   and will make master{tilde}4 no longer have master{tilde}5 as
+   a parent (though both the old master{tilde}4 and new
+   master{tilde}4 will have all the same files).  Use
+   --reference-excluded-parents to instead have the the stream
+   refer to commits in the excluded range of history by their
+   sha1sum.  Note that the resulting stream can only be used by a
+   repository which already contains the necessary parent
+   commits.
+
 --refspec::
Apply the specified refspec to each ref exported. Multiple of them can
be specified.
@@ -119,8 +131,9 @@ marks the same across runs.
'git rev-list', that specifies the specific objects and references
to export.  For example, `master~10..master` causes the
current master reference to be exported along with all objects
-   added since its 10th ancestor commit and all files common to
-   master{tilde}9 and master{tilde}10.
+   added since its 10th ancestor commit and (unless the
+   --reference-excluded-parents option is specified) all files
+   common to master{tilde}9 and master{tilde}10.
 
 EXAMPLES
 
diff --git a/builtin/fast-export.c b/builtin/fast-export.c
index 2fef00436b..3cc98c31ad 100644
--- a/builtin/fast-export.c
+++ b/builtin/fast-export.c
@@ -37,6 +37,7 @@ static int fake_missing_tagger;
 static int use_done_feature;
 static int no_data;
 static int full_tree;
+static int reference_excluded_commits;
 static struct string_list extra_refs = STRING_LIST_INIT_NODUP;
 static struct string_list tag_refs = STRING_LIST_INIT_NODUP;
 static struct refspec refspecs = REFSPEC_INIT_FETCH;
@@ -596,7 +597,8 @@ static void handle_commit(struct commit *commit, struct 
rev_info *rev,
message += 2;
 
if (commit->parents &&
-   get_object_mark(>parents->item->object) != 0 &&
+   (get_object_mark(>parents->item->object) != 0 ||
+reference_excluded_commits) &&
!full_tree) {
parse_commit_or_die(commit->parents->item);
diff_tree_oid(get_commit_tree_oid(commit->parents->item),
@@ -644,13 +646,21 @@ static void handle_commit(struct commit *commit, struct 
rev_info *rev,
unuse_commit_buffer(commit, commit_buffer);
 
for (i = 0, p = commit->parents; p; p = p->next) {
-   int mark = get_object_mark(>item->object);
-   if (!mark)
+