Re: [PATCH 1/1] git-p4: add format-patch subcommand

2018-03-12 Thread Luke Diamand
On 26 February 2018 at 23:48, Eric Sunshine  wrote:
> On Mon, Feb 26, 2018 at 6:48 AM, Luke Diamand  wrote:
>> It takes a list of P4 changelists and generates a patch for
>> each one, using "p4 describe".
>> [...]

Just to say that I almost got this working reasonably well, but it
gets pretty nasty making binary files work - in order to generate the
git binary format I need a base85 encoder which exists in Python3, but
not in Python2. And while I could obviously write it in Python easily
enough, it starts to feel like an awful lot of code implementing (and
maintaining) a slightly second-rate "git diff". And while it's
tempting to just not support binary files, in reality Perforce repos
seem to end up with lots of them.

So I think I might have another go at the previous scheme. What I will
have to do is to first create a faked-up commit representing the point
prior to the shelved commit for each file ("for file@rev-1 in files"),
and then create the real commit ("for file@rev in files"). It's
probably not as grim as it sounds and then means that we end up with
git doing all the hard work of diffing files, rather than relying on
Perforce's diff engine in conjunction with some Python.


>> Signed-off-by: Luke Diamand 
>> ---
>> diff --git a/git-p4.py b/git-p4.py
>> @@ -3749,6 +3761,277 @@ class P4Branches(Command):
>> +class P4MakePatch(Command,P4UserMap):
>> +def run(self, args):
>> +if self.output and not os.path.isdir(self.output):
>> +sys.exit("output directory %s does not exist" % self.output)
>
> For consistency with "git format-patch", this could create the output
> directory automatically rather than erroring out.
>
>> +if self.strip_depot_prefix:
>> +self.clientSpec = getClientSpec()
>> +else:
>> +self.clientSpec = None
>> +
>> +self.loadUserMapFromCache()
>> +if len(args) < 1:
>> +return False
>
> Would it make sense to handle this no-arguments case earlier before
> doing work, such as loading the user map from cache?
>
>> +for change in args:
>> +self.make_patch(int(change))
>> +
>> +return True
>> +
>> +def p4_fetch_delta(self, change, files, shelved=False):
>> +cmd = ["describe"]
>> +if shelved:
>> +cmd += ["-S"]
>> +cmd += ["-du"]
>> +cmd += ["%s" % change]
>> +cmd = p4_build_cmd(cmd)
>> +
>> +p4 = subprocess.Popen(cmd, shell=False, stdout=subprocess.PIPE)
>> +try:
>> +result = p4.stdout.readlines()
>> +except EOFError:
>> +pass
>> +in_diff = False
>> +matcher = re.compile('\s+(.*)#(\d+)\s+\(text\)\s+')
>> +diffmatcher = re.compile("Differences ...")
>
> I'm not familiar with the output of "p4 describe", but does it include
> user-supplied text? If so, would it be safer to anchor 'diffmatcher',
> for instance, "^Diferences...$"?
>
>> +delta = ""
>> +skip_next_blank_line = False
>> +
>> +for line in result:
>> +if diffmatcher.match(line):
>
> Stepping back, does "Differences..." appear on a line of its own? If
> so, why does this need to be a regex at all? Can't it just do a direct
> string comparison?
>
>> +in_diff = True
>> +continue
>> +
>> +if in_diff:
>> +
>> +if skip_next_blank_line and \
>> +line.rstrip() == "":
>> +skip_next_blank_line = False
>> +continue
>> +
>> +m = matcher.match(line)
>> +if m:
>> +file = self.map_path(m.group(1))
>> +ver = m.group(2)
>> +delta += "diff --git a%s b%s\n" % (file, file)
>> +delta += "--- a%s\n" % file
>> +delta += "+++ b%s\n" % file
>> +skip_next_blank_line = True
>> +else:
>> +delta += line
>> +
>> +delta += "\n"
>> +
>> +exitCode = p4.wait()
>> +if exitCode != 0:
>> +raise IOError("p4 '%s' command failed" % str(cmd))
>
> Since p4.stdout.readlines() gathered all output from the command
> before massaging it into Git format, can't the p4.wait() be done
> earlier, just after the output has been read?
>
>> +return delta
>> +
>> +def make_patch(self, change):
>> +[...]
>> +# add new or deleted files
>> +for file in files:
>> +[...]
>> +if add or delete:
>> +if add:
>> +[...]
>> +else:
>> +[...]
>> +
>> +[...]
>> +
>> +if add:
>> +prefix = "+"
>> +else:
>> +prefix = "-"
>> +
>> +if delete:
>> +[...]
>> +

Re: [PATCH 1/1] git-p4: add format-patch subcommand

2018-02-26 Thread Eric Sunshine
On Mon, Feb 26, 2018 at 6:48 AM, Luke Diamand  wrote:
> It takes a list of P4 changelists and generates a patch for
> each one, using "p4 describe".
> [...]
> Signed-off-by: Luke Diamand 
> ---
> diff --git a/git-p4.py b/git-p4.py
> @@ -3749,6 +3761,277 @@ class P4Branches(Command):
> +class P4MakePatch(Command,P4UserMap):
> +def run(self, args):
> +if self.output and not os.path.isdir(self.output):
> +sys.exit("output directory %s does not exist" % self.output)

For consistency with "git format-patch", this could create the output
directory automatically rather than erroring out.

> +if self.strip_depot_prefix:
> +self.clientSpec = getClientSpec()
> +else:
> +self.clientSpec = None
> +
> +self.loadUserMapFromCache()
> +if len(args) < 1:
> +return False

Would it make sense to handle this no-arguments case earlier before
doing work, such as loading the user map from cache?

> +for change in args:
> +self.make_patch(int(change))
> +
> +return True
> +
> +def p4_fetch_delta(self, change, files, shelved=False):
> +cmd = ["describe"]
> +if shelved:
> +cmd += ["-S"]
> +cmd += ["-du"]
> +cmd += ["%s" % change]
> +cmd = p4_build_cmd(cmd)
> +
> +p4 = subprocess.Popen(cmd, shell=False, stdout=subprocess.PIPE)
> +try:
> +result = p4.stdout.readlines()
> +except EOFError:
> +pass
> +in_diff = False
> +matcher = re.compile('\s+(.*)#(\d+)\s+\(text\)\s+')
> +diffmatcher = re.compile("Differences ...")

I'm not familiar with the output of "p4 describe", but does it include
user-supplied text? If so, would it be safer to anchor 'diffmatcher',
for instance, "^Diferences...$"?

> +delta = ""
> +skip_next_blank_line = False
> +
> +for line in result:
> +if diffmatcher.match(line):

Stepping back, does "Differences..." appear on a line of its own? If
so, why does this need to be a regex at all? Can't it just do a direct
string comparison?

> +in_diff = True
> +continue
> +
> +if in_diff:
> +
> +if skip_next_blank_line and \
> +line.rstrip() == "":
> +skip_next_blank_line = False
> +continue
> +
> +m = matcher.match(line)
> +if m:
> +file = self.map_path(m.group(1))
> +ver = m.group(2)
> +delta += "diff --git a%s b%s\n" % (file, file)
> +delta += "--- a%s\n" % file
> +delta += "+++ b%s\n" % file
> +skip_next_blank_line = True
> +else:
> +delta += line
> +
> +delta += "\n"
> +
> +exitCode = p4.wait()
> +if exitCode != 0:
> +raise IOError("p4 '%s' command failed" % str(cmd))

Since p4.stdout.readlines() gathered all output from the command
before massaging it into Git format, can't the p4.wait() be done
earlier, just after the output has been read?

> +return delta
> +
> +def make_patch(self, change):
> +[...]
> +# add new or deleted files
> +for file in files:
> +[...]
> +if add or delete:
> +if add:
> +[...]
> +else:
> +[...]
> +
> +[...]
> +
> +if add:
> +prefix = "+"
> +else:
> +prefix = "-"
> +
> +if delete:
> +[...]
> +else:
> +# added
> +[...]
> +
> +(lines, delta_content) = self.read_file_contents(prefix, 
> path_rev)
> +
> +if add:
> +if lines > 0:
> +delta += "@@ -0,0 +1,%d @@\n" % lines
> +else:
> +delta += "@@ -1,%d +0,0 @@\n" % lines

It's not clear why you sometimes check 'add' but other times 'delete'.
Perhaps always used one or the other? For instance:

action = file["action"]
if action == "add" or action == "delete":
if action == "add":
before = "/dev/null"
...
else
...

delta += ...

if action == "add":
...

or something.

> +delta += delta_content
> +
> +if self.output:
> +with open("%s/%s.patch" % (self.output, change), "w") as f:
> +f.write(delta)
> +else:
> +print(delta)
> diff --git a/t/t9832-make-patch.sh b/t/t9832-make-patch.sh
> @@ -0,0 +1,135 @@
> +# Converting P4 changes into patches
> +#
> +# - added, deleted, modified files
> +# - regular commits, shelved commits
> 

Re: [PATCH 1/1] git-p4: add format-patch subcommand

2018-02-26 Thread Miguel Torroja
very nice subcommand, I tested it with a shelved CL and a submitted CL.
Find my comments inline

On Mon, Feb 26, 2018 at 12:48 PM, Luke Diamand  wrote:
> It takes a list of P4 changelists and generates a patch for
> each one, using "p4 describe".
>
> This is especially useful for applying shelved changelists
> to your git-p4 tree; the existing "git p4" subcommands do
> not handle these.
>
> That's because they "p4 print" the contents of each file at
> a given revision, and then use git-fastimport to generate the
> deltas. But in the case of a shelved changelist, there is no
> easy way to find out what the previous file state was - Perforce
> does not have the concept of a single repo-wide revision.
>
> Unfortunately, using "p4 describe" comes with a price: it cannot
> be used to reliably generate diffs for binary files (it tries to
> linebreak on LF characters) and it is also _much_ slower.
>
> Unicode character correctness is untested - in theory if
> "p4 describe" knows about the character encoding it shouldn't
> break unicode characters if they happen to contain LF, but I
> haven't tested this.
>
> Signed-off-by: Luke Diamand 
> ---
>  Documentation/git-p4.txt |  33 +
>  git-p4.py| 304 
> +--
>  t/t9832-make-patch.sh| 135 +
>  3 files changed, 462 insertions(+), 10 deletions(-)
>  create mode 100755 t/t9832-make-patch.sh
>
> diff --git a/Documentation/git-p4.txt b/Documentation/git-p4.txt
> index d8c8f11c9f..1908b00de2 100644
> --- a/Documentation/git-p4.txt
> +++ b/Documentation/git-p4.txt
> @@ -164,6 +164,28 @@ $ git p4 submit --shelve
>  $ git p4 submit --update-shelve 1234 --update-shelve 2345
>  
>
> +
> +format-patch
> +~~
> +This will attempt to create a unified diff (using the git patch variant) 
> which
> +can be passed to patch. This is generated using the output from "p4 
> describe".
> +
> +It includes the contents of added files (which "p4 describe" does not).
> +
> +Binary files cannot be handled correctly due to limitations in "p4 describe".
> +
> +It will handle both normal and shelved (pending) changelists.
> +
> +Because of the way this works, it will be much slower than the normal git-p4 
> clone
> +path.
> +
> +
> +$ git p4 format-patch 12345
> +$ git p4 format-patch --output patchdir 12345 12346 12347
> +$ git p4 format-patch --strip-depot-prefix 12348 > out.patch
> +$ git am out.patch
> +
> +
>  OPTIONS
>  ---
>
> @@ -337,6 +359,17 @@ These options can be used to modify 'git p4 rebase' 
> behavior.
>  --import-labels::
> Import p4 labels.
>
> +Format-patch options
> +
> +
> +--output::
> +Write patches to this directory (which must exist) instead of to
> +standard output.
> +
> +--strip-depot-prefix::
> +Strip the depot prefix from filenames in the patch.  This makes
> +it suitable for passing to tools such as "git am".
> +
>  DEPOT PATH SYNTAX
>  -
>  The p4 depot path argument to 'git p4 sync' and 'git p4 clone' can
> diff --git a/git-p4.py b/git-p4.py
> index 7bb9cadc69..a1e998e6f5 100755
> --- a/git-p4.py
> +++ b/git-p4.py
> @@ -26,6 +26,7 @@ import zipfile
>  import zlib
>  import ctypes
>  import errno
> +import time
>
>  try:
>  from subprocess import CalledProcessError
> @@ -316,12 +317,17 @@ def p4_last_change():
>  results = p4CmdList(["changes", "-m", "1"], skip_info=True)
>  return int(results[0]['change'])
>
> -def p4_describe(change):
> +def p4_describe(change, shelved=False):
>  """Make sure it returns a valid result by checking for
> the presence of field "time".  Return a dict of the
> results."""
>
> -ds = p4CmdList(["describe", "-s", str(change)], skip_info=True)
> +cmd = ["describe", "-s"]
> +if shelved:
> +cmd += ["-S"]
> +cmd += [str(change)]
> +
> +ds = p4CmdList(cmd, skip_info=True)
>  if len(ds) != 1:
>  die("p4 describe -s %d did not return 1 result: %s" % (change, 
> str(ds)))
>
> @@ -372,7 +378,14 @@ def split_p4_type(p4type):
>  mods = ""
>  if len(s) > 1:
>  mods = s[1]
> -return (base, mods)
> +
> +git_mode = "100644"
> +if "x" in mods:
> +git_mode = "100755"
> +if base == "symlink":
> +git_mode = "12"
> +
> +return (base, mods, git_mode)
>
>  #
>  # return the raw p4 type of a file (text, text+ko, etc)
> @@ -413,7 +426,7 @@ def p4_keywords_regexp_for_file(file):
>  if not os.path.exists(file):
>  return None
>  else:
> -(type_base, type_mods) = split_p4_type(p4_type(file))
> +(type_base, type_mods, _) = split_p4_type(p4_type(file))
>  return p4_keywords_regexp_for_type(type_base, type_mods)
>
>  def setP4ExecBit(file, mode):
> @@ -1208,6 +1221,9 @@ class P4UserMap:
>  else:
>  return True
>
> +def getP4UsernameEmail(self, userid):
> +return 

[PATCH 1/1] git-p4: add format-patch subcommand

2018-02-26 Thread Luke Diamand
It takes a list of P4 changelists and generates a patch for
each one, using "p4 describe".

This is especially useful for applying shelved changelists
to your git-p4 tree; the existing "git p4" subcommands do
not handle these.

That's because they "p4 print" the contents of each file at
a given revision, and then use git-fastimport to generate the
deltas. But in the case of a shelved changelist, there is no
easy way to find out what the previous file state was - Perforce
does not have the concept of a single repo-wide revision.

Unfortunately, using "p4 describe" comes with a price: it cannot
be used to reliably generate diffs for binary files (it tries to
linebreak on LF characters) and it is also _much_ slower.

Unicode character correctness is untested - in theory if
"p4 describe" knows about the character encoding it shouldn't
break unicode characters if they happen to contain LF, but I
haven't tested this.

Signed-off-by: Luke Diamand 
---
 Documentation/git-p4.txt |  33 +
 git-p4.py| 304 +--
 t/t9832-make-patch.sh| 135 +
 3 files changed, 462 insertions(+), 10 deletions(-)
 create mode 100755 t/t9832-make-patch.sh

diff --git a/Documentation/git-p4.txt b/Documentation/git-p4.txt
index d8c8f11c9f..1908b00de2 100644
--- a/Documentation/git-p4.txt
+++ b/Documentation/git-p4.txt
@@ -164,6 +164,28 @@ $ git p4 submit --shelve
 $ git p4 submit --update-shelve 1234 --update-shelve 2345
 
 
+
+format-patch
+~~
+This will attempt to create a unified diff (using the git patch variant) which
+can be passed to patch. This is generated using the output from "p4 describe".
+
+It includes the contents of added files (which "p4 describe" does not).
+
+Binary files cannot be handled correctly due to limitations in "p4 describe".
+
+It will handle both normal and shelved (pending) changelists.
+
+Because of the way this works, it will be much slower than the normal git-p4 
clone
+path.
+
+
+$ git p4 format-patch 12345
+$ git p4 format-patch --output patchdir 12345 12346 12347
+$ git p4 format-patch --strip-depot-prefix 12348 > out.patch
+$ git am out.patch
+
+
 OPTIONS
 ---
 
@@ -337,6 +359,17 @@ These options can be used to modify 'git p4 rebase' 
behavior.
 --import-labels::
Import p4 labels.
 
+Format-patch options
+
+
+--output::
+Write patches to this directory (which must exist) instead of to
+standard output.
+
+--strip-depot-prefix::
+Strip the depot prefix from filenames in the patch.  This makes
+it suitable for passing to tools such as "git am".
+
 DEPOT PATH SYNTAX
 -
 The p4 depot path argument to 'git p4 sync' and 'git p4 clone' can
diff --git a/git-p4.py b/git-p4.py
index 7bb9cadc69..a1e998e6f5 100755
--- a/git-p4.py
+++ b/git-p4.py
@@ -26,6 +26,7 @@ import zipfile
 import zlib
 import ctypes
 import errno
+import time
 
 try:
 from subprocess import CalledProcessError
@@ -316,12 +317,17 @@ def p4_last_change():
 results = p4CmdList(["changes", "-m", "1"], skip_info=True)
 return int(results[0]['change'])
 
-def p4_describe(change):
+def p4_describe(change, shelved=False):
 """Make sure it returns a valid result by checking for
the presence of field "time".  Return a dict of the
results."""
 
-ds = p4CmdList(["describe", "-s", str(change)], skip_info=True)
+cmd = ["describe", "-s"]
+if shelved:
+cmd += ["-S"]
+cmd += [str(change)]
+
+ds = p4CmdList(cmd, skip_info=True)
 if len(ds) != 1:
 die("p4 describe -s %d did not return 1 result: %s" % (change, 
str(ds)))
 
@@ -372,7 +378,14 @@ def split_p4_type(p4type):
 mods = ""
 if len(s) > 1:
 mods = s[1]
-return (base, mods)
+
+git_mode = "100644"
+if "x" in mods:
+git_mode = "100755"
+if base == "symlink":
+git_mode = "12"
+
+return (base, mods, git_mode)
 
 #
 # return the raw p4 type of a file (text, text+ko, etc)
@@ -413,7 +426,7 @@ def p4_keywords_regexp_for_file(file):
 if not os.path.exists(file):
 return None
 else:
-(type_base, type_mods) = split_p4_type(p4_type(file))
+(type_base, type_mods, _) = split_p4_type(p4_type(file))
 return p4_keywords_regexp_for_type(type_base, type_mods)
 
 def setP4ExecBit(file, mode):
@@ -1208,6 +1221,9 @@ class P4UserMap:
 else:
 return True
 
+def getP4UsernameEmail(self, userid):
+return self.users[userid]
+
 def getUserCacheFilename(self):
 home = os.environ.get("HOME", os.environ.get("USERPROFILE"))
 return home + "/.gitp4-usercache.txt"
@@ -2570,13 +2586,9 @@ class P4Sync(Command, P4UserMap):
 sys.stdout.write('\r%s --> %s (%i MB)\n' % (file['depotFile'], 
relPath, size/1024/1024))
 sys.stdout.flush()
 
-(type_base, type_mods) = split_p4_type(file["type"])
+(type_base,