Re: [PATCH v3] git-p4: Use -m when running p4 changes
On 18/04/15 00:11, Lex Spoon wrote: Simply running p4 changes on a large branch can result in a too many rows scanned error from the Perforce server. It is better to use a sequence of smaller calls to p4 changes, using the -m option to limit the size of each call. Signed-off-by: Lex Spoon l...@lexspoon.org Reviewed-by: Junio C Hamano gits...@pobox.com Reviewed-by: Luke Diamand l...@diamand.org I could be wrong about this, but it looks like importNewBranches() is taking an extra argument, but that isn't reflected in the place where it gets called. I think it just got missed. As a result, t9801-git-p4-branch.sh fails with this error: Importing revision 3 (37%) Importing new branch depot/branch1 Traceback (most recent call last): File /home/lgd/git/git/git-p4, line 3327, in module main() File /home/lgd/git/git/git-p4, line 3321, in main if not cmd.run(args): File /home/lgd/git/git/git-p4, line 3195, in run if not P4Sync.run(self, depotPaths): File /home/lgd/git/git/git-p4, line 3057, in run self.importChanges(changes) File /home/lgd/git/git/git-p4, line 2692, in importChanges if self.importNewBranch(branch, change - 1): TypeError: importNewBranch() takes exactly 4 arguments (3 given) rm: cannot remove `/home/lgd/git/git/t/trash directory.t9801-git-p4-branch/git/.git/objects/pack': Directory not empty not ok 8 - import depot, branch detection, branchList branch definition Thanks! Luke --- Updated as suggested: - documentation added - avoided touch(1) - used test_seq - used || exit for test commands inside for loops - more tabs - fewer line breaks - expanded commit message Documentation/git-p4.txt | 17 ++--- git-p4.py| 54 +++- t/t9818-git-p4-block.sh | 64 3 files changed, 120 insertions(+), 15 deletions(-) create mode 100755 t/t9818-git-p4-block.sh diff --git a/Documentation/git-p4.txt b/Documentation/git-p4.txt index a1664b9..82aa5d6 100644 --- a/Documentation/git-p4.txt +++ b/Documentation/git-p4.txt @@ -225,9 +225,20 @@ Git repository: they can find the p4 branches in refs/heads. --max-changes n:: - Limit the number of imported changes to 'n'. Useful to - limit the amount of history when using the '@all' p4 revision - specifier. + Import at most 'n' changes, rather than the entire range of + changes included in the given revision specifier. A typical + usage would be use '@all' as the revision specifier, but then + to use '--max-changes 1000' to import only the last 1000 + revisions rather than the entire revision history. + +--changes-block-size n:: + The internal block size to use when converting a revision + specifier such as '@all' into a list of specific change + numbers. Instead of using a single call to 'p4 changes' to + find the full list of changes for the conversion, there are a + sequence of calls to 'p4 changes -m', each of which requests + one block of changes of the given size. The default block size + is 500, which should usually be suitable. --keep-path:: The mapping of file names from the p4 depot path to Git, by diff --git a/git-p4.py b/git-p4.py index 549022e..1fba3aa 100755 --- a/git-p4.py +++ b/git-p4.py @@ -740,17 +740,43 @@ def createOrUpdateBranchesFromOrigin(localRefPrefix = refs/remotes/p4/, silent def originP4BranchesExist(): return gitBranchExists(origin) or gitBranchExists(origin/p4) or gitBranchExists(origin/p4/master) -def p4ChangesForPaths(depotPaths, changeRange): +def p4ChangesForPaths(depotPaths, changeRange, block_size): assert depotPaths -cmd = ['changes'] -for p in depotPaths: -cmd += [%s...%s % (p, changeRange)] -output = p4_read_pipe_lines(cmd) +assert block_size + +# Parse the change range into start and end +if changeRange is None or changeRange == '': +changeStart = '@1' +changeEnd = '#head' +else: +parts = changeRange.split(',') +assert len(parts) == 2 +changeStart = parts[0] +changeEnd = parts[1] +# Accumulate change numbers in a dictionary to avoid duplicates changes = {} -for line in output: -changeNum = int(line.split( )[1]) -changes[changeNum] = True + +for p in depotPaths: +# Retrieve changes a block at a time, to prevent running +# into a MaxScanRows error from the server. +start = changeStart +end = changeEnd +get_another_block = True +while get_another_block: +new_changes = [] +cmd = ['changes'] +cmd += ['-m', str(block_size)] +cmd += [%s...%s,%s % (p, start, end)] +for line in p4_read_pipe_lines(cmd): +changeNum = int(line.split( )[1]) +new_changes.append(changeNum) +
Re: [PATCH v3] git-p4: Use -m when running p4 changes
On Mon, Apr 20, 2015 at 5:53 AM, Luke Diamand l...@diamand.org wrote: I could be wrong about this, but it looks like importNewBranches() is taking an extra argument, but that isn't reflected in the place where it gets called. I think it just got missed. As a result, t9801-git-p4-branch.sh fails with this error: Oh dear, definitely. The argument can in fact be dropped, because it's already already available via a field of the same object. I post an update with that change. -Lex -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3] git-p4: Use -m when running p4 changes
Simply running p4 changes on a large branch can result in a too many rows scanned error from the Perforce server. It is better to use a sequence of smaller calls to p4 changes, using the -m option to limit the size of each call. Signed-off-by: Lex Spoon l...@lexspoon.org Reviewed-by: Junio C Hamano gits...@pobox.com Reviewed-by: Luke Diamand l...@diamand.org --- Updated as suggested: - documentation added - avoided touch(1) - used test_seq - used || exit for test commands inside for loops - more tabs - fewer line breaks - expanded commit message Documentation/git-p4.txt | 17 ++--- git-p4.py| 54 +++- t/t9818-git-p4-block.sh | 64 3 files changed, 120 insertions(+), 15 deletions(-) create mode 100755 t/t9818-git-p4-block.sh diff --git a/Documentation/git-p4.txt b/Documentation/git-p4.txt index a1664b9..82aa5d6 100644 --- a/Documentation/git-p4.txt +++ b/Documentation/git-p4.txt @@ -225,9 +225,20 @@ Git repository: they can find the p4 branches in refs/heads. --max-changes n:: - Limit the number of imported changes to 'n'. Useful to - limit the amount of history when using the '@all' p4 revision - specifier. + Import at most 'n' changes, rather than the entire range of + changes included in the given revision specifier. A typical + usage would be use '@all' as the revision specifier, but then + to use '--max-changes 1000' to import only the last 1000 + revisions rather than the entire revision history. + +--changes-block-size n:: + The internal block size to use when converting a revision + specifier such as '@all' into a list of specific change + numbers. Instead of using a single call to 'p4 changes' to + find the full list of changes for the conversion, there are a + sequence of calls to 'p4 changes -m', each of which requests + one block of changes of the given size. The default block size + is 500, which should usually be suitable. --keep-path:: The mapping of file names from the p4 depot path to Git, by diff --git a/git-p4.py b/git-p4.py index 549022e..1fba3aa 100755 --- a/git-p4.py +++ b/git-p4.py @@ -740,17 +740,43 @@ def createOrUpdateBranchesFromOrigin(localRefPrefix = refs/remotes/p4/, silent def originP4BranchesExist(): return gitBranchExists(origin) or gitBranchExists(origin/p4) or gitBranchExists(origin/p4/master) -def p4ChangesForPaths(depotPaths, changeRange): +def p4ChangesForPaths(depotPaths, changeRange, block_size): assert depotPaths -cmd = ['changes'] -for p in depotPaths: -cmd += [%s...%s % (p, changeRange)] -output = p4_read_pipe_lines(cmd) +assert block_size + +# Parse the change range into start and end +if changeRange is None or changeRange == '': +changeStart = '@1' +changeEnd = '#head' +else: +parts = changeRange.split(',') +assert len(parts) == 2 +changeStart = parts[0] +changeEnd = parts[1] +# Accumulate change numbers in a dictionary to avoid duplicates changes = {} -for line in output: -changeNum = int(line.split( )[1]) -changes[changeNum] = True + +for p in depotPaths: +# Retrieve changes a block at a time, to prevent running +# into a MaxScanRows error from the server. +start = changeStart +end = changeEnd +get_another_block = True +while get_another_block: +new_changes = [] +cmd = ['changes'] +cmd += ['-m', str(block_size)] +cmd += [%s...%s,%s % (p, start, end)] +for line in p4_read_pipe_lines(cmd): +changeNum = int(line.split( )[1]) +new_changes.append(changeNum) +changes[changeNum] = True +if len(new_changes) == block_size: +get_another_block = True +end = '@' + str(min(new_changes)) +else: +get_another_block = False changelist = changes.keys() changelist.sort() @@ -1911,7 +1937,10 @@ class P4Sync(Command, P4UserMap): optparse.make_option(--import-labels, dest=importLabels, action=store_true), optparse.make_option(--import-local, dest=importIntoRemotes, action=store_false, help=Import into refs/heads/ , not refs/remotes), -optparse.make_option(--max-changes, dest=maxChanges), +optparse.make_option(--max-changes, dest=maxChanges, + help=Maximum number of changes to import), +optparse.make_option(--changes-block-size, dest=changes_block_size, type=int, + help=Internal block size to use when iteratively calling p4 changes), optparse.make_option(--keep-path,