[PATCH 06/22] make sure partially read index is not changed

2013-07-07 Thread Thomas Gummerer
A partially read index file currently cannot be written to disk.  Make
sure that never happens, by re-reading the index file if the index file
wasn't read completely before changing the in-memory index.

Signed-off-by: Thomas Gummerer 
---
 builtin/update-index.c | 4 
 cache.h| 4 +++-
 read-cache-v2.c| 3 +++
 read-cache.c   | 8 
 4 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/builtin/update-index.c b/builtin/update-index.c
index 5c7762e..03f6426 100644
--- a/builtin/update-index.c
+++ b/builtin/update-index.c
@@ -49,6 +49,8 @@ static int mark_ce_flags(const char *path, int flag, int mark)
int namelen = strlen(path);
int pos = cache_name_pos(path, namelen);
if (0 <= pos) {
+   if (active_cache_partially_read)
+   cache_change_filter_opts(NULL);
if (mark)
active_cache[pos]->ce_flags |= flag;
else
@@ -253,6 +255,8 @@ static void chmod_path(int flip, const char *path)
pos = cache_name_pos(path, strlen(path));
if (pos < 0)
goto fail;
+   if (active_cache_partially_read)
+   cache_change_filter_opts(NULL);
ce = active_cache[pos];
mode = ce->ce_mode;
if (!S_ISREG(mode))
diff --git a/cache.h b/cache.h
index d38dfbd..f6c3407 100644
--- a/cache.h
+++ b/cache.h
@@ -293,7 +293,8 @@ struct index_state {
struct cache_tree *cache_tree;
struct cache_time timestamp;
unsigned name_hash_initialized : 1,
-initialized : 1;
+initialized : 1,
+partially_read : 1;
struct hash_table name_hash;
struct hash_table dir_hash;
struct index_ops *ops;
@@ -315,6 +316,7 @@ extern void free_name_hash(struct index_state *istate);
 #define active_alloc (the_index.cache_alloc)
 #define active_cache_changed (the_index.cache_changed)
 #define active_cache_tree (the_index.cache_tree)
+#define active_cache_partially_read (the_index.partially_read)
 
 #define read_cache() read_index(&the_index)
 #define read_cache_from(path) read_index_from(&the_index, (path))
diff --git a/read-cache-v2.c b/read-cache-v2.c
index 1ed640d..2cc792d 100644
--- a/read-cache-v2.c
+++ b/read-cache-v2.c
@@ -273,6 +273,7 @@ static int read_index_v2(struct index_state *istate, void 
*mmap,
src_offset += 8;
src_offset += extsize;
}
+   istate->partially_read = 0;
return 0;
 unmap:
munmap(mmap, mmap_size);
@@ -495,6 +496,8 @@ static int write_index_v2(struct index_state *istate, int 
newfd)
struct stat st;
struct strbuf previous_name_buf = STRBUF_INIT, *previous_name;
 
+   if (istate->partially_read)
+   die("BUG: index: cannot write a partially read index");
for (i = removed = extended = 0; i < entries; i++) {
if (cache[i]->ce_flags & CE_REMOVE)
removed++;
diff --git a/read-cache.c b/read-cache.c
index b30ee75..4529fab 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -30,6 +30,8 @@ static void replace_index_entry(struct index_state *istate, 
int nr, struct cache
 {
struct cache_entry *old = istate->cache[nr];
 
+   if (istate->partially_read)
+   index_change_filter_opts(istate, NULL);
remove_name_hash(istate, old);
set_index_entry(istate, nr, ce);
istate->cache_changed = 1;
@@ -467,6 +469,8 @@ int remove_index_entry_at(struct index_state *istate, int 
pos)
 {
struct cache_entry *ce = istate->cache[pos];
 
+   if (istate->partially_read)
+   index_change_filter_opts(istate, NULL);
record_resolve_undo(istate, ce);
remove_name_hash(istate, ce);
istate->cache_changed = 1;
@@ -978,6 +982,8 @@ int add_index_entry(struct index_state *istate, struct 
cache_entry *ce, int opti
 {
int pos;
 
+   if (istate->partially_read)
+   index_change_filter_opts(istate, NULL);
if (option & ADD_CACHE_JUST_APPEND)
pos = istate->cache_nr;
else {
@@ -1184,6 +1190,8 @@ int refresh_index(struct index_state *istate, unsigned 
int flags, const char **p
/* If we are doing --really-refresh that
 * means the index is not valid anymore.
 */
+   if (istate->partially_read)
+   index_change_filter_opts(istate, NULL);
ce->ce_flags &= ~CE_VALID;
istate->cache_changed = 1;
}
-- 
1.8.3.453.g1dfc63d

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 04/22] read-cache: Re-read index if index file changed

2013-07-07 Thread Thomas Gummerer
Add the possibility of re-reading the index file, if it changed
while reading.

The index file might change during the read, causing outdated
information to be displayed. We check if the index file changed
by using its stat data as heuristic.

Helped-by: Ramsay Jones 
Signed-off-by: Thomas Gummerer 
---
 read-cache.c | 91 +---
 1 file changed, 57 insertions(+), 34 deletions(-)

diff --git a/read-cache.c b/read-cache.c
index 1e7ffc2..3e3a0e2 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1275,11 +1275,31 @@ int read_index(struct index_state *istate)
return read_index_from(istate, get_index_file());
 }
 
+static int index_changed(struct stat *st_old, struct stat *st_new)
+{
+   if (st_old->st_mtime != st_new->st_mtime ||
+#if !defined (__CYGWIN__)
+   st_old->st_uid   != st_new->st_uid ||
+   st_old->st_gid   != st_new->st_gid ||
+   st_old->st_ino   != st_new->st_ino ||
+#endif
+#if USE_NSEC
+   ST_MTIME_NSEC(*st_old) != ST_MTIME_NSEC(*st_new) ||
+#endif
+#if USE_STDEV
+   st_old->st_dev != st_new->st_dev ||
+#endif
+   st_old->st_size != st_new->st_size)
+   return 1;
+
+   return 0;
+}
+
 /* remember to discard_cache() before reading a different cache! */
 int read_index_from(struct index_state *istate, const char *path)
 {
-   int fd;
-   struct stat st;
+   int fd, err, i;
+   struct stat st_old, st_new;
struct cache_version_header *hdr;
void *mmap;
size_t mmap_size;
@@ -1291,41 +1311,44 @@ int read_index_from(struct index_state *istate, const 
char *path)
errno = ENOENT;
istate->timestamp.sec = 0;
istate->timestamp.nsec = 0;
+   for (i = 0; i < 50; i++) {
+   err = 0;
+   fd = open(path, O_RDONLY);
+   if (fd < 0) {
+   if (errno == ENOENT)
+   return 0;
+   die_errno("index file open failed");
+   }
 
-   fd = open(path, O_RDONLY);
-   if (fd < 0) {
-   if (errno == ENOENT)
-   return 0;
-   die_errno("index file open failed");
+   if (fstat(fd, &st_old))
+   die_errno("cannot stat the open index");
+
+   errno = EINVAL;
+   mmap_size = xsize_t(st_old.st_size);
+   mmap = xmmap(NULL, mmap_size, PROT_READ | PROT_WRITE, 
MAP_PRIVATE, fd, 0);
+   close(fd);
+   if (mmap == MAP_FAILED)
+   die_errno("unable to map index file");
+
+   hdr = mmap;
+   if (verify_hdr_version(istate, hdr, mmap_size) < 0)
+   err = 1;
+
+   if (istate->ops->verify_hdr(mmap, mmap_size) < 0)
+   err = 1;
+
+   if (istate->ops->read_index(istate, mmap, mmap_size) < 0)
+   err = 1;
+   istate->timestamp.sec = st_old.st_mtime;
+   istate->timestamp.nsec = ST_MTIME_NSEC(st_old);
+   if (lstat(path, &st_new))
+   die_errno("cannot stat the open index");
+
+   munmap(mmap, mmap_size);
+   if (!index_changed(&st_old, &st_new) && !err)
+   return istate->cache_nr;
}
 
-   if (fstat(fd, &st))
-   die_errno("cannot stat the open index");
-
-   errno = EINVAL;
-   mmap_size = xsize_t(st.st_size);
-   mmap = xmmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 
0);
-   close(fd);
-   if (mmap == MAP_FAILED)
-   die_errno("unable to map index file");
-
-   hdr = mmap;
-   if (verify_hdr_version(istate, hdr, mmap_size) < 0)
-   goto unmap;
-
-   if (istate->ops->verify_hdr(mmap, mmap_size) < 0)
-   goto unmap;
-
-   if (istate->ops->read_index(istate, mmap, mmap_size) < 0)
-   goto unmap;
-   istate->timestamp.sec = st.st_mtime;
-   istate->timestamp.nsec = ST_MTIME_NSEC(st);
-
-   munmap(mmap, mmap_size);
-   return istate->cache_nr;
-
-unmap:
-   munmap(mmap, mmap_size);
die("index file corrupt");
 }
 
-- 
1.8.3.453.g1dfc63d

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 01/22] t2104: Don't fail for index versions other than [23]

2013-07-07 Thread Thomas Gummerer
t2104 currently checks for the exact index version 2 or 3,
depending if there is a skip-worktree flag or not. Other
index versions do not use extended flags and thus cannot
be tested for version changes.

Make this test update the index to version 2 at the beginning
of the test. Testing the skip-worktree flags for the default
index format is still covered by t7011 and t7012.

Signed-off-by: Thomas Gummerer 
---
 t/t2104-update-index-skip-worktree.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/t/t2104-update-index-skip-worktree.sh 
b/t/t2104-update-index-skip-worktree.sh
index 1d0879b..bd9644f 100755
--- a/t/t2104-update-index-skip-worktree.sh
+++ b/t/t2104-update-index-skip-worktree.sh
@@ -22,6 +22,7 @@ H sub/2
 EOF
 
 test_expect_success 'setup' '
+   git update-index --index-version=2 &&
mkdir sub &&
touch ./1 ./2 sub/1 sub/2 &&
git add 1 2 sub/1 sub/2 &&
-- 
1.8.3.453.g1dfc63d

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 00/22] Index v5

2013-07-07 Thread Thomas Gummerer
Hi,

This is a follow up for last years Google Summer of Code (late I know
:-) ), which wasn't merged back then.  The previous rounds of the
series are at $gmane/202752, $gmane/202923, $gmane/203088 and
$gmane/203517.

Since then I added a index reading api, which allows certain parts of
Git to take advantage of the the partial reading capability of the new
index file format now.  In this series the grep and the ls-files and
the code-paths used by them are switched to the new api.

Another goal for the api is to hide the open coded loops and accesses
to the in-memory format, to make it simpler to change the in-memory
format to a version that fits the new on-disk format better.

Except for the new patches, mostly the "read-cache: read index-v5"
patch changed, as the possibility to read the index partially was
added.

The first patch for t2104 makes sense without the rest of the series,
as it fixes running the test-suite with index-v4 as the default index
format.

Below are the timings for the WebKit repository.  c4b2d88 is the
revicion before adding anything, while HEAD are the times at the last
patch in the series.  The slower times in update-index come from the
update-index patch so they are no problem (in c4b2d88 the index is
only read, while in HEAD it's read and written).  The increase in time
in the ls-files test come from the not having the prune_cache function
in the index api.

I have not added this function as it only seems of use in ls-files,
but it can still be added if this increase is a problem.

Testc4b2d88   HEAD  
 
-
0003.2: v[23]: update-index 0.11(0.06+0.04)   0.22(0.15+0.05) 
+100.0%
0003.3: v[23]: grep nonexistent -- subdir   0.12(0.08+0.03)   0.12(0.09+0.02) 
+0.0%  
0003.4: v[23]: ls-files -- subdir   0.11(0.08+0.01)   0.12(0.08+0.03) 
+9.1%  
0003.6: v4: update-index0.09(0.06+0.02)   0.18(0.14+0.03) 
+100.0%
0003.7: v4: grep nonexistent -- subdir  0.10(0.08+0.02)   0.10(0.07+0.02) 
+0.0%  
0003.8: v4: ls-files -- subdir  0.09(0.07+0.01)   0.10(0.08+0.01) 
+11.1% 
0003.10: v5: update-index0.15(0.10+0.03)   
 
0003.11: v5: grep nonexistent -- subdir  0.01(0.00+0.00)   
 
0003.12: v5: ls-files -- subdir  0.01(0.01+0.00)   
 

And for reference the times for a synthetic repository with a 470MB
index file, just to demonstrate the improvements in large repositories.

Testc4b2d88   HEAD  
 
-
0003.2: v[23]: update-index 1.50(1.18+0.30)   3.18(2.55+0.60) 
+112.0%
0003.3: v[23]: grep nonexistent -- subdir   1.62(1.28+0.32)   1.66(1.28+0.36) 
+2.5%  
0003.4: v[23]: ls-files -- subdir   1.49(1.21+0.26)   1.62(1.28+0.32) 
+8.7%  
0003.6: v4: update-index1.18(0.89+0.28)   2.68(2.22+0.44) 
+127.1%
0003.7: v4: grep nonexistent -- subdir  1.29(1.00+0.28)   1.30(1.04+0.24) 
+0.8%  
0003.8: v4: ls-files -- subdir  1.20(0.95+0.23)   1.30(0.98+0.30) 
+8.3%  
0003.10: v5: update-index2.12(1.63+0.48)   
 
0003.11: v5: grep nonexistent -- subdir  0.08(0.04+0.02)   
 
0003.12: v5: ls-files -- subdir  0.07(0.05+0.01)   
 


Thomas Gummerer (21):
  t2104: Don't fail for index versions other than [23]
  read-cache: split index file version specific functionality
  read-cache: move index v2 specific functions to their own file
  read-cache: Re-read index if index file changed
  read-cache: add index reading api
  make sure partially read index is not changed
  dir.c: use index api
  tree.c: use index api
  name-hash.c: use index api
  grep.c: Use index api
  ls-files.c: use the index api
  read-cache: make read_blob_data_from_index use index api
  documentation: add documentation of the index-v5 file format
  read-cache: make in-memory format aware of stat_crc
  read-cache: read index-v5
  read-cache: read resolve-undo data
  read-cache: read cache-tree in index-v5
  read-cache: write index-v5
  read-cache: write index-v5 cache-tree data
  read-cache: write resolve-undo data for index-v5
  update-index.c: rewrite index when index-version is given

Thomas Rast (1):
  p0003-index.sh: add perf test for the index formats

 Documentation/technical/index-file-format-v5.txt |  296 +
 Makefile |3 +
 builtin/grep.c   |   71 +-
 builtin/ls-files.c   |  213 ++-
 builtin/update-index.c   |8 +-
 cache-tree.c |2 +-
 cache-tree.h |6 +
 cache.h 

[PATCH 02/22] read-cache: split index file version specific functionality

2013-07-07 Thread Thomas Gummerer
Split index file version specific functionality to their own functions,
to prepare for moving the index file version specific parts to their own
file.  This makes it easier to add a new index file format later.

Signed-off-by: Thomas Gummerer 
---
 cache.h  |   5 +-
 read-cache.c | 130 +--
 test-index-version.c |   2 +-
 3 files changed, 90 insertions(+), 47 deletions(-)

diff --git a/cache.h b/cache.h
index c288678..7af853b 100644
--- a/cache.h
+++ b/cache.h
@@ -100,9 +100,12 @@ unsigned long git_deflate_bound(git_zstream *, unsigned 
long);
  */
 
 #define CACHE_SIGNATURE 0x44495243 /* "DIRC" */
-struct cache_header {
+struct cache_version_header {
unsigned int hdr_signature;
unsigned int hdr_version;
+};
+
+struct cache_header {
unsigned int hdr_entries;
 };
 
diff --git a/read-cache.c b/read-cache.c
index d5201f9..93947bf 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1268,10 +1268,8 @@ struct ondisk_cache_entry_extended {
ondisk_cache_entry_extended_size(ce_namelen(ce)) : \
ondisk_cache_entry_size(ce_namelen(ce)))
 
-static int verify_hdr(struct cache_header *hdr, unsigned long size)
+static int verify_hdr_version(struct cache_version_header *hdr, unsigned long 
size)
 {
-   git_SHA_CTX c;
-   unsigned char sha1[20];
int hdr_version;
 
if (hdr->hdr_signature != htonl(CACHE_SIGNATURE))
@@ -1279,10 +1277,22 @@ static int verify_hdr(struct cache_header *hdr, 
unsigned long size)
hdr_version = ntohl(hdr->hdr_version);
if (hdr_version < INDEX_FORMAT_LB || INDEX_FORMAT_UB < hdr_version)
return error("bad index version %d", hdr_version);
+   return 0;
+}
+
+static int verify_hdr(void *mmap, unsigned long size)
+{
+   git_SHA_CTX c;
+   unsigned char sha1[20];
+
+   if (size < sizeof(struct cache_version_header)
+   + sizeof(struct cache_header) + 20)
+   die("index file smaller than expected");
+
git_SHA1_Init(&c);
-   git_SHA1_Update(&c, hdr, size - 20);
+   git_SHA1_Update(&c, mmap, size - 20);
git_SHA1_Final(sha1, &c);
-   if (hashcmp(sha1, (unsigned char *)hdr + size - 20))
+   if (hashcmp(sha1, (unsigned char *)mmap + size - 20))
return error("bad index file sha1 signature");
return 0;
 }
@@ -1424,47 +1434,19 @@ static struct cache_entry *create_from_disk(struct 
ondisk_cache_entry *ondisk,
return ce;
 }
 
-/* remember to discard_cache() before reading a different cache! */
-int read_index_from(struct index_state *istate, const char *path)
+static int read_index_v2(struct index_state *istate, void *mmap, unsigned long 
mmap_size)
 {
-   int fd, i;
-   struct stat st;
+   int i;
unsigned long src_offset;
-   struct cache_header *hdr;
-   void *mmap;
-   size_t mmap_size;
+   struct cache_version_header *hdr;
+   struct cache_header *hdr_v2;
struct strbuf previous_name_buf = STRBUF_INIT, *previous_name;
 
-   if (istate->initialized)
-   return istate->cache_nr;
-
-   istate->timestamp.sec = 0;
-   istate->timestamp.nsec = 0;
-   fd = open(path, O_RDONLY);
-   if (fd < 0) {
-   if (errno == ENOENT)
-   return 0;
-   die_errno("index file open failed");
-   }
-
-   if (fstat(fd, &st))
-   die_errno("cannot stat the open index");
-
-   mmap_size = xsize_t(st.st_size);
-   if (mmap_size < sizeof(struct cache_header) + 20)
-   die("index file smaller than expected");
-
-   mmap = xmmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 
0);
-   if (mmap == MAP_FAILED)
-   die_errno("unable to map index file");
-   close(fd);
-
hdr = mmap;
-   if (verify_hdr(hdr, mmap_size) < 0)
-   goto unmap;
+   hdr_v2 = (struct cache_header *)((char *)mmap + sizeof(*hdr));
 
istate->version = ntohl(hdr->hdr_version);
-   istate->cache_nr = ntohl(hdr->hdr_entries);
+   istate->cache_nr = ntohl(hdr_v2->hdr_entries);
istate->cache_alloc = alloc_nr(istate->cache_nr);
istate->cache = xcalloc(istate->cache_alloc, sizeof(*istate->cache));
istate->initialized = 1;
@@ -1474,7 +1456,7 @@ int read_index_from(struct index_state *istate, const 
char *path)
else
previous_name = NULL;
 
-   src_offset = sizeof(*hdr);
+   src_offset = sizeof(*hdr) + sizeof(*hdr_v2);
for (i = 0; i < istate->cache_nr; i++) {
struct ondisk_cache_entry *disk_ce;
struct cache_entry *ce;
@@ -1487,8 +1469,6 @@ int read_index_from(struct index_state *istate, const 
char *path)
src_offset += consumed;
}
strbuf_release(&previous_name_buf);
-   istate->timestamp.sec = st.st_mtime;
-   istate->t

[PATCH 05/22] read-cache: add index reading api

2013-07-07 Thread Thomas Gummerer
Add an api for access to the index file.  Currently there is only a very
basic api for accessing the index file, which only allows a full read of
the index, and lets the users of the data filter it.  The new index api
gives the users the possibility to use only part of the index and
provides functions for iterating over and accessing cache entries.

This simplifies future improvements to the in-memory format, as changes
will be concentrated on one file, instead of the whole git source code.

Signed-off-by: Thomas Gummerer 
---
 cache.h |  57 +-
 read-cache-v2.c |  96 +++--
 read-cache.c| 108 
 read-cache.h|  12 ++-
 4 files changed, 263 insertions(+), 10 deletions(-)

diff --git a/cache.h b/cache.h
index 5082b34..d38dfbd 100644
--- a/cache.h
+++ b/cache.h
@@ -127,7 +127,8 @@ struct cache_entry {
unsigned int ce_flags;
unsigned int ce_namelen;
unsigned char sha1[20];
-   struct cache_entry *next;
+   struct cache_entry *next; /* used by name_hash */
+   struct cache_entry *next_ce; /* used to keep a list of cache entries */
char name[FLEX_ARRAY]; /* more */
 };
 
@@ -258,6 +259,32 @@ static inline unsigned int canon_mode(unsigned int mode)
 
 #define cache_entry_size(len) (offsetof(struct cache_entry,name) + (len) + 1)
 
+/*
+ * Options by which the index should be filtered when read partially.
+ *
+ * pathspec: The pathspec which the index entries have to match
+ * seen: Used to return the seen parameter from match_pathspec()
+ * max_prefix, max_prefix_len: These variables are set to the longest
+ * common prefix, the length of the longest common prefix of the
+ * given pathspec
+ *
+ * read_staged: used to indicate if the conflicted entries (entries
+ * with a stage) should be included
+ * read_cache_tree: used to indicate if the cache-tree should be read
+ * read_resolve_undo: used to indicate if the resolve undo data should
+ * be read
+ */
+struct filter_opts {
+   const char **pathspec;
+   char *seen;
+   char *max_prefix;
+   int max_prefix_len;
+
+   int read_staged;
+   int read_cache_tree;
+   int read_resolve_undo;
+};
+
 struct index_state {
struct cache_entry **cache;
unsigned int version;
@@ -270,6 +297,8 @@ struct index_state {
struct hash_table name_hash;
struct hash_table dir_hash;
struct index_ops *ops;
+   struct internal_ops *internal_ops;
+   struct filter_opts *filter_opts;
 };
 
 extern struct index_state the_index;
@@ -311,6 +340,17 @@ extern void free_name_hash(struct index_state *istate);
 #define unmerge_cache_entry_at(at) unmerge_index_entry_at(&the_index, at)
 #define unmerge_cache(pathspec) unmerge_index(&the_index, pathspec)
 #define read_blob_data_from_cache(path, sz) 
read_blob_data_from_index(&the_index, (path), (sz))
+
+/* index api */
+#define read_cache_filtered(opts) read_index_filtered(&the_index, (opts))
+#define read_cache_filtered_from(path, opts) 
read_index_filtered_from(&the_index, (path), (opts))
+#define get_cache_entry_by_name(name, namelen, ce) \
+   get_index_entry_by_name(&the_index, (name), (namelen), (ce))
+#define for_each_cache_entry(fn, cb_data) \
+   for_each_index_entry(&the_index, (fn), (cb_data))
+#define next_cache_entry(ce) next_index_entry(ce)
+#define cache_change_filter_opts(opts) index_change_filter_opts(&the_index, 
(opts))
+#define sort_cache() sort_index(&the_index)
 #endif
 
 enum object_type {
@@ -438,6 +478,21 @@ extern int init_db(const char *template_dir, unsigned int 
flags);
} \
} while (0)
 
+/* index api */
+extern int read_index_filtered(struct index_state *, struct filter_opts *opts);
+extern int read_index_filtered_from(struct index_state *, const char *path, 
struct filter_opts *opts);
+extern int get_index_entry_by_name(struct index_state *, const char *name, int 
namelen,
+  struct cache_entry **ce);
+extern struct cache_entry *next_index_entry(struct cache_entry *ce);
+void index_change_filter_opts(struct index_state *istate, struct filter_opts 
*opts);
+void sort_index(struct index_state *istate);
+
+typedef int each_cache_entry_fn(struct cache_entry *ce, void *);
+
+extern int for_each_index_entry(struct index_state *istate,
+   each_cache_entry_fn, void *);
+
+
 /* Initialize and use the cache information */
 extern int read_index(struct index_state *);
 extern int read_index_preload(struct index_state *, const char **pathspec);
diff --git a/read-cache-v2.c b/read-cache-v2.c
index a6883c3..1ed640d 100644
--- a/read-cache-v2.c
+++ b/read-cache-v2.c
@@ -3,6 +3,7 @@
 #include "resolve-undo.h"
 #include "cache-tree.h"
 #include "varint.h"
+#include "dir.h"
 
 /* Mask for the name length in ce_flags in the on-disk index */
 #define CE_NAMEMASK  (0x0fff)
@@

[PATCH 07/22] dir.c: use index api

2013-07-07 Thread Thomas Gummerer
Signed-off-by: Thomas Gummerer 
---
 dir.c | 33 +++--
 1 file changed, 15 insertions(+), 18 deletions(-)

diff --git a/dir.c b/dir.c
index 897c874..f4919ba 100644
--- a/dir.c
+++ b/dir.c
@@ -468,19 +468,19 @@ void add_exclude(const char *string, const char *base,
 
 static void *read_skip_worktree_file_from_index(const char *path, size_t *size)
 {
-   int pos, len;
+   int len;
unsigned long sz;
enum object_type type;
void *data;
struct index_state *istate = &the_index;
+   struct cache_entry *ce;
 
len = strlen(path);
-   pos = index_name_pos(istate, path, len);
-   if (pos < 0)
+   if (!get_index_entry_by_name(istate, path, len, &ce))
return NULL;
-   if (!ce_skip_worktree(istate->cache[pos]))
+   if (!ce_skip_worktree(ce))
return NULL;
-   data = read_sha1_file(istate->cache[pos]->sha1, &type, &sz);
+   data = read_sha1_file(ce->sha1, &type, &sz);
if (!data || type != OBJ_BLOB) {
free(data);
return NULL;
@@ -968,16 +968,13 @@ static enum exist_status 
directory_exists_in_index_icase(const char *dirname, in
  */
 static enum exist_status directory_exists_in_index(const char *dirname, int 
len)
 {
-   int pos;
+   struct cache_entry *ce;
 
if (ignore_case)
return directory_exists_in_index_icase(dirname, len);
 
-   pos = cache_name_pos(dirname, len);
-   if (pos < 0)
-   pos = -pos-1;
-   while (pos < active_nr) {
-   struct cache_entry *ce = active_cache[pos++];
+   get_cache_entry_by_name(dirname, len, &ce);
+   while (ce) {
unsigned char endchar;
 
if (strncmp(ce->name, dirname, len))
@@ -989,6 +986,7 @@ static enum exist_status directory_exists_in_index(const 
char *dirname, int len)
return index_directory;
if (!endchar && S_ISGITLINK(ce->ce_mode))
return index_gitdir;
+   ce = next_cache_entry(ce);
}
return index_nonexistent;
 }
@@ -1114,7 +1112,6 @@ static int exclude_matches_pathspec(const char *path, int 
len,
 
 static int get_index_dtype(const char *path, int len)
 {
-   int pos;
struct cache_entry *ce;
 
ce = cache_name_exists(path, len, 0);
@@ -1131,18 +1128,18 @@ static int get_index_dtype(const char *path, int len)
}
 
/* Try to look it up as a directory */
-   pos = cache_name_pos(path, len);
-   if (pos >= 0)
+   if (get_cache_entry_by_name(path, len, &ce));
return DT_UNKNOWN;
-   pos = -pos-1;
-   while (pos < active_nr) {
-   ce = active_cache[pos++];
+
+   while (ce) {
if (strncmp(ce->name, path, len))
break;
if (ce->name[len] > '/')
break;
-   if (ce->name[len] < '/')
+   if (ce->name[len] < '/') {
+   ce = next_cache_entry(ce);
continue;
+   }
if (!ce_uptodate(ce))
break;  /* continue? */
return DT_DIR;
-- 
1.8.3.453.g1dfc63d

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 08/22] tree.c: use index api

2013-07-07 Thread Thomas Gummerer
Signed-off-by: Thomas Gummerer 
---
 tree.c | 38 --
 1 file changed, 20 insertions(+), 18 deletions(-)

diff --git a/tree.c b/tree.c
index 62fed63..5cd43f4 100644
--- a/tree.c
+++ b/tree.c
@@ -128,20 +128,28 @@ int read_tree_recursive(struct tree *tree,
return ret;
 }
 
-static int cmp_cache_name_compare(const void *a_, const void *b_)
+
+struct read_tree_data {
+   read_tree_fn_t fn;
+   int stage;
+};
+
+int get_read_tree_fn(struct cache_entry *ce, void *cb_data)
 {
-   const struct cache_entry *ce1, *ce2;
+   struct read_tree_data *data = cb_data;
 
-   ce1 = *((const struct cache_entry **)a_);
-   ce2 = *((const struct cache_entry **)b_);
-   return cache_name_stage_compare(ce1->name, ce1->ce_namelen, 
ce_stage(ce1),
- ce2->name, ce2->ce_namelen, ce_stage(ce2));
+   if (ce_stage(ce) == data->stage) {
+   data->fn = read_one_entry;
+   return 0;
+   }
+   return 1;
 }
 
 int read_tree(struct tree *tree, int stage, struct pathspec *match)
 {
read_tree_fn_t fn = NULL;
-   int i, err;
+   int err;
+   struct read_tree_data rtd;
 
/*
 * Currently the only existing callers of this function all
@@ -158,11 +166,10 @@ int read_tree(struct tree *tree, int stage, struct 
pathspec *match)
 * do it the original slow way, otherwise, append and then
 * sort at the end.
 */
-   for (i = 0; !fn && i < active_nr; i++) {
-   struct cache_entry *ce = active_cache[i];
-   if (ce_stage(ce) == stage)
-   fn = read_one_entry;
-   }
+   rtd.fn = fn;
+   rtd.stage = stage;
+   for_each_cache_entry(get_read_tree_fn, &rtd);
+   fn = rtd.fn;
 
if (!fn)
fn = read_one_entry_quick;
@@ -170,12 +177,7 @@ int read_tree(struct tree *tree, int stage, struct 
pathspec *match)
if (fn == read_one_entry || err)
return err;
 
-   /*
-* Sort the cache entry -- we need to nuke the cache tree, though.
-*/
-   cache_tree_free(&active_cache_tree);
-   qsort(active_cache, active_nr, sizeof(active_cache[0]),
- cmp_cache_name_compare);
+   sort_cache();
return 0;
 }
 
-- 
1.8.3.453.g1dfc63d

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 10/22] grep.c: Use index api

2013-07-07 Thread Thomas Gummerer
Signed-off-by: Thomas Gummerer 
---
 builtin/grep.c | 71 ++
 1 file changed, 37 insertions(+), 34 deletions(-)

diff --git a/builtin/grep.c b/builtin/grep.c
index a419cda..2a1c8f4 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -368,41 +368,33 @@ static void run_pager(struct grep_opt *opt, const char 
*prefix)
free(argv);
 }
 
-static int grep_cache(struct grep_opt *opt, const struct pathspec *pathspec, 
int cached)
+struct grep_opts {
+   struct grep_opt *opt;
+   const struct pathspec *pathspec;
+   int cached;
+   int hit;
+};
+
+static int grep_cache(struct cache_entry *ce, void *cb_data)
 {
-   int hit = 0;
-   int nr;
-   read_cache();
+   struct grep_opts *opts = cb_data;
 
-   for (nr = 0; nr < active_nr; nr++) {
-   struct cache_entry *ce = active_cache[nr];
-   if (!S_ISREG(ce->ce_mode))
-   continue;
-   if (!match_pathspec_depth(pathspec, ce->name, ce_namelen(ce), 
0, NULL))
-   continue;
-   /*
-* If CE_VALID is on, we assume worktree file and its cache 
entry
-* are identical, even if worktree file has been modified, so 
use
-* cache version instead
-*/
-   if (cached || (ce->ce_flags & CE_VALID) || 
ce_skip_worktree(ce)) {
-   if (ce_stage(ce))
-   continue;
-   hit |= grep_sha1(opt, ce->sha1, ce->name, 0, ce->name);
-   }
-   else
-   hit |= grep_file(opt, ce->name);
-   if (ce_stage(ce)) {
-   do {
-   nr++;
-   } while (nr < active_nr &&
-!strcmp(ce->name, active_cache[nr]->name));
-   nr--; /* compensate for loop control */
-   }
-   if (hit && opt->status_only)
-   break;
-   }
-   return hit;
+   if (!S_ISREG(ce->ce_mode))
+   return 0;
+   if (!match_pathspec_depth(opts->pathspec, ce->name, ce_namelen(ce), 0, 
NULL))
+   return 0;
+   /*
+* If CE_VALID is on, we assume worktree file and its cache entry
+* are identical, even if worktree file has been modified, so use
+* cache version instead
+*/
+   if (opts->cached || (ce->ce_flags & CE_VALID) || ce_skip_worktree(ce))
+   opts->hit |= grep_sha1(opts->opt, ce->sha1, ce->name, 0, 
ce->name);
+   else
+   opts->hit |= grep_file(opts->opt, ce->name);
+   if (opts->hit && opts->opt->status_only)
+   return 1;
+   return 0;
 }
 
 static int grep_tree(struct grep_opt *opt, const struct pathspec *pathspec,
@@ -895,10 +887,21 @@ int cmd_grep(int argc, const char **argv, const char 
*prefix)
} else if (0 <= opt_exclude) {
die(_("--[no-]exclude-standard cannot be used for tracked 
contents."));
} else if (!list.nr) {
+   struct grep_opts opts;
+   struct filter_opts *filter_opts = xmalloc(sizeof(*filter_opts));
+
if (!cached)
setup_work_tree();
 
-   hit = grep_cache(&opt, &pathspec, cached);
+   memset(filter_opts, 0, sizeof(*filter_opts));
+   filter_opts->pathspec = pathspec.raw;
+   opts.opt = &opt;
+   opts.pathspec = &pathspec;
+   opts.cached = cached;
+   opts.hit = 0;
+   read_cache_filtered(filter_opts);
+   for_each_cache_entry(grep_cache, &opts);
+   hit = opts.hit;
} else {
if (cached)
die(_("both --cached and trees are given."));
-- 
1.8.3.453.g1dfc63d

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 12/22] read-cache: make read_blob_data_from_index use index api

2013-07-07 Thread Thomas Gummerer
Signed-off-by: Thomas Gummerer 
---
 read-cache.c | 22 ++
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/read-cache.c b/read-cache.c
index 4529fab..c81e643 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1588,29 +1588,27 @@ int index_name_is_other(const struct index_state 
*istate, const char *name,
 
 void *read_blob_data_from_index(struct index_state *istate, const char *path, 
unsigned long *size)
 {
-   int pos, len;
+   int ret, len;
unsigned long sz;
enum object_type type;
void *data;
+   struct cache_entry *ce;
 
len = strlen(path);
-   pos = index_name_pos(istate, path, len);
-   if (pos < 0) {
+   ret = get_index_entry_by_name(istate, path, len, &ce);
+   if (!ret) {
/*
 * We might be in the middle of a merge, in which
 * case we would read stage #2 (ours).
 */
-   int i;
-   for (i = -pos - 1;
-(pos < 0 && i < istate->cache_nr &&
- !strcmp(istate->cache[i]->name, path));
-i++)
-   if (ce_stage(istate->cache[i]) == 2)
-   pos = i;
+   for (; !ret && ce && !strcmp(ce->name, path); ce = 
next_index_entry(ce))
+   if (ce_stage(ce) == 2)
+   ret = 1;
+
}
-   if (pos < 0)
+   if (!ret)
return NULL;
-   data = read_sha1_file(istate->cache[pos]->sha1, &type, &sz);
+   data = read_sha1_file(ce->sha1, &type, &sz);
if (!data || type != OBJ_BLOB) {
free(data);
return NULL;
-- 
1.8.3.453.g1dfc63d

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 11/22] ls-files.c: use the index api

2013-07-07 Thread Thomas Gummerer
Signed-off-by: Thomas Gummerer 
---
 builtin/ls-files.c | 213 +
 1 file changed, 100 insertions(+), 113 deletions(-)

diff --git a/builtin/ls-files.c b/builtin/ls-files.c
index 08d9786..82857d4 100644
--- a/builtin/ls-files.c
+++ b/builtin/ls-files.c
@@ -88,36 +88,35 @@ static void show_killed_files(struct dir_struct *dir)
for (i = 0; i < dir->nr; i++) {
struct dir_entry *ent = dir->entries[i];
char *cp, *sp;
-   int pos, len, killed = 0;
+   int len, killed = 0;
 
for (cp = ent->name; cp - ent->name < ent->len; cp = sp + 1) {
+   struct cache_entry *ce;
+
sp = strchr(cp, '/');
if (!sp) {
/* If ent->name is prefix of an entry in the
 * cache, it will be killed.
 */
-   pos = cache_name_pos(ent->name, ent->len);
-   if (0 <= pos)
+   if (get_cache_entry_by_name(ent->name, 
ent->len, &ce))
die("bug in show-killed-files");
-   pos = -pos - 1;
-   while (pos < active_nr &&
-  ce_stage(active_cache[pos]))
-   pos++; /* skip unmerged */
-   if (active_nr <= pos)
+   while (ce && ce_stage(ce))
+   ce = next_cache_entry(ce);
+   if (!ce)
break;
/* pos points at a name immediately after
 * ent->name in the cache.  Does it expect
 * ent->name to be a directory?
 */
-   len = ce_namelen(active_cache[pos]);
+   len = ce_namelen(ce);
if ((ent->len < len) &&
-   !strncmp(active_cache[pos]->name,
+   !strncmp(ce->name,
 ent->name, ent->len) &&
-   active_cache[pos]->name[ent->len] == '/')
+   ce->name[ent->len] == '/')
killed = 1;
break;
}
-   if (0 <= cache_name_pos(ent->name, sp - ent->name)) {
+   if (get_cache_entry_by_name(ent->name, sp - ent->name, 
&ce)) {
/* If any of the leading directories in
 * ent->name is registered in the cache,
 * ent->name will be killed.
@@ -213,10 +212,43 @@ static int ce_excluded(struct dir_struct *dir, struct 
cache_entry *ce)
return is_excluded(dir, ce->name, &dtype);
 }
 
-static void show_files(struct dir_struct *dir)
+static int show_cached_stage(struct cache_entry *ce, void *cb_data)
 {
-   int i;
+   struct dir_struct *dir = cb_data;
+
+   if ((dir->flags & DIR_SHOW_IGNORED) && !ce_excluded(dir, ce))
+   return 0;
+   if (show_unmerged && !ce_stage(ce))
+   return 0;
+   if (ce->ce_flags & CE_UPDATE)
+   return 0;
+   show_ce_entry(ce_stage(ce) ? tag_unmerged :
+   (ce_skip_worktree(ce) ? tag_skip_worktree : tag_cached), ce);
+   return 0;
+}
 
+static int show_deleted_modified(struct cache_entry *ce, void *cb_data)
+{
+   struct stat st;
+   int err;
+   struct dir_struct *dir = cb_data;
+
+   if ((dir->flags & DIR_SHOW_IGNORED) && !ce_excluded(dir, ce))
+   return 0;
+   if (ce->ce_flags & CE_UPDATE)
+   return 0;
+   if (ce_skip_worktree(ce))
+   return 0;
+   err = lstat(ce->name, &st);
+   if (show_deleted && err)
+   show_ce_entry(tag_removed, ce);
+   if (show_modified && ce_modified(ce, &st, 0))
+   show_ce_entry(tag_modified, ce);
+   return 0;
+}
+
+static void show_files(struct dir_struct *dir)
+{
/* For cached/deleted files we don't need to even do the readdir */
if (show_others || show_killed) {
fill_directory(dir, pathspec);
@@ -225,66 +257,18 @@ static void show_files(struct dir_struct *dir)
if (show_killed)
show_killed_files(dir);
}
-   if (show_cached || show_stage) {
-   for (i = 0; i < active_nr; i++) {
-   struct cache_entry *ce = active_cache[i];
-   if ((dir->flags & DIR_SHOW_IGNORED) &&
-   !ce_excluded(dir, ce))
-  

[PATCH 09/22] name-hash.c: use index api

2013-07-07 Thread Thomas Gummerer
Signed-off-by: Thomas Gummerer 
---
 name-hash.c | 11 +++
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/name-hash.c b/name-hash.c
index 617c86c..6551849 100644
--- a/name-hash.c
+++ b/name-hash.c
@@ -144,16 +144,19 @@ static void hash_index_entry(struct index_state *istate, 
struct cache_entry *ce)
add_dir_entry(istate, ce);
 }
 
-static void lazy_init_name_hash(struct index_state *istate)
+static int hash_entry(struct cache_entry *ce, void *istate)
 {
-   int nr;
+   hash_index_entry((struct index_state *)istate, ce);
+   return 0;
+}
 
+static void lazy_init_name_hash(struct index_state *istate)
+{
if (istate->name_hash_initialized)
return;
if (istate->cache_nr)
preallocate_hash(&istate->name_hash, istate->cache_nr);
-   for (nr = 0; nr < istate->cache_nr; nr++)
-   hash_index_entry(istate, istate->cache[nr]);
+   for_each_index_entry(istate, hash_entry, istate);
istate->name_hash_initialized = 1;
 }
 
-- 
1.8.3.453.g1dfc63d

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 16/22] read-cache: read resolve-undo data

2013-07-07 Thread Thomas Gummerer
Make git read the resolve-undo data from the index.

Since the resolve-undo data is joined with the conflicts in
the ondisk format of the index file version 5, conflicts and
resolved data is read at the same time, and the resolve-undo
data is then converted to the in-memory format.

Helped-by: Thomas Rast 
Signed-off-by: Thomas Gummerer 
---
 read-cache-v5.c | 39 +++
 1 file changed, 39 insertions(+)

diff --git a/read-cache-v5.c b/read-cache-v5.c
index e319f30..193970a 100644
--- a/read-cache-v5.c
+++ b/read-cache-v5.c
@@ -1,5 +1,6 @@
 #include "cache.h"
 #include "read-cache.h"
+#include "string-list.h"
 #include "resolve-undo.h"
 #include "cache-tree.h"
 #include "dir.h"
@@ -447,6 +448,43 @@ static int read_conflicts(struct conflict_entry **head,
return 0;
 }
 
+static void resolve_undo_convert_v5(struct index_state *istate,
+   struct conflict_entry *conflict)
+{
+   int i;
+
+   while (conflict) {
+   struct string_list_item *lost;
+   struct resolve_undo_info *ui;
+   struct conflict_part *cp;
+
+   if (conflict->entries &&
+   (conflict->entries->flags & CONFLICT_CONFLICTED) != 0) {
+   conflict = conflict->next;
+   continue;
+   }
+   if (!istate->resolve_undo) {
+   istate->resolve_undo = xcalloc(1, sizeof(struct 
string_list));
+   istate->resolve_undo->strdup_strings = 1;
+   }
+
+   lost = string_list_insert(istate->resolve_undo, conflict->name);
+   if (!lost->util)
+   lost->util = xcalloc(1, sizeof(*ui));
+   ui = lost->util;
+
+   cp = conflict->entries;
+   for (i = 0; i < 3; i++)
+   ui->mode[i] = 0;
+   while (cp) {
+   ui->mode[conflict_stage(cp) - 1] = cp->entry_mode;
+   hashcpy(ui->sha1[conflict_stage(cp) - 1], cp->sha1);
+   cp = cp->next;
+   }
+   conflict = conflict->next;
+   }
+}
+
 static int read_entries(struct index_state *istate, struct directory_entry 
**de,
unsigned int *entry_offset, void **mmap,
unsigned long mmap_size, unsigned int *nr,
@@ -460,6 +498,7 @@ static int read_entries(struct index_state *istate, struct 
directory_entry **de,
conflict_queue = NULL;
if (read_conflicts(&conflict_queue, *de, mmap, mmap_size) < 0)
return -1;
+   resolve_undo_convert_v5(istate, conflict_queue);
for (i = 0; i < (*de)->de_nfiles; i++) {
if (read_entry(&ce,
   *de,
-- 
1.8.3.453.g1dfc63d

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 20/22] read-cache: write resolve-undo data for index-v5

2013-07-07 Thread Thomas Gummerer
Make git read the resolve-undo data from the index.

Since the resolve-undo data is joined with the conflicts in
the ondisk format of the index file version 5, conflicts and
resolved data is read at the same time, and the resolve-undo
data is then converted to the in-memory format.

Helped-by: Thomas Rast 
Signed-off-by: Thomas Gummerer 
---
 read-cache-v5.c | 94 +
 1 file changed, 94 insertions(+)

diff --git a/read-cache-v5.c b/read-cache-v5.c
index 306de30..412db53 100644
--- a/read-cache-v5.c
+++ b/read-cache-v5.c
@@ -1011,6 +1011,99 @@ static void cache_tree_to_ondisk_v5(struct hash_table 
*table, struct cache_tree
convert_one_to_ondisk_v5(table, root, "", 0, 0);
 }
 
+static void resolve_undo_to_ondisk_v5(struct hash_table *table,
+ struct string_list *resolve_undo,
+ unsigned int *ndir, int *total_dir_len,
+ struct directory_entry *de)
+{
+   struct string_list_item *item;
+   struct directory_entry *search;
+
+   if (!resolve_undo)
+   return;
+   for_each_string_list_item(item, resolve_undo) {
+   struct conflict_entry *conflict_entry;
+   struct resolve_undo_info *ui = item->util;
+   char *super;
+   int i, dir_len, len;
+   uint32_t crc;
+   struct directory_entry *found, *current, *new_tree;
+
+   if (!ui)
+   continue;
+
+   super = super_directory(item->string);
+   if (!super)
+   dir_len = 0;
+   else
+   dir_len = strlen(super);
+   crc = crc32(0, (Bytef*)super, dir_len);
+   found = lookup_hash(crc, table);
+   current = NULL;
+   new_tree = NULL;
+
+   while (!found) {
+   struct directory_entry *new;
+
+   new = init_directory_entry(super, dir_len);
+   if (!current)
+   current = new;
+   insert_directory_entry(new, table, total_dir_len, ndir, 
crc);
+   if (new_tree != NULL)
+   new->de_nsubtrees = 1;
+   new->next = new_tree;
+   new_tree = new;
+   super = super_directory(super);
+   if (!super)
+   dir_len = 0;
+   else
+   dir_len = strlen(super);
+   crc = crc32(0, (Bytef*)super, dir_len);
+   found = lookup_hash(crc, table);
+   }
+   search = found;
+   while (search->next_hash && strcmp(super, search->pathname) != 
0)
+   search = search->next_hash;
+   if (search && !current)
+   current = search;
+   if (!search && !current)
+   current = new_tree;
+   if (!super && new_tree) {
+   new_tree->next = de->next;
+   de->next = new_tree;
+   de->de_nsubtrees++;
+   } else if (new_tree) {
+   struct directory_entry *temp;
+
+   search = de->next;
+   while (strcmp(super, search->pathname))
+   search = search->next;
+   temp = new_tree;
+   while (temp->next)
+   temp = temp->next;
+   search->de_nsubtrees++;
+   temp->next = search->next;
+   search->next = new_tree;
+   }
+
+   len = strlen(item->string);
+   conflict_entry = create_new_conflict(item->string, len, 
current->de_pathlen);
+   add_conflict_to_directory_entry(current, conflict_entry);
+   for (i = 0; i < 3; i++) {
+   if (ui->mode[i]) {
+   struct conflict_part *cp;
+
+   cp = xmalloc(sizeof(struct conflict_part));
+   cp->flags = (i + 1) << CONFLICT_STAGESHIFT;
+   cp->entry_mode = ui->mode[i];
+   cp->next = NULL;
+   hashcpy(cp->sha1, ui->sha1[i]);
+   add_part_to_conflict_entry(current, 
conflict_entry, cp);
+   }
+   }
+   }
+}
+
 static struct directory_entry *compile_directory_data(struct index_state 
*istate,
int nfile,
unsigned int *ndir,
@@ -1118,6 +1211,7 @@ static struct directory_entry 
*compile_directory_data(str

[PATCH 15/22] read-cache: read index-v5

2013-07-07 Thread Thomas Gummerer
Make git read the index file version 5 without complaining.

This version of the reader doesn't read neither the cache-tree
nor the resolve undo data, but doesn't choke on an index that
includes such data.

Helped-by: Junio C Hamano 
Helped-by: Nguyen Thai Ngoc Duy 
Helped-by: Thomas Rast 
Signed-off-by: Thomas Gummerer 
---
 Makefile|   1 +
 cache.h |  75 ++-
 read-cache-v5.c | 658 
 read-cache.h|   1 +
 4 files changed, 734 insertions(+), 1 deletion(-)
 create mode 100644 read-cache-v5.c

diff --git a/Makefile b/Makefile
index 73369ae..80e35f5 100644
--- a/Makefile
+++ b/Makefile
@@ -856,6 +856,7 @@ LIB_OBJS += quote.o
 LIB_OBJS += reachable.o
 LIB_OBJS += read-cache.o
 LIB_OBJS += read-cache-v2.o
+LIB_OBJS += read-cache-v5.o
 LIB_OBJS += reflog-walk.o
 LIB_OBJS += refs.o
 LIB_OBJS += remote.o
diff --git a/cache.h b/cache.h
index d77af5e..e110ec8 100644
--- a/cache.h
+++ b/cache.h
@@ -99,7 +99,7 @@ unsigned long git_deflate_bound(git_zstream *, unsigned long);
 #define CACHE_SIGNATURE 0x44495243 /* "DIRC" */
 
 #define INDEX_FORMAT_LB 2
-#define INDEX_FORMAT_UB 4
+#define INDEX_FORMAT_UB 5
 
 /*
  * The "cache_time" is just the low 32 bits of the
@@ -121,6 +121,15 @@ struct stat_data {
unsigned int sd_size;
 };
 
+/*
+ * The *next pointer is used in read_entries_v5 for holding
+ * all the elements of a directory, and points to the next
+ * cache_entry in a directory.
+ *
+ * It is reset by the add_name_hash call in set_index_entry
+ * to set it to point to the next cache_entry in the
+ * correct in-memory format ordering.
+ */
 struct cache_entry {
struct stat_data ce_stat_data;
unsigned int ce_mode;
@@ -133,11 +142,59 @@ struct cache_entry {
char name[FLEX_ARRAY]; /* more */
 };
 
+struct directory_entry {
+   struct directory_entry *next;
+   struct directory_entry *next_hash;
+   struct cache_entry *ce;
+   struct cache_entry *ce_last;
+   struct conflict_entry *conflict;
+   struct conflict_entry *conflict_last;
+   unsigned int conflict_size;
+   unsigned int de_foffset;
+   unsigned int de_cr;
+   unsigned int de_ncr;
+   unsigned int de_nsubtrees;
+   unsigned int de_nfiles;
+   unsigned int de_nentries;
+   unsigned char sha1[20];
+   unsigned short de_flags;
+   unsigned int de_pathlen;
+   char pathname[FLEX_ARRAY];
+};
+
+struct conflict_part {
+   struct conflict_part *next;
+   unsigned short flags;
+   unsigned short entry_mode;
+   unsigned char sha1[20];
+};
+
+struct conflict_entry {
+   struct conflict_entry *next;
+   unsigned int nfileconflicts;
+   struct conflict_part *entries;
+   unsigned int namelen;
+   unsigned int pathlen;
+   char name[FLEX_ARRAY];
+};
+
+struct ondisk_conflict_part {
+   unsigned short flags;
+   unsigned short entry_mode;
+   unsigned char sha1[20];
+};
+
+#define CE_NAMEMASK  (0x0fff)
 #define CE_STAGEMASK (0x3000)
 #define CE_EXTENDED  (0x4000)
 #define CE_VALID (0x8000)
+#define CE_SMUDGED   (0x0400) /* index v5 only flag */
 #define CE_STAGESHIFT 12
 
+#define CONFLICT_CONFLICTED (0x8000)
+#define CONFLICT_STAGESHIFT 13
+#define CONFLICT_STAGEMASK (0x6000)
+
 /*
  * Range 0x in ce_flags is divided into
  * two parts: in-memory flags and on-disk ones.
@@ -174,6 +231,18 @@ struct cache_entry {
 #define CE_EXTENDED_FLAGS (CE_INTENT_TO_ADD | CE_SKIP_WORKTREE)
 
 /*
+ * Representation of the extended on-disk flags in the v5 format.
+ * They must not collide with the ordinary on-disk flags, and need to
+ * fit in 16 bits.  Note however that v5 does not save the name
+ * length.
+ */
+#define CE_INTENT_TO_ADD_V5  (0x4000)
+#define CE_SKIP_WORKTREE_V5  (0x0800)
+#if (CE_VALID|CE_STAGEMASK) & (CE_INTENTTOADD_V5|CE_SKIPWORKTREE_V5)
+#error "v5 on-disk flags collide with ordinary on-disk flags"
+#endif
+
+/*
  * Safeguard to avoid saving wrong flags:
  *  - CE_EXTENDED2 won't get saved until its semantic is known
  *  - Bits in 0x have been saved in ce_flags already
@@ -212,6 +281,8 @@ static inline unsigned create_ce_flags(unsigned stage)
 #define ce_skip_worktree(ce) ((ce)->ce_flags & CE_SKIP_WORKTREE)
 #define ce_mark_uptodate(ce) ((ce)->ce_flags |= CE_UPTODATE)
 
+#define conflict_stage(c) ((CONFLICT_STAGEMASK & (c)->flags) >> 
CONFLICT_STAGESHIFT)
+
 #define ce_permissions(mode) (((mode) & 0100) ? 0755 : 0644)
 static inline unsigned int create_ce_mode(unsigned int mode)
 {
@@ -259,6 +330,8 @@ static inline unsigned int canon_mode(unsigned int mode)
 }
 
 #define cache_entry_size(len) (offsetof(struct cache_entry,name) + (len) + 1)
+#define directory_entry_size(len) (offsetof(struct directory_entry,pathname) + 
(len) + 1)
+#define conflict_entry_size(len) (offsetof(struct conflict_entry,name) + (len) 
+ 1)
 
 /*
  * Options by which the index should be filtered when read partially.
diff --git a/read-cache-v5

[PATCH 17/22] read-cache: read cache-tree in index-v5

2013-07-07 Thread Thomas Gummerer
Since the cache-tree data is saved as part of the directory data,
we already read it at the beginning of the index. The cache-tree
is only converted from this directory data.

The cache-tree data is arranged in a tree, with the children sorted by
pathlen at each node, while the ondisk format is sorted lexically.
So we have to rebuild this format from the on-disk directory list.

Signed-off-by: Thomas Gummerer 
---
 cache-tree.c|   2 +-
 cache-tree.h|   6 
 read-cache-v5.c | 100 
 3 files changed, 107 insertions(+), 1 deletion(-)

diff --git a/cache-tree.c b/cache-tree.c
index 37e4d00..f4b0917 100644
--- a/cache-tree.c
+++ b/cache-tree.c
@@ -31,7 +31,7 @@ void cache_tree_free(struct cache_tree **it_p)
*it_p = NULL;
 }
 
-static int subtree_name_cmp(const char *one, int onelen,
+int subtree_name_cmp(const char *one, int onelen,
const char *two, int twolen)
 {
if (onelen < twolen)
diff --git a/cache-tree.h b/cache-tree.h
index 55d0f59..9aac493 100644
--- a/cache-tree.h
+++ b/cache-tree.h
@@ -21,10 +21,16 @@ struct cache_tree {
struct cache_tree_sub **down;
 };
 
+struct directory_queue {
+   struct directory_queue *down;
+   struct directory_entry *de;
+};
+
 struct cache_tree *cache_tree(void);
 void cache_tree_free(struct cache_tree **);
 void cache_tree_invalidate_path(struct cache_tree *, const char *);
 struct cache_tree_sub *cache_tree_sub(struct cache_tree *, const char *);
+int subtree_name_cmp(const char *, int, const char *, int);
 
 void cache_tree_write(struct strbuf *, struct cache_tree *root);
 struct cache_tree *cache_tree_read(const char *buffer, unsigned long size);
diff --git a/read-cache-v5.c b/read-cache-v5.c
index 193970a..f1ad132 100644
--- a/read-cache-v5.c
+++ b/read-cache-v5.c
@@ -448,6 +448,103 @@ static int read_conflicts(struct conflict_entry **head,
return 0;
 }
 
+static struct cache_tree *convert_one(struct directory_queue *queue, int dirnr)
+{
+   int i, subtree_nr;
+   struct cache_tree *it;
+   struct directory_queue *down;
+
+   it = cache_tree();
+   it->entry_count = queue[dirnr].de->de_nentries;
+   subtree_nr = queue[dirnr].de->de_nsubtrees;
+   if (0 <= it->entry_count)
+   hashcpy(it->sha1, queue[dirnr].de->sha1);
+
+   /*
+* Just a heuristic -- we do not add directories that often but
+* we do not want to have to extend it immediately when we do,
+* hence +2.
+*/
+   it->subtree_alloc = subtree_nr + 2;
+   it->down = xcalloc(it->subtree_alloc, sizeof(struct cache_tree_sub *));
+   down = queue[dirnr].down;
+   for (i = 0; i < subtree_nr; i++) {
+   struct cache_tree *sub;
+   struct cache_tree_sub *subtree;
+   char *buf, *name;
+
+   name = "";
+   buf = strtok(down[i].de->pathname, "/");
+   while (buf) {
+   name = buf;
+   buf = strtok(NULL, "/");
+   }
+   sub = convert_one(down, i);
+   if(!sub)
+   goto free_return;
+   subtree = cache_tree_sub(it, name);
+   subtree->cache_tree = sub;
+   }
+   if (subtree_nr != it->subtree_nr)
+   die("cache-tree: internal error");
+   return it;
+ free_return:
+   cache_tree_free(&it);
+   return NULL;
+}
+
+static int compare_cache_tree_elements(const void *a, const void *b)
+{
+   const struct directory_entry *de1, *de2;
+
+   de1 = ((const struct directory_queue *)a)->de;
+   de2 = ((const struct directory_queue *)b)->de;
+   return subtree_name_cmp(de1->pathname, de1->de_pathlen,
+   de2->pathname, de2->de_pathlen);
+}
+
+static struct directory_entry *sort_directories(struct directory_entry *de,
+   struct directory_queue *queue)
+{
+   int i, nsubtrees;
+
+   nsubtrees = de->de_nsubtrees;
+   for (i = 0; i < nsubtrees; i++) {
+   struct directory_entry *new_de;
+   de = de->next;
+   new_de = xmalloc(directory_entry_size(de->de_pathlen));
+   memcpy(new_de, de, directory_entry_size(de->de_pathlen));
+   queue[i].de = new_de;
+   if (de->de_nsubtrees) {
+   queue[i].down = xcalloc(de->de_nsubtrees,
+   sizeof(struct directory_queue));
+   de = sort_directories(de,
+   queue[i].down);
+   }
+   }
+   qsort(queue, nsubtrees, sizeof(struct directory_queue),
+   compare_cache_tree_elements);
+   return de;
+}
+
+/*
+ * This function modifys the directory argument that is given to it.
+ * Don't use it if the directory entries are still needed after.
+ */
+stat

[PATCH 21/22] update-index.c: rewrite index when index-version is given

2013-07-07 Thread Thomas Gummerer
Make update-index always rewrite the index when a index-version
is given, even if the index already has the right version.
This option is used for performance testing the writer and
reader.

Signed-off-by: Thomas Gummerer 
---
 builtin/update-index.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/builtin/update-index.c b/builtin/update-index.c
index 03f6426..7954ddb 100644
--- a/builtin/update-index.c
+++ b/builtin/update-index.c
@@ -6,6 +6,7 @@
 #include "cache.h"
 #include "quote.h"
 #include "cache-tree.h"
+#include "read-cache.h"
 #include "tree-walk.h"
 #include "builtin.h"
 #include "refs.h"
@@ -863,8 +864,7 @@ int cmd_update_index(int argc, const char **argv, const 
char *prefix)
preferred_index_format,
INDEX_FORMAT_LB, INDEX_FORMAT_UB);
 
-   if (the_index.version != preferred_index_format)
-   active_cache_changed = 1;
+   active_cache_changed = 1;
the_index.version = preferred_index_format;
}
 
-- 
1.8.3.453.g1dfc63d

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 03/22] read-cache: move index v2 specific functions to their own file

2013-07-07 Thread Thomas Gummerer
Move index version 2 specific functions to their own file. The non-index
specific functions will be in read-cache.c, while the index version 2
specific functions will be in read-cache-v2.c.

Helped-by: Nguyen Thai Ngoc Duy 
Signed-off-by: Thomas Gummerer 
---
 Makefile |   2 +
 cache.h  |  16 +-
 read-cache-v2.c  | 556 +
 read-cache.c | 575 ---
 read-cache.h |  57 +
 test-index-version.c |   5 +
 6 files changed, 661 insertions(+), 550 deletions(-)
 create mode 100644 read-cache-v2.c
 create mode 100644 read-cache.h

diff --git a/Makefile b/Makefile
index 5a68fe5..73369ae 100644
--- a/Makefile
+++ b/Makefile
@@ -711,6 +711,7 @@ LIB_H += progress.h
 LIB_H += prompt.h
 LIB_H += quote.h
 LIB_H += reachable.h
+LIB_H += read-cache.h
 LIB_H += reflog-walk.h
 LIB_H += refs.h
 LIB_H += remote.h
@@ -854,6 +855,7 @@ LIB_OBJS += prompt.o
 LIB_OBJS += quote.o
 LIB_OBJS += reachable.o
 LIB_OBJS += read-cache.o
+LIB_OBJS += read-cache-v2.o
 LIB_OBJS += reflog-walk.o
 LIB_OBJS += refs.o
 LIB_OBJS += remote.o
diff --git a/cache.h b/cache.h
index 7af853b..5082b34 100644
--- a/cache.h
+++ b/cache.h
@@ -95,19 +95,8 @@ unsigned long git_deflate_bound(git_zstream *, unsigned 
long);
  */
 #define DEFAULT_GIT_PORT 9418
 
-/*
- * Basic data structures for the directory cache
- */
 
 #define CACHE_SIGNATURE 0x44495243 /* "DIRC" */
-struct cache_version_header {
-   unsigned int hdr_signature;
-   unsigned int hdr_version;
-};
-
-struct cache_header {
-   unsigned int hdr_entries;
-};
 
 #define INDEX_FORMAT_LB 2
 #define INDEX_FORMAT_UB 4
@@ -280,6 +269,7 @@ struct index_state {
 initialized : 1;
struct hash_table name_hash;
struct hash_table dir_hash;
+   struct index_ops *ops;
 };
 
 extern struct index_state the_index;
@@ -489,8 +479,8 @@ extern void *read_blob_data_from_index(struct index_state 
*, const char *, unsig
 #define CE_MATCH_RACY_IS_DIRTY 02
 /* do stat comparison even if CE_SKIP_WORKTREE is true */
 #define CE_MATCH_IGNORE_SKIP_WORKTREE  04
-extern int ie_match_stat(const struct index_state *, const struct cache_entry 
*, struct stat *, unsigned int);
-extern int ie_modified(const struct index_state *, const struct cache_entry *, 
struct stat *, unsigned int);
+extern int ie_match_stat(struct index_state *, const struct cache_entry *, 
struct stat *, unsigned int);
+extern int ie_modified(struct index_state *, const struct cache_entry *, 
struct stat *, unsigned int);
 
 #define PATHSPEC_ONESTAR 1 /* the pathspec pattern sastisfies GFNM_ONESTAR 
*/
 
diff --git a/read-cache-v2.c b/read-cache-v2.c
new file mode 100644
index 000..a6883c3
--- /dev/null
+++ b/read-cache-v2.c
@@ -0,0 +1,556 @@
+#include "cache.h"
+#include "read-cache.h"
+#include "resolve-undo.h"
+#include "cache-tree.h"
+#include "varint.h"
+
+/* Mask for the name length in ce_flags in the on-disk index */
+#define CE_NAMEMASK  (0x0fff)
+
+struct cache_header {
+   unsigned int hdr_entries;
+};
+
+/*
+ * Index File I/O
+ */
+
+/*
+ * dev/ino/uid/gid/size are also just tracked to the low 32 bits
+ * Again - this is just a (very strong in practice) heuristic that
+ * the inode hasn't changed.
+ *
+ * We save the fields in big-endian order to allow using the
+ * index file over NFS transparently.
+ */
+struct ondisk_cache_entry {
+   struct cache_time ctime;
+   struct cache_time mtime;
+   unsigned int dev;
+   unsigned int ino;
+   unsigned int mode;
+   unsigned int uid;
+   unsigned int gid;
+   unsigned int size;
+   unsigned char sha1[20];
+   unsigned short flags;
+   char name[FLEX_ARRAY]; /* more */
+};
+
+/*
+ * This struct is used when CE_EXTENDED bit is 1
+ * The struct must match ondisk_cache_entry exactly from
+ * ctime till flags
+ */
+struct ondisk_cache_entry_extended {
+   struct cache_time ctime;
+   struct cache_time mtime;
+   unsigned int dev;
+   unsigned int ino;
+   unsigned int mode;
+   unsigned int uid;
+   unsigned int gid;
+   unsigned int size;
+   unsigned char sha1[20];
+   unsigned short flags;
+   unsigned short flags2;
+   char name[FLEX_ARRAY]; /* more */
+};
+
+/* These are only used for v3 or lower */
+#define align_flex_name(STRUCT,len) ((offsetof(struct STRUCT,name) + (len) + 
8) & ~7)
+#define ondisk_cache_entry_size(len) align_flex_name(ondisk_cache_entry,len)
+#define ondisk_cache_entry_extended_size(len) 
align_flex_name(ondisk_cache_entry_extended,len)
+#define ondisk_ce_size(ce) (((ce)->ce_flags & CE_EXTENDED) ? \
+   ondisk_cache_entry_extended_size(ce_namelen(ce)) : \
+   ondisk_cache_entry_size(ce_namelen(ce)))
+
+static int veri

[PATCH 19/22] read-cache: write index-v5 cache-tree data

2013-07-07 Thread Thomas Gummerer
Write the cache-tree data for the index version 5 file format. The
in-memory cache-tree data is converted to the ondisk format, by adding
it to the directory entries, that were compiled from the cache-entries
in the step before.

Signed-off-by: Thomas Gummerer 
---
 read-cache-v5.c | 53 +
 1 file changed, 53 insertions(+)

diff --git a/read-cache-v5.c b/read-cache-v5.c
index f056f6b..306de30 100644
--- a/read-cache-v5.c
+++ b/read-cache-v5.c
@@ -960,6 +960,57 @@ static struct conflict_entry 
*create_conflict_entry_from_ce(struct cache_entry *
return create_new_conflict(ce->name, ce_namelen(ce), pathlen);
 }
 
+static void convert_one_to_ondisk_v5(struct hash_table *table, struct 
cache_tree *it,
+   const char *path, int pathlen, uint32_t crc)
+{
+   int i;
+   struct directory_entry *found, *search;
+
+   crc = crc32(crc, (Bytef*)path, pathlen);
+   found = lookup_hash(crc, table);
+   search = found;
+   while (search && strcmp(path, search->pathname + search->de_pathlen - 
strlen(path)) != 0)
+   search = search->next_hash;
+   if (!search)
+   return;
+   /*
+* The number of subtrees is already calculated by
+* compile_directory_data, therefore we only need to
+* add the entry_count
+*/
+   search->de_nentries = it->entry_count;
+   if (0 <= it->entry_count)
+   hashcpy(search->sha1, it->sha1);
+   if (strcmp(path, "") != 0)
+   crc = crc32(crc, (Bytef*)"/", 1);
+
+#if DEBUG
+   if (0 <= it->entry_count)
+   fprintf(stderr, "cache-tree <%.*s> (%d ent, %d subtree) %s\n",
+   pathlen, path, it->entry_count, it->subtree_nr,
+   sha1_to_hex(it->sha1));
+   else
+   fprintf(stderr, "cache-tree <%.*s> (%d subtree) invalid\n",
+   pathlen, path, it->subtree_nr);
+#endif
+
+   for (i = 0; i < it->subtree_nr; i++) {
+   struct cache_tree_sub *down = it->down[i];
+   if (i) {
+   struct cache_tree_sub *prev = it->down[i-1];
+   if (subtree_name_cmp(down->name, down->namelen,
+prev->name, prev->namelen) <= 0)
+   die("fatal - unsorted cache subtree");
+   }
+   convert_one_to_ondisk_v5(table, down->cache_tree, down->name, 
down->namelen, crc);
+   }
+}
+
+static void cache_tree_to_ondisk_v5(struct hash_table *table, struct 
cache_tree *root)
+{
+   convert_one_to_ondisk_v5(table, root, "", 0, 0);
+}
+
 static struct directory_entry *compile_directory_data(struct index_state 
*istate,
int nfile,
unsigned int *ndir,
@@ -1065,6 +1116,8 @@ static struct directory_entry 
*compile_directory_data(struct index_state *istate
previous_entry->next = no_subtrees;
}
}
+   if (istate->cache_tree)
+   cache_tree_to_ondisk_v5(&table, istate->cache_tree);
return de;
 }
 
-- 
1.8.3.453.g1dfc63d

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 18/22] read-cache: write index-v5

2013-07-07 Thread Thomas Gummerer
Write the index version 5 file format to disk. This version doesn't
write the cache-tree data and resolve-undo data to the file.

The main work is done when filtering out the directories from the
current in-memory format, where in the same turn also the conflicts
and the file data is calculated.

Helped-by: Nguyen Thai Ngoc Duy 
Helped-by: Thomas Rast 
Signed-off-by: Thomas Gummerer 
---
 cache.h |   8 +
 read-cache-v5.c | 594 +++-
 read-cache.c|  11 +-
 read-cache.h|   1 +
 4 files changed, 611 insertions(+), 3 deletions(-)

diff --git a/cache.h b/cache.h
index e110ec8..a92b490 100644
--- a/cache.h
+++ b/cache.h
@@ -581,6 +581,7 @@ extern int unmerged_index(const struct index_state *);
 extern int verify_path(const char *path);
 extern struct cache_entry *index_name_exists(struct index_state *istate, const 
char *name, int namelen, int igncase);
 extern int index_name_pos(const struct index_state *, const char *name, int 
namelen);
+extern struct directory_entry *init_directory_entry(char *pathname, int len);
 #define ADD_CACHE_OK_TO_ADD 1  /* Ok to add */
 #define ADD_CACHE_OK_TO_REPLACE 2  /* Ok to replace file/directory */
 #define ADD_CACHE_SKIP_DFCHECK 4   /* Ok to skip DF conflict checks */
@@ -1379,6 +1380,13 @@ static inline ssize_t write_str_in_full(int fd, const 
char *str)
return write_in_full(fd, str, strlen(str));
 }
 
+/* index-v5 helper functions */
+extern char *super_directory(const char *filename);
+extern void insert_directory_entry(struct directory_entry *, struct hash_table 
*, int *, unsigned int *, uint32_t);
+extern void add_conflict_to_directory_entry(struct directory_entry *, struct 
conflict_entry *);
+extern void add_part_to_conflict_entry(struct directory_entry *, struct 
conflict_entry *, struct conflict_part *);
+extern struct conflict_entry *create_new_conflict(char *, int, int);
+
 /* pager.c */
 extern void setup_pager(void);
 extern const char *pager_program;
diff --git a/read-cache-v5.c b/read-cache-v5.c
index f1ad132..f056f6b 100644
--- a/read-cache-v5.c
+++ b/read-cache-v5.c
@@ -788,10 +788,602 @@ static void index_change_filter_opts_v5(struct 
index_state *istate, struct filte
read_index_filtered(istate, opts);
 }
 
+#define WRITE_BUFFER_SIZE 8192
+static unsigned char write_buffer[WRITE_BUFFER_SIZE];
+static unsigned long write_buffer_len;
+
+static int ce_write_flush(int fd)
+{
+   unsigned int buffered = write_buffer_len;
+   if (buffered) {
+   if (write_in_full(fd, write_buffer, buffered) != buffered)
+   return -1;
+   write_buffer_len = 0;
+   }
+   return 0;
+}
+
+static int ce_write(uint32_t *crc, int fd, void *data, unsigned int len)
+{
+   if (crc)
+   *crc = crc32(*crc, (Bytef*)data, len);
+   while (len) {
+   unsigned int buffered = write_buffer_len;
+   unsigned int partial = WRITE_BUFFER_SIZE - buffered;
+   if (partial > len)
+   partial = len;
+   memcpy(write_buffer + buffered, data, partial);
+   buffered += partial;
+   if (buffered == WRITE_BUFFER_SIZE) {
+   write_buffer_len = buffered;
+   if (ce_write_flush(fd))
+   return -1;
+   buffered = 0;
+   }
+   write_buffer_len = buffered;
+   len -= partial;
+   data = (char *) data + partial;
+   }
+   return 0;
+}
+
+static int ce_flush(int fd)
+{
+   unsigned int left = write_buffer_len;
+
+   if (left)
+   write_buffer_len = 0;
+
+   if (write_in_full(fd, write_buffer, left) != left)
+   return -1;
+
+   return 0;
+}
+
+static void ce_smudge_racily_clean_entry(struct cache_entry *ce)
+{
+   /*
+* This method shall only be called if the timestamp of ce
+* is racy (check with is_racy_timestamp). If the timestamp
+* is racy, the writer will set the CE_SMUDGED flag.
+*
+* The reader (match_stat_basic) will then take care
+* of checking if the entry is really changed or not, by
+* taking into account the size and the stat_crc and if
+* that hasn't changed checking the sha1.
+*/
+   ce->ce_flags |= CE_SMUDGED;
+}
+
+char *super_directory(const char *filename)
+{
+   char *slash;
+
+   slash = strrchr(filename, '/');
+   if (slash)
+   return xmemdupz(filename, slash-filename);
+   return NULL;
+}
+
+struct directory_entry *init_directory_entry(char *pathname, int len)
+{
+   struct directory_entry *de = xmalloc(directory_entry_size(len));
+
+   memcpy(de->pathname, pathname, len);
+   de->pathname[len] = '\0';
+   de->de_flags  = 0;
+   de->de_foffset= 0;
+   de->de_cr = 0;
+   de->de_ncr= 0;
+  

[PATCH 13/22] documentation: add documentation of the index-v5 file format

2013-07-07 Thread Thomas Gummerer
Add a documentation of the index file format version 5 to
Documentation/technical.

Helped-by: Michael Haggerty 
Helped-by: Junio C Hamano 
Helped-by: Thomas Rast 
Helped-by: Nguyen Thai Ngoc Duy 
Helped-by: Robin Rosenberg 
Signed-off-by: Thomas Gummerer 
---
 Documentation/technical/index-file-format-v5.txt | 296 +++
 1 file changed, 296 insertions(+)
 create mode 100644 Documentation/technical/index-file-format-v5.txt

diff --git a/Documentation/technical/index-file-format-v5.txt 
b/Documentation/technical/index-file-format-v5.txt
new file mode 100644
index 000..4213087
--- /dev/null
+++ b/Documentation/technical/index-file-format-v5.txt
@@ -0,0 +1,296 @@
+GIT index format
+
+
+== The git index
+
+   The git index file (.git/index) documents the status of the files
+ in the git staging area.
+
+   The staging area is used for preparing commits, merging, etc.
+
+== The git index file format
+
+   All binary numbers are in network byte order. Version 5 is described
+ here. The index file consists of various sections. They appear in
+ the following order in the file.
+
+   - header: the description of the index format, including it's signature,
+ version and various other fields that are used internally.
+
+   - diroffsets (ndir entries of "direcotry offset"): A 4-byte offset
+   relative to the beginning of the "direntries block" (see below)
+   for each of the ndir directories in the index, sorted by pathname
+   (of the directory it's pointing to). [1]
+
+   - direntries (ndir entries of "directory offset"): A directory entry
+   for each of the ndir directories in the index, sorted by pathname
+   (see below). [2]
+
+   - fileoffsets (nfile entries of "file offset"): A 4-byte offset
+   relative to the beginning of the fileentries block (see below)
+   for each of the nfile files in the index. [1]
+
+   - fileentries (nfile entries of "file entry"): A file entry for
+   each of the nfile files in the index (see below).
+
+   - crdata: A number of entries for conflicted data/resolved conflicts
+   (see below).
+
+   - Extensions (Currently none, see below in the future)
+
+ Extensions are identified by signature. Optional extensions can
+ be ignored if GIT does not understand them.
+
+ GIT supports an arbitrary number of extension, but currently none
+ is implemented. [3]
+
+ extsig (32-bits): extension signature. If the first byte is 'A'..'Z'
+ the extension is optional and can be ignored.
+
+ extsize (32-bits): size of the extension, excluding the header
+   (extsig, extsize, extchecksum).
+
+ extchecksum (32-bits): crc32 checksum of the extension signature
+   and size.
+
+- Extension data.
+
+== Header
+   sig (32-bits): Signature:
+ The signature is { 'D', 'I', 'R', 'C' } (stands for "dircache")
+
+   vnr (32-bits): Version number:
+ The current supported versions are 2, 3, 4 and 5.
+
+   ndir (32-bits): number of directories in the index.
+
+   nfile (32-bits): number of file entries in the index.
+
+   fblockoffset (32-bits): offset to the file block, relative to the
+ beginning of the file.
+
+   - Offset to the extensions.
+
+ nextensions (32-bits): number of extensions.
+
+ extoffset (32-bits): offset to the extension. (Possibly none, as
+   many as indicated in the 4-byte number of extensions)
+
+   headercrc (32-bits): crc checksum including the header and the
+ offsets to the extensions.
+
+
+== Directory offsets (diroffsets)
+
+  diroffset (32-bits): offset to the directory relative to the beginning
+of the index file. There are ndir + 1 offsets in the diroffset table,
+the last is pointing to the end of the last direntry. With this last
+entry, we are able to replace the strlen of when reading the directory
+name, by calculating it from diroffset[n+1]-diroffset[n]-61.  61 is the
+size of the directory data, which follows each each directory + the
+crc sum + the NUL byte.
+
+  This part is needed for making the directory entries bisectable and
+thus allowing a binary search.
+
+== Directory entry (direntries)
+
+  Directory entries are sorted in lexicographic order by the name
+of their path starting with the root.
+
+  pathname (variable length, nul terminated): relative to top level
+directory (without the leading slash). '/' is used as path
+separator. A string of length 0 ('') indicates the root directory.
+The special path components ".", and ".." (without quotes) are
+disallowed. The path also includes a trailing slash. [9]
+
+  foffset (32-bits): offset to the lexicographically first file in
+the file offsets (fileoffsets), relative to the beginning of
+the fileoffset block.
+
+  cr (32-bits): offset to conflicted/resolved data at the end of the
+index. 0 if there is no such data. [4]
+
+  ncr (32-bits): number of conflicted/resolved data entries at the
+end 

[PATCH 14/22] read-cache: make in-memory format aware of stat_crc

2013-07-07 Thread Thomas Gummerer
Make the in-memory format aware of the stat_crc used by index-v5.
It is simply ignored by index version prior to v5.

Signed-off-by: Thomas Gummerer 
---
 cache.h  |  1 +
 read-cache.c | 25 +
 2 files changed, 26 insertions(+)

diff --git a/cache.h b/cache.h
index f6c3407..d77af5e 100644
--- a/cache.h
+++ b/cache.h
@@ -127,6 +127,7 @@ struct cache_entry {
unsigned int ce_flags;
unsigned int ce_namelen;
unsigned char sha1[20];
+   uint32_t ce_stat_crc;
struct cache_entry *next; /* used by name_hash */
struct cache_entry *next_ce; /* used to keep a list of cache entries */
char name[FLEX_ARRAY]; /* more */
diff --git a/read-cache.c b/read-cache.c
index c81e643..5ec0222 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -108,6 +108,29 @@ int match_stat_data(const struct stat_data *sd, struct 
stat *st)
return changed;
 }
 
+static uint32_t calculate_stat_crc(struct cache_entry *ce)
+{
+   unsigned int ctimens = 0;
+   uint32_t stat, stat_crc;
+
+   stat = htonl(ce->ce_stat_data.sd_ctime.sec);
+   stat_crc = crc32(0, (Bytef*)&stat, 4);
+#ifdef USE_NSEC
+   ctimens = ce->ce_stat_data.sd_ctime.nsec;
+#endif
+   stat = htonl(ctimens);
+   stat_crc = crc32(stat_crc, (Bytef*)&stat, 4);
+   stat = htonl(ce->ce_stat_data.sd_ino);
+   stat_crc = crc32(stat_crc, (Bytef*)&stat, 4);
+   stat = htonl(ce->ce_stat_data.sd_dev);
+   stat_crc = crc32(stat_crc, (Bytef*)&stat, 4);
+   stat = htonl(ce->ce_stat_data.sd_uid);
+   stat_crc = crc32(stat_crc, (Bytef*)&stat, 4);
+   stat = htonl(ce->ce_stat_data.sd_gid);
+   stat_crc = crc32(stat_crc, (Bytef*)&stat, 4);
+   return stat_crc;
+}
+
 /*
  * This only updates the "non-critical" parts of the directory
  * cache, ie the parts that aren't tracked by GIT, and only used
@@ -122,6 +145,8 @@ void fill_stat_cache_info(struct cache_entry *ce, struct 
stat *st)
 
if (S_ISREG(st->st_mode))
ce_mark_uptodate(ce);
+
+   ce->ce_stat_crc = calculate_stat_crc(ce);
 }
 
 static int ce_compare_data(const struct cache_entry *ce, struct stat *st)
-- 
1.8.3.453.g1dfc63d

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 22/22] p0003-index.sh: add perf test for the index formats

2013-07-07 Thread Thomas Gummerer
From: Thomas Rast 

Add a performance test for index version [23]/4/5 by using
git update-index --index-version=x, thus testing both the reader
and the writer speed of all index formats.

Signed-off-by: Thomas Rast 
Signed-off-by: Thomas Gummerer 
---
 t/perf/p0003-index.sh | 59 +++
 1 file changed, 59 insertions(+)
 create mode 100755 t/perf/p0003-index.sh

diff --git a/t/perf/p0003-index.sh b/t/perf/p0003-index.sh
new file mode 100755
index 000..3e02868
--- /dev/null
+++ b/t/perf/p0003-index.sh
@@ -0,0 +1,59 @@
+#!/bin/sh
+
+test_description="Tests index versions [23]/4/5"
+
+. ./perf-lib.sh
+
+test_perf_large_repo
+
+test_expect_success "convert to v3" "
+   git update-index --index-version=2
+"
+
+test_perf "v[23]: update-index" "
+   git update-index --index-version=2 >/dev/null
+"
+
+subdir=$(git ls-files | sed 's#/[^/]*$##' | grep -v '^$' | uniq | tail -n 30 | 
head -1)
+
+test_perf "v[23]: grep nonexistent -- subdir" "
+   test_must_fail git grep nonexistent -- $subdir >/dev/null
+"
+
+test_perf "v[23]: ls-files -- subdir" "
+   git ls-files $subdir >/dev/null
+"
+
+test_expect_success "convert to v4" "
+   git update-index --index-version=4
+"
+
+test_perf "v4: update-index" "
+   git update-index --index-version=4 >/dev/null
+"
+
+test_perf "v4: grep nonexistent -- subdir" "
+   test_must_fail git grep nonexistent -- $subdir >/dev/null
+"
+
+test_perf "v4: ls-files -- subdir" "
+   git ls-files $subdir >/dev/null
+"
+
+test_expect_success "convert to v5" "
+   git update-index --index-version=5
+"
+
+test_perf "v5: update-index" "
+   git update-index --index-version=5 >/dev/null
+"
+
+test_perf "v5: grep nonexistent -- subdir" "
+   test_must_fail git grep nonexistent -- $subdir >/dev/null
+"
+
+test_perf "v5: ls-files -- subdir" "
+   git ls-files $subdir >/dev/null
+"
+
+test_done
-- 
1.8.3.453.g1dfc63d

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH/RFC] blame: accept multiple -L ranges

2013-07-07 Thread Eric Sunshine
git-blame accepts only zero or one -L option. Clients requiring blame
information for multiple disjoint ranges are therefore forced either to
invoke git-blame multiple times, once for each range, or only once with
no -L option to cover the entire file, which can be costly. Teach
git-blame to accept multiple -L ranges.

Overlapping and out-of-order ranges are accepted and handled gracefully.
For example:

  git blame -L 3,+4 -L 91,+7 -L 2,3 -L 89,100 source.c

emits blame information for lines 2-6 and 89-100.

Signed-off-by: Eric Sunshine 
---

This is RFC because it lacks documentation and test updates, and because
I want to make sure the approach is sound and not abusive of the blame
machinery.

Rather than sorting and coalescing input -L ranges manually, existing
add_blame_range() and coalesce() are (ab)used to normalize the input.
This requires a small change to coalesce() to deal with potentially
overlapping ranges since it never otherwise encounters overlap during
normal blame operation.

This patch is somewhat less scary when whitespace changes are ignored.


 builtin/blame.c | 70 +
 1 file changed, 36 insertions(+), 34 deletions(-)

diff --git a/builtin/blame.c b/builtin/blame.c
index 079dcd3..f26ff44 100644
--- a/builtin/blame.c
+++ b/builtin/blame.c
@@ -278,8 +278,11 @@ static void coalesce(struct scoreboard *sb)
for (ent = sb->ent; ent && (next = ent->next); ent = next) {
if (same_suspect(ent->suspect, next->suspect) &&
ent->guilty == next->guilty &&
-   ent->s_lno + ent->num_lines == next->s_lno) {
-   ent->num_lines += next->num_lines;
+   ent->s_lno + ent->num_lines >= next->s_lno) {
+   int ent_top = ent->lno + ent->num_lines;
+   int next_top = next->lno + next->num_lines;
+   if (ent_top < next_top)
+   ent->num_lines = next_top - ent->s_lno;
ent->next = next->next;
if (ent->next)
ent->next->prev = ent;
@@ -2245,17 +2248,6 @@ static int blame_move_callback(const struct option 
*option, const char *arg, int
return 0;
 }
 
-static int blame_bottomtop_callback(const struct option *option, const char 
*arg, int unset)
-{
-   const char **bottomtop = option->value;
-   if (!arg)
-   return -1;
-   if (*bottomtop)
-   die("More than one '-L n,m' option given");
-   *bottomtop = arg;
-   return 0;
-}
-
 int cmd_blame(int argc, const char **argv, const char *prefix)
 {
struct rev_info revs;
@@ -2263,11 +2255,11 @@ int cmd_blame(int argc, const char **argv, const char 
*prefix)
struct scoreboard sb;
struct origin *o;
struct blame_entry *ent;
-   long dashdash_pos, bottom, top, lno;
+   long dashdash_pos, lno;
const char *final_commit_name = NULL;
enum object_type type;
 
-   static const char *bottomtop = NULL;
+   static struct string_list ranges;
static int output_option = 0, opt = 0;
static int show_stats = 0;
static const char *revs_file = NULL;
@@ -2293,13 +2285,14 @@ int cmd_blame(int argc, const char **argv, const char 
*prefix)
OPT_STRING(0, "contents", &contents_from, N_("file"), N_("Use 
's contents as the final image")),
{ OPTION_CALLBACK, 'C', NULL, &opt, N_("score"), N_("Find line 
copies within and across files"), PARSE_OPT_OPTARG, blame_copy_callback },
{ OPTION_CALLBACK, 'M', NULL, &opt, N_("score"), N_("Find line 
movements within and across files"), PARSE_OPT_OPTARG, blame_move_callback },
-   OPT_CALLBACK('L', NULL, &bottomtop, N_("n,m"), N_("Process only 
line range n,m, counting from 1"), blame_bottomtop_callback),
+   OPT_STRING_LIST('L', NULL, &ranges, N_("n,m"), N_("Process only 
line range n,m, counting from 1")),
OPT__ABBREV(&abbrev),
OPT_END()
};
 
struct parse_opt_ctx_t ctx;
int cmd_is_annotate = !strcmp(argv[0], "annotate");
+   unsigned int range_i;
 
git_config(git_blame_config, NULL);
init_revisions(&revs, NULL);
@@ -2492,24 +2485,33 @@ parse_done:
num_read_blob++;
lno = prepare_lines(&sb);
 
-   bottom = top = 0;
-   if (bottomtop)
-   prepare_blame_range(&sb, bottomtop, lno, &bottom, &top);
-   if (bottom < 1)
-   bottom = 1;
-   if (top < 1)
-   top = lno;
-   bottom--;
-   if (lno < top || lno < bottom)
-   die("file %s has only %lu lines", path, lno);
-
-   ent = xcalloc(1, sizeof(*ent));
-   ent->lno = bottom;
-   ent->num_lines = top - bottom;
-   ent->suspect = o;
-   ent->s_lno = bottom;
-
-   sb.ent = ent;
+   if (!ranges.nr)
+   string

Re: [PATCH v2 2/2] send-email: introduce sendemail.smtpsslcertpath

2013-07-07 Thread John Keeping
On Sat, Jul 06, 2013 at 09:12:31PM -0700, Junio C Hamano wrote:
> John Keeping  writes:
> 
> > @@ -1096,19 +1101,18 @@ sub smtp_auth_maybe {
> >  # Helper to come up with SSL/TLS certification validation params
> >  # and warn when doing no verification
> >  sub ssl_verify_params {
> > -   use IO::Socket::SSL qw(SSL_VERIFY_PEER SSL_VERIFY_NONE);
> > -
> > -   if (!defined $smtp_ssl_cert_path) {
> > -   $smtp_ssl_cert_path = "/etc/ssl/certs";
> > +   if ($smtp_ssl_verify == 0) {
> > +   return (SSL_verify_mode => IO::Socket::SSL->SSL_VERIFY_NONE);
> 
> I do not see any "use IO::Socket::SSL" anywhere after applying this
> patch.  Is this expected to work?

I don't get any errors about unknown variables when running it.  Do we
get IO::Socket::SSL imported through Net::SMTP::SSL, which extends it?
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] test-lib.sh - cygwin does not have usable FIFOs

2013-07-07 Thread Torsten Bögershausen
On 2013-07-07 02.55, Jonathan Nieder wrote:
> Mark Levedahl wrote:
> 
>> Do not use FIFOs on cygwin, they do not work. Cygwin includes
>> coreutils, so has mkfifo, and that command does something. However,
>> the resultant named pipe is known (on the Cygwin mailing list at
>> least) to not work correctly.
> 
> Hm.  How would you recommend going about writing a script that takes
> output from a command, transforms it, and then feeds it back into
> that command's input?  Are sockets a more reliable way to do this kind
> of IPC on Cygwin?
> 
> See reinit_git and try_dump from t9010-svn-fe.sh for context.
> 
> Thanks,
> Jonathan

t9010 needs PIPE in most test cases as a prerequisite.
And if PIPE isn't available, the tests can not be run.

Are you suggesting to replace the named pipes with a TCP socket?

Disabling PIPE under cygwin seems to be the right thing to do,
or do I miss something ?

/Torsten
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 09/16] documentation: add documentation for the bitmap format

2013-07-07 Thread Jeff King
On Mon, Jul 01, 2013 at 11:47:32AM -0700, Colby Ranger wrote:

> > But I think we are comparing
> > apples to steaks here, Vincent is (rightfully) concerned about process
> > startup performance, whereas our timings were assuming the process was
> > already running.
> >
> 
> I did some timing on loading the reverse index for the kernel and it
> is pretty slow (~1200ms). I just submitted a fix to do a bucket sort
> and reduced that to ~450ms, which is still slow but much better:

On my machine, loading the kernel revidx in C git is about ~830ms. I
switched the qsort() call to a radix/bucket sort, and have it down to
~200ms. So definitely much better, though that still leaves a bit to be
desired for quick commands. E.g., "git rev-list --count A..B" should
become fairly instantaneous with bitmaps, but in many cases the revindex
loading will take longer than it would have to simply do the actual
traversal.

-Peff
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] git-config: update doc for --get with multiple values

2013-07-07 Thread John Keeping
On Wed, Jul 03, 2013 at 11:47:50AM -0700, Junio C Hamano wrote:
> John Keeping  writes:
> 
> > Since commit 00b347d (git-config: do not complain about duplicate
> > entries, 2012-10-23), "git config --get" does not exit with an error if
> > there are multiple values for the specified key but instead returns the
> > last value.  Update the documentation to reflect this.
> >
> > Signed-off-by: John Keeping 
> > ---
> >  Documentation/git-config.txt | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/Documentation/git-config.txt b/Documentation/git-config.txt
> > index 19a7be0..fbad05e 100644
> > --- a/Documentation/git-config.txt
> > +++ b/Documentation/git-config.txt
> > @@ -82,7 +82,7 @@ OPTIONS
> >  --get::
> > Get the value for a given key (optionally filtered by a regex
> > matching the value). Returns error code 1 if the key was not
> > -   found and error code 2 if multiple key values were found.
> > +   found and the last value if multiple key values were found.
> >  
> >  --get-all::
> > Like get, but does not fail if the number of values for the key
> 
> Thanks.
> 
> I wondered if we should explain the significance of "last" a bit
> more (like "this results in the value from the most specific
> configuration file to be used, the ones in $GIT_DIR/config
> overriding what is in $HOME/.gitconfig"), but I do not have a strong
> opinion either way.  Let's queue this for 'maint' for now.

I don't think that change belongs here.  How about doing something like
this in the FILES section (the first two hunks are just reordering the
existing list, only the last hunk changes the content):

-- >8 --
diff --git a/Documentation/git-config.txt b/Documentation/git-config.txt
index fbad05e..99dc497 100644
--- a/Documentation/git-config.txt
+++ b/Documentation/git-config.txt
@@ -206,12 +206,8 @@ FILES
 If not set explicitly with '--file', there are four files where
 'git config' will search for configuration options:
 
-$GIT_DIR/config::
-   Repository specific configuration file.
-
-~/.gitconfig::
-   User-specific configuration file. Also called "global"
-   configuration file.
+$(prefix)/etc/gitconfig::
+   System-wide configuration file.
 
 $XDG_CONFIG_HOME/git/config::
Second user-specific configuration file. If $XDG_CONFIG_HOME is not set
@@ -221,8 +217,12 @@ $XDG_CONFIG_HOME/git/config::
you sometimes use older versions of Git, as support for this
file was added fairly recently.
 
-$(prefix)/etc/gitconfig::
-   System-wide configuration file.
+~/.gitconfig::
+   User-specific configuration file. Also called "global"
+   configuration file.
+
+$GIT_DIR/config::
+   Repository specific configuration file.
 
 If no further options are given, all reading options will read all of these
 files that are available. If the global or the system-wide configuration
@@ -230,6 +230,10 @@ file are not available they will be ignored. If the 
repository configuration
 file is not available or readable, 'git config' will exit with a non-zero
 error code. However, in neither case will an error message be issued.
 
+The files are read in the order given above, with last value found taking
+precedence over values read earlier.  When multiple values are taken then all
+values of a key from all files will be used.
+
 All writing options will per default write to the repository specific
 configuration file. Note that this also affects options like '--replace-all'
 and '--unset'. *'git config' will only ever change one file at a time*.
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH/RFC] blame: accept multiple -L ranges

2013-07-07 Thread Junio C Hamano
Eric Sunshine  writes:

> git-blame accepts only zero or one -L option. Clients requiring blame
> information for multiple disjoint ranges are therefore forced either to
> invoke git-blame multiple times, once for each range, or only once with
> no -L option to cover the entire file, which can be costly. Teach
> git-blame to accept multiple -L ranges.
>
> Overlapping and out-of-order ranges are accepted and handled gracefully.
> For example:
>
>   git blame -L 3,+4 -L 91,+7 -L 2,3 -L 89,100 source.c
>
> emits blame information for lines 2-6 and 89-100.
>
> Signed-off-by: Eric Sunshine 
> ---
>
> This is RFC because it lacks documentation and test updates, and because
> I want to make sure the approach is sound and not abusive of the blame
> machinery.

Thanks.  Procrastination (and being down sick) pays off.

A few commments (without reading too deep in the patch, so do not
take any of these as complaint---if you did it the way I said "I'd
prefer", take it as a praise ;-).

 - The general concept to start from not just one but multiple blame
   entries that share the same source (and initial suspect) is the
   right way to implement multiple ranges.

 - I'd prefer to see the command parser for multiple -L options to
   ensure that they are in strictly increasing order without
   overlap.  Error out with a message if the input ranges are out of
   order or with overlap.  Doing it that way, it would be easier to
   explain to the users how "blame -L /A/,/B/ -L /C/,/D/" should
   work.  It would find the first line that matches C _after_ the
   end of the first range.  This is in line with the way we find the
   end of the range (e.g. the line that matches B) starting from the
   last line previously specified (e.g. the line that matches A).

 - I'd be somewhat unhappy to see coalesce() butchered to blindly
   accept overlapping ranges (if anything, I'd rather see it
   tightened to detect such input as a programming error), but this
   is a minor point.
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC/PATCH 0/4] cat-file --batch-disk-sizes

2013-07-07 Thread Jeff King
When I work with alternates repositories that have the objects for many
individual forks inter-mixed, one of the questions I want to ask git is
how much space particular forks are taking up in the object database.
This is easy enough to script with `rev-list --objects $fork1 --not
$fork2`, as long as you can convert the object names into their on-disk
sizes.

Unfortunately, it's hard to get the on-disk object sizes for packs. You
can do it directly with `verify-pack -v`, which is incredibly slow. Or
you can sort and subtract offsets from the output of `show-index` (i.e.,
the same thing the pack-revindex code does internally). Instead, this
patch series exposes the revindex-generated sizes on the command line.

The fourth patch does not need to be built on top of this series, but
the early parts provide a convenient way to measure the revindex code.

  [1/4]: zero-initialize object_info structs
  [2/4]: teach sha1_object_info_extended a "disk_size" query
  [3/4]: cat-file: add --batch-disk-sizes option
  [4/4]: pack-revindex: radix-sort the revindex

-Peff
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 2/2] send-email: introduce sendemail.smtpsslcertpath

2013-07-07 Thread Junio C Hamano
Jeff King  writes:

> On Fri, Jul 05, 2013 at 08:29:48PM +, brian m. carlson wrote:
>
>> On Fri, Jul 05, 2013 at 10:20:11AM -0700, Junio C Hamano wrote:
>> > +# Helper to come up with SSL/TLS certification validation params
>> > +# and warn when doing no verification
>> > +sub ssl_verify_params {
>> > +  use IO::Socket::SSL qw(SSL_VERIFY_PEER SSL_VERIFY_NONE);
>> 
>> You might as well put this at the top of the file, because all use
>> statements happen at compile time anyway, regardless of their location.
>> If you want to lazy-load this, you need to do:
>> 
>> require IO::Socket::SSL;
>> IO::Socket::SSL->import(qw(SSL_VERIFY_PEER SSL_VERIFY_NONE));
>> 
>> which is equivalent to "use" except that it happens at runtime.
>
> I think we _must_ lazy load this, or else we are breaking git-send-email
> users on platforms that do not have IO::Socket::SSL (and do not plan on
> using SSL themselves).
>
> The same goes for the "use" in patch 1/2.

A very good point.  Thanks.

Also it appears that people seem to be seeing different behaviours
depending on the versions of IO::Socket::SSL they have; we may need
to conditionalize what our code does depending on $PACKAGE::Version
after we do that lazy loading.
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/4] zero-initialize object_info structs

2013-07-07 Thread Jeff King
The sha1_object_info_extended function expects the caller to
provide a "struct object_info" which contains pointers to
"query" items that will be filled in. The purpose of
providing pointers rather than storing the response directly
in the struct is so that callers can choose not to incur the
expense in finding particular fields that they do not care
about.

Right now the only query item is "sizep", and all callers
set it explicitly to choose whether or not to query it; they
can then leave the rest of the struct uninitialized.

However, as we add new query items, each caller will have to
be updated to explicitly turn off the new ones (by setting
them to NULL).  Instead, let's teach each caller to
zero-initialize the struct, so that they do not have to
learn about each new query item added.

Signed-off-by: Jeff King 
---
Obviously I plan to add a new query type in the next patch, but this
initialization is probably a reasonable thing to be doing anyway.

 sha1_file.c | 2 +-
 streaming.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/sha1_file.c b/sha1_file.c
index 0af19c0..de06a97 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -2428,7 +2428,7 @@ int sha1_object_info(const unsigned char *sha1, unsigned 
long *sizep)
 
 int sha1_object_info(const unsigned char *sha1, unsigned long *sizep)
 {
-   struct object_info oi;
+   struct object_info oi = {0};
 
oi.sizep = sizep;
return sha1_object_info_extended(sha1, &oi);
diff --git a/streaming.c b/streaming.c
index cabcd9d..cac282f 100644
--- a/streaming.c
+++ b/streaming.c
@@ -135,7 +135,7 @@ struct git_istream *open_istream(const unsigned char *sha1,
 struct stream_filter *filter)
 {
struct git_istream *st;
-   struct object_info oi;
+   struct object_info oi = {0};
const unsigned char *real = lookup_replace_object(sha1);
enum input_source src = istream_source(real, type, &oi);
 
-- 
1.8.3.rc3.24.gec82cb9

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/4] teach sha1_object_info_extended a "disk_size" query

2013-07-07 Thread Jeff King
Using sha1_object_info_extended, a caller can find out the
type of an object, its size, and information about where it
is stored. In addition to the object's "true" size, it can
also be useful to know the size that the object takes on
disk (e.g., to generate statistics about which refs consume
space).

This patch adds a "disk_sizep" field to "struct object_info",
and fills it in during sha1_object_info_extended if it is
non-NULL.

Signed-off-by: Jeff King 
---
 cache.h |  1 +
 sha1_file.c | 20 
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/cache.h b/cache.h
index dd0fb33..2d06169 100644
--- a/cache.h
+++ b/cache.h
@@ -1130,6 +1130,7 @@ struct object_info {
 struct object_info {
/* Request */
unsigned long *sizep;
+   unsigned long *disk_sizep;
 
/* Response */
enum {
diff --git a/sha1_file.c b/sha1_file.c
index de06a97..4c2365f 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -1697,7 +1697,8 @@ static int packed_object_info(struct packed_git *p, off_t 
obj_offset,
 #define POI_STACK_PREALLOC 64
 
 static int packed_object_info(struct packed_git *p, off_t obj_offset,
- unsigned long *sizep, int *rtype)
+ unsigned long *sizep, int *rtype,
+ unsigned long *disk_sizep)
 {
struct pack_window *w_curs = NULL;
unsigned long size;
@@ -1731,6 +1732,11 @@ static int packed_object_info(struct packed_git *p, 
off_t obj_offset,
}
}
 
+   if (disk_sizep) {
+   struct revindex_entry *revidx = find_pack_revindex(p, 
obj_offset);
+   *disk_sizep = revidx[1].offset - obj_offset;
+   }
+
while (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
off_t base_offset;
/* Push the object we're going to leave behind */
@@ -2357,7 +2363,8 @@ struct packed_git *find_sha1_pack(const unsigned char 
*sha1,
 
 }
 
-static int sha1_loose_object_info(const unsigned char *sha1, unsigned long 
*sizep)
+static int sha1_loose_object_info(const unsigned char *sha1, unsigned long 
*sizep,
+ unsigned long *disk_sizep)
 {
int status;
unsigned long mapsize, size;
@@ -2368,6 +2375,8 @@ static int sha1_loose_object_info(const unsigned char 
*sha1, unsigned long *size
map = map_sha1_file(sha1, &mapsize);
if (!map)
return -1;
+   if (disk_sizep)
+   *disk_sizep = mapsize;
if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0)
status = error("unable to unpack %s header",
   sha1_to_hex(sha1));
@@ -2391,13 +2400,15 @@ int sha1_object_info_extended(const unsigned char 
*sha1, struct object_info *oi)
if (co) {
if (oi->sizep)
*(oi->sizep) = co->size;
+   if (oi->disk_sizep)
+   *(oi->disk_sizep) = 0;
oi->whence = OI_CACHED;
return co->type;
}
 
if (!find_pack_entry(sha1, &e)) {
/* Most likely it's a loose object. */
-   status = sha1_loose_object_info(sha1, oi->sizep);
+   status = sha1_loose_object_info(sha1, oi->sizep, 
oi->disk_sizep);
if (status >= 0) {
oi->whence = OI_LOOSE;
return status;
@@ -2409,7 +2420,8 @@ int sha1_object_info_extended(const unsigned char *sha1, 
struct object_info *oi)
return status;
}
 
-   status = packed_object_info(e.p, e.offset, oi->sizep, &rtype);
+   status = packed_object_info(e.p, e.offset, oi->sizep, &rtype,
+   oi->disk_sizep);
if (status < 0) {
mark_bad_packed_object(e.p, sha1);
status = sha1_object_info_extended(sha1, oi);
-- 
1.8.3.rc3.24.gec82cb9

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/4] cat-file: add --batch-disk-sizes option

2013-07-07 Thread Jeff King
This option is just like --batch-check, but shows the
on-disk size rather than the true object size. In other
words, it makes the "disk_size" query of sha1_object_info_extended
available via the command-line.

This can be used for rough attribution of disk usage to
particular refs, though see the caveats in the
documentation.

This patch does not include any tests, as the exact numbers
returned are volatile and subject to zlib and packing
decisions.

Signed-off-by: Jeff King 
---
I sort of tacked this onto the --batch-check format by replacing the
"real" object size with the on-disk size when this option is used. I'm
open to suggestions. Two other things I considered were:

  1. Having the option simply output an extra field with the on-disk
 size. But then you are paying for the true object size lookup, even
 if you don't necessarily care.

  2. Simply outputting the disk-size and object name. For my purposes, I
 do not care about the object type, and finding the type takes non-trivial
 resources (we have to walk delta chains to find the true type).

Perhaps we need

  git cat-file --batch-format="%(disk-size) %(object)"

or similar.

 Documentation/git-cat-file.txt | 16 
 builtin/cat-file.c |  9 +
 2 files changed, 25 insertions(+)

diff --git a/Documentation/git-cat-file.txt b/Documentation/git-cat-file.txt
index 30d585a..d4af1fc 100644
--- a/Documentation/git-cat-file.txt
+++ b/Documentation/git-cat-file.txt
@@ -65,6 +65,22 @@ OPTIONS
Print the SHA-1, type, and size of each object provided on stdin. May 
not
be combined with any other options or arguments.
 
+--batch-disk-sizes::
+   Like `--batch-check`, but print the on-disk size of each object
+   (including zlib and delta compression) rather than the object's
+   true size. May not be combined with any other options or
+   arguments.
++
+NOTE: The on-disk size reported is accurate, but care should be taken in
+drawing conclusions about which refs or objects are responsible for disk
+usage. The size of a packed non-delta object be much larger than the
+size of objects which delta against it, but the choice of which object
+is the base and which is the delta is arbitrary and is subject to change
+during a repack. Note also that multiple copies of an object may be
+present in the object database; in this case, it is undefined which
+copy's size will be reported.
+
+
 OUTPUT
 --
 If '-t' is specified, one of the .
diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index 045cee7..5112c64 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -15,6 +15,7 @@
 
 #define BATCH 1
 #define BATCH_CHECK 2
+#define BATCH_DISK_SIZES 3
 
 static int cat_one_file(int opt, const char *exp_type, const char *obj_name)
 {
@@ -135,6 +136,11 @@ static int batch_one_object(const char *obj_name, int 
print_contents)
 
if (print_contents == BATCH)
contents = read_sha1_file(sha1, &type, &size);
+   else if (print_contents == BATCH_DISK_SIZES) {
+   struct object_info oi = {0};
+   oi.disk_sizep = &size;
+   type = sha1_object_info_extended(sha1, &oi);
+   }
else
type = sha1_object_info(sha1, &size);
 
@@ -206,6 +212,9 @@ int cmd_cat_file(int argc, const char **argv, const char 
*prefix)
OPT_SET_INT(0, "batch-check", &batch,
N_("show info about objects fed from the standard 
input"),
BATCH_CHECK),
+   OPT_SET_INT(0, "batch-disk-sizes", &batch,
+   N_("show on-disk size of objects fed from standard 
input"),
+   BATCH_DISK_SIZES),
OPT_END()
};
 
-- 
1.8.3.rc3.24.gec82cb9

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 4/4] pack-revindex: radix-sort the revindex

2013-07-07 Thread Jeff King
The pack revindex stores the offsets of the objects in the
pack in sorted order, allowing us to easily find the on-disk
size of each object. To compute it, we populate an array
with the offsets from the sha1-sorted idx file, and then use
qsort to order it by offsets.

That does O(n log n) offset comparisons, and profiling shows
that we spend most of our time in cmp_offset. However, since
we are sorting on a simple off_t, we can use numeric sorts
that perform better. A radix sort can run in O(k*n), where k
is the number of "digits" in our number. For a 64-bit off_t,
using 16-bit "digits" gives us k=4.

On the linux.git repo, with about 3M objects to sort, this
yields a 400% speedup. Here are the best-of-five numbers for
running "echo HEAD | git cat-file --batch-disk-size", which
is dominated by time spent building the pack revindex:

  before after
  real0m0.834s   0m0.204s
  user0m0.788s   0m0.164s
  sys 0m0.040s   0m0.036s

On a smaller repo, the radix sort would not be
as impressive (and could even be worse), as we are trading
the log(n) factor for the k=4 of the radix sort. However,
even on git.git, with 173K objects, it shows some
improvement:

  before after
  real0m0.046s   0m0.017s
  user0m0.036s   0m0.012s
  sys 0m0.008s   0m0.000s

Signed-off-by: Jeff King 
---
I think there are probably still two potential issues here:

  1. My while() loop termination probably has issues when we have to use
 all 64 bits to represent the pack offset (not likely, but...)

  2. We put "int pos[65536]" on the stack. This is a little big, but is
 probably OK, as I think the usual small stack problems we have seen
 are always in threaded code. But it would not be a big deal to heap
 allocate it (it would happen once per radix step, which is only 4
 times for the whole sort).

 pack-revindex.c | 77 +
 1 file changed, 72 insertions(+), 5 deletions(-)

diff --git a/pack-revindex.c b/pack-revindex.c
index 77a0465..d2adf36 100644
--- a/pack-revindex.c
+++ b/pack-revindex.c
@@ -59,11 +59,78 @@ static int cmp_offset(const void *a_, const void *b_)
/* revindex elements are lazily initialized */
 }
 
-static int cmp_offset(const void *a_, const void *b_)
+/*
+ * This is a least-significant-digit radix sort using a 16-bit "digit".
+ */
+static void sort_revindex(struct revindex_entry *entries, int n, off_t max)
 {
-   const struct revindex_entry *a = a_;
-   const struct revindex_entry *b = b_;
-   return (a->offset < b->offset) ? -1 : (a->offset > b->offset) ? 1 : 0;
+   /*
+* We need O(n) temporary storage, so we sort back and forth between
+* the real array and our tmp storage. To keep them straight, we always
+* sort from "a" into buckets in "b".
+*/
+   struct revindex_entry *tmp = xcalloc(n, sizeof(*tmp));
+   struct revindex_entry *a = entries, *b = tmp;
+   int digits = 0;
+
+   /*
+* We want to know the bucket that a[i] will go into when we are using
+* the digit that is N bits from the (least significant) end.
+*/
+#define BUCKET_FOR(a, i, digits) ((a[i].offset >> digits) & 0x)
+
+   while (max / (((off_t)1) << digits)) {
+   struct revindex_entry *swap;
+   int i;
+   int pos[65536] = {0};
+
+   /*
+* We want pos[i] to store the index of the last element that
+* will go in bucket "i" (actually one past the last element).
+* To do this, we first count the items that will go in each
+* bucket, which gives us a relative offset from the last
+* bucket. We can then cumulatively add the index from the
+* previous bucket to get the true index.
+*/
+   for (i = 0; i < n; i++)
+   pos[BUCKET_FOR(a, i, digits)]++;
+   for (i = 1; i < ARRAY_SIZE(pos); i++)
+   pos[i] += pos[i-1];
+
+   /*
+* Now we can drop the elements into their correct buckets (in
+* our temporary array).  We iterate the pos counter backwards
+* to avoid using an extra index to count up. And since we are
+* going backwards there, we must also go backwards through the
+* array itself, to keep the sort stable.
+*/
+   for (i = n - 1; i >= 0; i--)
+   b[--pos[BUCKET_FOR(a, i, digits)]] = a[i];
+
+   /*
+* Now "b" contains the most sorted list, so we swap "a" and
+* "b" for the next iteration.
+*/
+   swap = a;
+   a = b;
+   b = swap;
+
+   /* And bump our digits for the next round. */
+   digits += 16;
+   }
+
+   /*
+* If we ended with our data 

Re: [PATCH] lockfile: fix buffer overflow in path handling

2013-07-07 Thread Michael Haggerty
On 07/07/2013 06:12 AM, Jeff King wrote:
> On Sat, Jul 06, 2013 at 09:48:52PM +0200, Michael Haggerty wrote:
> 
>> When and if resolve_symlink() is called, then that function is
>> correctly told to treat the buffer as (PATH_MAX - 5) characters long.
>> This part is correct.  However:
>>
>> * If LOCK_NODEREF was specified, then resolve_symlink() is never
>>   called.
>>
>> * If resolve_symlink() is called but the path is not a symlink, then
>>   the length check is never applied.
>>
>> So it is possible for a path with length (PATH_MAX - 5 <= len <
>> PATH_MAX) to make it through the checks.  When ".lock" is strcat()ted
>> to such a path, the lock_file::filename buffer is overflowed.
> 
> Thanks for posting this. I independently discovered this about a month
> ago while working on an unrelated series, and then let it languish
> unseen and forgotten at the base of that almost-done series.
> 
> So definitely a problem, and my patch looked almost identical to
> yours. The only difference is:
> 
>>  static int lock_file(struct lock_file *lk, const char *path, int flags)
>>  {
>> -if (strlen(path) >= sizeof(lk->filename))
>> -return -1;
>> -strcpy(lk->filename, path);
>>  /*
>>   * subtract 5 from size to make sure there's room for adding
>>   * ".lock" for the lock file name
>>   */
>> +if (strlen(path) >= sizeof(lk->filename)-5)
>> +return -1;
>> +strcpy(lk->filename, path);
>>  if (!(flags & LOCK_NODEREF))
>>  resolve_symlink(lk->filename, sizeof(lk->filename)-5);
> 
> It might be worth consolidating the magic "-5" into a constant near the
> comment, like this:
> 
> diff --git a/lockfile.c b/lockfile.c
> index c6fb77b..2aeb2bb 100644
> --- a/lockfile.c
> +++ b/lockfile.c
> @@ -124,15 +124,16 @@ static int lock_file(struct lock_file *lk, const char 
> *path, int flags)
>  
>  static int lock_file(struct lock_file *lk, const char *path, int flags)
>  {
> - if (strlen(path) >= sizeof(lk->filename))
> - return -1;
> - strcpy(lk->filename, path);
>   /*
>* subtract 5 from size to make sure there's room for adding
>* ".lock" for the lock file name
>*/
> + static const size_t max_path_len = sizeof(lk->filename) - 5;
> + if (strlen(path) >= max_path_len)
> + return -1;
> + strcpy(lk->filename, path);
>   if (!(flags & LOCK_NODEREF))
> - resolve_symlink(lk->filename, sizeof(lk->filename)-5);
> + resolve_symlink(lk->filename, max_path_len);
>   strcat(lk->filename, ".lock");
>   lk->fd = open(lk->filename, O_RDWR | O_CREAT | O_EXCL, 0666);
>   if (0 <= lk->fd) {
> 
> But either way, the fix looks good to me.

Yes, the constant is an improvement and Peff's version is also fine with me.

Michael

-- 
Michael Haggerty
mhag...@alum.mit.edu
http://softwareswirl.blogspot.com/
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/3] name-rev doc: rewrite --stdin paragraph

2013-07-07 Thread Ramkumar Ramachandra
Signed-off-by: Ramkumar Ramachandra 
---
 Documentation/git-name-rev.txt | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/Documentation/git-name-rev.txt b/Documentation/git-name-rev.txt
index 7cde4b3..94bded8 100644
--- a/Documentation/git-name-rev.txt
+++ b/Documentation/git-name-rev.txt
@@ -32,8 +32,10 @@ OPTIONS
List all commits reachable from all refs
 
 --stdin::
-   Read from stdin, append "()" to all sha1's of nameable
-   commits, and pass to stdout
+   Transform stdin by substituting all the 40-character SHA-1
+   hexes (say $hex) with "$hex ($rev_name)".  When used with
+   --name-only, substitute with "$rev_name", omitting $hex
+   altogether.  Intended for the scripter's use.
 
 --name-only::
Instead of printing both the SHA-1 and the name, print only
-- 
1.8.3.2.737.gcbc076a.dirty

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/3] name-rev: fix assumption about --name-only usage

2013-07-07 Thread Ramkumar Ramachandra
236157 (Teach git-describe how to run name-rev, 2007-05-21) introduced
`git name-rev --name-only`, with the intent of using it to implement
`git describe --contains`.  According to the message, users wanted to
use describe to figure out which tags contains a specific commit.
name-rev already did this, but didn't print out in the same format as
describe:

  $ git describe v1.8.3~1
  v1.8.3-rc3-8-g5e49f30

  $ git name-rev v1.8.3~1
  v1.8.3~1 tags/v1.8.3~1

There are two problems with using the output of name-rev in describe:
first, it prints out the given argument before describing it.  Second,
it prefixes "tags/" to the tag description.  To eliminate these two
problems, 236157 proposed that the --name-rev option would strip these
things when used with --tags, to match the describe output more closely:

  $ git name-rev --name-only --tags v1.8.3~1
  v1.8.3~1

236157 did not anticipate a problem with always combining --name-rev
with --tags, because it was primarily intended to be used from describe,
where it hard-coded these two arguments in the execv() of name-rev.

Later, 3f7701 (make 'git describe --all --contains' work, 2007-12-19)
noticed that describe didn't work with --contains and --all.  This is
because --contains implied a call to --name-rev (in with --tags was
hard-coded), but --all implied that any ref should be used to describe
the given argument (not just tags).  3f7701 took the band-aid approach,
and made --all disable --tags when calling name-rev.  As a result, while

  $ git describe --contains v1.8.3~1
  v1.8.3~1

would get name-rev to print output in the same format as describe,

  $ git describe --contains --all v1.8.3~1
  tags/v1.8.3~1

would not strip the leading "tags/".

The bug exists in git to this day.  Fix it by removing the assumption
that name-rev --name-only is only intended to be used with --tags.  Also
update some tests.

Users and scripts have learnt to live with 3f7701, and it will continue
to be a small quirk.  Even after this patch, notice

  $ git checkout -b foom v1.8.3
  $ git describe --contains @~1
  v1.8.3~1
  $ git describe --contains --all @~1
  foom~1

In other words, --contains implies --tags in name-rev, which gives
precedence to tags; --all cancels that effect thereby giving precedence
to branches in the case of a tie.

Signed-off-by: Ramkumar Ramachandra 
---
 Documentation/git-name-rev.txt   |  6 +++---
 builtin/name-rev.c   |  3 +++
 t/t4202-log.sh   |  8 
 t/t6007-rev-list-cherry-pick-file.sh | 32 
 4 files changed, 26 insertions(+), 23 deletions(-)

diff --git a/Documentation/git-name-rev.txt b/Documentation/git-name-rev.txt
index 6b0f1ba..7cde4b3 100644
--- a/Documentation/git-name-rev.txt
+++ b/Documentation/git-name-rev.txt
@@ -37,9 +37,9 @@ OPTIONS
 
 --name-only::
Instead of printing both the SHA-1 and the name, print only
-   the name.  If given with --tags the usual tag prefix of
-   "tags/" is also omitted from the name, matching the output
-   of `git-describe` more closely.
+   the name.  The usual tag prefix of "tags/" is also omitted
+   from the name, matching the output of `git-describe` more
+   closely.
 
 --no-undefined::
Die with error code != 0 when a reference is undefined,
diff --git a/builtin/name-rev.c b/builtin/name-rev.c
index 87d4854..37207a9 100644
--- a/builtin/name-rev.c
+++ b/builtin/name-rev.c
@@ -138,6 +138,9 @@ static int name_ref(const char *path, const unsigned char 
*sha1, int flags, void
path = shorten_unambiguous_ref(path, 0);
else if (!prefixcmp(path, "refs/heads/"))
path = path + 11;
+   else if (data->name_only
+   && !prefixcmp(path, "refs/tags/"))
+   path = path + 10;
else if (!prefixcmp(path, "refs/"))
path = path + 5;
 
diff --git a/t/t4202-log.sh b/t/t4202-log.sh
index cb03d28..9bec360 100755
--- a/t/t4202-log.sh
+++ b/t/t4202-log.sh
@@ -302,7 +302,7 @@ cat > expect <<\EOF
 | |
 | | side-2
 | |
-| * commit tags/side-1
+| * commit side-1
 | | Author: A U Thor 
 | |
 | | side-1
@@ -327,17 +327,17 @@ cat > expect <<\EOF
 |
 |   fourth
 |
-* commit tags/side-1~1
+* commit side-1~1
 | Author: A U Thor 
 |
 | third
 |
-* commit tags/side-1~2
+* commit side-1~2
 | Author: A U Thor 
 |
 | second
 |
-* commit tags/side-1~3
+* commit side-1~3
   Author: A U Thor 
 
   initial
diff --git a/t/t6007-rev-list-cherry-pick-file.sh 
b/t/t6007-rev-list-cherry-pick-file.sh
index 28d4f6b..5a8175e 100755
--- a/t/t6007-rev-list-cherry-pick-file.sh
+++ b/t/t6007-rev-list-cherry-pick-file.sh
@@ -49,8 +49,8 @@ test_expect_success setup '
 '
 
 cat >expect expect C
 EOF
 
 test_expect_success '-

[PATCH 0/3] Iron output of describe --contains --all

2013-07-07 Thread Ramkumar Ramachandra
Hi,

I actually sent these patches to the list last month, but nobody
seemed to be interested.  This is an unedited resend.

I looked into adding tests, but decided that it was a lost cause: the
output is too loosely defined for any scripts to rely on it strongly.
[1/3] already shows a race between branches and tags.  For another
example, consider two tags are pointing to same commit (D and R in
t6120-describe.sh).  Run the following on the two tags and see what
happens for yourself:

  $ git describe --contains
  $ git describe --all
  $ git describe --tags

Now think about various combinations of these options.  I'm not saying
that it's a Bad Thing (TM), but that nobody has bothered tightening
the output.

My main motivation for doing this series is my prompt: I don't want to
see

  artagnon|(tags/v1.8.3^0)~/src/git$

when

  artagnon|(v1.8.3)~/src/git$

is so much more pleasant and consistent.  Obviously, hacking around
this problem in the prompt script is the Wrong thing to do.

Thanks.

Ramkumar Ramachandra (3):
  name-rev: fix assumption about --name-only usage
  name-rev: strip trailing ^0 in when --name-only
  name-rev doc: rewrite --stdin paragraph

 Documentation/git-name-rev.txt   | 12 +++-
 builtin/name-rev.c   | 12 +++-
 t/t4202-log.sh   |  8 
 t/t6007-rev-list-cherry-pick-file.sh | 32 
 4 files changed, 38 insertions(+), 26 deletions(-)

-- 
1.8.3.2.737.gcbc076a.dirty

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/3] name-rev: strip trailing ^0 in when --name-only

2013-07-07 Thread Ramkumar Ramachandra
236157 (Teach git-describe how to run name-rev, 2007-05-21) introduced
`git name-rev --name-only`, with the intent of using it to implement
`git describe --contains`.  According to the message, one of the primary
objectives of --name-only was to make the output of name-rev match that
of describe.

  $ git describe --contains --all master
  master

  $ git describe --contains --all master~1
  master~1

  $ git describe --contains --all v1.8.3~1
  v1.8.3~1

  $ git describe --contains --all v1.8.3
  v1.8.3^0

The last invocation unnecessarily prints a trailing "^0" (--stdin does
not suffer from this defect).  Fix this.

Signed-off-by: Ramkumar Ramachandra 
---
 builtin/name-rev.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/builtin/name-rev.c b/builtin/name-rev.c
index 37207a9..8ba5d72 100644
--- a/builtin/name-rev.c
+++ b/builtin/name-rev.c
@@ -186,7 +186,14 @@ static void show_name(const struct object *obj,
if (!name_only)
printf("%s ", caller_name ? caller_name : sha1_to_hex(sha1));
name = get_rev_name(obj);
-   if (name)
+
+   if (name && name_only) {
+   /* strip possible trailing ^0 from name */
+   int len = strlen(name);
+   if (len > 2 && !strcmp(name + len - 2, "^0"))
+   len -= 2;
+   printf("%.*s\n", len, name);
+   } else if (name)
printf("%s\n", name);
else if (allow_undefined)
printf("undefined\n");
-- 
1.8.3.2.737.gcbc076a.dirty

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: git subtree push-all and pull-all

2013-07-07 Thread Fredrik Gustafsson
On Wed, Jul 03, 2013 at 03:56:36PM -0400, Gareth Collins wrote:
> Hello,
> 
> I see over the last year (on the web and in this mailing list) there
> was some activity to extend subtree with a .gittrees file and
> push-all/pull-all commands.
> 
> Perhaps I missed it, but looking through the latest git code on the
> github mirror I can't find any reference to the .gittrees file or
> these commands.
> 
> Does anyone know the status of this feature? Was it decided that this
> was a bad idea and the feature has been rejected? Or is this a feature
> still "cooking"...which will likely make it into git mainline at some
> point?
> 
> I ask because I would like to use something like this to be able to
> keep a combined repository and separate project repositories in sync.
> Of course, if it was decided that this feature is fundamentally a bad
> idea then I will do something different.
> 
> Any pointers would be a big help.
> 
> thanks in advance,
> Gareth Collins

Still no answer to this? I suggest that you CC the persons discussing
this the last time.

-- 
Med vänliga hälsningar
Fredrik Gustafsson

tel: 0733-608274
e-post: iv...@iveqy.com
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] prompt: do not double-discriminate detached HEAD

2013-07-07 Thread Ramkumar Ramachandra
When GIT_PS1_SHOWCOLORHINTS is turned on, there is no need to put a
detached HEAD within parenthesis: the color can be used to discriminate
the detached HEAD.

Signed-off-by: Ramkumar Ramachandra 
---
 For cuteness :)

 contrib/completion/git-prompt.sh | 5 -
 t/t9903-bash-prompt.sh   | 2 +-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/contrib/completion/git-prompt.sh b/contrib/completion/git-prompt.sh
index a81ef5a..37e66a2 100644
--- a/contrib/completion/git-prompt.sh
+++ b/contrib/completion/git-prompt.sh
@@ -372,7 +372,10 @@ __git_ps1 ()
esac 2>/dev/null)" ||
 
b="$short_sha..."
-   b="($b)"
+   # if there is no color, use
+   # parenthesis to indicate that the
+   # HEAD is detached
+   test -n "${GIT_PS1_SHOWCOLORHINTS-}" || b="($b)"
fi
fi
fi
diff --git a/t/t9903-bash-prompt.sh b/t/t9903-bash-prompt.sh
index 3c3e4e8..c44b1a6 100755
--- a/t/t9903-bash-prompt.sh
+++ b/t/t9903-bash-prompt.sh
@@ -450,7 +450,7 @@ test_expect_success 'prompt - bash color pc mode - branch 
name' '
 '
 
 test_expect_success 'prompt - bash color pc mode - detached head' '
-   printf "BEFORE: (${c_red}(%s...)${c_clear}):AFTER" $(git log -1 
--format="%h" b1^) >expected &&
+   printf "BEFORE: (${c_red}%s...${c_clear}):AFTER" $(git log -1 
--format="%h" b1^) >expected &&
git checkout b1^ &&
test_when_finished "git checkout master" &&
(
-- 
1.8.3.2.737.gcbc076a.dirty

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] range_set: fix coalescing bug when range is a subset of another

2013-07-07 Thread Eric Sunshine
When coalescing ranges, sort_and_merge_range_set() unconditionally
assumes that the end of a range being folded into a preceding range
should become the end of the coalesced range. This assumption, however,
is invalid when one range is a subset of another.  For example, given
ranges 1-5 and 2-3 added via range_set_append_unsafe(),
sort_and_merge_range_set() incorrectly coalesces them to range 1-3
rather than the correct union range 1-5. Fix this bug.

Signed-off-by: Eric Sunshine 
---

Presumably, this problem does not happen in practice, so it's not clear
if the patch should be applied.  I discovered it when teaching git-blame
to accept multiple -L options, one iteration of which (after making the
range_set API public) employed sort_and_merge_range_set() to sort and
coalesce input -L ranges added via range_set_append_unsafe().

It might make sense to apply this patch in order to future-proof
sort_and_merge_range_set() in case the range_set API ever becomes
public.


 line-log.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/line-log.c b/line-log.c
index 4bbb09b..8cc29a0 100644
--- a/line-log.c
+++ b/line-log.c
@@ -116,7 +116,8 @@ static void sort_and_merge_range_set(struct range_set *rs)
 
for (i = 1; i < rs->nr; i++) {
if (rs->ranges[i].start <= rs->ranges[o-1].end) {
-   rs->ranges[o-1].end = rs->ranges[i].end;
+   if (rs->ranges[o-1].end < rs->ranges[i].end)
+   rs->ranges[o-1].end = rs->ranges[i].end;
} else {
rs->ranges[o].start = rs->ranges[i].start;
rs->ranges[o].end = rs->ranges[i].end;
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] prompt: do not double-discriminate detached HEAD

2013-07-07 Thread John Szakmeister
On Sun, Jul 7, 2013 at 8:52 AM, Ramkumar Ramachandra  wrote:
> When GIT_PS1_SHOWCOLORHINTS is turned on, there is no need to put a
> detached HEAD within parenthesis: the color can be used to discriminate
> the detached HEAD.
>
> Signed-off-by: Ramkumar Ramachandra 
> ---
>  For cuteness :)

Personally, I'd rather see the parens kept.  Not everyone sees red
very well--I know several people who can't see it at all, and it keeps
it consistent with non-colored output.

-John
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/2] git-svn: allow git-svn fetching to work using serf

2013-07-07 Thread Daniel Shahaf
Kyle McKay wrote on Sat, Jul 06, 2013 at 19:46:40 -0700:
> On Jul 6, 2013, at 19:23, Jonathan Nieder wrote:
>> Kyle McKay wrote:
>>
>>> Unless bulk updates are disabled when using the serf access method
>>> (the only one available with svn 1.8) for https?: urls,
>>> apply_textdelta does indeed get called multiple times in a row
>>> without an intervening temp_release.
>>
>> You mean "Unless bulk updates are enabled" and "without an intervening
>> close_file", right?
>
> The problem seems to be skelta mode although it may just be the fact  
> that ra_serf has multiple connections outstanding and since ra_neon only 
> ever has one it can't happen over ra_neon.
>
> If the server disables bulk updates (SVNAllowBulkUpdates Off) all  
> clients are forced to use skelta mode, even ra_neon clients.

As Brane and I have pointed out, git-svn can instruct libsvn_* to use
bulk updates regardless of the server version, by setting
SVN_CONFIG_OPTION_HTTP_BULK_UPDATES (new in 1.8).

If you have questions about that, though, please address them to
us...@subversion.apache.org (the proper list for API usage questions),
not to me personally.

Cheers,

Daniel
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/2] git-svn: allow git-svn fetching to work using serf

2013-07-07 Thread David Rothenberger
On 7/7/2013 6:39 AM, Daniel Shahaf wrote:
> Kyle McKay wrote on Sat, Jul 06, 2013 at 19:46:40 -0700:
>> On Jul 6, 2013, at 19:23, Jonathan Nieder wrote:
>>> Kyle McKay wrote:
>>>
 Unless bulk updates are disabled when using the serf access method
 (the only one available with svn 1.8) for https?: urls,
 apply_textdelta does indeed get called multiple times in a row
 without an intervening temp_release.
>>>
>>> You mean "Unless bulk updates are enabled" and "without an intervening
>>> close_file", right?
>>
>> The problem seems to be skelta mode although it may just be the fact  
>> that ra_serf has multiple connections outstanding and since ra_neon only 
>> ever has one it can't happen over ra_neon.
>>
>> If the server disables bulk updates (SVNAllowBulkUpdates Off) all  
>> clients are forced to use skelta mode, even ra_neon clients.
> 
> As Brane and I have pointed out, git-svn can instruct libsvn_* to use
> bulk updates regardless of the server version, by setting
> SVN_CONFIG_OPTION_HTTP_BULK_UPDATES (new in 1.8).

According to the table in the release notes [1], Skelta mode will be
used if the 1.7 or 1.8 server sets SVNAllowBulkUpdates to Off,
regardless of what the client sets in the configuration.

Is that not true?

[1] https://subversion.apache.org/docs/release-notes/1.8.html#neon-deleted

-- 
David Rothenberger    daver...@acm.org
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] diffcore-pickaxe: simplify has_changes and contains

2013-07-07 Thread Junio C Hamano
Jeff King  writes:

> Before, if (!one && !two) we would call contains(two, ...), and now we
> will simply assume it is zero. Which I think is an improvement, as we
> would have segfaulted before. I don't think it is a bug in the current
> code (we would not ever feed the function two NULLs), but it is nice to
> be more defensive.
>
> Acked-by: Jeff King 

Thanks, both.
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 09/16] documentation: add documentation for the bitmap format

2013-07-07 Thread Shawn Pearce
On Sun, Jul 7, 2013 at 2:46 AM, Jeff King  wrote:
> On Mon, Jul 01, 2013 at 11:47:32AM -0700, Colby Ranger wrote:
>
>> > But I think we are comparing
>> > apples to steaks here, Vincent is (rightfully) concerned about process
>> > startup performance, whereas our timings were assuming the process was
>> > already running.
>> >
>>
>> I did some timing on loading the reverse index for the kernel and it
>> is pretty slow (~1200ms). I just submitted a fix to do a bucket sort
>> and reduced that to ~450ms, which is still slow but much better:
>
> On my machine, loading the kernel revidx in C git is about ~830ms. I
> switched the qsort() call to a radix/bucket sort, and have it down to
> ~200ms. So definitely much better,

This is a very nice reduction. pack-objects would benefit from it even
without bitmaps. Since it doesn't require a data format change this is
a pretty harmless patch to include in Git. We may later conclude
caching the revidx is worthwhile, but until then a bucket sort doesn't
hurt. :-)

> though that still leaves a bit to be
> desired for quick commands. E.g., "git rev-list --count A..B" should
> become fairly instantaneous with bitmaps, but in many cases the revindex
> loading will take longer than it would have to simply do the actual
> traversal.

Yea, we don't know of a way around this. In a few cases the bitmap
code in JGit is slower than the naive traversal, but these are only on
small segments of history. I wonder if you could guess which algorithm
to use by looking at the offsets of A and B using the idx file. If
they are near each other in the pack, run the naive algorithm without
bitmaps and revidx. If they are farther apart assume the bitmap would
help more than traversal and use bitmap+revidx.

Working out what the correct "distance" should be before switching
algorithms is hard. A and B could be megabytes apart in the pack but A
could be B's grandparent and traversed in milliseconds. I wonder how
often that is in practice, certainly if A and B are within a few
hundred kilobytes of each other the naive traversal should be almost
instant.
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] lockfile: fix buffer overflow in path handling

2013-07-07 Thread Junio C Hamano
Michael Haggerty  writes:

>> But either way, the fix looks good to me.
>
> Yes, the constant is an improvement and Peff's version is also fine with me.

OK, will squash in.  Thanks both.
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] git-config: update doc for --get with multiple values

2013-07-07 Thread Junio C Hamano
John Keeping  writes:

>> I wondered if we should explain the significance of "last" a bit
>> more (like "this results in the value from the most specific
>> configuration file to be used, the ones in $GIT_DIR/config
>> overriding what is in $HOME/.gitconfig"), but I do not have a strong
>> opinion either way.  Let's queue this for 'maint' for now.
>
> I don't think that change belongs here.  How about doing something like
> this in the FILES section (the first two hunks are just reordering the
> existing list, only the last hunk changes the content):

Sounds like a good change to me ;-).

> -- >8 --
> diff --git a/Documentation/git-config.txt b/Documentation/git-config.txt
> index fbad05e..99dc497 100644
> --- a/Documentation/git-config.txt
> +++ b/Documentation/git-config.txt
> @@ -206,12 +206,8 @@ FILES
>  If not set explicitly with '--file', there are four files where
>  'git config' will search for configuration options:
>  
> -$GIT_DIR/config::
> - Repository specific configuration file.
> -
> -~/.gitconfig::
> - User-specific configuration file. Also called "global"
> - configuration file.
> +$(prefix)/etc/gitconfig::
> + System-wide configuration file.
>  
>  $XDG_CONFIG_HOME/git/config::
>   Second user-specific configuration file. If $XDG_CONFIG_HOME is not set
> @@ -221,8 +217,12 @@ $XDG_CONFIG_HOME/git/config::
>   you sometimes use older versions of Git, as support for this
>   file was added fairly recently.
>  
> -$(prefix)/etc/gitconfig::
> - System-wide configuration file.
> +~/.gitconfig::
> + User-specific configuration file. Also called "global"
> + configuration file.
> +
> +$GIT_DIR/config::
> + Repository specific configuration file.
>  
>  If no further options are given, all reading options will read all of these
>  files that are available. If the global or the system-wide configuration
> @@ -230,6 +230,10 @@ file are not available they will be ignored. If the 
> repository configuration
>  file is not available or readable, 'git config' will exit with a non-zero
>  error code. However, in neither case will an error message be issued.
>  
> +The files are read in the order given above, with last value found taking
> +precedence over values read earlier.  When multiple values are taken then all
> +values of a key from all files will be used.
> +
>  All writing options will per default write to the repository specific
>  configuration file. Note that this also affects options like '--replace-all'
>  and '--unset'. *'git config' will only ever change one file at a time*.
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/4] zero-initialize object_info structs

2013-07-07 Thread Junio C Hamano
Jeff King  writes:

> Obviously I plan to add a new query type in the next patch, but this
> initialization is probably a reasonable thing to be doing anyway.

Yes. Thanks.

>
>  sha1_file.c | 2 +-
>  streaming.c | 2 +-
>  2 files changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/sha1_file.c b/sha1_file.c
> index 0af19c0..de06a97 100644
> --- a/sha1_file.c
> +++ b/sha1_file.c
> @@ -2428,7 +2428,7 @@ int sha1_object_info(const unsigned char *sha1, 
> unsigned long *sizep)
>  
>  int sha1_object_info(const unsigned char *sha1, unsigned long *sizep)
>  {
> - struct object_info oi;
> + struct object_info oi = {0};
>  
>   oi.sizep = sizep;
>   return sha1_object_info_extended(sha1, &oi);
> diff --git a/streaming.c b/streaming.c
> index cabcd9d..cac282f 100644
> --- a/streaming.c
> +++ b/streaming.c
> @@ -135,7 +135,7 @@ struct git_istream *open_istream(const unsigned char 
> *sha1,
>struct stream_filter *filter)
>  {
>   struct git_istream *st;
> - struct object_info oi;
> + struct object_info oi = {0};
>   const unsigned char *real = lookup_replace_object(sha1);
>   enum input_source src = istream_source(real, type, &oi);
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/2] allow git-svn fetching to work using serf

2013-07-07 Thread Jonathan Nieder
(cc-ing subversion's users@ list for advice)
Kyle McKay wrote:
> On Jul 6, 2013, at 18:37, Jonathan Nieder wrote:
>> Kyle McKay wrote:
>>> Begin forwarded message:

 [2] http://subversion.tigris.org/issues/show_bug.cgi?id=2932
>>
>> Ah, thanks for the context.
>>
>> It's still not clear to me how we know that ra_serf driving the editor
>> in a non depth-first manner is the problem here.  Has that explanation
>> been confirmed somehow?
[...]
> Since ra_serf makes multiple connections to the server (hard-coded
> to 4 prior to svn 1.8, defaults to 4 in svn 1.8 but can be set to
> between 1 and 8) it makes sense there would be multiple active calls
> to apply_textdelta if processing is done as results are received on
> the multiple connections.

Ah, that's worrisome.  Do I understand you correctly that to work with
ra_serf in skelta mode, callers need to make their apply_textdelta
callback thread-safe?

Or do you just mean that the traversal order is based on the order in
which results are received?  That would be fine, as long as after each
apply_textdelta call, close_file is called before the next
apply_textdelta.

Jonathan
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/4] cat-file: add --batch-disk-sizes option

2013-07-07 Thread Junio C Hamano
Jeff King  writes:

> Perhaps we need
>
>   git cat-file --batch-format="%(disk-size) %(object)"
>
> or similar.

I agree with your reasoning.  It may be simpler to give an interface
to ask for which pieces of info, e.g. --batch-cols=size,disksize,
without giving the readers a flexible "format".

> +NOTE: The on-disk size reported is accurate, but care should be taken in
> +drawing conclusions about which refs or objects are responsible for disk
> +usage. The size of a packed non-delta object be much larger than the
> +size of objects which delta against it, but the choice of which object
> +is the base and which is the delta is arbitrary and is subject to change
> +during a repack. Note also that multiple copies of an object may be
> +present in the object database; in this case, it is undefined which
> +copy's size will be reported.

This is a good note to leave to the readers. I was wondering how
valid to accuse that B is taking a lot of space compared to C when
you have three objects A, B and C (in decreasing order of on-disk
footprint) when A is huge and C is a small delta against A and B is
independent.  The role of A and C in their delta chain could easily
be swapped during the next full repack and then C will appear a lot
larger than B.

It might be interesting to measure the total disk footprint of an
entire delta "family" (the objects that delta against the same
base).  You may find out that hello.c with a manageable size have
very many revisions and overall have a larger on-disk footprint than
a single copy of unchanging help.mov clip used in the documentation
does, which may be an interesting observation to make.
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/2] allow git-svn fetching to work using serf

2013-07-07 Thread Jonathan Nieder
(cc-ing users@ as requested by danielsh)
David Rothenberger wrote:
> On 7/6/2013 5:28 PM, Jonathan Nieder wrote:

>> Is there a simple explanation of why violating the depth-first
>> constraint would lead to multiple blob (i.e., file, not directory)
>> deltas being opened in a row without an intervening close?
>
> I believe serf is doing the following for a number of files in parallel:
>  1. open_file
>  2. apply_textdelta
>  3. change_file_prop, change_file_prop, ...
>  4. close_file

Ah, that makes more sense.  It is not about traversal order but about
processing multiple non-directory files in parallel, and step (3)
potentially involving a large number of property changes means that it
can make sense not to take a lock.

Perhaps the reference documentation could warn about this?

On the git-svn side, it looks like we have enough information to make
a more complete commit message or in-code comment so the reason for
multiple git_blob tempfiles is not forgotten.  Thanks for your patient
explanations.

Jonathan
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] prompt: do not double-discriminate detached HEAD

2013-07-07 Thread Junio C Hamano
John Szakmeister  writes:

> On Sun, Jul 7, 2013 at 8:52 AM, Ramkumar Ramachandra  
> wrote:
>> When GIT_PS1_SHOWCOLORHINTS is turned on, there is no need to put a
>> detached HEAD within parenthesis: the color can be used to discriminate
>> the detached HEAD.
>>
>> Signed-off-by: Ramkumar Ramachandra 
>> ---
>>  For cuteness :)
>
> Personally, I'd rather see the parens kept.  Not everyone sees red
> very well--I know several people who can't see it at all, and it keeps
> it consistent with non-colored output.

+1; I find red on many terminal emulators to be too dark to tell,
especially in a small font, from black myself.
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/3] merge-recursive: remove dead conditional in update_stages()

2013-07-07 Thread Thomas Rast
650467c (merge-recursive: Consolidate different update_stages
functions, 2011-08-11) changed the former argument 'clear' to always
be true.  Remove the useless conditional.

Signed-off-by: Thomas Rast 
---
 merge-recursive.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/merge-recursive.c b/merge-recursive.c
index ea9dbd3..0b9cafb 100644
--- a/merge-recursive.c
+++ b/merge-recursive.c
@@ -544,11 +544,9 @@ static int update_stages(const char *path, const struct 
diff_filespec *o,
 * would_lose_untracked).  Instead, reverse the order of the calls
 * (executing update_file first and then update_stages).
 */
-   int clear = 1;
int options = ADD_CACHE_OK_TO_ADD | ADD_CACHE_SKIP_DFCHECK;
-   if (clear)
-   if (remove_file_from_cache(path))
-   return -1;
+   if (remove_file_from_cache(path))
+   return -1;
if (o)
if (add_cacheinfo(o->mode, o->sha1, path, 1, 0, options))
return -1;
-- 
1.8.3.2.908.gbd0dbd0

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/3] merge -Xindex-only

2013-07-07 Thread Thomas Rast
[Michael, sorry for the double mail -- I typoed the list address on
the first round.]

I recently looked into making merge-recursive more useful as a modular
piece in various tasks, e.g. Michael's git-imerge and the experiments
I made in showing evil merges.

This miniseries is the extremely low-hanging fruit.  If it makes a
good first step for git-imerge, perhaps it can go in like this.  It's
not a big speedup (about 2.2s vs 2.4s in a sample conflicting merge in
git.git), but it does feel much cleaner to avoid touching the worktree
unless actually necessary.

Otherwise it's probably not worth it just yet; for what I want to do
with it, we need some more reshuffling of things.



Thomas Rast (3):
  merge-recursive: remove dead conditional in update_stages()
  merge-recursive: untangle double meaning of o->call_depth
  merge-recursive: -Xindex-only to leave worktree unchanged

 Documentation/merge-strategies.txt |  4 
 merge-recursive.c  | 46 +-
 merge-recursive.h  |  1 +
 t/t3030-merge-recursive.sh | 13 +++
 4 files changed, 43 insertions(+), 21 deletions(-)

-- 
1.8.3.2.908.gbd0dbd0

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/3] merge-recursive: -Xindex-only to leave worktree unchanged

2013-07-07 Thread Thomas Rast
Using the new no_worktree flag from the previous commit, we can teach
merge-recursive to leave the worktree untouched.  Expose this with a
new strategy option so that scripts can use it.
---
 Documentation/merge-strategies.txt |  4 
 merge-recursive.c  |  2 ++
 t/t3030-merge-recursive.sh | 13 +
 3 files changed, 19 insertions(+)

diff --git a/Documentation/merge-strategies.txt 
b/Documentation/merge-strategies.txt
index 49a9a7d..b663a2e 100644
--- a/Documentation/merge-strategies.txt
+++ b/Documentation/merge-strategies.txt
@@ -92,6 +92,10 @@ subtree[=];;
is prefixed (or stripped from the beginning) to make the shape of
two trees to match.
 
+index-only;;
+   Write the merge result only to the index; do not touch the
+   worktree.
+
 octopus::
This resolves cases with more than two heads, but refuses to do
a complex merge that needs manual resolution.  It is
diff --git a/merge-recursive.c b/merge-recursive.c
index b93b762..a58d691 100644
--- a/merge-recursive.c
+++ b/merge-recursive.c
@@ -2094,6 +2094,8 @@ int parse_merge_opt(struct merge_options *o, const char 
*s)
if ((o->rename_score = parse_rename_score(&score)) == -1 || 
*score != 0)
return -1;
}
+   else if (!strcmp(s, "index-only"))
+   o->no_worktree = 1;
else
return -1;
return 0;
diff --git a/t/t3030-merge-recursive.sh b/t/t3030-merge-recursive.sh
index 2f96100..2f3a16c 100755
--- a/t/t3030-merge-recursive.sh
+++ b/t/t3030-merge-recursive.sh
@@ -296,6 +296,19 @@ test_expect_success 'merge-recursive result' '
 
 '
 
+test_expect_success 'merge-recursive --index-only' '
+
+   rm -fr [abcd] &&
+   git checkout -f "$c2" &&
+   test_expect_code 1 git merge-recursive --index-only "$c0" -- "$c2" 
"$c1" &&
+   git ls-files -s >actual &&
+   # reuses "expected" from previous test!
+   test_cmp expected actual &&
+   git diff HEAD >actual-diff &&
+   : >expected-diff &&
+   test_cmp expected-diff actual-diff
+'
+
 test_expect_success 'fail if the index has unresolved entries' '
 
rm -fr [abcd] &&
-- 
1.8.3.2.908.gbd0dbd0

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/3] merge-recursive: untangle double meaning of o->call_depth

2013-07-07 Thread Thomas Rast
o->call_depth has a double function: a nonzero call_depth means we
want to construct virtual merge bases, but it also means we want to
avoid touching the worktree.  Introduce a new flag o->no_worktree for
the latter.

Signed-off-by: Thomas Rast 
---
 merge-recursive.c | 38 +-
 merge-recursive.h |  1 +
 2 files changed, 22 insertions(+), 17 deletions(-)

diff --git a/merge-recursive.c b/merge-recursive.c
index 0b9cafb..b93b762 100644
--- a/merge-recursive.c
+++ b/merge-recursive.c
@@ -407,10 +407,10 @@ static void record_df_conflict_files(struct merge_options 
*o,
int i;
 
/*
-* If we're merging merge-bases, we don't want to bother with
-* any working directory changes.
+* If we're working in-core only (e.g., merging merge-bases),
+* we don't want to bother with any working directory changes.
 */
-   if (o->call_depth)
+   if (o->no_worktree)
return;
 
/* Ensure D/F conflicts are adjacent in the entries list. */
@@ -723,7 +723,7 @@ static void update_file_flags(struct merge_options *o,
  int update_cache,
  int update_wd)
 {
-   if (o->call_depth)
+   if (o->no_worktree)
update_wd = 0;
 
if (update_wd) {
@@ -930,7 +930,7 @@ static struct merge_file_info merge_file_1(struct 
merge_options *o,
result.clean = merge_submodule(result.sha,
   one->path, one->sha1,
   a->sha1, b->sha1,
-  !o->call_depth);
+  !o->no_worktree);
} else if (S_ISLNK(a->mode)) {
hashcpy(result.sha, a->sha1);
 
@@ -1002,7 +1002,7 @@ static void handle_change_delete(struct merge_options *o,
 const char *change, const char *change_past)
 {
char *renamed = NULL;
-   if (dir_in_way(path, !o->call_depth)) {
+   if (dir_in_way(path, !o->no_worktree)) {
renamed = unique_path(o, path, a_sha ? o->branch1 : o->branch2);
}
 
@@ -1127,10 +1127,10 @@ static void handle_file(struct merge_options *o,
char *add_name = unique_path(o, rename->path, other_branch);
update_file(o, 0, add->sha1, add->mode, add_name);
 
-   remove_file(o, 0, rename->path, 0);
+   remove_file(o, 0, rename->path, o->no_worktree);
dst_name = unique_path(o, rename->path, cur_branch);
} else {
-   if (dir_in_way(rename->path, !o->call_depth)) {
+   if (dir_in_way(rename->path, !o->no_worktree)) {
dst_name = unique_path(o, rename->path, cur_branch);
output(o, 1, _("%s is a directory in %s adding as %s 
instead"),
   rename->path, other_branch, dst_name);
@@ -1237,7 +1237,7 @@ static void conflict_rename_rename_2to1(struct 
merge_options *o,
 * merge base just undo the renames; they can be detected
 * again later for the non-recursive merge.
 */
-   remove_file(o, 0, path, 0);
+   remove_file(o, 0, path, o->no_worktree);
update_file(o, 0, mfi_c1.sha, mfi_c1.mode, a->path);
update_file(o, 0, mfi_c2.sha, mfi_c2.mode, b->path);
} else {
@@ -1245,7 +1245,7 @@ static void conflict_rename_rename_2to1(struct 
merge_options *o,
char *new_path2 = unique_path(o, path, ci->branch2);
output(o, 1, _("Renaming %s to %s and %s to %s instead"),
   a->path, new_path1, b->path, new_path2);
-   remove_file(o, 0, path, 0);
+   remove_file(o, 0, path, o->no_worktree);
update_file(o, 0, mfi_c1.sha, mfi_c1.mode, new_path1);
update_file(o, 0, mfi_c2.sha, mfi_c2.mode, new_path2);
free(new_path2);
@@ -1404,7 +1404,7 @@ static int process_renames(struct merge_options *o,
 * add-source case).
 */
remove_file(o, 1, ren1_src,
-   renamed_stage == 2 || 
!was_tracked(ren1_src));
+   o->no_worktree || renamed_stage == 2 || 
!was_tracked(ren1_src));
 
hashcpy(src_other.sha1, 
ren1->src_entry->stages[other_stage].sha);
src_other.mode = 
ren1->src_entry->stages[other_stage].mode;
@@ -1600,7 +1600,7 @@ static int merge_content(struct merge_options *o,
 o->branch2 == rename_conflict_info->branch1) ?
pair1->two->path : pair1->one->path;
 
-   if (dir_in_way(path, !o->call_depth))
+   if (dir_in_way(path, !o->no_worktr

Re: [PATCH 3/3] name-rev doc: rewrite --stdin paragraph

2013-07-07 Thread Junio C Hamano
Ramkumar Ramachandra  writes:

> Signed-off-by: Ramkumar Ramachandra 
> ---
>  Documentation/git-name-rev.txt | 6 --
>  1 file changed, 4 insertions(+), 2 deletions(-)
>
> diff --git a/Documentation/git-name-rev.txt b/Documentation/git-name-rev.txt
> index 7cde4b3..94bded8 100644
> --- a/Documentation/git-name-rev.txt
> +++ b/Documentation/git-name-rev.txt
> @@ -32,8 +32,10 @@ OPTIONS
>   List all commits reachable from all refs
>  
>  --stdin::
> - Read from stdin, append "()" to all sha1's of nameable
> - commits, and pass to stdout
> + Transform stdin by substituting all the 40-character SHA-1
> + hexes (say $hex) with "$hex ($rev_name)".  When used with
> + --name-only, substitute with "$rev_name", omitting $hex
> + altogether.  Intended for the scripter's use.
>  
>  --name-only::
>   Instead of printing both the SHA-1 and the name, print only

Looks obviously correct and more helpful description.

Thanks.
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/3] name-rev: strip trailing ^0 in when --name-only

2013-07-07 Thread Junio C Hamano
Ramkumar Ramachandra  writes:

> 236157 (Teach git-describe how to run name-rev, 2007-05-21) introduced
> `git name-rev --name-only`, with the intent of using it to implement
> `git describe --contains`.  According to the message, one of the primary
> objectives of --name-only was to make the output of name-rev match that
> of describe.
>
>   $ git describe --contains --all master
>   master
>
>   $ git describe --contains --all master~1
>   master~1
>
>   $ git describe --contains --all v1.8.3~1
>   v1.8.3~1
>
>   $ git describe --contains --all v1.8.3
>   v1.8.3^0

WRT "describe --contains", I do agree that both of these

$ git describe $(git rev-parse v1.8.3^0)
$ git describe --contains $(git rev-parse v1.8.3^0)

should just say "v1.8.3" without ~0/^0/~0~0~0 etc. and the last
example you showed will be improved by dropping ^0 at the end.

However.

I was a bit bothered by the description talking _only_ about
describe, but the actual change is to modify what name-rev gives its
direct users as well.  And that made me realize that the patch
itself has an undesirable side effect.

"describe" is _only_ about commit history graph, so in its context
v1.8.3 means the same thing as v1.8.3^0 (we never want to get a tag;
we always want a commit).  But I do not think "name-rev" is limited
to commits, in the sense that you would see this:

$ git rev-parse v1.8.3 v1.8.3^0 | git name-rev --stdin
8af06057d0c31a24e8737ae846ac2e116e8bafb9
edca4152560522a431a51fc0a06147fc680b5b18 (tags/v1.8.3^0)

The second object is _not_ v1.8.3 but is v1.8.3^0 in the context of
name-rev, whose purpose is to give you a string you can feed
"rev-parse" and get the object name back.  "rev-parse v1.8.3" will
not give you the commit object name, so you need to keep "^0".
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/3] name-rev: fix assumption about --name-only usage

2013-07-07 Thread Junio C Hamano
Ramkumar Ramachandra  writes:

> 236157 (Teach git-describe how to run name-rev, 2007-05-21) introduced
> `git name-rev --name-only`, with the intent of using it to implement
> `git describe --contains`.  According to the message, users wanted to
> use describe to figure out which tags contains a specific commit.
> name-rev already did this, but didn't print out in the same format as
> describe:
>
>   $ git describe v1.8.3~1
>   v1.8.3-rc3-8-g5e49f30
>
> ...  As a result, while
>
>   $ git describe --contains v1.8.3~1
>   v1.8.3~1

The above two look consistent, yes.

>
> would get name-rev to print output in the same format as describe,
>
>   $ git describe --contains --all v1.8.3~1
>   tags/v1.8.3~1
>
> would not strip the leading "tags/".

If you _know_ v1.8.3 does not appear outside "tags/", this does look
inconsistent, but I do not think the code checks it.  Ahd if the
code does not, I am not sure not stripping "tags/" is necessarily a
bad thing, because "--all" allows names to come outside "tags/"
hierarchy.

Also how should this interact with v1.8.3-1-g98c5c4a that changed
the rule somewhat so that the common prefix is stripped when we know
the result is not ambiguous?
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/4] cat-file: add --batch-disk-sizes option

2013-07-07 Thread Jeff King
On Sun, Jul 07, 2013 at 10:49:46AM -0700, Junio C Hamano wrote:

> Jeff King  writes:
> 
> > Perhaps we need
> >
> >   git cat-file --batch-format="%(disk-size) %(object)"
> >
> > or similar.
> 
> I agree with your reasoning.  It may be simpler to give an interface
> to ask for which pieces of info, e.g. --batch-cols=size,disksize,
> without giving the readers a flexible "format".

Yeah, that is probably a lot more sane. That would be sufficient for my
use, I doubt anyone really wants the full format, and it would be easy
to add it later if we are wrong. It would also be easy to add other
items from the sha1_object_info_extended list, too (e.g.,
loose/cached/packed).

I'll do that in my re-roll.

> > +NOTE: The on-disk size reported is accurate, but care should be taken in
> > +drawing conclusions about which refs or objects are responsible for disk
> > +usage. [...]
> 
> This is a good note to leave to the readers. I was wondering how
> valid to accuse that B is taking a lot of space compared to C when
> you have three objects A, B and C (in decreasing order of on-disk
> footprint) when A is huge and C is a small delta against A and B is
> independent.  The role of A and C in their delta chain could easily
> be swapped during the next full repack and then C will appear a lot
> larger than B.

Yeah. I exercise a lot of human analysis when I use this tool myself.
What I am usually looking for is that somebody has forked a 100M repo,
and then dumped 2G of extra data on top. Those cases are not all that
hard to spot, and would not usually change too much in a repack.

> It might be interesting to measure the total disk footprint of an
> entire delta "family" (the objects that delta against the same
> base).  You may find out that hello.c with a manageable size have
> very many revisions and overall have a larger on-disk footprint than
> a single copy of unchanging help.mov clip used in the documentation
> does, which may be an interesting observation to make.

Yeah, that is an interesting stat, though I have not had a need for it
myself. Certainly you could do:

  git rev-list --objects --all |
  grep ' hello.c$' |
  cut -d' ' -f1 |
  git cat-file --batch-disk-sizes

to see hello.c's size. But I cannot think offhand of a way to get the
list of objects that are in a delta chain together (potentially crossing
path boundaries), short of parsing verfiy-pack output myself. I think it
is orthogonal to this patch, though. This exposes more information about
objects themselves; it would be up to another patch to help discover and
narrow the list of interesting objects.

-Peff
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/2] git-svn: allow git-svn fetching to work using serf

2013-07-07 Thread Kyle McKay
I forwarded the "SVNAllowBulkUpdates Off" question to the us...@subversion.apache.org 
 list and here's the reply:


On Jul 7, 2013, at 11:11, Lieven Govaerts wrote:

On Sun, Jul 7, 2013 at 4:48 PM, Kyle McKay  wrote:

On Jul 7, 2013, at 06:39, Daniel Shahaf wrote:


Kyle McKay wrote on Sat, Jul 06, 2013 at 19:46:40 -0700:


On Jul 6, 2013, at 19:23, Jonathan Nieder wrote:


Kyle McKay wrote:

Unless bulk updates are disabled when using the serf access  
method

(the only one available with svn 1.8) for https?: urls,
apply_textdelta does indeed get called multiple times in a row
without an intervening temp_release.



You mean "Unless bulk updates are enabled" and "without an  
intervening

close_file", right?



The problem seems to be skelta mode although it may just be the  
fact
that ra_serf has multiple connections outstanding and since  
ra_neon only

ever has one it can't happen over ra_neon.

If the server disables bulk updates (SVNAllowBulkUpdates Off) all
clients are forced to use skelta mode, even ra_neon clients.



As Brane and I have pointed out, git-svn can instruct libsvn_* to  
use

bulk updates regardless of the server version, by setting
SVN_CONFIG_OPTION_HTTP_BULK_UPDATES (new in 1.8).

If you have questions about that, though, please address them to
us...@subversion.apache.org (the proper list for API usage  
questions),

not to me personally.



According to the table at
,
if the server sets SVNAllowBulkUpdates Off, the client will be  
forced to use

skelta no matter what the client setting is.


Indeed, the server admin has the final say in which mode is actually
used. SVNAllowBulkUpdates Off is only advised if the server admin
wants a log line per accessed resource. I doubt it's used a lot, but
the option is there.



Is that table incorrect?


No, that table is correct.

Lieven


So the final say so on whether or not bulk updates are allowed is on  
the server side which means git-svn really needs to handle skelta mode  
on the client side properly when using ra-serf to guarantee  
functionality with all subversion server configurations.


Kyle
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] prompt: do not double-discriminate detached HEAD

2013-07-07 Thread Kyle McKay

On Jul 7, 2013, at 10:53, Junio C Hamano wrote:

John Szakmeister  writes:

On Sun, Jul 7, 2013 at 8:52 AM, Ramkumar Ramachandra > wrote:

When GIT_PS1_SHOWCOLORHINTS is turned on, there is no need to put a
detached HEAD within parenthesis: the color can be used to  
discriminate

the detached HEAD.

Signed-off-by: Ramkumar Ramachandra 
---
For cuteness :)


Personally, I'd rather see the parens kept.  Not everyone sees red
very well--I know several people who can't see it at all, and it  
keeps

it consistent with non-colored output.


+1; I find red on many terminal emulators to be too dark to tell,
especially in a small font, from black myself.


+1; me too for the same reason.
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/3] merge-recursive: untangle double meaning of o->call_depth

2013-07-07 Thread Junio C Hamano
Thomas Rast  writes:

> o->call_depth has a double function: a nonzero call_depth means we
> want to construct virtual merge bases, but it also means we want to
> avoid touching the worktree.  Introduce a new flag o->no_worktree for
> the latter.

I do remember discussing this with you the other day, and while I do
agree that we may not want to touch the working tree in the
outermost merge (i.e. o->call_depth is zero) in some applications, I
do not think of a situation where you _do_ want to touch working
tree while performing the inner merge.  I'd feel safer if the code
said that "no matter what no-worktree option says, we won't touch
the working tree if o->call_depth is not zero" clearly in some way,
e.g. 

if (o->call_depth || o->in_index_merge)
return; /* leave without touching working tree */

In other words, I do not like the part of the code that pretends
these two are independent options, when what we really want is to
have two modes for the outermost (o->call_depth == 0) case.
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/2] Avoid suggestions to "merge" remote changes

2013-07-07 Thread John Keeping
As another aspect of the change to make git-pull die when remote changes
do not fast-forward, this series changes the advice messages in git-push to
avoid implying that the user wants to merge remote changes.

I've chosen the word "integrate" because it does not carry any special
meaning in Git (in terms of being a command) and seems to cover the
merge and rebase cases nicely.

John Keeping (2):
  push: avoid suggesting "merging" remote changes
  pull: change the description to "integrate" changes

 Documentation/git-pull.txt |  2 +-
 builtin/push.c | 12 ++--
 git-pull.sh|  2 +-
 3 files changed, 8 insertions(+), 8 deletions(-)

-- 
1.8.3.2.855.gbc9faed

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/2] push: avoid suggesting "merging" remote changes

2013-07-07 Thread John Keeping
With some workflows, it is more suitable to rebase on top of remote
changes when a push does not fast-forward.  Change the advice messages
in git-push to suggest that a user "integrate the remote changes"
instead of "merge the remote changes" to make this slightly clearer.

Also change the suggested 'git pull' to 'git pull ...' to hint to users
that they may want to add other parameters.

Suggested-by: Philip Oakley 
Signed-off-by: John Keeping 
---
 builtin/push.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/builtin/push.c b/builtin/push.c
index 2d84d10..44e53cd 100644
--- a/builtin/push.c
+++ b/builtin/push.c
@@ -211,8 +211,8 @@ static void setup_default_push_refspecs(struct remote 
*remote)
 
 static const char message_advice_pull_before_push[] =
N_("Updates were rejected because the tip of your current branch is 
behind\n"
-  "its remote counterpart. Merge the remote changes (e.g. 'git 
pull')\n"
-  "before pushing again.\n"
+  "its remote counterpart. Integrate the remote changes (e.g.\n"
+  "'git pull ...') before pushing again.\n"
   "See the 'Note about fast-forwards' in 'git push --help' for 
details.");
 
 static const char message_advice_use_upstream[] =
@@ -223,15 +223,15 @@ static const char message_advice_use_upstream[] =
 
 static const char message_advice_checkout_pull_push[] =
N_("Updates were rejected because a pushed branch tip is behind its 
remote\n"
-  "counterpart. Check out this branch and merge the remote changes\n"
-  "(e.g. 'git pull') before pushing again.\n"
+  "counterpart. Check out this branch and integrate the remote 
changes\n"
+  "(e.g. 'git pull ...') before pushing again.\n"
   "See the 'Note about fast-forwards' in 'git push --help' for 
details.");
 
 static const char message_advice_ref_fetch_first[] =
N_("Updates were rejected because the remote contains work that you 
do\n"
   "not have locally. This is usually caused by another repository 
pushing\n"
-  "to the same ref. You may want to first merge the remote changes 
(e.g.,\n"
-  "'git pull') before pushing again.\n"
+  "to the same ref. You may want to first integrate the remote 
changes\n"
+  "(e.g., 'git pull ...') before pushing again.\n"
   "See the 'Note about fast-forwards' in 'git push --help' for 
details.");
 
 static const char message_advice_ref_already_exists[] =
-- 
1.8.3.2.855.gbc9faed

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/2] pull: change the description to "integrate" changes

2013-07-07 Thread John Keeping
Since git-pull learned the --rebase option it has not just been about
merging changes from a remote repository (where "merge" is in the sense
of "git merge").  Change the description to use "integrate" instead of
"merge" in order to reflect this.

Signed-off-by: John Keeping 
---
 Documentation/git-pull.txt | 2 +-
 git-pull.sh| 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/git-pull.txt b/Documentation/git-pull.txt
index 24ab07a..6ef8d59 100644
--- a/Documentation/git-pull.txt
+++ b/Documentation/git-pull.txt
@@ -3,7 +3,7 @@ git-pull(1)
 
 NAME
 
-git-pull - Fetch from and merge with another repository or a local branch
+git-pull - Fetch from and integrate with another repository or a local branch
 
 
 SYNOPSIS
diff --git a/git-pull.sh b/git-pull.sh
index 6828e2c..ecf0011 100755
--- a/git-pull.sh
+++ b/git-pull.sh
@@ -5,7 +5,7 @@
 # Fetch one or more remote refs and merge it/them into the current HEAD.
 
 USAGE='[-n | --no-stat] [--[no-]commit] [--[no-]squash] [--[no-]ff] [-s 
strategy]... []  ...'
-LONG_USAGE='Fetch one or more remote refs and merge it/them into the current 
HEAD.'
+LONG_USAGE='Fetch one or more remote refs and integrate it/them into the 
current HEAD.'
 SUBDIRECTORY_OK=Yes
 OPTIONS_SPEC=
 . git-sh-setup
-- 
1.8.3.2.855.gbc9faed

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] git-config(1): clarify precedence of multiple values

2013-07-07 Thread John Keeping
In order to clarify which value is used when there are multiple values
defined for a key, re-order the list of file locations so that it runs
from least specific to most specific.  Then add a paragraph which simply
says that the last value will be used.

Signed-off-by: John Keeping 
---
On Sun, Jul 07, 2013 at 10:31:38AM -0700, Junio C Hamano wrote:
> John Keeping  writes:
> 
> >> I wondered if we should explain the significance of "last" a bit
> >> more (like "this results in the value from the most specific
> >> configuration file to be used, the ones in $GIT_DIR/config
> >> overriding what is in $HOME/.gitconfig"), but I do not have a strong
> >> opinion either way.  Let's queue this for 'maint' for now.
> >
> > I don't think that change belongs here.  How about doing something like
> > this in the FILES section (the first two hunks are just reordering the
> > existing list, only the last hunk changes the content):
> 
> Sounds like a good change to me ;-).

So here it is as a proper patch :-)

 Documentation/git-config.txt | 20 
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/Documentation/git-config.txt b/Documentation/git-config.txt
index fbad05e..99dc497 100644
--- a/Documentation/git-config.txt
+++ b/Documentation/git-config.txt
@@ -206,12 +206,8 @@ FILES
 If not set explicitly with '--file', there are four files where
 'git config' will search for configuration options:
 
-$GIT_DIR/config::
-   Repository specific configuration file.
-
-~/.gitconfig::
-   User-specific configuration file. Also called "global"
-   configuration file.
+$(prefix)/etc/gitconfig::
+   System-wide configuration file.
 
 $XDG_CONFIG_HOME/git/config::
Second user-specific configuration file. If $XDG_CONFIG_HOME is not set
@@ -221,8 +217,12 @@ $XDG_CONFIG_HOME/git/config::
you sometimes use older versions of Git, as support for this
file was added fairly recently.
 
-$(prefix)/etc/gitconfig::
-   System-wide configuration file.
+~/.gitconfig::
+   User-specific configuration file. Also called "global"
+   configuration file.
+
+$GIT_DIR/config::
+   Repository specific configuration file.
 
 If no further options are given, all reading options will read all of these
 files that are available. If the global or the system-wide configuration
@@ -230,6 +230,10 @@ file are not available they will be ignored. If the 
repository configuration
 file is not available or readable, 'git config' will exit with a non-zero
 error code. However, in neither case will an error message be issued.
 
+The files are read in the order given above, with last value found taking
+precedence over values read earlier.  When multiple values are taken then all
+values of a key from all files will be used.
+
 All writing options will per default write to the repository specific
 configuration file. Note that this also affects options like '--replace-all'
 and '--unset'. *'git config' will only ever change one file at a time*.
-- 
1.8.3.2.855.gbc9faed

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] git-config(1): clarify precedence of multiple values

2013-07-07 Thread Jeff King
On Sun, Jul 07, 2013 at 08:49:56PM +0100, John Keeping wrote:

> In order to clarify which value is used when there are multiple values
> defined for a key, re-order the list of file locations so that it runs
> from least specific to most specific.  Then add a paragraph which simply
> says that the last value will be used.

Makes sense.

>  file is not available or readable, 'git config' will exit with a non-zero
>  error code. However, in neither case will an error message be issued.
>  
> +The files are read in the order given above, with last value found taking
> +precedence over values read earlier.  When multiple values are taken then all
> +values of a key from all files will be used.

What order do the values come in --get-all?

If you read carefully, I think it is implied by the "files are read in
order given above", but it took me a minute to think about it. Maybe add
at the end:

  ...values of a key from all files will be shown, in the order the
  files are processed (least specific to most specific).

Or something (I do not like "least specific" because we have not
introduced that term earlier, but I can't think of another succinct way
to say it).

But even without that, the patch is certainly an improvement.

-Peff
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 15/22] read-cache: read index-v5

2013-07-07 Thread Eric Sunshine
On Sun, Jul 7, 2013 at 4:11 AM, Thomas Gummerer  wrote:
> Make git read the index file version 5 without complaining.
>
> This version of the reader doesn't read neither the cache-tree
> nor the resolve undo data, but doesn't choke on an index that
> includes such data.
> ---
> diff --git a/read-cache-v5.c b/read-cache-v5.c
> new file mode 100644
> index 000..e319f30
> --- /dev/null
> +++ b/read-cache-v5.c
> @@ -0,0 +1,658 @@
> +static struct directory_entry *read_directories(unsigned int *dir_offset,
> +   unsigned int *dir_table_offset,
> +   void *mmap,
> +   int mmap_size)
> +{
> +   int i, ondisk_directory_size;
> +   uint32_t *filecrc, *beginning, *end;
> +   struct directory_entry *current = NULL;
> +   struct ondisk_directory_entry *disk_de;
> +   struct directory_entry *de;
> +   unsigned int data_len, len;
> +   char *name;
> +
> +   /* Length of pathname + nul byte for termination + size of
> +* members of ondisk_directory_entry. (Just using the size
> +* of the stuct doesn't work, because there may be padding

s/stuct/struct/

> +* bytes for the struct)
> +*/

Also:

  /*
   * Format multi-line comment
   * like this.
   */

Remaining multi-line comments appear to be formatted correctly.
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 17/22] read-cache: read cache-tree in index-v5

2013-07-07 Thread Eric Sunshine
On Sun, Jul 7, 2013 at 4:11 AM, Thomas Gummerer  wrote:
> Since the cache-tree data is saved as part of the directory data,
> we already read it at the beginning of the index. The cache-tree
> is only converted from this directory data.
>
> The cache-tree data is arranged in a tree, with the children sorted by
> pathlen at each node, while the ondisk format is sorted lexically.
> So we have to rebuild this format from the on-disk directory list.
>
> Signed-off-by: Thomas Gummerer 
> ---
> diff --git a/read-cache-v5.c b/read-cache-v5.c
> index 193970a..f1ad132 100644
> --- a/read-cache-v5.c
> +++ b/read-cache-v5.c
> @@ -448,6 +448,103 @@ static int read_conflicts(struct conflict_entry **head,
> return 0;
>  }
>
> +/*
> + * This function modifys the directory argument that is given to it.

s/modifys/modifies/

> + * Don't use it if the directory entries are still needed after.
> + */
> +static struct cache_tree *cache_tree_convert_v5(struct directory_entry *de)
> +{
> +   struct directory_queue *queue;
> +
> +   if (!de->de_nentries)
> +   return NULL;
> +   queue = xcalloc(1, sizeof(struct directory_queue));
> +   queue[0].de = de;
> +   queue[0].down = xcalloc(de->de_nsubtrees, sizeof(struct 
> directory_queue));
> +
> +   sort_directories(de, queue[0].down);
> +   return convert_one(queue, 0);
> +}
> +
>  static void resolve_undo_convert_v5(struct index_state *istate,
> struct conflict_entry *conflict)
>  {
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 18/22] read-cache: write index-v5

2013-07-07 Thread Eric Sunshine
On Sun, Jul 7, 2013 at 4:11 AM, Thomas Gummerer  wrote:
> Write the index version 5 file format to disk. This version doesn't
> write the cache-tree data and resolve-undo data to the file.
>
> The main work is done when filtering out the directories from the
> current in-memory format, where in the same turn also the conflicts
> and the file data is calculated.
>
> Helped-by: Nguyen Thai Ngoc Duy 
> Helped-by: Thomas Rast 
> Signed-off-by: Thomas Gummerer 
> ---
> diff --git a/read-cache-v5.c b/read-cache-v5.c
> index f1ad132..f056f6b 100644
> --- a/read-cache-v5.c
> +++ b/read-cache-v5.c
> +static int write_index_v5(struct index_state *istate, int newfd)
> +{
> +   struct cache_version_header hdr;
> +   struct cache_header hdr_v5;
> +   struct cache_entry **cache = istate->cache;
> +   struct directory_entry *de;
> +   struct ondisk_directory_entry *ondisk;
> +   int entries = istate->cache_nr;
> +   int i, removed, non_conflicted, total_dir_len, ondisk_directory_size;
> +   int total_file_len, conflict_offset, offset_to_offset;
> +   unsigned int ndir;
> +   uint32_t crc;
> +
> +   if (istate->partially_read)
> +   die("BUG: index: cannot write a partially read index");
> +
> +   for (i = removed = 0; i < entries; i++) {
> +   if (cache[i]->ce_flags & CE_REMOVE)
> +   removed++;
> +   }
> +   hdr.hdr_signature = htonl(CACHE_SIGNATURE);
> +   hdr.hdr_version = htonl(istate->version);
> +   hdr_v5.hdr_nfile = htonl(entries - removed);
> +   hdr_v5.hdr_nextension = htonl(0); /* Currently no extensions are 
> supported */
> +
> +   non_conflicted = 0;
> +   total_dir_len = 0;
> +   total_file_len = 0;
> +   de = compile_directory_data(istate, entries, &ndir, &non_conflicted,
> +   &total_dir_len, &total_file_len);
> +   hdr_v5.hdr_ndir = htonl(ndir);
> +
> +   /*
> +* This is needed because the compiler aligns structs to sizes multipe

s/multipe/multiple/

> +* of 4
> +*/
> +   ondisk_directory_size = sizeof(ondisk->flags)
> +   + sizeof(ondisk->foffset)
> +   + sizeof(ondisk->cr)
> +   + sizeof(ondisk->ncr)
> +   + sizeof(ondisk->nsubtrees)
> +   + sizeof(ondisk->nfiles)
> +   + sizeof(ondisk->nentries)
> +   + sizeof(ondisk->sha1);
> +   hdr_v5.hdr_fblockoffset = htonl(sizeof(hdr) + sizeof(hdr_v5) + 4
> +   + (ndir + 1) * 4
> +   + total_dir_len
> +   + ndir * (ondisk_directory_size + 4)
> +   + (non_conflicted + 1) * 4);
> +
> +   crc = 0;
> +   if (ce_write(&crc, newfd, &hdr, sizeof(hdr)) < 0)
> +   return -1;
> +   if (ce_write(&crc, newfd, &hdr_v5, sizeof(hdr_v5)) < 0)
> +   return -1;
> +   crc = htonl(crc);
> +   if (ce_write(NULL, newfd, &crc, 4) < 0)
> +   return -1;
> +
> +   conflict_offset = sizeof(hdr) + sizeof(hdr_v5) + 4
> +   + (ndir + 1) * 4
> +   + total_dir_len
> +   + ndir * (ondisk_directory_size + 4)
> +   + (non_conflicted + 1) * 4
> +   + total_file_len
> +   + non_conflicted * (sizeof(struct ondisk_cache_entry) + 4);
> +   if (write_directories(de, newfd, conflict_offset) < 0)
> +   return -1;
> +   offset_to_offset = sizeof(hdr) + sizeof(hdr_v5) + 4
> +   + (ndir + 1) * 4
> +   + total_dir_len
> +   + ndir * (ondisk_directory_size + 4);
> +   if (write_entries(istate, de, entries, newfd, offset_to_offset) < 0)
> +   return -1;
> +   if (write_conflicts(istate, de, newfd) < 0)
> +   return -1;
> +   return ce_flush(newfd);
> +}
> +
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/4] cat-file: add --batch-disk-sizes option

2013-07-07 Thread brian m. carlson
On Sun, Jul 07, 2013 at 06:09:49AM -0400, Jeff King wrote:
> +NOTE: The on-disk size reported is accurate, but care should be taken in
> +drawing conclusions about which refs or objects are responsible for disk
> +usage. The size of a packed non-delta object be much larger than the

You probably meant "may be" here.   ^

-- 
brian m. carlson / brian with sandals: Houston, Texas, US
+1 832 623 2791 | http://www.crustytoothpaste.net/~bmc | My opinion only
OpenPGP: RSA v4 4096b: 88AC E9B2 9196 305B A994 7552 F1BA 225C 0223 B187


signature.asc
Description: Digital signature


Re: [PATCH 2/3] name-rev: strip trailing ^0 in when --name-only

2013-07-07 Thread Junio C Hamano
Junio C Hamano  writes:

> WRT "describe --contains", I do agree that both of these
>
> $ git describe $(git rev-parse v1.8.3^0)
> $ git describe --contains $(git rev-parse v1.8.3^0)
> 
> should just say "v1.8.3" without ~0/^0/~0~0~0 etc. and the last
> example you showed will be improved by dropping ^0 at the end.
>
> However.
>
> I was a bit bothered by the description talking _only_ about
> describe, but the actual change is to modify what name-rev gives its
> direct users as well.  And that made me realize that the patch
> itself has an undesirable side effect.
>
> "describe" is _only_ about commit history graph, so in its context
> v1.8.3 means the same thing as v1.8.3^0 (we never want to get a tag;
> we always want a commit).  But I do not think "name-rev" is limited
> to commits, in the sense that you would see this:
>
> $ git rev-parse v1.8.3 v1.8.3^0 | git name-rev --stdin
> 8af06057d0c31a24e8737ae846ac2e116e8bafb9
> edca4152560522a431a51fc0a06147fc680b5b18 (tags/v1.8.3^0)
>
> The second object is _not_ v1.8.3 but is v1.8.3^0 in the context of
> name-rev, whose purpose is to give you a string you can feed
> "rev-parse" and get the object name back.  "rev-parse v1.8.3" will
> not give you the commit object name, so you need to keep "^0".

Well, the code in "name-rev" other than --stdin mode is already
broken (and the documentation half-describes this breakage) in that
it describes the peeled commit and rejects anything other than
commit objects.  The reason I say "half-describes" is that it only
says that the command takes "commit-ish" and leaves it unclear if it
comes up with a name for the tag itself that happens to be
commit-ish, or it does so for the commit that is referred by the
tag.

I'll send out a WIP to fix that, and also help the topic to strip
unnecessary "^0" suffix when name-rev is run as an implementation
detail of "describe" shortly.
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Documentation: finding $(prefix)/etc/gitconfig when prefix = /usr

2013-07-07 Thread Robin Rosenberg
Signed-off-by: Robin Rosenberg 
---
 Documentation/git-config.txt | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/Documentation/git-config.txt b/Documentation/git-config.txt
index 9ae2508..3198d52 100644
--- a/Documentation/git-config.txt
+++ b/Documentation/git-config.txt
@@ -107,7 +107,8 @@ See also <>.
 
 --system::
For writing options: write to system-wide $(prefix)/etc/gitconfig
-   rather than the repository .git/config.
+   rather than the repository .git/config. However, $(prefix) is /usr
+   then /etc/gitconfig is used.
 +
 For reading options: read only from system-wide $(prefix)/etc/gitconfig
 rather than from all available files.
@@ -214,7 +215,8 @@ $XDG_CONFIG_HOME/git/config::
file was added fairly recently.
 
 $(prefix)/etc/gitconfig::
-   System-wide configuration file.
+   System-wide configuration file, unless $(prefix) is /usr. In the
+   latter case /etc/gitconfig is used.
 
 If no further options are given, all reading options will read all of these
 files that are available. If the global or the system-wide configuration
-- 
1.8.3.rc0.19.g7e6a0cc

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Documentation: finding $(prefix)/etc/gitconfig when prefix = /usr

2013-07-07 Thread John Keeping
On Mon, Jul 08, 2013 at 12:00:02AM +0200, Robin Rosenberg wrote:
> Signed-off-by: Robin Rosenberg 
> ---
>  Documentation/git-config.txt | 6 --
>  1 file changed, 4 insertions(+), 2 deletions(-)
> 
> diff --git a/Documentation/git-config.txt b/Documentation/git-config.txt
> index 9ae2508..3198d52 100644
> --- a/Documentation/git-config.txt
> +++ b/Documentation/git-config.txt
> @@ -107,7 +107,8 @@ See also <>.
>  
>  --system::
>   For writing options: write to system-wide $(prefix)/etc/gitconfig
> - rather than the repository .git/config.
> + rather than the repository .git/config. However, $(prefix) is /usr
> + then /etc/gitconfig is used.

That's a build time condition, not something that's decided at runtime
so I'm not sure that this logic belongs in the user facing
documentation.  The technically correct change would be to use
"$(sysconfdir)/gitconfig" but I think that will just confuse users more.

Since we have a build step for the documentation, I wonder if it's
possible to replace these with the correct directory at build time.

>  +
>  For reading options: read only from system-wide $(prefix)/etc/gitconfig
>  rather than from all available files.
> @@ -214,7 +215,8 @@ $XDG_CONFIG_HOME/git/config::
>   file was added fairly recently.
>  
>  $(prefix)/etc/gitconfig::
> - System-wide configuration file.
> + System-wide configuration file, unless $(prefix) is /usr. In the
> + latter case /etc/gitconfig is used.
>  
>  If no further options are given, all reading options will read all of these
>  files that are available. If the global or the system-wide configuration
> -- 
> 1.8.3.rc0.19.g7e6a0cc
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Documentation: finding $(prefix)/etc/gitconfig when prefix = /usr

2013-07-07 Thread Robin Rosenberg

I guess this isn't true either. Anyone has a better way of
specifiying where the system wide config file is read from,
or a user-parseable definition of $(prefix) ?

-- robin

- Ursprungligt meddelande -
> Signed-off-by: Robin Rosenberg 
> ---
>  Documentation/git-config.txt | 6 --
>  1 file changed, 4 insertions(+), 2 deletions(-)
> 
> diff --git a/Documentation/git-config.txt
> b/Documentation/git-config.txt
> index 9ae2508..3198d52 100644
> --- a/Documentation/git-config.txt
> +++ b/Documentation/git-config.txt
> @@ -107,7 +107,8 @@ See also <>.
>  
>  --system::
>   For writing options: write to system-wide $(prefix)/etc/gitconfig
> - rather than the repository .git/config.
> + rather than the repository .git/config. However, $(prefix) is /usr
> + then /etc/gitconfig is used.
>  +
>  For reading options: read only from system-wide
>  $(prefix)/etc/gitconfig
>  rather than from all available files.
> @@ -214,7 +215,8 @@ $XDG_CONFIG_HOME/git/config::
>   file was added fairly recently.
>  
>  $(prefix)/etc/gitconfig::
> - System-wide configuration file.
> + System-wide configuration file, unless $(prefix) is /usr. In the
> + latter case /etc/gitconfig is used.
>  
>  If no further options are given, all reading options will read all
>  of these
>  files that are available. If the global or the system-wide
>  configuration
> --
> 1.8.3.rc0.19.g7e6a0cc
> 
> 
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Documentation: finding $(prefix)/etc/gitconfig when prefix = /usr

2013-07-07 Thread Robin Rosenberg


- Ursprungligt meddelande -
> On Mon, Jul 08, 2013 at 12:00:02AM +0200, Robin Rosenberg wrote:
> > Signed-off-by: Robin Rosenberg 
> > ---
> >  Documentation/git-config.txt | 6 --
> >  1 file changed, 4 insertions(+), 2 deletions(-)
> > 
> > diff --git a/Documentation/git-config.txt
> > b/Documentation/git-config.txt
> > index 9ae2508..3198d52 100644
> > --- a/Documentation/git-config.txt
> > +++ b/Documentation/git-config.txt
> > @@ -107,7 +107,8 @@ See also <>.
> >  
> >  --system::
> > For writing options: write to system-wide $(prefix)/etc/gitconfig
> > -   rather than the repository .git/config.
> > +   rather than the repository .git/config. However, $(prefix) is
> > /usr
> > +   then /etc/gitconfig is used.
> 
> That's a build time condition, not something that's decided at
> runtime
> so I'm not sure that this logic belongs in the user facing
> documentation.  The technically correct change would be to use
> "$(sysconfdir)/gitconfig" but I think that will just confuse users
> more.
> 
> Since we have a build step for the documentation, I wonder if it's
> possible to replace these with the correct directory at build time.

Might work for documentation. I'd like a run-time variant for my
particular problem, but the one I found now, e.g. git rev-parse --show-prefix"
does not work in the general case. I want to tell JGit where the system
wide configuration file is, even before we have a git repository.

-- robin
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/4] describe: use argv-array

2013-07-07 Thread Junio C Hamano
Instead of using a hand allocated args[] array, use argv-array API
to manage the dynamically created list of arguments when invoking
name-rev.

Signed-off-by: Junio C Hamano 
---
 builtin/describe.c | 31 ---
 1 file changed, 16 insertions(+), 15 deletions(-)

diff --git a/builtin/describe.c b/builtin/describe.c
index 4e675c3..b5434e4 100644
--- a/builtin/describe.c
+++ b/builtin/describe.c
@@ -7,6 +7,7 @@
 #include "parse-options.h"
 #include "diff.h"
 #include "hash.h"
+#include "argv-array.h"
 
 #define SEEN   (1u<<0)
 #define MAX_TAGS   (FLAG_BITS - 1)
@@ -442,24 +443,24 @@ int cmd_describe(int argc, const char **argv, const char 
*prefix)
die(_("--long is incompatible with --abbrev=0"));
 
if (contains) {
-   const char **args = xmalloc((7 + argc) * sizeof(char *));
-   int i = 0;
-   args[i++] = "name-rev";
-   args[i++] = "--name-only";
-   args[i++] = "--no-undefined";
+   struct argv_array args;
+
+   argv_array_init(&args);
+   argv_array_push(&args, "name-rev");
+   argv_array_push(&args, "--name-only");
+   argv_array_push(&args, "--no-undefined");
if (always)
-   args[i++] = "--always";
+   argv_array_push(&args, "--always");
if (!all) {
-   args[i++] = "--tags";
-   if (pattern) {
-   char *s = xmalloc(strlen("--refs=refs/tags/") + 
strlen(pattern) + 1);
-   sprintf(s, "--refs=refs/tags/%s", pattern);
-   args[i++] = s;
-   }
+   argv_array_push(&args, "--tags");
+   if (pattern)
+   argv_array_pushf(&args, "--refs=refs/tags/%s", 
pattern);
+   }
+   while (*argv) {
+   argv_array_push(&args, *argv);
+   argv++;
}
-   memcpy(args + i, argv, argc * sizeof(char *));
-   args[i + argc] = NULL;
-   return cmd_name_rev(i + argc, args, prefix);
+   return cmd_name_rev(args.argc, args.argv, prefix);
}
 
init_hash(&names);
-- 
1.8.3.2-853-ga8cbcc9

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/4] Make "git name-rev $(git rev-parse v1.8.3)" work

2013-07-07 Thread Junio C Hamano
So here is a set of small preparatory steps to help the other topic
to allow "git describe -contains v1.8.3" omit trailing "^0" from its
output.  We do not want to prevent people from allowing "name-rev"
to convert object names other than commit-ishes.

The series should apply on 96ffd4ca (Merge branch
'nk/name-rev-abbreviated-refs', 2013-06-30).

Junio C Hamano (4):
  name-ref: factor out name shortening logic from name_ref()
  name-rev: allow converting the exact object name at the tip of a ref
  describe: use argv-array
  describe/name-rev: tell name-rev to peel the incoming object to commit first

 builtin/describe.c |  32 ---
 builtin/name-rev.c | 113 -
 2 files changed, 112 insertions(+), 33 deletions(-)

-- 
1.8.3.2-853-ga8cbcc9

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/4] name-ref: factor out name shortening logic from name_ref()

2013-07-07 Thread Junio C Hamano
The logic will be used in a new codepath for showing exact matches.

Signed-off-by: Junio C Hamano 
---
 builtin/name-rev.c | 19 ---
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/builtin/name-rev.c b/builtin/name-rev.c
index 87d4854..1234ebb 100644
--- a/builtin/name-rev.c
+++ b/builtin/name-rev.c
@@ -96,6 +96,17 @@ static int subpath_matches(const char *path, const char 
*filter)
return -1;
 }
 
+static const char *name_ref_abbrev(const char *refname, int 
shorten_unambiguous)
+{
+   if (shorten_unambiguous)
+   refname = shorten_unambiguous_ref(refname, 0);
+   else if (!prefixcmp(refname, "refs/heads/"))
+   refname = refname + 11;
+   else if (!prefixcmp(refname, "refs/"))
+   refname = refname + 5;
+   return refname;
+}
+
 struct name_ref_data {
int tags_only;
int name_only;
@@ -134,13 +145,7 @@ static int name_ref(const char *path, const unsigned char 
*sha1, int flags, void
if (o && o->type == OBJ_COMMIT) {
struct commit *commit = (struct commit *)o;
 
-   if (can_abbreviate_output)
-   path = shorten_unambiguous_ref(path, 0);
-   else if (!prefixcmp(path, "refs/heads/"))
-   path = path + 11;
-   else if (!prefixcmp(path, "refs/"))
-   path = path + 5;
-
+   path = name_ref_abbrev(path, can_abbreviate_output);
name_rev(commit, xstrdup(path), 0, 0, deref);
}
return 0;
-- 
1.8.3.2-853-ga8cbcc9

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/4] name-rev: allow converting the exact object name at the tip of a ref

2013-07-07 Thread Junio C Hamano
"git name-rev" is supposed to convert 40-hex object names into
strings that name the same objects based on refs, that can be fed to
"git rev-parse" to get the same object names back, so

$ git rev-parse v1.8.3 v1.8.3^0 | git name-rev --stdin
8af06057d0c31a24e8737ae846ac2e116e8bafb9
edca4152560522a431a51fc0a06147fc680b5b18 (tags/v1.8.3^0)

has to have "^0" at the end, as "edca41" is a commit, not the tag
that references it.

The command however did not bother to see if the object is at the
tip of some ref, and failed to convert a tag object.

Teach it to show this instead:

$ git rev-parse v1.8.3 v1.8.3^0 | git name-rev --stdin
8af06057d0c31a24e8737ae846ac2e116e8bafb9 (tags/v1.8.3)
edca4152560522a431a51fc0a06147fc680b5b18 (tags/v1.8.3^0)

Signed-off-by: Junio C Hamano 
---
 builtin/name-rev.c | 59 +-
 1 file changed, 58 insertions(+), 1 deletion(-)

diff --git a/builtin/name-rev.c b/builtin/name-rev.c
index 1234ebb..29a6f56 100644
--- a/builtin/name-rev.c
+++ b/builtin/name-rev.c
@@ -4,6 +4,7 @@
 #include "tag.h"
 #include "refs.h"
 #include "parse-options.h"
+#include "sha1-lookup.h"
 
 #define CUTOFF_DATE_SLOP 86400 /* one day */
 
@@ -113,6 +114,34 @@ struct name_ref_data {
const char *ref_filter;
 };
 
+static struct tip_table {
+   struct tip_table_entry {
+   unsigned char sha1[20];
+   const char *refname;
+   } *table;
+   int nr;
+   int alloc;
+   int sorted;
+} tip_table;
+
+static void add_to_tip_table(const unsigned char *sha1, const char *refname,
+int shorten_unambiguous)
+{
+   refname = name_ref_abbrev(refname, shorten_unambiguous);
+
+   ALLOC_GROW(tip_table.table, tip_table.nr + 1, tip_table.alloc);
+   hashcpy(tip_table.table[tip_table.nr].sha1, sha1);
+   tip_table.table[tip_table.nr].refname = xstrdup(refname);
+   tip_table.nr++;
+   tip_table.sorted = 0;
+}
+
+static int tipcmp(const void *a_, const void *b_)
+{
+   const struct tip_table_entry *a = a_, *b = b_;
+   return hashcmp(a->sha1, b->sha1);
+}
+
 static int name_ref(const char *path, const unsigned char *sha1, int flags, 
void *cb_data)
 {
struct object *o = parse_object(sha1);
@@ -135,6 +164,8 @@ static int name_ref(const char *path, const unsigned char 
*sha1, int flags, void
}
}
 
+   add_to_tip_table(sha1, path, can_abbreviate_output);
+
while (o && o->type == OBJ_TAG) {
struct tag *t = (struct tag *) o;
if (!t->tagged)
@@ -151,6 +182,32 @@ static int name_ref(const char *path, const unsigned char 
*sha1, int flags, void
return 0;
 }
 
+static const unsigned char *nth_tip_table_ent(size_t ix, void *table_)
+{
+   struct tip_table_entry *table = table_;
+   return table[ix].sha1;
+}
+
+static const char *get_exact_ref_match(const struct object *o)
+{
+   int found;
+
+   if (!tip_table.table || !tip_table.nr)
+   return NULL;
+
+   if (!tip_table.sorted) {
+   qsort(tip_table.table, tip_table.nr, sizeof(*tip_table.table),
+ tipcmp);
+   tip_table.sorted = 1;
+   }
+
+   found = sha1_pos(o->sha1, tip_table.table, tip_table.nr,
+nth_tip_table_ent);
+   if (0 <= found)
+   return tip_table.table[found].refname;
+   return NULL;
+}
+
 /* returns a static buffer */
 static const char *get_rev_name(const struct object *o)
 {
@@ -159,7 +216,7 @@ static const char *get_rev_name(const struct object *o)
struct commit *c;
 
if (o->type != OBJ_COMMIT)
-   return NULL;
+   return get_exact_ref_match(o);
c = (struct commit *) o;
n = c->util;
if (!n)
-- 
1.8.3.2-853-ga8cbcc9

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 4/4] describe/name-rev: tell name-rev to peel the incoming object to commit first

2013-07-07 Thread Junio C Hamano
With this on top of the other patches in this series, you would get:

$ git describe --contains $(git rev-parse v1.8.3 v1.8.3^0)
v1.8.3
v1.8.3

while you can still differentiate tags and the commits they point at
with:

$ git name-rev --refs=tags/\* --name-only $(git rev-parse v1.8.3 v1.8.3^0)
v1.8.3
v1.8.3^0

The difference in these two behaviours is achieved by adding --peel-to-commit
option to "name-rev" and using it when "describe" internally calls it.

Signed-off-by: Junio C Hamano 
---
 builtin/describe.c |  1 +
 builtin/name-rev.c | 35 +--
 2 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/builtin/describe.c b/builtin/describe.c
index b5434e4..f7adda6 100644
--- a/builtin/describe.c
+++ b/builtin/describe.c
@@ -447,6 +447,7 @@ int cmd_describe(int argc, const char **argv, const char 
*prefix)
 
argv_array_init(&args);
argv_array_push(&args, "name-rev");
+   argv_array_push(&args, "--peel-to-commit");
argv_array_push(&args, "--name-only");
argv_array_push(&args, "--no-undefined");
if (always)
diff --git a/builtin/name-rev.c b/builtin/name-rev.c
index 29a6f56..fa37731 100644
--- a/builtin/name-rev.c
+++ b/builtin/name-rev.c
@@ -15,6 +15,7 @@ typedef struct rev_name {
 } rev_name;
 
 static long cutoff = LONG_MAX;
+static int peel_to_commit;
 
 /* How many generations are maximally preferred over _one_ merge traversal? */
 #define MERGE_TRAVERSAL_WEIGHT 65535
@@ -33,7 +34,7 @@ static void name_rev(struct commit *commit,
if (commit->date < cutoff)
return;
 
-   if (deref) {
+   if (deref && !peel_to_commit) {
char *new_name = xmalloc(strlen(tip_name)+3);
strcpy(new_name, tip_name);
strcat(new_name, "^0");
@@ -320,6 +321,8 @@ int cmd_name_rev(int argc, const char **argv, const char 
*prefix)
OPT_BOOLEAN(0, "undefined", &allow_undefined, N_("allow to 
print `undefined` names")),
OPT_BOOLEAN(0, "always", &always,
   N_("show abbreviated commit object as fallback")),
+   OPT_BOOLEAN(0, "peel-to-commit", &peel_to_commit,
+   N_("peel tag object names in the input to a 
commmit")),
OPT_END(),
};
 
@@ -334,7 +337,7 @@ int cmd_name_rev(int argc, const char **argv, const char 
*prefix)
 
for (; argc; argc--, argv++) {
unsigned char sha1[20];
-   struct object *o;
+   struct object *object;
struct commit *commit;
 
if (get_sha1(*argv, sha1)) {
@@ -343,17 +346,29 @@ int cmd_name_rev(int argc, const char **argv, const char 
*prefix)
continue;
}
 
-   o = deref_tag(parse_object(sha1), *argv, 0);
-   if (!o || o->type != OBJ_COMMIT) {
+   commit = NULL;
+   object = parse_object(sha1);
+   if (object) {
+   struct object *peeled = deref_tag(object, *argv, 0);
+   if (peeled && peeled->type == OBJ_COMMIT)
+   commit = (struct commit *) peeled;
+   }
+
+   if (!object) {
+   fprintf(stderr, "Could not get object for %s. 
Skipping.\n",
+   *argv);
+   continue;
+   }
+   if (peel_to_commit && !commit) {
fprintf(stderr, "Could not get commit for %s. 
Skipping.\n",
-   *argv);
+   *argv);
continue;
}
-
-   commit = (struct commit *)o;
-   if (cutoff > commit->date)
-   cutoff = commit->date;
-   add_object_array((struct object *)commit, *argv, &revs);
+   if (commit) {
+   if (cutoff > commit->date)
+   cutoff = commit->date;
+   }
+   add_object_array(object, *argv, &revs);
}
 
if (cutoff)
-- 
1.8.3.2-853-ga8cbcc9

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Documentation: finding $(prefix)/etc/gitconfig when prefix = /usr

2013-07-07 Thread Junio C Hamano
Robin Rosenberg  writes:

> I guess this isn't true either. Anyone has a better way of
> specifiying where the system wide config file is read from,
> or a user-parseable definition of $(prefix) ?

... the system-wide configuration file (typically
/etc/gitconfig), rather than the repository specific
configuration file (.git/config)

should be sufficient.

>
> -- robin
>
> - Ursprungligt meddelande -
>> Signed-off-by: Robin Rosenberg 
>> ---
>>  Documentation/git-config.txt | 6 --
>>  1 file changed, 4 insertions(+), 2 deletions(-)
>> 
>> diff --git a/Documentation/git-config.txt
>> b/Documentation/git-config.txt
>> index 9ae2508..3198d52 100644
>> --- a/Documentation/git-config.txt
>> +++ b/Documentation/git-config.txt
>> @@ -107,7 +107,8 @@ See also <>.
>>  
>>  --system::
>>  For writing options: write to system-wide $(prefix)/etc/gitconfig
>> -rather than the repository .git/config.
>> +rather than the repository .git/config. However, $(prefix) is /usr
>> +then /etc/gitconfig is used.
>>  +
>>  For reading options: read only from system-wide
>>  $(prefix)/etc/gitconfig
>>  rather than from all available files.
>> @@ -214,7 +215,8 @@ $XDG_CONFIG_HOME/git/config::
>>  file was added fairly recently.
>>  
>>  $(prefix)/etc/gitconfig::
>> -System-wide configuration file.
>> +System-wide configuration file, unless $(prefix) is /usr. In the
>> +latter case /etc/gitconfig is used.
>>  
>>  If no further options are given, all reading options will read all
>>  of these
>>  files that are available. If the global or the system-wide
>>  configuration
>> --
>> 1.8.3.rc0.19.g7e6a0cc
>> 
>> 
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 4/4] pack-revindex: radix-sort the revindex

2013-07-07 Thread Shawn Pearce
On Sun, Jul 7, 2013 at 3:14 AM, Jeff King  wrote:
> The pack revindex stores the offsets of the objects in the
> pack in sorted order, allowing us to easily find the on-disk
> size of each object. To compute it, we populate an array
> with the offsets from the sha1-sorted idx file, and then use
> qsort to order it by offsets.
>
> That does O(n log n) offset comparisons, and profiling shows
> that we spend most of our time in cmp_offset. However, since
> we are sorting on a simple off_t, we can use numeric sorts
> that perform better. A radix sort can run in O(k*n), where k
> is the number of "digits" in our number. For a 64-bit off_t,
> using 16-bit "digits" gives us k=4.

Did you try the simple bucket sort Colby now uses in JGit?

The sort is pretty simple:

  bucket_size = pack_length / object_count;
  buckets[] = malloc(object_count * sizeof(int));

  foreach obj in idx:
push_chain(buckets[obj.offset / bucket_size], obj.idx_nth);

  foreach bucket:
insertion sort by offset

https://eclipse.googlesource.com/jgit/jgit/+/master/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackReverseIndex.java

We observed on linux.git that most buckets have an average number of
objects. IIRC the bucket_size was ~201 bytes and most buckets had very
few objects each. For lookups we keep the bucket_size parameter and a
bucket index table. This arrangement uses 8 bytes per object in the
reverse index, making it very memory efficient. Searches are typically
below O(log N) time because each bucket has http://vger.kernel.org/majordomo-info.html


Re: [PATCH] prompt: do not double-discriminate detached HEAD

2013-07-07 Thread Eduardo R. D'Avila
I think color in terminals should be used to highlight and make it easier to see
textual information, not to replace them. So I would keep the parenthesis.

> +   test -n "${GIT_PS1_SHOWCOLORHINTS-}" || 
> b="($b)"

Also, the proposed change has a side-effect because color is not possible in
non-pc mode, even if GIT_PS1_SHOWCOLORHINTS is defined. (Non-pc mode
with GIT_PS1_SHOWCOLORHINTS defined would make the detached HEAD not be
shown neither in red nor within parenthesis).

Regards,

Eduardo R. D'Avila
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 05/22] read-cache: add index reading api

2013-07-07 Thread Duy Nguyen
On Sun, Jul 7, 2013 at 3:11 PM, Thomas Gummerer  wrote:
> Add an api for access to the index file.  Currently there is only a very
> basic api for accessing the index file, which only allows a full read of
> the index, and lets the users of the data filter it.  The new index api
> gives the users the possibility to use only part of the index and
> provides functions for iterating over and accessing cache entries.
>
> This simplifies future improvements to the in-memory format, as changes
> will be concentrated on one file, instead of the whole git source code.
>
> Signed-off-by: Thomas Gummerer 
> ---
>  cache.h |  57 +-
>  read-cache-v2.c |  96 +++--
>  read-cache.c| 108 
> 
>  read-cache.h|  12 ++-
>  4 files changed, 263 insertions(+), 10 deletions(-)
>
> diff --git a/cache.h b/cache.h
> index 5082b34..d38dfbd 100644
> --- a/cache.h
> +++ b/cache.h
> @@ -127,7 +127,8 @@ struct cache_entry {
> unsigned int ce_flags;
> unsigned int ce_namelen;
> unsigned char sha1[20];
> -   struct cache_entry *next;
> +   struct cache_entry *next; /* used by name_hash */
> +   struct cache_entry *next_ce; /* used to keep a list of cache entries 
> */
> char name[FLEX_ARRAY]; /* more */
>  };

>From what I read, doing

ce = start;
while (ce) { do(something); ce = next_cache_entry(ce); }

is the same as

i = start_index;
while (i < active_nr) { ce = active_cache[i]; do(something); i++; }

What's the advantage of using the former over the latter? Do you plan
to eliminate the latter loop (by hiding "struct cache_entry **cache;"
from public index_state structure?
--
Duy
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 05/22] read-cache: add index reading api

2013-07-07 Thread Duy Nguyen
On Sun, Jul 7, 2013 at 3:11 PM, Thomas Gummerer  wrote:
> +/*
> + * Options by which the index should be filtered when read partially.
> + *
> + * pathspec: The pathspec which the index entries have to match
> + * seen: Used to return the seen parameter from match_pathspec()
> + * max_prefix, max_prefix_len: These variables are set to the longest
> + * common prefix, the length of the longest common prefix of the
> + * given pathspec
> + *
> + * read_staged: used to indicate if the conflicted entries (entries
> + * with a stage) should be included
> + * read_cache_tree: used to indicate if the cache-tree should be read
> + * read_resolve_undo: used to indicate if the resolve undo data should
> + * be read
> + */
> +struct filter_opts {
> +   const char **pathspec;
> +   char *seen;
> +   char *max_prefix;
> +   int max_prefix_len;
> +
> +   int read_staged;
> +   int read_cache_tree;
> +   int read_resolve_undo;
> +};
> +
>  struct index_state {
> struct cache_entry **cache;
> unsigned int version;
> @@ -270,6 +297,8 @@ struct index_state {
> struct hash_table name_hash;
> struct hash_table dir_hash;
> struct index_ops *ops;
> +   struct internal_ops *internal_ops;
> +   struct filter_opts *filter_opts;
>  };

...

> -/* remember to discard_cache() before reading a different cache! */
> -int read_index_from(struct index_state *istate, const char *path)
> +
> +int read_index_filtered_from(struct index_state *istate, const char *path,
> +struct filter_opts *opts)
>  {
> int fd, err, i;
> struct stat st_old, st_new;
> @@ -1337,7 +1425,7 @@ int read_index_from(struct index_state *istate, const 
> char *path)
> if (istate->ops->verify_hdr(mmap, mmap_size) < 0)
> err = 1;
>
> -   if (istate->ops->read_index(istate, mmap, mmap_size) < 0)
> +   if (istate->ops->read_index(istate, mmap, mmap_size, opts) < 
> 0)
> err = 1;
> istate->timestamp.sec = st_old.st_mtime;
> istate->timestamp.nsec = ST_MTIME_NSEC(st_old);
> @@ -1345,6 +1433,7 @@ int read_index_from(struct index_state *istate, const 
> char *path)
> die_errno("cannot stat the open index");
>
> munmap(mmap, mmap_size);
> +   istate->filter_opts = opts;
> if (!index_changed(&st_old, &st_new) && !err)
> return istate->cache_nr;
> }

Putting filter_opts in index_state feels like a bad design. Iterator
information should be separated from the iterated object, so that two
callers can walk through the same index without stepping on each other
(I'm not talking about multithreading, a caller may walk a bit, then
the other caller starts walking, then the former caller resumes
walking again in a call chain).
--
Duy
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


A local shared clone is now much slower

2013-07-07 Thread Stephen Rothwell
Hi guys,

So commit 0433ad128c59 ("clone: run check_everything_connected") (which
turned up with v1.8.3) added a large traversal to clone which (as the
comment said) makes a clone much slower.  It is especially noticeable on
"git clone -s -l -n" which I use every day and used to be almost
instant.  Is there any thought to making it fast again, please?

The above clone is very useful for working with different branches in one
tree without touching every file in the main branch you are working
with (and consequent issues with rebuilding at least).  As linux-next
maintainer, you can imagine that I do this a bit.

I am sure one of Linus' points about branches was that being able to make
a fast local clone of a tree to use more than one branch was a feature.

-- 
Cheers,
Stephen Rothwells...@canb.auug.org.au


pgpA3zkp3VAx8.pgp
Description: PGP signature


Re: A local shared clone is now much slower

2013-07-07 Thread Duy Nguyen
On Mon, Jul 8, 2013 at 10:03 AM, Stephen Rothwell  wrote:
> Hi guys,
>
> So commit 0433ad128c59 ("clone: run check_everything_connected") (which
> turned up with v1.8.3) added a large traversal to clone which (as the
> comment said) makes a clone much slower.  It is especially noticeable on
> "git clone -s -l -n" which I use every day and used to be almost
> instant.  Is there any thought to making it fast again, please?

It's done that way as a security measure against repo corruption.
Although I wonder if we could do connectivity check in background
instead (reports are stored in .git and picked up by git-status). The
same mechanism could be used for "git gc --auto". If the repo turns
out corrupted, the user may lose the last ~10 minutes of work, not
really bad for the speed trade off. This mode is not the default, of
course. The user has to be aware of the risk when choosing this route.
--
Duy
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: A local shared clone is now much slower

2013-07-07 Thread Stephen Rothwell
Hi Duy,

On Mon, 8 Jul 2013 10:20:22 +0700 Duy Nguyen  wrote:
>
> On Mon, Jul 8, 2013 at 10:03 AM, Stephen Rothwell  
> wrote:
> >
> > So commit 0433ad128c59 ("clone: run check_everything_connected") (which
> > turned up with v1.8.3) added a large traversal to clone which (as the
> > comment said) makes a clone much slower.  It is especially noticeable on
> > "git clone -s -l -n" which I use every day and used to be almost
> > instant.  Is there any thought to making it fast again, please?
> 
> It's done that way as a security measure against repo corruption.
> Although I wonder if we could do connectivity check in background
> instead (reports are stored in .git and picked up by git-status). The
> same mechanism could be used for "git gc --auto". If the repo turns
> out corrupted, the user may lose the last ~10 minutes of work, not
> really bad for the speed trade off. This mode is not the default, of
> course. The user has to be aware of the risk when choosing this route.

Thanks for the explanation.  Now, is there some way I can turn it off
just for the local shared case.   In my case, I check my repo regularly,
so don't need or want this going on while I am working ...

-- 
Cheers,
Stephen Rothwells...@canb.auug.org.au


pgpK5FEJKmn52.pgp
Description: PGP signature