Re: [PATCH v4 12/24] read-cache: read index-v5

2013-11-30 Thread Duy Nguyen
On Wed, Nov 27, 2013 at 7:00 PM, Thomas Gummerer t.gumme...@gmail.com wrote:
 --- a/cache.h
 +++ b/cache.h
 @@ -132,11 +141,17 @@ struct cache_entry {
 char name[FLEX_ARRAY]; /* more */
  };

 +#define CE_NAMEMASK  (0x0fff)

CE_NAMEMASK is redefined in read-cache-v2.c in read-cache: move index
v2 specific functions to their own file. My gcc is smart enough to
see the two defines are about the same value and does not warn me. But
we should remove one (likely this one as I see no use of this macro
outside read-cache-v2.c)

  #define CE_STAGEMASK (0x3000)
  #define CE_EXTENDED  (0x4000)
  #define CE_VALID (0x8000)
 +#define CE_SMUDGED   (0x0400) /* index v5 only flag */
  #define CE_STAGESHIFT 12

 +#define CONFLICT_CONFLICTED (0x8000)
 +#define CONFLICT_STAGESHIFT 13
 +#define CONFLICT_STAGEMASK (0x6000)
 +
  /*
   * Range 0x in ce_flags is divided into
   * two parts: in-memory flags and on-disk ones.

 diff --git a/read-cache-v5.c b/read-cache-v5.c
 new file mode 100644
 index 000..9d8c8f0
 --- /dev/null
 +++ b/read-cache-v5.c
 +static int read_index_v5(struct index_state *istate, void *mmap,
 +unsigned long mmap_size, struct filter_opts *opts)
 +{
 +   unsigned int entry_offset, foffsetblock, nr = 0, *extoffsets;
 +   unsigned int dir_offset, dir_table_offset;
 +   int need_root = 0, i;
 +   uint32_t *offset;
 +   struct directory_entry *root_directory, *de, *last_de;
 +   const char **paths = NULL;
 +   struct pathspec adjusted_pathspec;
 +   struct cache_header *hdr;
 +   struct cache_header_v5 *hdr_v5;
 +
 +   hdr = mmap;
 +   hdr_v5 = ptr_add(mmap, sizeof(*hdr));
 +   istate-cache_alloc = alloc_nr(ntohl(hdr-hdr_entries));
 +   istate-cache = xcalloc(istate-cache_alloc, sizeof(struct 
 cache_entry *));
 +   extoffsets = xcalloc(ntohl(hdr_v5-hdr_nextension), sizeof(int));
 +   for (i = 0; i  ntohl(hdr_v5-hdr_nextension); i++) {
 +   offset = ptr_add(mmap, sizeof(*hdr) + sizeof(*hdr_v5));
 +   extoffsets[i] = htonl(*offset);
 +   }
 +
 +   /* Skip size of the header + crc sum + size of offsets to extensions 
 + size of offsets */
 +   dir_offset = sizeof(*hdr) + sizeof(*hdr_v5) + 
 ntohl(hdr_v5-hdr_nextension) * 4 + 4
 +   + (ntohl(hdr_v5-hdr_ndir) + 1) * 4;
 +   dir_table_offset = sizeof(*hdr) + sizeof(*hdr_v5) + 
 ntohl(hdr_v5-hdr_nextension) * 4 + 4;
 +   root_directory = read_directories(dir_offset, dir_table_offset,
 + mmap, mmap_size);
 +
 +   entry_offset = ntohl(hdr_v5-hdr_fblockoffset);
 +   foffsetblock = dir_offset;
 +
 +   if (opts  opts-pathspec  opts-pathspec-nr) {
 +   paths = xmalloc((opts-pathspec-nr + 1)*sizeof(char *));
 +   paths[opts-pathspec-nr] = NULL;

Put this statement here

GUARD_PATHSPEC(opts-pathspec,
  PATHSPEC_FROMTOP |
  PATHSPEC_MAXDEPTH |
  PATHSPEC_LITERAL |
  PATHSPEC_GLOB |
  PATHSPEC_ICASE);

This says the mentioned magic is safe in this code. New magic may or
may not be and needs to be checked (soonest by me, I'm going to add
negative pathspec and I'll need to look into how it should be handled
in this code block).

 +   for (i = 0; i  opts-pathspec-nr; i++) {
 +   char *super = strdup(opts-pathspec-items[i].match);
 +   int len = strlen(super);

You should only check as far as items[i].nowildcard_len, not strlen().
The rest could be wildcards and stuff and not so reliable.

 +   while (len  super[len - 1] == '/'  super[len - 2] 
 == '/')
 +   super[--len] = '\0'; /* strip all but one 
 trailing slash */
 +   while (len  super[--len] != '/')
 +   ; /* scan backwards to next / */
 +   if (len = 0)
 +   super[len--] = '\0';
 +   if (len = 0) {
 +   need_root = 1;
 +   break;
 +   }
 +   paths[i] = super;
 +   }

And maybe put the comment FIXME: consider merging this code with
create_simplify() in dir.c somewhere. It's for me to look for things
to do when I'm bored ;-)

 +   }
 +
 +   if (!need_root)
 +   parse_pathspec(adjusted_pathspec, PATHSPEC_ALL_MAGIC, 
 PATHSPEC_PREFER_CWD, NULL, paths);

I would go with PATHSPEC_PREFER_FULL instead of _CWD as it's safer.
Looking only at this function without caller context, it's hard to say
if _CWD is the right choice.

 +
 +   de = root_directory;
 +   last_de = de;

This statement is redundant. last_de is only used in one code block
below and it's always re-initialized before entering the loop to skip
subdirs.

 +   while (de) {
 +   if (need_root ||
 +   

Re: [PATCH v4 12/24] read-cache: read index-v5

2013-11-30 Thread Thomas Gummerer
Duy Nguyen pclo...@gmail.com writes:

 On Wed, Nov 27, 2013 at 7:00 PM, Thomas Gummerer t.gumme...@gmail.com wrote:
 --- a/cache.h
 +++ b/cache.h
 @@ -132,11 +141,17 @@ struct cache_entry {
 char name[FLEX_ARRAY]; /* more */
  };

 +#define CE_NAMEMASK  (0x0fff)

 CE_NAMEMASK is redefined in read-cache-v2.c in read-cache: move index
 v2 specific functions to their own file. My gcc is smart enough to
 see the two defines are about the same value and does not warn me. But
 we should remove one (likely this one as I see no use of this macro
 outside read-cache-v2.c)

Thanks for catching that, there's no need to have it here.  I'll remove
it in the re-roll.

  #define CE_STAGEMASK (0x3000)
  #define CE_EXTENDED  (0x4000)
  #define CE_VALID (0x8000)
 +#define CE_SMUDGED   (0x0400) /* index v5 only flag */
  #define CE_STAGESHIFT 12

 +#define CONFLICT_CONFLICTED (0x8000)
 +#define CONFLICT_STAGESHIFT 13
 +#define CONFLICT_STAGEMASK (0x6000)
 +
  /*
   * Range 0x in ce_flags is divided into
   * two parts: in-memory flags and on-disk ones.

 diff --git a/read-cache-v5.c b/read-cache-v5.c
 new file mode 100644
 index 000..9d8c8f0
 --- /dev/null
 +++ b/read-cache-v5.c
 +static int read_index_v5(struct index_state *istate, void *mmap,
 +unsigned long mmap_size, struct filter_opts *opts)
 +{
 +   unsigned int entry_offset, foffsetblock, nr = 0, *extoffsets;
 +   unsigned int dir_offset, dir_table_offset;
 +   int need_root = 0, i;
 +   uint32_t *offset;
 +   struct directory_entry *root_directory, *de, *last_de;
 +   const char **paths = NULL;
 +   struct pathspec adjusted_pathspec;
 +   struct cache_header *hdr;
 +   struct cache_header_v5 *hdr_v5;
 +
 +   hdr = mmap;
 +   hdr_v5 = ptr_add(mmap, sizeof(*hdr));
 +   istate-cache_alloc = alloc_nr(ntohl(hdr-hdr_entries));
 +   istate-cache = xcalloc(istate-cache_alloc, sizeof(struct 
 cache_entry *));
 +   extoffsets = xcalloc(ntohl(hdr_v5-hdr_nextension), sizeof(int));
 +   for (i = 0; i  ntohl(hdr_v5-hdr_nextension); i++) {
 +   offset = ptr_add(mmap, sizeof(*hdr) + sizeof(*hdr_v5));
 +   extoffsets[i] = htonl(*offset);
 +   }
 +
 +   /* Skip size of the header + crc sum + size of offsets to extensions 
 + size of offsets */
 +   dir_offset = sizeof(*hdr) + sizeof(*hdr_v5) + 
 ntohl(hdr_v5-hdr_nextension) * 4 + 4
 +   + (ntohl(hdr_v5-hdr_ndir) + 1) * 4;
 +   dir_table_offset = sizeof(*hdr) + sizeof(*hdr_v5) + 
 ntohl(hdr_v5-hdr_nextension) * 4 + 4;
 +   root_directory = read_directories(dir_offset, dir_table_offset,
 + mmap, mmap_size);
 +
 +   entry_offset = ntohl(hdr_v5-hdr_fblockoffset);
 +   foffsetblock = dir_offset;
 +
 +   if (opts  opts-pathspec  opts-pathspec-nr) {
 +   paths = xmalloc((opts-pathspec-nr + 1)*sizeof(char *));
 +   paths[opts-pathspec-nr] = NULL;

 Put this statement here

 GUARD_PATHSPEC(opts-pathspec,
   PATHSPEC_FROMTOP |
   PATHSPEC_MAXDEPTH |
   PATHSPEC_LITERAL |
   PATHSPEC_GLOB |
   PATHSPEC_ICASE);

 This says the mentioned magic is safe in this code. New magic may or
 may not be and needs to be checked (soonest by me, I'm going to add
 negative pathspec and I'll need to look into how it should be handled
 in this code block).

Thanks, I'll add the statement in the re-roll.

 +   for (i = 0; i  opts-pathspec-nr; i++) {
 +   char *super = strdup(opts-pathspec-items[i].match);
 +   int len = strlen(super);

 You should only check as far as items[i].nowildcard_len, not strlen().
 The rest could be wildcards and stuff and not so reliable.

Ok, will change it in the re-roll.

 +   while (len  super[len - 1] == '/'  super[len - 
 2] == '/')
 +   super[--len] = '\0'; /* strip all but one 
 trailing slash */
 +   while (len  super[--len] != '/')
 +   ; /* scan backwards to next / */
 +   if (len = 0)
 +   super[len--] = '\0';
 +   if (len = 0) {
 +   need_root = 1;
 +   break;
 +   }
 +   paths[i] = super;
 +   }

 And maybe put the comment FIXME: consider merging this code with
 create_simplify() in dir.c somewhere. It's for me to look for things
 to do when I'm bored ;-)

Heh, thanks, will do.

 +   }
 +
 +   if (!need_root)
 +   parse_pathspec(adjusted_pathspec, PATHSPEC_ALL_MAGIC, 
 PATHSPEC_PREFER_CWD, NULL, paths);

 I would go with PATHSPEC_PREFER_FULL instead of _CWD as it's safer.
 Looking only at this function without caller context, it's hard to say
 if _CWD is the right choice.

Ok, thanks, will change.

 +
 +   de = root_directory;

Re: [PATCH v4 12/24] read-cache: read index-v5

2013-11-30 Thread Antoine Pelisse
On Wed, Nov 27, 2013 at 1:00 PM, Thomas Gummerer t.gumme...@gmail.com wrote:
 Make git read the index file version 5 without complaining.

 This version of the reader reads neither the cache-tree
 nor the resolve undo data, however, it won't choke on an
 index that includes such data.

 Helped-by: Junio C Hamano gits...@pobox.com
 Helped-by: Nguyen Thai Ngoc Duy pclo...@gmail.com
 Helped-by: Thomas Rast tr...@student.ethz.ch
 Signed-off-by: Thomas Gummerer t.gumme...@gmail.com
 ---
 [...]
 +static struct directory_entry *read_directories(unsigned int *dir_offset,
 +   unsigned int 
 *dir_table_offset,
 +   void *mmap, int mmap_size)

Minor nit: why is this mmap_size int while all others are unsigned long ?

 [...]
 +static int read_entry(struct cache_entry **ce, char *pathname, size_t 
 pathlen,
 + void *mmap, unsigned long mmap_size,
 + unsigned int first_entry_offset,
 + unsigned int foffsetblock)
 [...]
 +static int read_entries(struct index_state *istate, struct directory_entry 
 *de,
 +   unsigned int first_entry_offset, void *mmap,
 +   unsigned long mmap_size, unsigned int *nr,
 +   unsigned int foffsetblock)
 [...]
 +static int read_index_v5(struct index_state *istate, void *mmap,
 +unsigned long mmap_size, struct filter_opts *opts)
--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v4 12/24] read-cache: read index-v5

2013-11-30 Thread Antoine Pelisse
On Wed, Nov 27, 2013 at 1:00 PM, Thomas Gummerer t.gumme...@gmail.com wrote:
 +static int verify_hdr(void *mmap, unsigned long size)
 +{
 +   uint32_t *filecrc;
 +   unsigned int header_size;
 +   struct cache_header *hdr;
 +   struct cache_header_v5 *hdr_v5;
 +
 +   if (size  sizeof(struct cache_header)
 +   + sizeof (struct cache_header_v5) + 4)
 +   die(index file smaller than expected);
 +
 +   hdr = mmap;
 +   hdr_v5 = ptr_add(mmap, sizeof(*hdr));
 +   /* Size of the header + the size of the extensionoffsets */
 +   header_size = sizeof(*hdr) + sizeof(*hdr_v5) + hdr_v5-hdr_nextension 
 * 4;
 +   /* Initialize crc */
 +   filecrc = ptr_add(mmap, header_size);
 +   if (!check_crc32(0, hdr, header_size, ntohl(*filecrc)))
 +   return error(bad index file header crc signature);
 +   return 0;
 +}

I find it curious that we actually need a value from the header (and
use it for pointer arithmetic) to check that the header is valid. The
application will crash before the crc is checked if
hdr_v5-hdr_nextensions is corrupted. Or am I missing something ?
--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v4 12/24] read-cache: read index-v5

2013-11-30 Thread Thomas Gummerer
Antoine Pelisse apeli...@gmail.com writes:

 On Wed, Nov 27, 2013 at 1:00 PM, Thomas Gummerer t.gumme...@gmail.com wrote:
 Make git read the index file version 5 without complaining.

 This version of the reader reads neither the cache-tree
 nor the resolve undo data, however, it won't choke on an
 index that includes such data.

 Helped-by: Junio C Hamano gits...@pobox.com
 Helped-by: Nguyen Thai Ngoc Duy pclo...@gmail.com
 Helped-by: Thomas Rast tr...@student.ethz.ch
 Signed-off-by: Thomas Gummerer t.gumme...@gmail.com
 ---
 [...]
 +static struct directory_entry *read_directories(unsigned int *dir_offset,
 +   unsigned int 
 *dir_table_offset,
 +   void *mmap, int mmap_size)

 Minor nit: why is this mmap_size int while all others are unsigned long ?

Thanks for catching that.  It should be unsigned long here to.  Will
fix in the re-roll.

 [...]
 +static int read_entry(struct cache_entry **ce, char *pathname, size_t 
 pathlen,
 + void *mmap, unsigned long mmap_size,
 + unsigned int first_entry_offset,
 + unsigned int foffsetblock)
 [...]
 +static int read_entries(struct index_state *istate, struct directory_entry 
 *de,
 +   unsigned int first_entry_offset, void *mmap,
 +   unsigned long mmap_size, unsigned int *nr,
 +   unsigned int foffsetblock)
 [...]
 +static int read_index_v5(struct index_state *istate, void *mmap,
 +unsigned long mmap_size, struct filter_opts *opts)
--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v4 12/24] read-cache: read index-v5

2013-11-30 Thread Thomas Gummerer
Antoine Pelisse apeli...@gmail.com writes:

 On Wed, Nov 27, 2013 at 1:00 PM, Thomas Gummerer t.gumme...@gmail.com wrote:
 +static int verify_hdr(void *mmap, unsigned long size)
 +{
 +   uint32_t *filecrc;
 +   unsigned int header_size;
 +   struct cache_header *hdr;
 +   struct cache_header_v5 *hdr_v5;
 +
 +   if (size  sizeof(struct cache_header)
 +   + sizeof (struct cache_header_v5) + 4)
 +   die(index file smaller than expected);
 +
 +   hdr = mmap;
 +   hdr_v5 = ptr_add(mmap, sizeof(*hdr));
 +   /* Size of the header + the size of the extensionoffsets */
 +   header_size = sizeof(*hdr) + sizeof(*hdr_v5) + 
 hdr_v5-hdr_nextension * 4;
 +   /* Initialize crc */
 +   filecrc = ptr_add(mmap, header_size);
 +   if (!check_crc32(0, hdr, header_size, ntohl(*filecrc)))
 +   return error(bad index file header crc signature);
 +   return 0;
 +}

 I find it curious that we actually need a value from the header (and
 use it for pointer arithmetic) to check that the header is valid. The
 application will crash before the crc is checked if
 hdr_v5-hdr_nextensions is corrupted. Or am I missing something ?

Good catch, I'm the one that was missing something here.  We still need
to use the value from the header before calculating the crc, but should
check if header_size - 4 is less than the total size of the index file.
Then even if the header is corrupted we won't read anything that is not
mmap'ed and thus won't crash.

This guard should also be included for everything else that checks the
crc checksum, as that has the same problems and the calculated place in
the file for the crc might be after the end of the file.

Thanks, will fix in the re-roll.
--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4 12/24] read-cache: read index-v5

2013-11-27 Thread Thomas Gummerer
Make git read the index file version 5 without complaining.

This version of the reader reads neither the cache-tree
nor the resolve undo data, however, it won't choke on an
index that includes such data.

Helped-by: Junio C Hamano gits...@pobox.com
Helped-by: Nguyen Thai Ngoc Duy pclo...@gmail.com
Helped-by: Thomas Rast tr...@student.ethz.ch
Signed-off-by: Thomas Gummerer t.gumme...@gmail.com
---
 Makefile|   1 +
 cache.h |  32 -
 read-cache-v5.c | 417 
 read-cache.h|   1 +
 4 files changed, 450 insertions(+), 1 deletion(-)
 create mode 100644 read-cache-v5.c

diff --git a/Makefile b/Makefile
index 5c28777..6a1b054 100644
--- a/Makefile
+++ b/Makefile
@@ -851,6 +851,7 @@ LIB_OBJS += quote.o
 LIB_OBJS += reachable.o
 LIB_OBJS += read-cache.o
 LIB_OBJS += read-cache-v2.o
+LIB_OBJS += read-cache-v5.o
 LIB_OBJS += reflog-walk.o
 LIB_OBJS += refs.o
 LIB_OBJS += remote.o
diff --git a/cache.h b/cache.h
index 8c2ccc4..65171e4 100644
--- a/cache.h
+++ b/cache.h
@@ -99,7 +99,7 @@ unsigned long git_deflate_bound(git_zstream *, unsigned long);
 #define CACHE_SIGNATURE 0x44495243 /* DIRC */
 
 #define INDEX_FORMAT_LB 2
-#define INDEX_FORMAT_UB 4
+#define INDEX_FORMAT_UB 5
 
 /*
  * The cache_time is just the low 32 bits of the
@@ -121,6 +121,15 @@ struct stat_data {
unsigned int sd_size;
 };
 
+/*
+ * The *next_ce pointer is used in read_entries_v5 for holding
+ * all the elements of a directory, and points to the next
+ * cache_entry in a directory.
+ *
+ * It is reset by the add_name_hash call in set_index_entry
+ * to set it to point to the next cache_entry in the
+ * correct in-memory format ordering.
+ */
 struct cache_entry {
struct stat_data ce_stat_data;
unsigned int ce_mode;
@@ -132,11 +141,17 @@ struct cache_entry {
char name[FLEX_ARRAY]; /* more */
 };
 
+#define CE_NAMEMASK  (0x0fff)
 #define CE_STAGEMASK (0x3000)
 #define CE_EXTENDED  (0x4000)
 #define CE_VALID (0x8000)
+#define CE_SMUDGED   (0x0400) /* index v5 only flag */
 #define CE_STAGESHIFT 12
 
+#define CONFLICT_CONFLICTED (0x8000)
+#define CONFLICT_STAGESHIFT 13
+#define CONFLICT_STAGEMASK (0x6000)
+
 /*
  * Range 0x in ce_flags is divided into
  * two parts: in-memory flags and on-disk ones.
@@ -173,6 +188,19 @@ struct cache_entry {
 #define CE_EXTENDED_FLAGS (CE_INTENT_TO_ADD | CE_SKIP_WORKTREE)
 
 /*
+ * Representation of the extended on-disk flags in the v5 format.
+ * They must not collide with the ordinary on-disk flags, and need to
+ * fit in 16 bits.  Note however that v5 does not save the name
+ * length.
+ */
+#define CE_INTENT_TO_ADD_V5  (0x4000)
+#define CE_SKIP_WORKTREE_V5  (0x0800)
+#define CE_INVALID_V5(0x0200)
+#if (CE_VALID|CE_STAGEMASK)  
(CE_INTENTTOADD_V5|CE_SKIPWORKTREE_V5|CE_INVALID_V5)
+#error v5 on-disk flags collide with ordinary on-disk flags
+#endif
+
+/*
  * Safeguard to avoid saving wrong flags:
  *  - CE_EXTENDED2 won't get saved until its semantic is known
  *  - Bits in 0x have been saved in ce_flags already
@@ -213,6 +241,8 @@ static inline unsigned create_ce_flags(unsigned stage)
 #define ce_skip_worktree(ce) ((ce)-ce_flags  CE_SKIP_WORKTREE)
 #define ce_mark_uptodate(ce) ((ce)-ce_flags |= CE_UPTODATE)
 
+#define conflict_stage(c) ((CONFLICT_STAGEMASK  (c)-flags)  
CONFLICT_STAGESHIFT)
+
 #define ce_permissions(mode) (((mode)  0100) ? 0755 : 0644)
 static inline unsigned int create_ce_mode(unsigned int mode)
 {
diff --git a/read-cache-v5.c b/read-cache-v5.c
new file mode 100644
index 000..9d8c8f0
--- /dev/null
+++ b/read-cache-v5.c
@@ -0,0 +1,417 @@
+#include cache.h
+#include read-cache.h
+#include resolve-undo.h
+#include cache-tree.h
+#include dir.h
+#include pathspec.h
+
+#define ptr_add(x,y) ((void *)(((char *)(x)) + (y)))
+
+struct cache_header_v5 {
+   uint32_t hdr_ndir;
+   uint32_t hdr_fblockoffset;
+   uint32_t hdr_nextension;
+};
+
+struct directory_entry {
+   struct directory_entry **sub;
+   struct directory_entry *next;
+   struct directory_entry *next_hash;
+   struct cache_entry *ce;
+   struct cache_entry *ce_last;
+   uint32_t conflict_size;
+   uint32_t de_foffset;
+   uint32_t de_nsubtrees;
+   uint32_t de_nfiles;
+   uint32_t de_nentries;
+   unsigned char sha1[20];
+   uint16_t de_flags;
+   uint32_t de_pathlen;
+   char pathname[FLEX_ARRAY];
+};
+
+struct conflict_part {
+   struct conflict_part *next;
+   uint16_t flags;
+   uint16_t entry_mode;
+   unsigned char sha1[20];
+};
+
+struct conflict_entry {
+   struct conflict_entry *next;
+   uint32_t nfileconflicts;
+   struct conflict_part *entries;
+   uint32_t namelen;
+   uint32_t pathlen;
+   char name[FLEX_ARRAY];
+};
+
+/*
+ * Index File I/O
+ */
+
+struct