Re: Storing permissions

2005-04-17 Thread David A. Wheeler
Linus Torvalds wrote:
On Sat, 16 Apr 2005, Paul Jackson wrote:
Morten wrote:
It makes some sense in principle, but without storing what they mean
(i.e., group==?) it certainly makes no sense. 
There's no they there.
I think Martin's proposal, to which I agreed, was to store a _single_
bit.  If any of the execute permissions of the incoming file are set,
then the bit is stored ON, else it is stored OFF.  On 'checkout', if the
bit is ON, then the file permission is set mode 0777 (modulo umask),
else it is set mode 0666 (modulo umask).

I think I agree.
Anybody willing to send me a patch? One issue is that if done the obvious
way it's an incompatible change, and old tree objects won't be valid any
more. It might be ok to just change the compare cache check to only care
about a few bits, though: S_IXUSR and S_IFDIR.
There's a minor reason to write out ALL the perm bit data, but
only care about a few bits coming back in: Some people use
SCM systems as a generalized backup system, so you can back up
your system to an arbitrary known state in the past
(e.g., Change my /etc files to the state I was at
just before I installed that *#@ program!).
For more on this, see:
 http://www.onlamp.com/pub/a/onlamp/2005/01/06/svn_homedir.html
If you store all the bits, then you CAN restore things
more exactly the way they were.  This is imperfect, since
it doesn't cover more exotic permission
values from SELinux, xattrs, whatever.  For some, that's enough.
Yeah, I know, not the main purpose of git.  But what the heck,
I _like_ flexible infrastructures.
--- David A. Wheeler
-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


active_cache leaks

2005-04-17 Thread Brad Roberts
Ok.. so there's been a couple attempts to patch the leak that were all
wrong due to mixed memory management for that array.  Here's a seed for
discussion on how to plug that leak.  Some would argue that it's not
leaking enough to fix, but for those that want to turn git into a library,
the lifetime of the cache could end up not being short any more, so it's
worth discussing how to fix it.

The qd fix in this patch isn't elegant, but gets the job done.  More
interesting could be to have the entry itself contain a state bit, though
that wastes storage space.

Two basic changes:

1) introduce a set_active_cache() api and change all 'active_cache[i] = ce'
   calls to use it.
2) add a active_cache_malloced array to parallel the active_cache array.

I don't like #2, but see that qd comment. :)

It's only lightly tested as I'm still trying to wrap my head around how to
actually use git and git-pasky.

I was tempted to add a get_cache_entry api as well, so that nothing
outside of read-cache.c touched active_cache directly, but that can come
next.

Later,
Brad

--- cache.h
+++ cache.h 2005-04-16 23:08:37.0 -0700
@@ -88,6 +88,7 @@
 extern int read_cache(void);
 extern int write_cache(int newfd, struct cache_entry **cache, int entries);
 extern int cache_name_pos(const char *name, int namelen);
+extern int set_cache_entry(struct cache_entry *ce, int pos, int 
malloced_entry);
 extern int add_cache_entry(struct cache_entry *ce, int ok_to_add);
 extern int remove_file_from_cache(char *path);
 extern int cache_match_stat(struct cache_entry *ce, struct stat *st);
--- read-cache.c
+++ read-cache.c2005-04-16 23:32:34.0 -0700
@@ -8,6 +8,7 @@
 
 const char *sha1_file_directory = NULL;
 struct cache_entry **active_cache = NULL;
+static int * active_cache_malloced = NULL;
 unsigned int active_nr = 0, active_alloc = 0;
 
 void usage(const char *err)
@@ -381,6 +382,15 @@
return ce_namelen(b) == len  !memcmp(a-name, b-name, len);
 }
 
+int set_cache_entry(struct cache_entry *ce, int pos, int malloced_entry)
+{
+   if (active_cache_malloced[pos])
+   free(active_cache[pos]);
+   active_cache[pos] = ce;
+   active_cache_malloced[pos] = malloced_entry;
+   return 0;
+}
+
 int add_cache_entry(struct cache_entry *ce, int ok_to_add)
 {
int pos;
@@ -389,7 +399,7 @@
 
/* existing match? Just replace it */
if (pos = 0) {
-   active_cache[pos] = ce;
+   set_cache_entry(ce, pos, 0);
return 0;
}
pos = -pos-1;
@@ -414,13 +424,16 @@
if (active_nr == active_alloc) {
active_alloc = alloc_nr(active_alloc);
active_cache = realloc(active_cache, active_alloc * 
sizeof(struct cache_entry *));
+   active_cache_malloced = realloc(active_cache, active_alloc * 
sizeof(int));
}
 
/* Add it in.. */
active_nr++;
-   if (active_nr  pos)
+   if (active_nr  pos) {
memmove(active_cache + pos + 1, active_cache + pos, (active_nr 
- pos - 1) * sizeof(ce));
-   active_cache[pos] = ce;
+   memmove(active_cache_malloced + pos + 1, active_cache_malloced 
+ pos, (active_nr - pos - 1) * sizeof(int));
+   }
+   set_cache_entry(ce, pos, 1);
return 0;
 }
 
@@ -482,12 +495,13 @@
active_nr = ntohl(hdr-hdr_entries);
active_alloc = alloc_nr(active_nr);
active_cache = calloc(active_alloc, sizeof(struct cache_entry *));
+   active_cache_malloced = calloc(active_alloc, sizeof(int));
 
offset = sizeof(*hdr);
for (i = 0; i  active_nr; i++) {
struct cache_entry *ce = map + offset;
offset = offset + ce_size(ce);
-   active_cache[i] = ce;
+   set_cache_entry(ce, i, 0);
}
return active_nr;
 
--- update-cache.c
+++ update-cache.c  2005-04-16 23:33:28.0 -0700
@@ -199,11 +199,14 @@
struct cache_entry *ce = active_cache[i];
struct cache_entry *new = refresh_entry(ce);
 
+   if (new == ce)
+   continue;
+
if (!new) {
printf(%s: needs update\n, ce-name);
continue;
}
-   active_cache[i] = new;
+   set_cache_entry(new, i, 1);
}
 }
 


Re: [PATCH] libgit

2005-04-17 Thread Brad Roberts
 Converted git to libgit. Moved all the main() calls into a single
 multi-call binary - git-main.
 Made extern a bunch of functions that were static. Verified it at least
 still minimally worked.
 Note: this is only a first step towards creating a generic library.
 Figuring out what functions and variables *truly* need to be exported,
 renaming them to a git_function api, making it thread safe
 ... and not least of all, keeping up with everybody working out of the
 base tree... are problems that remain. Also - cleaning up the UI.

Why do all that static and main rearrangement?  It would be a lot simpler
and cleaner to simply rename the main's in place and add the new
dispatcher by itself.

Later,
Brad

-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Storing permissions

2005-04-17 Thread Paul Jackson
David wrote:
 There's a minor reason to write out ALL the perm bit data, but

There's always the 'configurable option' approach.

Someone, I doubt Linus will have any interest in it, could volunteer to
make the masks of st_mode, used when storing and recovering file
permissions, be configurable by some environment variable settings,
which default to whatever Linus provided.

But, in general, if you want a generalized backup system, git is not it.

Git skips all files whose name begins with the dot '.' character, and
anything that is not a regular file or directory.  Git makes no
concessions to working adequately on file systems lacking normal inode
numbers (such as smb, fat, vfat).  Git obscures the archive format a
modest amount, for pure speed and to encourage use only via appropriate
wrappers.  Git is tuned for blazing speed at the operations that Linus
needs, not for trivial recovery, using the most basic tools, under harsh
circumstances.

The basic idea of using such an 'object database' (though I dislike that
term -- too high falutin vague) of files stored by their hash is a
good one.  But a different core implementation is needed for backups.

I have one that I use for my own backups, but it is written in Python,
and uses MD5, one or the other of which likely disqualifies it from
further consideration by half the readers of this list.

-- 
  I won't rest till it's the best ...
  Programmer, Linux Scalability
  Paul Jackson [EMAIL PROTECTED] 1.650.933.1373, 
1.925.600.0401
-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Yet another base64 patch

2005-04-17 Thread Paul Jackson
David wrote:
 My list would be:
 ext2, ext3, NFS, and Windows' NTFS (stupid short filenames,
 case-insensitive/case-preserving).

I'm no mind reader, but I'd bet a pretty penny that what you have in
mind and what Linus has in mind have no overlaps in their solution sets.

Happy coding ...

-- 
  I won't rest till it's the best ...
  Programmer, Linux Scalability
  Paul Jackson [EMAIL PROTECTED] 1.650.933.1373, 
1.925.600.0401
-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Better error message from checkout-cache for unmerged files.

2005-04-17 Thread Junio C Hamano
The checkout-cache command says file is not in the cache when
an unmerged path is given.  This patch adds code to distinguish
the unmerged and the nonexistent cases and gives an appropriate
error message.

Signed-off-by: Junio C Hamano [EMAIL PROTECTED]
---

 checkout-cache.c |   11 +--
 1 files changed, 9 insertions(+), 2 deletions(-)

checkout-cache.c: e16619c9d099367c224fb485f5525d66267bbd92
--- checkout-cache.c
+++ checkout-cache.c2005-04-17 01:49:01.0 -0700
@@ -121,8 +121,15 @@
 {
int pos = cache_name_pos(name, strlen(name));
if (pos  0) {
-   if (!quiet)
-   fprintf(stderr, checkout-cache: %s is not in the 
cache\n, name);
+   if (!quiet) {
+   pos = -pos - 1;
+   fprintf(stderr,
+   checkout-cache: %s is %s.\n,
+   name,
+   (pos  active_nr 
+!strcmp(active_cache[pos]-name, name)) ?
+   unmerged : not in the cache);
+   }
return -1;
}
return checkout_entry(active_cache[pos]);

-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Summary of read-tree -m O A B mechanism

2005-04-17 Thread Junio C Hamano
Earlier I wrote down a list of issues your recent merge
stage changes have introduced to the rest of the plumbing, with
a set of suggested adaptions.  I think all of them are cleared
now (you have a pile of patches from me in your mailbox).

I do not know what percentage of people on this list are using
git without the Cogito part, but I suspect that the number might
be quite small.  I also suspect, from the description Petr gave
us on how the merging in Cogito works, Cogito does not currently
use the read-tree -m O A B mechanism, and those majority who
do not deal with the low level tools themselves would not have
to know about the merge issues yet.  But I think it is a good
time, now things have started to settle down, to summarize how
various commands work when they see those funny dircache
entries created after read-tree -m O A B has run.  Of course,
people working on Cogito needs to know them, once they decide to
use the reed-tree -m O A B mechanism.

 * read-tree -m O A B

   - For description on how this works, the definitive reading
 is [*R1*].  In short:

 - unlike ordinary read-tree, -m form reads up to three
   trees and creates paths that are unmerged.  

 - trivial merges are done by read-tree itself.  only
   conflicting paths will be in unmerged state when
   read-tree returns.

 * write-tree

 - write-tree refuses to give you a tree until all the
   unmerged paths are resolved.

 * show-files

   - show-files --unmerged and show-files --stage can be
 used to examine detailed information on unmerged paths.
 For an unmerged path, instead of recording a single
 mode/SHA1 pair, the dircache records up to three such
 pairs; one from tree O in stage 1, A in stage 2, and B in
 stage 3.  This information can be used by the user (or
 Cogito) to see what should eventually be recorded at the
 path.

 * update-cache

   - An explicit update-cache [--add] path or update-cache
 [--add] --cacheinfo mode SHA1 path tells the plumbing that
 the user (or Cogito) wants to resolve it by storing
 mode/SHA1 of the given working file or mode SHA1 specified
 on the command line.  The path ceases to be in unmerged
 state after this happens.

 Similarly, update-cache --remove path resolves the
 unmerged state and the merge result is not having anything
 at that path.

   - update-cache --refresh, in addition to the needs update
 message people are now familiar with, says needs merge
 for unmerged paths.

 * show-diff

   - show-diff on an unmerged path simply says unmerged (the
 plumbing would not know what to diff with what among three
 stages and the working file).  

 * checkout-cache

   - checkout-cache -a warns about unmerged paths and checks
 out only the merged paths.

   - checkout-cache [-f] path on an unmerged path says
 Unmerged, just like the same command on non-existent path
 says not in the cache, and does not touch the working
 file.
 

I hope the descriptions in this summary is correct enough to be
useful to somebody.


[Reference]

*R1* http://marc.theaimsgroup.com/?l=gitm=111363270608902w=2

-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Remove unused arguments from index_fd()

2005-04-17 Thread Junio C Hamano
The function index_fd() in update-cache.c takes 5 arguments, but
two is not necessary and one that is a pointer to a structure
really needs to be a pointer to one member of that structure.
This patch cleans it up.

Also it removes printf() apparently left after initial
debugging.

Signed-off-by: Junio C Hamano [EMAIL PROTECTED]
---

 update-cache.c |   15 ++-
 1 files changed, 6 insertions(+), 9 deletions(-)

update-cache.c: 5afecd1a4fd90d2505753ce2d5044d780fe69a7f
--- update-cache.c
+++ update-cache.c  2005-04-17 03:22:39.0 -0700
@@ -14,13 +14,13 @@
  */
 static int allow_add = 0, allow_remove = 0;
 
-static int index_fd(const char *path, int namelen, struct cache_entry *ce, int 
fd, struct stat *st)
+static int index_fd(unsigned char *sha1, int fd, struct stat *st)
 {
z_stream stream;
unsigned long size = st-st_size;
-   int max_out_bytes = namelen + size + 200;
+   int max_out_bytes = size + 200;
void *out = malloc(max_out_bytes);
-   void *metadata = malloc(namelen + 200);
+   void *metadata = malloc(200);
void *in;
SHA_CTX c;
 
@@ -56,9 +56,9 @@

SHA1_Init(c);
SHA1_Update(c, out, stream.total_out);
-   SHA1_Final(ce-sha1, c);
+   SHA1_Final(sha1, c);
 
-   return write_sha1_buffer(ce-sha1, out, stream.total_out);
+   return write_sha1_buffer(sha1, out, stream.total_out);
 }
 
 /*
@@ -109,7 +109,7 @@
ce-ce_mode = create_ce_mode(st.st_mode);
ce-ce_flags = htons(namelen);
 
-   if (index_fd(path, namelen, ce, fd, st)  0)
+   if (index_fd(ce-sha1, fd, st)  0)
return -1;
 
return add_cache_entry(ce, allow_add);
@@ -244,13 +244,10 @@
 
if (sscanf(arg1, %o, mode) != 1)
return -1;
-   printf(got mode %o\n, mode);
if (get_sha1_hex(arg2, sha1))
return -1;
-   printf(got sha1 %s\n, sha1_to_hex(sha1));
if (!verify_path(arg3))
return -1;
-   printf(got path %s\n, arg3);
 
len = strlen(arg3);
size = cache_entry_size(len);

-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[patch] fix for memory leak in write-tree.c

2005-04-17 Thread Brad Roberts
I've made my tree rsync'able:

rsync://gameboy2.puremagic.com/git


commit 1cdbc0a19b8d9b68f1f42735e2f14f1289823a63
tree 0e060fed0642337e675368913ec8d4f910a9f019
parent 11ed64c1b141c9ba397a1ca76aef2cd250976007
author Brad Roberts,,, [EMAIL PROTECTED] 1113736632 -0700
committer Brad Roberts,,, [EMAIL PROTECTED] 1113736632 -0700

Fix a memory leak in write-tree.c, not freeing the directory buffer.

Index: write-tree.c
===
--- 51b1bddbbc05e50d5bbf1f9662e503c2e85d5e96/write-tree.c  (mode:100644 
sha1:34a351b20fb38ea588f34bd9634f101b9dc533cb)
+++ 0e060fed0642337e675368913ec8d4f910a9f019/write-tree.c  (mode:100644 
sha1:fb046aa6ce6b9fce6a523a1e36ff43adab9bdd93)
@@ -93,10 +93,8 @@
i -= 5;
memcpy(buffer+i, tree , 5);

-   buffer += i;
-   offset -= i;
-
-   write_sha1_file(buffer, offset, returnsha1);
+   write_sha1_file(buffer + i, offset - i, returnsha1);
+   free(buffer);
return nr;
 }



-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Fix +x-related show-diff false positives

2005-04-17 Thread Petr Baudis
  Hi,

  the following patch fixes show-diff listing all +x files as differring.

Signed-off-by: Petr Baudis [EMAIL PROTECTED]
--- read-cache.c
+++ read-cache.c2005-04-17 13:42:13.322938268 +0200
@@ -306,7 +306,7 @@
ce-ce_gid != htonl(st-st_gid))
changed |= OWNER_CHANGED;
/* We consider only the owner x bit to be relevant for mode changes */
-   if (0100  (ntohs(ce-ce_mode) ^ st-st_mode))
+   if (0100  (ntohl(ce-ce_mode) ^ st-st_mode))
changed |= MODE_CHANGED;
if (ce-ce_dev != htonl(st-st_dev) ||
ce-ce_ino != htonl(st-st_ino))

  (It is against my tree, but I think it should apply cleanly to yours
too; perhaps with a tiny offset.)

  Kind regards,

-- 
Petr Pasky Baudis
Stuff: http://pasky.or.cz/
C++: an octopus made by nailing extra legs onto a dog. -- Steve Taylor
-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[patch] fork optional branch point normazilation

2005-04-17 Thread Brad Roberts
(ok, author looks better, but committer doesn't obey the AUTHOR_ vars yet)

This might not be how you intended git fork to behave, but without doing
_something_ to protect the head parameter a bit, this is just asking for a
corrutped .git/HEAD file.

commit 76faec069dfeae59c3ce5faaad10bdcded0cc908
tree c291316b28eff4042c80850cd93445345a606835
parent 1cdbc0a19b8d9b68f1f42735e2f14f1289823a63
author Brad Roberts [EMAIL PROTECTED] 1113738584 -0700
committer Brad Roberts,,, [EMAIL PROTECTED] 1113738584 -0700

gitfork needs to normalize the optional third parameter before using it.

Index: gitfork.sh
===
--- 51b1bddbbc05e50d5bbf1f9662e503c2e85d5e96/gitfork.sh  (mode:100755 
sha1:e5692ea9bdbc39b028fe1e1205381da632541bab)
+++ c291316b28eff4042c80850cd93445345a606835/gitfork.sh  (mode:100755 
sha1:386148ae9a99739d06a09742ff4157d0f7e4e223)
@@ -37,6 +37,7 @@
 [ -e $destdir ]  die $destdir already exists

 [ $head ] || head=$(commit-id)
+head=$(gitXnormid.sh -c $head)

 git lntree $destdir
 echo $head .git/heads/$name



-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: fork optional branch point normazilation

2005-04-17 Thread Brad Roberts
 
  Index: gitfork.sh
  ===
  --- 51b1bddbbc05e50d5bbf1f9662e503c2e85d5e96/gitfork.sh  (mode:100755 
  sha1:e5692ea9bdbc39b028fe1e1205381da632541bab)
  +++ c291316b28eff4042c80850cd93445345a606835/gitfork.sh  (mode:100755 
  sha1:386148ae9a99739d06a09742ff4157d0f7e4e223)
  @@ -37,6 +37,7 @@
   [ -e $destdir ]  die $destdir already exists
 
   [ $head ] || head=$(commit-id)
  +head=$(gitXnormid.sh -c $head)
 
   git lntree $destdir
   echo $head .git/heads/$name

 commit-id always returns the normalized commit ID.

 --
   Petr Pasky Baudis
 Stuff: http://pasky.or.cz/
 C++: an octopus made by nailing extra legs onto a dog. -- Steve Taylor


This feels better to me.  Diffed against my previus commit.  The problem
was that commit-id wasn't called if a branch point was specified nor was
that value checked for validity.

Index: gitfork.sh
===
--- c9ccaa172ccab8e56f2fe621ee24896bfddacf26/gitfork.sh  (mode:100755 
sha1:386148ae9a99739d06a09742ff4157d0f7e4e223)
+++ f9e06a309f63ac6858d019b51f2172283378d2ef/gitfork.sh  (mode:100755 
sha1:dbb508b8431368fc95cc9516eada52f5bf0f8bc1)
@@ -16,7 +16,7 @@

 name=$1
 destdir=$2
-head=$3
+head=$(gitXnormid.sh -c $3)

 die () {
echo gitfork.sh: $@ 2
@@ -36,9 +36,6 @@

 [ -e $destdir ]  die $destdir already exists

-[ $head ] || head=$(commit-id)
-head=$(gitXnormid.sh -c $head)
-
 git lntree $destdir
 echo $head .git/heads/$name
 ln -s heads/$name $destdir/.git/HEAD

-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Merge with git-pasky II.

2005-04-17 Thread David Woodhouse
On Sat, 2005-04-16 at 17:33 +0200, Johannes Schindelin wrote:
  But if it can be done cheaply enough at a later date even though we end
  up repeating ourselves, and if it can be done _well_ enough that we
  shouldn't have just asked the user in the first place, then yes, OK I
  agree.
 
 The repetition could be helped by using a cache.

Perhaps. Since neither such a cache nor even the commit comments are
strictly part of the git data, they probably shouldn't be included in
the sha1 hash of the commit object. However, I don't see a fundamental
reason why we couldn't store them in the same file but omit them from
the hash calculations. That also allows us to retrospectively edit
commit comments without completely changing the entire subsequent
history.

Or is that a little too heretical a suggestion?

-- 
dwmw2

-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Add lsremote command.

2005-04-17 Thread Petr Baudis
Dear diary, on Sun, Apr 17, 2005 at 07:36:51AM CEST, I got a letter
where Steven Cole [EMAIL PROTECTED] told me that...
 This is a fairly trivial addition, but if users are adding remote repositories
 with git addremote, then those users should be able to list out the remote
 list without having to know the details of where the remotes file is kept.

Could you please send your patches inline? (Either in the body or with
correct content-disposition header.)

You got the return values other way around and you are missing a
copyright notice at the top; you should also mention that you take no
parameters.

Please use -s instead of -e, since it is more appropriate in this case.
Also, you should report the no remotes message to stderr. And always
exit when you found that .git/remotes exists, not only if cat succeeds.

Kind regards,

-- 
Petr Pasky Baudis
Stuff: http://pasky.or.cz/
C++: an octopus made by nailing extra legs onto a dog. -- Steve Taylor
-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Re-done kernel archive - real one?

2005-04-17 Thread Russell King
On Sat, Apr 16, 2005 at 04:01:45PM -0700, Linus Torvalds wrote:
 So I re-created the dang thing (hey, it takes just a few minutes), and
 pushed it out, and there's now an archive on kernel.org in my public
 personal directory called linux-2.6.git. I'll continue the tradition
 of naming git-archive directories as *.git, since that really ends up
 being the .git directory for the checked-out thing.

We need to work out how we're going to manage to get our git changes to
you.  At the moment, I've very little idea how to do that.  Ideas?

At the bottom is the script itself.  There's probably some aspects of
it which aren't nice, maybe Petr can advise on this (and maybe increase
the functionality of the git shell script to fill in where necessary.)

However, I've made a start to generate the necessary emails.  How about
this format?

I'm not keen on the tree, parent, author and committer objects appearing
in this - they appear to clutter it up.  What're your thoughts?

I'd rather not have the FQDN of the machine where the commit happened
appearing in the logs.  (I've ''d it out for the time being, because
I'd rather not have yet more email-address-like objects get into spammers
databases with which to hammer my 512kbps DSL line.)

Linus,

Please incorporate the latest ARM changes.

This will update the following files:

 arm/kernel/process.c|   15 +++
 arm/kernel/traps.c  |8 ++--
 arm/lib/changebit.S |   11 ++-
 arm/lib/clearbit.S  |   13 ++---
 arm/lib/setbit.S|   11 ++-
 arm/lib/testchangebit.S |   15 ++-
 arm/lib/testclearbit.S  |   15 ++-
 arm/lib/testsetbit.S|   15 ++-
 arm/mach-footbridge/dc21285-timer.c |4 ++--
 arm/mach-sa1100/h3600.c |2 +-
 asm-arm/ptrace.h|5 +
 asm-arm/system.h|3 +++
 12 files changed, 32 insertions(+), 85 deletions(-)

through these ChangeSets:

tree 7c4d75539c29ef7a9dde81acf84a072649f4f394
parent d5922e9c35d21f0b6b82d1fd8b1444cfce57ca34
author Russell King [EMAIL PROTECTED] 1113749462 +0100
committer Russell King [EMAIL PROTECTED] 1113749462 +0100

[PATCH] ARM: bitops

Convert ARM bitop assembly to a macro.  All bitops follow the same
format, so it's silly duplicating the code when only one or two
instructions are different.

Signed-off-by: Russell King [EMAIL PROTECTED]
tree fc10d3ffa6062cda10a10cb8262d8df238aea4fb
parent 5d9a545981893629c8f95e2b8b50d15d18c6ddbc
author Russell King [EMAIL PROTECTED] 1113749436 +0100
committer Russell King [EMAIL PROTECTED] 1113749436 +0100

[PATCH] ARM: showregs

Fix show_regs() to provide a backtrace.  Provide a new __show_regs()
function which implements the common subset of show_regs() and die().
Add prototypes to asm-arm/system.h

Signed-off-by: Russell King [EMAIL PROTECTED]
tree 5591fced9a2b5f84c6772dcbe2eb4b24e29161fc
parent 488faba31f59c5960aabbb2a5877a0f2923937a3
author Russell King [EMAIL PROTECTED] 1113748846 +0100
committer Russell King [EMAIL PROTECTED] 1113748846 +0100

[PATCH] ARM: h3600_irda_set_speed arguments

h3600_irda_set_speed() had the wrong type for the speed argument.
Fix this.

Signed-off-by: Russell King [EMAIL PROTECTED]
tree 2493491da6e446e48d5443f0a549a10ed3d35b62
parent e7905b2f22eb5d5308c9122b9c06c2d02473dd4f
author Russell King [EMAIL PROTECTED] 1113748615 +0100
committer Russell King [EMAIL PROTECTED] 1113748615 +0100

[PATCH] ARM: footbridge rtc init

The footbridge ISA RTC was being initialised before we had setup the
kernel timer.  This caused a divide by zero error when the current
time of day is set.  Resolve this by initialising the RTC after
the kernel timer has been initialised.

Signed-off-by: Russell King [EMAIL PROTECTED]

---

#!/bin/sh
prev=$(cat .git/heads/origin)
to=$(cat .git/HEAD)
who=Linus
what=ARM
 
cat  EOT
${who},
 
Please incorporate the latest ${what} changes.
 
This will update the following files:
 
EOT
 
git diff $prev $to | diffstat -p1
 
cat  EOT
 
through these ChangeSets:
 
EOT
 
this=$to
while [ $this != $prev ]; do
  cat-file commit $this | sed 's,.*,\t,'
  this=$(cat-file commit $this | grep ^parent | cut -d ' ' -f 2)
done


-- 
Russell King

-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[2/5] Add merge-base

2005-04-17 Thread Daniel Barkalow
merge-base finds one of the best common ancestors of a pair of commits. In
particular, it finds one of the ones which is fewest commits away from the
further of the heads.

Signed-Off-By: Daniel Barkalow [EMAIL PROTECTED]
Index: Makefile
===
--- 37a0b01b85c2999243674d48bfc71cdba0e5518e/Makefile  (mode:100644 
sha1:346e3850de026485802e41e16a1180be2df85e4a)
+++ d662b707e11391f6cfe597fd4d0bf9c41d34d01a/Makefile  (mode:100644 
sha1:b2ce7c5b63fffca59653b980d98379909f893d44)
@@ -14,7 +14,7 @@
 
 PROG=   update-cache show-diff init-db write-tree read-tree commit-tree \
cat-file fsck-cache checkout-cache diff-tree rev-tree show-files \
-   check-files ls-tree
+   check-files ls-tree merge-base
 
 SCRIPT=parent-id tree-id git gitXnormid.sh gitadd.sh gitaddremote.sh \
gitcommit.sh gitdiff-do gitdiff.sh gitlog.sh gitls.sh gitlsobj.sh \
Index: merge-base.c
===
--- /dev/null  (tree:37a0b01b85c2999243674d48bfc71cdba0e5518e)
+++ d662b707e11391f6cfe597fd4d0bf9c41d34d01a/merge-base.c  (mode:100644 
sha1:0f85e7d9e9a896d1142a54170ddf1159f11f9cdd)
@@ -0,0 +1,108 @@
+#include stdlib.h
+#include cache.h
+#include revision.h
+
+struct revision *common_ancestor(struct revision *rev1, struct revision *rev2)
+{
+   struct parent *parent;
+
+   struct parent *rev1list = malloc(sizeof(struct parent));
+   struct parent *rev2list = malloc(sizeof(struct parent));
+
+   struct parent *posn, *temp;
+
+   rev1list-parent = rev1;
+   rev1list-next = NULL;
+
+   rev2list-parent = rev2;
+   rev2list-next = NULL;
+
+   while (rev1list || rev2list) {
+   posn = rev1list;
+   rev1list = NULL;
+   while (posn) {
+   parse_commit_object(posn-parent);
+   if (posn-parent-flags  0x0001) {
+   /*
+   printf(1 already seen %s %x\n,
+  sha1_to_hex(posn-parent-sha1),
+  posn-parent-flags);
+   */
+// do nothing
+   } else if (posn-parent-flags  0x0002) {
+//  free lists
+   return posn-parent;
+   } else {
+   /*
+   printf(1 based on %s\n,
+  sha1_to_hex(posn-parent-sha1));
+   */
+   posn-parent-flags |= 0x0001;
+
+   parent = posn-parent-parent;
+   while (parent) {
+   temp = malloc(sizeof(struct parent));
+   temp-next = rev1list;
+   temp-parent = parent-parent;
+   rev1list = temp;
+   parent = parent-next;
+   }
+   }
+   posn = posn-next;
+   }
+   posn = rev2list;
+   rev2list = NULL;
+   while (posn) {
+   parse_commit_object(posn-parent);
+   if (posn-parent-flags  0x0002) {
+   /*
+   printf(2 already seen %s\n,
+  sha1_to_hex(posn-parent-sha1));
+   */
+// do nothing
+   } else if (posn-parent-flags  0x0001) {
+//  free lists
+   return posn-parent;
+   } else {
+   /*
+   printf(2 based on %s\n,
+  sha1_to_hex(posn-parent-sha1));
+   */
+   posn-parent-flags |= 0x0002;
+
+   parent = posn-parent-parent;
+   while (parent) {
+   temp = malloc(sizeof(struct parent));
+   temp-next = rev2list;
+   temp-parent = parent-parent;
+   rev2list = temp;
+   parent = parent-next;
+   }
+   }
+   posn = posn-next;
+   }
+   }
+   return NULL;
+}
+
+int main(int argc, char **argv)
+{
+   struct revision *rev1, *rev2, *ret;
+   unsigned char rev1key[20], rev2key[20];
+   if (argc != 3 ||
+   get_sha1_hex(argv[1], rev1key) ||
+ 

Re: Re: Merge with git-pasky II.

2005-04-17 Thread Ingo Molnar

* Ingo Molnar [EMAIL PROTECTED] wrote:

 The compromise relies on you having reviewed something harmless, while 
 in reality what happened within the DB was far less harmless. And the 
 DB remains self-consistent: neither fsck, nor others importing your 
 tree will be able to detect the compromise. This attack can only be 
 detected when you apply the patch, after that point all the 
 information (except Malice's message in your inbox) is gone.

in fact, this attack cannot even be proven to be malicious, purely via 
the email from Malice: it could be incredible bad luck that caused that 
good-looking patch to be mistakenly matching a dangerous object.

In fact this could happen even today, _accidentally_. (but i'm willing 
to bet that hell will be freezing over first, and i'll have some really 
good odds ;) There's probably a much higher likelyhood of Linus' tree 
getting corrupted in some old fashioned way and introducing a security 
hole by accident)

Ingo
-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[3/5] Add http-pull

2005-04-17 Thread Daniel Barkalow
http-pull is a program that downloads from a (normal) HTTP server a commit
and all of the tree and blob objects it refers to (but not other commits,
etc.). Options could be used to make it download a larger or different
selection of objects. It depends on libcurl, which I forgot to mention in
the README again.

Signed-Off-By: Daniel Barkalow [EMAIL PROTECTED]
Index: Makefile
===
--- d662b707e11391f6cfe597fd4d0bf9c41d34d01a/Makefile  (mode:100644 
sha1:b2ce7c5b63fffca59653b980d98379909f893d44)
+++ 157b46ce1d82b3579e2e1258927b0d9bdbc033ab/Makefile  (mode:100644 
sha1:940ef8578cf469354002cd8feaec25d907015267)
@@ -14,7 +14,7 @@
 
 PROG=   update-cache show-diff init-db write-tree read-tree commit-tree \
cat-file fsck-cache checkout-cache diff-tree rev-tree show-files \
-   check-files ls-tree merge-base
+   check-files ls-tree http-pull merge-base
 
 SCRIPT=parent-id tree-id git gitXnormid.sh gitadd.sh gitaddremote.sh \
gitcommit.sh gitdiff-do gitdiff.sh gitlog.sh gitls.sh gitlsobj.sh \
@@ -35,6 +35,7 @@
 
 LIBS= -lssl -lz
 
+http-pull: LIBS += -lcurl
 
 $(PROG):%: %.o $(COMMON)
$(CC) $(CFLAGS) -o $@ $^ $(LIBS)
Index: http-pull.c
===
--- /dev/null  (tree:d662b707e11391f6cfe597fd4d0bf9c41d34d01a)
+++ 157b46ce1d82b3579e2e1258927b0d9bdbc033ab/http-pull.c  (mode:100644 
sha1:106ca31239e6afe6784e7c592234406f5c149e44)
@@ -0,0 +1,126 @@
+#include fcntl.h
+#include unistd.h
+#include string.h
+#include stdlib.h
+#include cache.h
+#include revision.h
+#include errno.h
+#include stdio.h
+
+#include curl/curl.h
+#include curl/easy.h
+
+static CURL *curl;
+
+static char *base;
+
+static int fetch(unsigned char *sha1)
+{
+   char *hex = sha1_to_hex(sha1);
+   char *filename = sha1_file_name(sha1);
+
+   char *url;
+   char *posn;
+   FILE *local;
+   struct stat st;
+
+   if (!stat(filename, st)) {
+   return 0;
+   }
+
+   local = fopen(filename, w);
+
+   if (!local) {
+   fprintf(stderr, Couldn't open %s\n, filename);
+   return -1;
+   }
+
+   curl_easy_setopt(curl, CURLOPT_FILE, local);
+
+   url = malloc(strlen(base) + 50);
+   strcpy(url, base);
+   posn = url + strlen(base);
+   strcpy(posn, objects/);
+   posn += 8;
+   memcpy(posn, hex, 2);
+   posn += 2;
+   *(posn++) = '/';
+   strcpy(posn, hex + 2);
+
+   curl_easy_setopt(curl, CURLOPT_URL, url);
+
+   curl_easy_perform(curl);
+
+   fclose(local);
+   
+   return 0;
+}
+
+static int process_tree(unsigned char *sha1)
+{
+   void *buffer;
+unsigned long size;
+char type[20];
+
+buffer = read_sha1_file(sha1, type, size);
+   if (!buffer)
+   return -1;
+   if (strcmp(type, tree))
+   return -1;
+   while (size) {
+   int len = strlen(buffer) + 1;
+   unsigned char *sha1 = buffer + len;
+   unsigned int mode;
+   int retval;
+
+   if (size  len + 20 || sscanf(buffer, %o, mode) != 1)
+   return -1;
+
+   buffer = sha1 + 20;
+   size -= len + 20;
+
+   retval = fetch(sha1);
+   if (retval)
+   return -1;
+
+   if (S_ISDIR(mode)) {
+   retval = process_tree(sha1);
+   if (retval)
+   return -1;
+   }
+   }
+   return 0;
+}
+
+static int process_commit(unsigned char *sha1)
+{
+   struct revision *rev = lookup_rev(sha1);
+   if (parse_commit_object(rev))
+   return -1;
+   
+   fetch(rev-tree);
+   process_tree(rev-tree);
+   return 0;
+}
+
+int main(int argc, char **argv)
+{
+   char *commit_id = argv[1];
+   char *url = argv[2];
+
+   unsigned char sha1[20];
+
+   get_sha1_hex(commit_id, sha1);
+
+   curl_global_init(CURL_GLOBAL_ALL);
+
+   curl = curl_easy_init();
+
+   base = url;
+
+   fetch(sha1);
+   process_commit(sha1);
+
+   curl_global_cleanup();
+   return 0;
+}

-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Parseable commit header

2005-04-17 Thread Linus Torvalds


On Sun, 17 Apr 2005, Stefan-W. Hahn wrote:
 
 after playing a while with git-pasky it is a crap to interpret the date of
 commit logs. Though it was a good idea to put the date in a parseable format
 (seconds since), but the format of the commit itself is not good parseable.

Actually, it is. The commit stuff removes all special characters from the 
strings, so '' and '' around the email do indeed act as delimiters, and 
cannot exist anywhere else.

Linus
-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Storing permissions

2005-04-17 Thread Linus Torvalds


On Sun, 17 Apr 2005, David A. Wheeler wrote:
 
 There's a minor reason to write out ALL the perm bit data, but
 only care about a few bits coming back in: Some people use
 SCM systems as a generalized backup system

Yes. I was actually thinking about having system config files in a git 
repository when I started it, since I noticed how nicely it would do 
exactly that.

However, since the mode bits also end up being part of the name of the 
tree object (ie they are most certainly part of the hash), it's really 
basically impossible to only care about one bit but writing out many bits: 
it's the same issue of having multiple identical blocks with different 
names.

It's ok if it happens occasionally (it _will_ happen at the point of a
tree conversion to the new format, for example), but it's not ok if it
happens all the time - which it would, since some people have umask 002
(and individual groups) and others have umask 022 (and shared groups), and
I can imagine that some anal people have umask 0077 (I don't want to play
with others).

The trees would constantly bounce between a million different combinations 
(since _some_ files would be checked out with the other mode).

At least if you always honor umask or always totally ignore umask, you get 
a nice repetable thing. We tried the always ignore umask thing, and the 
problem with that is that while _git_ ended up always doing a fchmod() 
to reset the whole permission mask, anybody who created files any other 
way and then checked them in would end up using umask.

One solution is to tell git with a command line flag and/or config file 
entry that for this repo, I want you to honor all bits. That should be 
easy enough to add at some point, and then you really get what you want.

That said, git won't be really good at doing system backup. I actually 
_do_ save a full 32-bit of mode (hey, you could have immutable bits 
etc set), but anybody who does anything fancy at all with mtime would be 
screwed, for example.

Also, right now we don't actually save any other type of file than
regular/directory, so you'd have to come up with a good save-format for
symlinks (easy, I guess - just make a link blob) and device nodes (that
one probably should be saved in the cache_entry  itself, possibly
encoded where the sha1 hash normally is).

Also, I made a design decision that git only cares about non-dotfiles. Git 
literally never sees or looks at _anything_ that starts with a .. I 
think that's absolutely the right thing to do for an SCM (if you hide your 
files, I really don't think you should expect the SCM to see it), but it's 
obviously not the right thing for a backup thing.

(It _might_ be the right thing for a system config file, though, eg 
tracking something like /etc with git might be ok, modulo the other 
issues).

Linus
-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Parsing code in revision.h

2005-04-17 Thread Petr Baudis
Dear diary, on Sun, Apr 17, 2005 at 05:24:20PM CEST, I got a letter
where Daniel Barkalow [EMAIL PROTECTED] told me that...
 This adds support to revision.h for parsing commit records (but not going
 any further than parsing a single record). Something like this is needed
 by anything that uses revision.h, but older programs open-code it.
 
 Signed-Off-By: Daniel Barkalow [EMAIL PROTECTED]

Could you please convert the current users (rev-tree.c and fsck-cache.c)
to use this in the same patch?

 Index: revision.h
 ===
 --- 45f926575d2c44072bfcf2317dbf3f0fbb513a4e/revision.h  (mode:100644 
 sha1:28d0de3261a61f68e4e0948a25a416a515cd2e83)
 +++ 37a0b01b85c2999243674d48bfc71cdba0e5518e/revision.h  (mode:100644 
 sha1:523bde6e14e18bb0ecbded8f83ad4df93fc467ab)
 @@ -24,6 +24,7 @@
   unsigned int flags;
   unsigned char sha1[20];
   unsigned long date;
 + unsigned char tree[20];
   struct parent *parent;
  };
  
 @@ -111,4 +112,29 @@
   }
  }
  
 +static int parse_commit_object(struct revision *rev)
 +{
 + if (!(rev-flags  SEEN)) {
 + void *buffer, *bufptr;
 + unsigned long size;
 + char type[20];
 + unsigned char parent[20];
 +
 + rev-flags |= SEEN;
 + buffer = bufptr = read_sha1_file(rev-sha1, type, size);
 + if (!buffer || strcmp(type, commit))
 + return -1;
 + get_sha1_hex(bufptr + 5, rev-tree);
 + bufptr += 46; /* tree  + hex sha1 + \n */
 + while (!memcmp(bufptr, parent , 7)  
 +!get_sha1_hex(bufptr+7, parent)) {
 + add_relationship(rev, parent);
 + bufptr += 48;   /* parent  + hex sha1 + \n */
 + }
 + //rev-date = parse_commit_date(bufptr);

I don't like this.

 + free(buffer);
 + }
 + return 0;
 +}
 +
  #endif /* REVISION_H */

BTW, I think that in longer term having this stuffed in revision.h is a
bad idea, we should have revision.c. I will accept patches putting the
stuff to revision.h for now, though (unless it gets outrageous).

-- 
Petr Pasky Baudis
Stuff: http://pasky.or.cz/
C++: an octopus made by nailing extra legs onto a dog. -- Steve Taylor
-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Storing permissions

2005-04-17 Thread David A. Wheeler
Linus Torvalds wrote:
On Sun, 17 Apr 2005, David A. Wheeler wrote:
There's a minor reason to write out ALL the perm bit data, but
only care about a few bits coming back in: Some people use
SCM systems as a generalized backup system
Yes. I was actually thinking about having system config files in a git 
repository when I started it, since I noticed how nicely it would do 
exactly that.

However, since the mode bits also end up being part of the name of the 
tree object (ie they are most certainly part of the hash), it's really 
basically impossible to only care about one bit but writing out many bits: 
it's the same issue of having multiple identical blocks with different 
names.
...
One solution is to tell git with a command line flag and/or config file 
entry that for this repo, I want you to honor all bits. That should be 
easy enough to add at some point, and then you really get what you want.
Yes, I thought of that too.  And I agree, that should do the job.
My real concern is I'm looking at the early design of the
storage format so that it's POSSIBLE to extend git in obvious ways.
As long as it's possible later, then that's a great thing.
...
Also, I made a design decision that git only cares about non-dotfiles. Git 
literally never sees or looks at _anything_ that starts with a .. I 
think that's absolutely the right thing to do for an SCM (if you hide your 
files, I really don't think you should expect the SCM to see it), but it's 
obviously not the right thing for a backup thing.
Again, a command line flag or config file entry could change that
in the future, if desired.  So this is a decision that could be
changed later... the best kind of decision :-).
--- David A. Wheeler
-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: using git directory cache code in darcs?

2005-04-17 Thread Linus Torvalds


On Sun, 17 Apr 2005, David Roundy wrote:
 
 That's all right.  Darcs would only access the cached data through a
 git-caching layer, and we've already got an abstraction layer over the
 pristine cache.  As long as the git layer can quickly retrieve the contents
 of a given file, we should be fine.

Yes.

In fact, one of my hopes was that other SCM's could just use the git
plumbing. But then I'd really suggest that you use git itself, not any
libgit. Ie you take _all_ the plumbing as real programs, and instead of
trying to link against individual routines, you'd _script_ it.

In other words, git would be an independent cache of the real SCM,
and/or the old history (ie an SCM that uses git could decide that the
git stuff is fine for archival, and really use git as the base: and then
the SCM could entirely concentrate on _only_ the interesting parts, ie
the actual merging etc).

That was really what I always personally saw git as, just the plumbing
beneath the surface. For example, something like arch, which is based on
patches and tar-balls (I think darcs is similar in that respect), could
use git as a _hell_ of a better history of tar-balls.

The thing is, unless you take the git object database approach, using 
_just_ the index part doesn't really mean all that much. Sure, you could 
just keep the current objects in the object database, but quite 
frankly, there would probably not be a whole lot of point to that. You'd 
waste so much time pruning and synchronizing with your real database 
that I suspect you'd be better off not using it.

(Or you could prune nightly or something, I guess).

Linus
-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Re: Re-done kernel archive - real one?

2005-04-17 Thread Petr Baudis
Dear diary, on Sun, Apr 17, 2005 at 05:24:48PM CEST, I got a letter
where Russell King [EMAIL PROTECTED] told me that...
 However, I've made a start to generate the necessary emails.  How about
 this format?
 
 I'm not keen on the tree, parent, author and committer objects appearing
 in this - they appear to clutter it up.  What're your thoughts?

I think it would look nicer if you used git log format. Actually...

 
 #!/bin/sh
 prev=$(cat .git/heads/origin)
 to=$(cat .git/HEAD)

It is preferred to use the commit-id command instead; when git decides
to totally dig over its metametadata storage, you won't need to care.

 this=$to
 while [ $this != $prev ]; do
   cat-file commit $this | sed 's,.*,\t,'
   this=$(cat-file commit $this | grep ^parent | cut -d ' ' -f 2)
 done

What about adding support to git log for not caring about side branches
(if there are multiple parents, go only over the first one; you might
add that as a switch to rev-tree, which would turn it to rev-line ;-)?

Then, you could also easily implement support for passing additional
commit ID to git log, and it would then print out the range between
these two.

That would do exactly what you want, and would be probably quite usable
for other uses too.

-- 
Petr Pasky Baudis
Stuff: http://pasky.or.cz/
C++: an octopus made by nailing extra legs onto a dog. -- Steve Taylor
-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Re-done kernel archive - real one?

2005-04-17 Thread Linus Torvalds


On Sun, 17 Apr 2005, Russell King wrote:

 On Sat, Apr 16, 2005 at 04:01:45PM -0700, Linus Torvalds wrote:
  So I re-created the dang thing (hey, it takes just a few minutes), and
  pushed it out, and there's now an archive on kernel.org in my public
  personal directory called linux-2.6.git. I'll continue the tradition
  of naming git-archive directories as *.git, since that really ends up
  being the .git directory for the checked-out thing.
 
 We need to work out how we're going to manage to get our git changes to
 you.  At the moment, I've very little idea how to do that.  Ideas?

To me, merging is my highest priority. I suspect that once I have a tree 
from you (or anybody else) that I actually _test_ merging with, I'll be 
motivated as hell to make sure that my plumbing actually works. 

After all, it's not just you who want to have to avoid the pain of 
merging: it's definitely in my own best interests to make merging as 
easy as possible. You're _the_ most obvious initial candidate, because 
your merges almost never have any conflicts at all, even on a file level 
(much less within a file).

 However, I've made a start to generate the necessary emails.  How about
 this format?
 
 I'm not keen on the tree, parent, author and committer objects appearing
 in this - they appear to clutter it up.  What're your thoughts?

Indeed. I'd almost drop the whole header except for the author line. 

Oh, and you need a separator between commits, right now your 
Signed-off-by: line ends up butting up with the header of the next 
commit ;)

 I'd rather not have the FQDN of the machine where the commit happened
 appearing in the logs.

That's fine. Out short-logs have always tried to have just the real name 
in them, and I do want an email-like thing for tracking the developer, but 
yes, if you remove the email, that's fine. It should be easy enough to do 
with a simple

sed 's/.*//'

or similar.

And if you replace author with From: and do the date conversion, it
might look more natural.

Linus
-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Re-done kernel archive - real one?

2005-04-17 Thread Linus Torvalds


On Sun, 17 Apr 2005, Russell King wrote:
 
 BTW, there appears to be errors in the history committed thus far.
 I'm not sure where this came from though.  Some of them could be
 UTF8 vs ASCII issues, but there's a number which seem to have extra
 random crap in them (^M) and lots of blank lines).

Ah, yes. That is actually from the original emails from Andrew. I do not 
know why, but I see them there. It's his script that does something 
strange.

(Andrew: in case you care, the first one is

[patch 003/198] arm: fix SIGBUS handling

which has the email looking like

...
From: [EMAIL PROTECTED]
Date: Tue, 12 Apr 2005 03:30:35 -0700
Status: 
X-Status: 
X-Keywords:   

^M)


From: Russell King [EMAIL PROTECTED]

ARM wasn't raising a SIGBUS with a siginfo structure.  Fix
__do_user_fault() to allow us to use it for SIGBUS conditions, and 
arrange
for the sigbus path to use this.
...

 One thing which definitely needs to be considered is - what character
 encoding are the comments to be stored as?

To git, it's just a byte stream, and you can have binary comments if you
want to. I personally would prefer to move towards UTF eventually, but I
really don't think it matters a whole lot as long as 99.9% of everything
we'd see there is still 7-bit ascii.

 ID: 75f86bac962b7609b0f3c21d25e10647ff8ed280
 [PATCH] intel8x0: AC'97 audio patch for Intel ESB2
  
 This patch adds the Intel ESB2 DID's to the intel8x0.c file for AC'97 
 audio
 support.
  
 Signed-off-by: A0Jason Gaston [EMAIL PROTECTED]

That A0 is also there in Andrew's original email. It's space with the
high bit set, and I have no idea why.

Linus
-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Parseable commit header

2005-04-17 Thread David A. Wheeler
Stefan-W. Hahn wrote:
Hi,
after playing a while with git-pasky it is a crap to interpret the date of
commit logs. Though it was a good idea to put the date in a parseable format
(seconds since), but the format of the commit itself is not good parseable.

Should be:
...
Committer-Dater: 1113684324 +0200
I'm probably coming in late to the game, but exactly
why is seconds-since-epoch format used instead of a format
more easily understood by humans?  Yes, I know tools
can easily convert that, but you're already using an ASCII format;
why not just record it in a format that's easily eyeballed like ISO's
mmddThhmmss [timezone]? E.G.:
 20050417T171520 +0200
or some such?  I'm SURE that people will mention things
like the patch I posted on April 17, 2005, and having the
patch format record times that way, directly, would be convenient
to the poor slobs^H^H^H^H^H developers who come later.
Yes, a tool can handle the conversion, but choosing formats
so a tool is unneeded for simple stuff is often better!
--- David A. Wheeler
-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: using git directory cache code in darcs?

2005-04-17 Thread Mike Taht
Linus Torvalds wrote:
On Sun, 17 Apr 2005, David Roundy wrote:
That's all right.  Darcs would only access the cached data through a
git-caching layer, and we've already got an abstraction layer over the
pristine cache.  As long as the git layer can quickly retrieve the contents
of a given file, we should be fine.

Yes.
In fact, one of my hopes was that other SCM's could just use the git
plumbing. But then I'd really suggest that you use git itself, not any
libgit. Ie you take _all_ the plumbing as real programs, and instead of
trying to link against individual routines, you'd _script_ it.
If you don't want it, I won't do it. Still makes sense to separate the 
plumbing from the porcelain, though.

--
Mike Taht
  You can tell how far we have to go, when FORTRAN is the language of
supercomputers.
-- Steven Feiner
-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[2.1/5] Add merge-base

2005-04-17 Thread Daniel Barkalow
merge-base finds one of the best common ancestors of a pair of commits. In
particular, it finds one of the ones which is fewest commits away from the
further of the heads.

Signed-Off-By: Daniel Barkalow [EMAIL PROTECTED]
Index: Makefile
===
--- 45f926575d2c44072bfcf2317dbf3f0fbb513a4e/Makefile  (mode:100644 
sha1:346e3850de026485802e41e16a1180be2df85e4a)
+++ 7d806c2d3be8f87d3d4d87e5254500d7fc24476b/Makefile  (mode:100644 
sha1:0e84e3cd12f836602b420c197e08fabefe975493)
@@ -14,7 +17,7 @@
 
 PROG=   update-cache show-diff init-db write-tree read-tree commit-tree \
cat-file fsck-cache checkout-cache diff-tree rev-tree show-files \
-   check-files ls-tree
+   check-files ls-tree merge-base
 
 SCRIPT=parent-id tree-id git gitXnormid.sh gitadd.sh gitaddremote.sh \
gitcommit.sh gitdiff-do gitdiff.sh gitlog.sh gitls.sh gitlsobj.sh \
Index: merge-base.c
===
--- /dev/null  (tree:45f926575d2c44072bfcf2317dbf3f0fbb513a4e)
+++ 7d806c2d3be8f87d3d4d87e5254500d7fc24476b/merge-base.c  (mode:100644 
sha1:ee979c7532cbdf823e9930993b0dd8f97aadb21f)
@@ -0,0 +1,95 @@
+#include stdlib.h
+#include cache.h
+#include revision.h
+
+static struct revision *process_list(struct parent **list_p, int this_mark,
+int other_mark)
+{
+   struct parent *parent, *temp;
+   struct parent *posn = *list_p;
+   *list_p = NULL;
+   while (posn) {
+   parse_commit_object(posn-parent);
+   if (posn-parent-flags  this_mark) {
+   /*
+ printf(%d already seen %s %x\n,
+ this_mark
+ sha1_to_hex(posn-parent-sha1),
+ posn-parent-flags);
+   */
+   /* do nothing; this indicates that this side
+* split and reformed, and we only need to
+* mark it once.
+*/
+   } else if (posn-parent-flags  other_mark) {
+   return posn-parent;
+   } else {
+   /*
+ printf(%d based on %s\n,
+ this_mark,
+ sha1_to_hex(posn-parent-sha1));
+   */
+   posn-parent-flags |= this_mark;
+   
+   parent = posn-parent-parent;
+   while (parent) {
+   temp = malloc(sizeof(struct parent));
+   temp-next = *list_p;
+   temp-parent = parent-parent;
+   *list_p = temp;
+   parent = parent-next;
+   }
+   }
+   posn = posn-next;
+   }
+   return NULL;
+}
+
+struct revision *common_ancestor(struct revision *rev1, struct revision *rev2)
+{
+   struct parent *rev1list = malloc(sizeof(struct parent));
+   struct parent *rev2list = malloc(sizeof(struct parent));
+
+   rev1list-parent = rev1;
+   rev1list-next = NULL;
+
+   rev2list-parent = rev2;
+   rev2list-next = NULL;
+
+   while (rev1list || rev2list) {
+   struct revision *ret;
+   ret = process_list(rev1list, 0x1, 0x2);
+   if (ret) {
+   /*  free lists */
+   return ret;
+   }
+   ret = process_list(rev2list, 0x2, 0x1);
+   if (ret) {
+   /*  free lists */
+   return ret;
+   }
+   }
+   return NULL;
+}
+
+int main(int argc, char **argv)
+{
+   struct revision *rev1, *rev2, *ret;
+   unsigned char rev1key[20], rev2key[20];
+
+   if (argc != 3 ||
+   get_sha1_hex(argv[1], rev1key) ||
+   get_sha1_hex(argv[2], rev2key)) {
+   usage(merge-base commit-id commit-id);
+   }
+   rev1 = lookup_rev(rev1key);
+   rev2 = lookup_rev(rev2key);
+   ret = common_ancestor(rev1, rev2);
+   if (ret) {
+   printf(%s\n, sha1_to_hex(ret-sha1));
+   return 0;
+   } else {
+   return 1;
+   }
+   
+}

-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch] fork optional branch point normazilation

2005-04-17 Thread Linus Torvalds


On Sun, 17 Apr 2005, Brad Roberts wrote:

 (ok, author looks better, but committer doesn't obey the AUTHOR_ vars yet)

They should't, but maybe I should add COMMITTER_xxx overrides. I just do 
_not_ want people to think that they should claim to be somebody else: 
it's not a security issue (you could compile your own commit-tree.c 
after all), it's more of a social rule thing. I prefer seeing bad email 
addresses that at least match the system setup to seeing good email 
addresses that people made up just to make them look clean.

Mind showing what your /etc/passwd file looks like (just your own entry, 
and please just remove your password entry if you don't use shadow 
passwords).

Maybe I should just remove _all_ strange characters when I do the name 
cleanup in commit. Right now I just remove the ones that matter to 
parsing it unambiguosly: '\n' '' and ''.

(The ',' character really is special: some people have

Torvalds, Linus

and maybe I should not just remove the commas, I should convert it to 
always be Linus Torvalds. But your gecos entry is just _strange_. Why 
the extra commas, I wonder?)

Linus
-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch] fork optional branch point normazilation

2005-04-17 Thread Daniel Barkalow
On Sun, 17 Apr 2005, Linus Torvalds wrote:

 On Sun, 17 Apr 2005, Brad Roberts wrote:
 
  (ok, author looks better, but committer doesn't obey the AUTHOR_ vars yet)
 
 They should't, but maybe I should add COMMITTER_xxx overrides. I just do 
 _not_ want people to think that they should claim to be somebody else: 
 it's not a security issue (you could compile your own commit-tree.c 
 after all), it's more of a social rule thing. I prefer seeing bad email 
 addresses that at least match the system setup to seeing good email 
 addresses that people made up just to make them look clean.

It seems to me like there should be a set of variables for the user in
general, and the various git scripts should arrange them appropriately
(e.g., git apply could look for a first Signed-Off-By, and make the
AUTHOR_ variables match that (for the next commit), while making the
COMMITTER match the user, etc). It seems to me like the current situation
is likely to lead to people claiming to be other people when applying
their patches, just due to having set up their correct info for handling
their own patches.

Actually, if the scripts are reorganizing them, they might as well send
them on the command line.

-Daniel
*This .sig left intentionally blank*

-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [4/5] Add option for hardlinkable cache of extracted blobs

2005-04-17 Thread Petr Baudis
Dear diary, on Sun, Apr 17, 2005 at 05:35:19PM CEST, I got a letter
where Daniel Barkalow [EMAIL PROTECTED] told me that...
 Index: checkout-cache.c
 ===
 --- 157b46ce1d82b3579e2e1258927b0d9bdbc033ab/checkout-cache.c  (mode:100644 
 sha1:5d3028df0a45329e45fff2006719c9267adeb946)
 +++ 08f7700831e056ad710af69f91e3a8a705b6b2b1/checkout-cache.c  (mode:100644 
 sha1:338588259e17dd235fdc7db759d770004a760e15)
 @@ -67,6 +71,80 @@
   return fd;
  }
  
 +#ifdef HARDLINK_CACHE
 +
 +/*
 + * NOTE! This returns a statically allocated buffer, so you have to be
 + * careful about using it. Do a strdup() if you need to save the
 + * filename.
 + */
 +char *sha1_blob_cache_file_name(const unsigned char *sha1)
 +{
..code basically identical with sha1_file_name()..
 +}

You can guess what would I like you to do. ;-)

 +
 +static int write_entry(struct cache_entry *ce)
 +{
 + int fd;
 + void *new;
 + unsigned long size;
 + long wrote;
 + char type[20];
 + char *cache_name;
 + struct stat st;
 +
 + cache_name = sha1_blob_cache_file_name(ce-sha1);
 +
 + if (stat(cache_name, st)) {
..basically cut'n'paste of non-hardlinking write_entry()..

BTW, I'd just use access(F_OK) instead of stat() it I don't care about
the file's stat at all anyway.

 + }
 + if (link(cache_name, ce-name)) {
 + if (errno == ENOENT) {
 + create_directories(ce-name);
 + link(cache_name, ce-name);
 + }
 + }
 + return 0;
 +}

I think it would be better to have this as hardlink_entry() and
write_entry() to take the file name to write the entry to. Then you
should explicitly multiplex in checkout_cache() between what you do.

-- 
Petr Pasky Baudis
Stuff: http://pasky.or.cz/
C++: an octopus made by nailing extra legs onto a dog. -- Steve Taylor
-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Yet another base64 patch

2005-04-17 Thread David A. Wheeler
Paul Jackson wrote:
David wrote:
My list would be:
ext2, ext3, NFS, and Windows' NTFS (stupid short filenames,
case-insensitive/case-preserving).

I'm no mind reader, but I'd bet a pretty penny that what you have in
mind and what Linus has in mind have no overlaps in their solution sets.
Sadly, I lack the mind reading ability as well.
Our goals are, I suspect, somewhat different.
Linus wants to build a tool that meets his specific needs
(managing kernel development), and he has particular requirements
(such as fast simple merging when working at large scales).
In contrast, I'm hoping for a more
general OSS/FS SCM tool that many others can use as well.
But I think there's heavy overlap in the solution space.
The Linux kernel project is, to my knowledge, the largest
project using a truly distributed SCM process.
Anyone else who is considering a distributed SCM process
would at _least_ want to think about how the Linux kernel
project works, and if they're doing so, they
might also want to reuse the development tools.
I'm just taking a peek, and
looking for situations where a design decision is irrelevant
for his purposes, but a particular direction would be of
particular help to other projects.  I'm more worried about the
storage format; if the code doesn't support some particular
feature but it could be added later without great pain, no big deal.
If something would imply a complete rewrite, that's undesirable.
--- David A. Wheeler
-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Re-done kernel archive - real one?

2005-04-17 Thread Petr Baudis
Dear diary, on Sun, Apr 17, 2005 at 08:13:59PM CEST, I got a letter
where David A. Wheeler [EMAIL PROTECTED] told me that...
 On Sun, 17 Apr 2005, Russell King wrote:
 BTW, there appears to be errors in the history committed thus far.
 I'm not sure where this came from though.  Some of them could be
 UTF8 vs ASCII issues,  
 ...
 One thing which definitely needs to be considered is - what character
 encoding are the comments to be stored as?
 
 Linus Torvalds replied:
 To git, it's just a byte stream, and you can have binary comments if you
 want to. I personally would prefer to move towards UTF eventually, but I
 really don't think it matters a whole lot as long as 99.9% of everything
 we'd see there is still 7-bit ascii.
 
 I would _heartily_ recommend moving towards UTF-8 as the
 internal charset for all comments.  Alternatives are possible
 (e.g., recording the charset in the header), but they're
 incredibly messy.  Even if you don't normally work in UTF-8,
 it's pretty easy to set most editors up to read  write UTF-8.
 Having the data stored as a constant charset eliminates
 a raft of error-prone code.

I tend to agree here. My toilet stuff is what can handle various
locale-based conversions at the commit-tree / cat-file tree sides etc,
but UTF-8 should be certainly the way to go internally.

Not that the plumbing should actually _care_ at all; anyone who uses it
should take the care, so this is more of a social thing.

-- 
Petr Pasky Baudis
Stuff: http://pasky.or.cz/
C++: an octopus made by nailing extra legs onto a dog. -- Steve Taylor
-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [1/5] Parsing code in revision.h

2005-04-17 Thread Linus Torvalds


On Sun, 17 Apr 2005, Daniel Barkalow wrote:

 --- 45f926575d2c44072bfcf2317dbf3f0fbb513a4e/revision.h  (mode:100644 
 sha1:28d0de3261a61f68e4e0948a25a416a515cd2e83)
 +++ 37a0b01b85c2999243674d48bfc71cdba0e5518e/revision.h  (mode:100644 
 sha1:523bde6e14e18bb0ecbded8f83ad4df93fc467ab)
 @@ -24,6 +24,7 @@
   unsigned int flags;
   unsigned char sha1[20];
   unsigned long date;
 + unsigned char tree[20];
   struct parent *parent;
  };
  

I think this is really wrong.

The whole point of revision.h is that it's a generic framework for 
keeping track of relationships between different objects. And those 
objects are in no way just commit objects.

For example, fsck uses this struct revision to create a full free of 
_all_ the object dependencies, which means that a struct revision can be 
any object at all - it's not in any way limited to commit objects, and 
there is no tree object that is associated with these things at all.

Besides, why do you want the tree? There's really nothing you can do with 
the tree to a first approximation - you need to _first_ do the 
reachability analysis entirely on the commit dependencies, and then when 
you've selected a set of commits, you can just output those.

Later phases will indeed look up what the tree is, but that's only after
you've decided on the commit object. There's no point in looking up (or
even trying to just remember) _all_ the tree objects.

Hmm?

Linus
-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Yet another base64 patch

2005-04-17 Thread Petr Baudis
Dear diary, on Sun, Apr 17, 2005 at 08:38:10AM CEST, I got a letter
where David A. Wheeler [EMAIL PROTECTED] told me that...
 I'd look at some of the more constraining, yet still
 common cases, and make sure it worked reasonably
 well without requiring magic. My list would be:
 ext2, ext3, NFS, and Windows' NTFS (stupid short filenames,
 case-insensitive/case-preserving).  Samba shouldn't be
 more constraining than NTFS, and I would expect ReiserFS
 wouldn't be a constraining case.  Bonus points if the
 names lengths are inside POSIX guarantees, but I bet the
 POSIX limits are so tiny as to be laughable.  Bonus points for
 CD-ROM format with the Rock Ridge extensions (I _think_ DVDs
 and later use that format too, yes?), though if that
 didn't work tar files are an easy workaround. Imagine a full
 Linux kernel source repository, for 30+ (pick a number) years..
 can the filesystems handle the number of objects in those cases?
 If it works, your infrastructure should be sufficiently
 portable to just work on others too.

I personally don't mind getting it work on more places, if it doesn't
make git work (measurably) worse on modern Linux systems, the code will
not go to hell, you tell me what needs to be done and preferably give me
the patches. ;-)

-- 
Petr Pasky Baudis
Stuff: http://pasky.or.cz/
C++: an octopus made by nailing extra legs onto a dog. -- Steve Taylor
-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Re-done kernel archive - real one?

2005-04-17 Thread David A. Wheeler
On Sun, 17 Apr 2005, Russell King wrote:
One thing which definitely needs to be considered is - what character
encoding are the comments to be stored as?
...
I replied:
I would _heartily_ recommend moving towards UTF-8 as the
internal charset for all comments.
Petr said:
Not that the plumbing should actually _care_ at all; anyone who uses it
should take the care, so this is more of a social thing.
The _plumbing_ shouldn't care, but the stuff above needs to know
how to interpret the stuff that the plumbing produces.
Russell King said:
Except, I believe, MicroEMACS, which both Linus and myself use.  As
far as I know, there aren't any patches to make it UTF-8 compliant.
Since plain ASCII is a subset of UTF-8,
as long as MicroEMACS users only create ASCII comments,
then the comments you create in MicroEMACS will still be UTF-8.
No big deal.
For reading comments, if the text is almost entirely
plain ASCII, you could just ignore the problem and have the
occasional character scramble.  If you need more, you'll
need a tool that's more internationalized or a working iconv,
but if that's important you'd be motivated.
Again, I'm looking for more generalized solutions, where
non-English comments are more common than in Linux kernel code.
--- David A. Wheeler
-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [3/5] Add http-pull

2005-04-17 Thread Daniel Barkalow
On Sun, 17 Apr 2005, Petr Baudis wrote:

  Index: Makefile
  ===
  --- d662b707e11391f6cfe597fd4d0bf9c41d34d01a/Makefile  (mode:100644 
  sha1:b2ce7c5b63fffca59653b980d98379909f893d44)
  +++ 157b46ce1d82b3579e2e1258927b0d9bdbc033ab/Makefile  (mode:100644 
  sha1:940ef8578cf469354002cd8feaec25d907015267)
  @@ -35,6 +35,7 @@
   
   LIBS= -lssl -lz
   
  +http-pull: LIBS += -lcurl
   
   $(PROG):%: %.o $(COMMON)
  $(CC) $(CFLAGS) -o $@ $^ $(LIBS)
 
 Whew. Looks like an awful trick, you say this works?! :-)
 
 At times, I wouldn't want to be a GNU make parser.

Yup. GNU make is big on the features which do the obvious thing, even when
you can't believe they work. This is probably why nobody's managed to
replace it.

  Index: http-pull.c
  ===
  --- /dev/null  (tree:d662b707e11391f6cfe597fd4d0bf9c41d34d01a)
  +++ 157b46ce1d82b3579e2e1258927b0d9bdbc033ab/http-pull.c  (mode:100644 
  sha1:106ca31239e6afe6784e7c592234406f5c149e44)
  +   url = malloc(strlen(base) + 50);
 
 Off-by-one. What about the trailing NUL?

I get length(base) + object/=8 + 40 SHA1 + 1 for '/' and 1 for NUL = 50.

 I think you should have at least two disjunct modes - either you are
 downloading everything related to the given commit, or you are
 downloading all commit records for commit predecessors.
 
 Even if you might not want all the intermediate trees, you definitively
 want the intermediate commits, to keep the history graph contignuous.
 
 So in git pull, I'd imagine to do
 
   http-pull -c $new_head
   http-pull -t $(tree-id $new_head)
 
 So, -c would fetch a given commit and all its predecessors until it hits
 what you already have on your side. -t would fetch a given tree with all
 files and subtrees and everything. http-pull shouldn't default on
 either, since they are mutually exclusive.
 
 What do you think?

I think I'd rather keep the current behavior and add a -c for getting the
history of commits, and maybe a -a for getting the history of commits and
their tress.

There's some trickiness for the history of commits thing for stopping at
the point where you have everything, but also behaving appropriately if
you try once, fail partway through, and then try again. It's on my queue
of things to think about.

-Daniel
*This .sig left intentionally blank*

-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[3.1/5] Add http-pull

2005-04-17 Thread Daniel Barkalow
http-pull is a program that downloads from a (normal) HTTP server a commit
and all of the tree and blob objects it refers to (but not other commits,
etc.). Options could be used to make it download a larger or different
selection of objects.

Signed-Off-By: Daniel Barkalow [EMAIL PROTECTED]
Index: Makefile
===
--- 45f926575d2c44072bfcf2317dbf3f0fbb513a4e/Makefile  (mode:100644 
sha1:346e3850de026485802e41e16a1180be2df85e4a)
+++ 3eae85f66143160a26f5545d197862c89e2a8fb8/Makefile  (mode:100644 
sha1:0e84e3cd12f836602b420c197e08fabefe975493)
@@ -14,7 +17,7 @@
 
 PROG=   update-cache show-diff init-db write-tree read-tree commit-tree \
cat-file fsck-cache checkout-cache diff-tree rev-tree show-files \
-   check-files ls-tree merge-base
+   check-files ls-tree http-pull merge-base
 
 SCRIPT=parent-id tree-id git gitXnormid.sh gitadd.sh gitaddremote.sh \
gitcommit.sh gitdiff-do gitdiff.sh gitlog.sh gitls.sh gitlsobj.sh \
@@ -35,6 +38,7 @@
 
 LIBS= -lssl -lz
 
+http-pull: LIBS += -lcurl
 
 $(PROG):%: %.o $(COMMON)
$(CC) $(CFLAGS) -o $@ $^ $(LIBS)
Index: README
===
--- 45f926575d2c44072bfcf2317dbf3f0fbb513a4e/README  (mode:100664 
sha1:0170eafb60ad9009ca41c6536cecd6d1fdee5b86)
+++ 3eae85f66143160a26f5545d197862c89e2a8fb8/README  (mode:100664 
sha1:921d552d810394e665323ec82b4826914918689c)
@@ -120,7 +120,7 @@
diff, patch
libssl
rsync
-
+   curl (later than 7.7, according to the docs)
 
 
The core GIT
Index: http-pull.c
===
--- /dev/null  (tree:45f926575d2c44072bfcf2317dbf3f0fbb513a4e)
+++ 3eae85f66143160a26f5545d197862c89e2a8fb8/http-pull.c  (mode:100644 
sha1:7ba4ad67f6dac34addb537ee147ae3de0550a484)
@@ -0,0 +1,139 @@
+#include fcntl.h
+#include unistd.h
+#include string.h
+#include stdlib.h
+#include cache.h
+#include revision.h
+#include errno.h
+#include stdio.h
+
+#include curl/curl.h
+#include curl/easy.h
+
+static CURL *curl;
+
+static char *base;
+
+static int fetch(unsigned char *sha1)
+{
+   char *hex = sha1_to_hex(sha1);
+   char *filename = sha1_file_name(sha1);
+
+   char *url;
+   char *posn;
+   FILE *local;
+
+   if (!access(filename, R_OK)) {
+   return 0;
+   }
+
+   local = fopen(filename, w);
+
+   if (!local) {
+   return error(Couldn't open %s, filename);
+   }
+
+   curl_easy_setopt(curl, CURLOPT_FILE, local);
+
+   url = malloc(strlen(base) + 50);
+   strcpy(url, base);
+   posn = url + strlen(base);
+   strcpy(posn, objects/);
+   posn += 8;
+   memcpy(posn, hex, 2);
+   posn += 2;
+   *(posn++) = '/';
+   strcpy(posn, hex + 2);
+
+   curl_easy_setopt(curl, CURLOPT_URL, url);
+
+   if (curl_easy_perform(curl)) {
+   fclose(local);
+   unlink(filename);
+   return error(Error downloading %s from %s,
+sha1_to_hex(sha1), url);
+   }
+
+   fclose(local);
+   
+   return 0;
+}
+
+static int process_tree(unsigned char *sha1)
+{
+   void *buffer;
+   unsigned long size;
+   char type[20];
+
+   buffer = read_sha1_file(sha1, type, size);
+   if (!buffer)
+   return error(Couldn't read %s.,
+sha1_to_hex(sha1));
+   if (strcmp(type, tree))
+   return error(Expected %s to be a tree, but was a %s.,
+sha1_to_hex(sha1), type);
+   while (size) {
+   int len = strlen(buffer) + 1;
+   unsigned char *sha1 = buffer + len;
+   unsigned int mode;
+   int retval;
+
+   if (size  len + 20 || sscanf(buffer, %o, mode) != 1)
+   return error(Invalid tree object);
+
+   buffer = sha1 + 20;
+   size -= len + 20;
+
+   retval = fetch(sha1);
+   if (retval)
+   return retval;
+
+   if (S_ISDIR(mode)) {
+   retval = process_tree(sha1);
+   if (retval)
+   return retval;
+   }
+   }
+   return 0;
+}
+
+static int process_commit(unsigned char *sha1)
+{
+   int retval;
+   struct revision *rev = lookup_rev(sha1);
+   if (parse_commit_object(rev))
+   return error(Couldn't parse commit %s\n, sha1_to_hex(sha1));
+
+   retval = fetch(rev-tree);
+   if (retval)
+   return retval;
+   retval = process_tree(rev-tree);
+   return retval;
+}
+
+int main(int argc, char **argv)
+{
+   char *commit_id = argv[1];
+   char *url = argv[2];
+   int retval;
+
+   unsigned char sha1[20];
+
+   get_sha1_hex(commit_id, sha1);
+
+   curl_global_init(CURL_GLOBAL_ALL);

Re: Re-done kernel archive - real one?

2005-04-17 Thread Russell King
On Sun, Apr 17, 2005 at 09:36:09AM -0700, Linus Torvalds wrote:
 On Sun, 17 Apr 2005, Russell King wrote:
  On Sat, Apr 16, 2005 at 04:01:45PM -0700, Linus Torvalds wrote:
   So I re-created the dang thing (hey, it takes just a few minutes), and
   pushed it out, and there's now an archive on kernel.org in my public
   personal directory called linux-2.6.git. I'll continue the tradition
   of naming git-archive directories as *.git, since that really ends up
   being the .git directory for the checked-out thing.
  
  We need to work out how we're going to manage to get our git changes to
  you.  At the moment, I've very little idea how to do that.  Ideas?
 
 To me, merging is my highest priority. I suspect that once I have a tree 
 from you (or anybody else) that I actually _test_ merging with, I'll be 
 motivated as hell to make sure that my plumbing actually works. 

Ok, I'll throw this tree onto master.kernel.org - how about
master.kernel.org:/home/rmk/linux-2.6-rmk.git ?  I think it's in the
same format as your trees:

linux-2.6-rmk.git
|-- HEAD
`-- objects

where HEAD was copied from my .git/heads/master, and objects from
.git/objects.

  However, I've made a start to generate the necessary emails.  How about
  this format?
  
  I'm not keen on the tree, parent, author and committer objects appearing
  in this - they appear to clutter it up.  What're your thoughts?
 
 Indeed. I'd almost drop the whole header except for the author line. 

Done.

 Oh, and you need a separator between commits, right now your 
 Signed-off-by: line ends up butting up with the header of the next 
 commit ;)

Done.

  I'd rather not have the FQDN of the machine where the commit happened
  appearing in the logs.
 
 That's fine. Out short-logs have always tried to have just the real name 
 in them, and I do want an email-like thing for tracking the developer, but 
 yes, if you remove the email, that's fine. It should be easy enough to do 
 with a simple
 
   sed 's/.*//'
 
 or similar.

Done.

 And if you replace author with From: and do the date conversion, it
 might look more natural.

Also done. 8)

I still need to work out how to make my noddy script follow different
branches which may be present though.  However, for my common work
flow, it fits what I require.

Ok, how about this format:

Linus,

Please incorporate the latest ARM changes.

This will update the following files:

 arch/arm/kernel/process.c  |   15 +++
 arch/arm/kernel/traps.c|8 ++--
 arch/arm/lib/changebit.S   |   11 ++-
 arch/arm/lib/clearbit.S|   13 ++---
 arch/arm/lib/setbit.S  |   11 ++-
 arch/arm/lib/testchangebit.S   |   15 ++-
 arch/arm/lib/testclearbit.S|   15 ++-
 arch/arm/lib/testsetbit.S  |   15 ++-
 arch/arm/mach-footbridge/dc21285-timer.c   |4 ++--
 arch/arm/mach-sa1100/h3600.c   |2 +-
 include/asm-arm/arch-ebsa285/debug-macro.S |7 +--
 include/asm-arm/arch-rpc/debug-macro.S |5 -
 include/asm-arm/ptrace.h   |5 +
 include/asm-arm/system.h   |3 +++
 14 files changed, 41 insertions(+), 88 deletions(-)

through these ChangeSets:

Author: Russell King: Sun Apr 17 16:28:31 BST 2005

[PATCH] ARM: fix debug macros

Fix debug EBSA285 and RiscPC debugging macros to detect whether the
MMU is enabled.

Signed-off-by: Russell King

Author: Russell King: Sun Apr 17 15:51:02 BST 2005

[PATCH] ARM: bitops

Convert ARM bitop assembly to a macro.  All bitops follow the same
format, so it's silly duplicating the code when only one or two
instructions are different.

Signed-off-by: Russell King

Author: Russell King: Sun Apr 17 15:50:36 BST 2005

[PATCH] ARM: showregs

Fix show_regs() to provide a backtrace.  Provide a new __show_regs()
function which implements the common subset of show_regs() and die().
Add prototypes to asm-arm/system.h

Signed-off-by: Russell King

Author: Russell King: Sun Apr 17 15:40:46 BST 2005

[PATCH] ARM: h3600_irda_set_speed arguments

h3600_irda_set_speed() had the wrong type for the speed argument.
Fix this.

Signed-off-by: Russell King

Author: Russell King: Sun Apr 17 15:36:55 BST 2005

[PATCH] ARM: footbridge rtc init

The footbridge ISA RTC was being initialised before we had setup the
kernel timer.  This caused a divide by zero error when the current
time of day is set.  Resolve this by initialising the RTC after
the kernel timer has been initialised.

Signed-off-by: Russell King



-- 
Russell King

-
To unsubscribe from this list: send the line unsubscribe git 

Re: [3/5] Add http-pull

2005-04-17 Thread Petr Baudis
Dear diary, on Sun, Apr 17, 2005 at 08:49:11PM CEST, I got a letter
where Daniel Barkalow [EMAIL PROTECTED] told me that...
 On Sun, 17 Apr 2005, Petr Baudis wrote:
   Index: http-pull.c
   ===
   --- /dev/null  (tree:d662b707e11391f6cfe597fd4d0bf9c41d34d01a)
   +++ 157b46ce1d82b3579e2e1258927b0d9bdbc033ab/http-pull.c  (mode:100644 
   sha1:106ca31239e6afe6784e7c592234406f5c149e44)
   + url = malloc(strlen(base) + 50);
  
  Off-by-one. What about the trailing NUL?
 
 I get length(base) + object/=8 + 40 SHA1 + 1 for '/' and 1 for NUL = 50.

Sorry, counted one '/' more. :-)

  I think you should have at least two disjunct modes - either you are
  downloading everything related to the given commit, or you are
  downloading all commit records for commit predecessors.
  
  Even if you might not want all the intermediate trees, you definitively
  want the intermediate commits, to keep the history graph contignuous.
  
  So in git pull, I'd imagine to do
  
  http-pull -c $new_head
  http-pull -t $(tree-id $new_head)
  
  So, -c would fetch a given commit and all its predecessors until it hits
  what you already have on your side. -t would fetch a given tree with all
  files and subtrees and everything. http-pull shouldn't default on
  either, since they are mutually exclusive.
  
  What do you think?
 
 I think I'd rather keep the current behavior and add a -c for getting the
 history of commits, and maybe a -a for getting the history of commits and
 their tress.

I'm not too kind at this. Either make it totally separate commands, or
make a required switch specifying what to do. Otherwise it implies the
switches would just modify what it does, but they make it do something
completely different.

-a would be fine too - basically a combination of -c and -t. I'd imagine
that is what Linus would want to use, e.g.

 There's some trickiness for the history of commits thing for stopping at
 the point where you have everything, but also behaving appropriately if
 you try once, fail partway through, and then try again. It's on my queue
 of things to think about.

Can't you just stop the recursion when you hit a commit you already
have?

-- 
Petr Pasky Baudis
Stuff: http://pasky.or.cz/
C++: an octopus made by nailing extra legs onto a dog. -- Steve Taylor
-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [1/5] Parsing code in revision.h

2005-04-17 Thread Daniel Barkalow
On Sun, 17 Apr 2005, Linus Torvalds wrote:

 On Sun, 17 Apr 2005, Daniel Barkalow wrote:
 
  --- 45f926575d2c44072bfcf2317dbf3f0fbb513a4e/revision.h  (mode:100644 
  sha1:28d0de3261a61f68e4e0948a25a416a515cd2e83)
  +++ 37a0b01b85c2999243674d48bfc71cdba0e5518e/revision.h  (mode:100644 
  sha1:523bde6e14e18bb0ecbded8f83ad4df93fc467ab)
  @@ -24,6 +24,7 @@
  unsigned int flags;
  unsigned char sha1[20];
  unsigned long date;
  +   unsigned char tree[20];
  struct parent *parent;
   };
   
 
 I think this is really wrong.
 
 The whole point of revision.h is that it's a generic framework for 
 keeping track of relationships between different objects. And those 
 objects are in no way just commit objects.

 For example, fsck uses this struct revision to create a full free of 
 _all_ the object dependencies, which means that a struct revision can be 
 any object at all - it's not in any way limited to commit objects, and 
 there is no tree object that is associated with these things at all.

I entirely missed this. No wonder my fsck-cache conversion wasn't going
so well...

 Besides, why do you want the tree? There's really nothing you can do with 
 the tree to a first approximation - you need to _first_ do the 
 reachability analysis entirely on the commit dependencies, and then when 
 you've selected a set of commits, you can just output those.

I actually want the tree for http-pull, not merging stuff. I was trying to
get a commit parser, not reachability at that point.

I think the right thing is to make a separate struct commit that has the
stuff I want in it, and probably do a struct tree at the same time.

-Daniel
*This .sig left intentionally blank*

-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [3/5] Add http-pull

2005-04-17 Thread Daniel Barkalow
On Sun, 17 Apr 2005, Petr Baudis wrote:

 Dear diary, on Sun, Apr 17, 2005 at 08:49:11PM CEST, I got a letter
 where Daniel Barkalow [EMAIL PROTECTED] told me that...
 
 I'm not too kind at this. Either make it totally separate commands, or
 make a required switch specifying what to do. Otherwise it implies the
 switches would just modify what it does, but they make it do something
 completely different.

That's a good point. I'll require a -t for now, and add more later.

 -a would be fine too - basically a combination of -c and -t. I'd imagine
 that is what Linus would want to use, e.g.

Well, -c -t would give you the current tree and the whole commit log, but
not old trees. -a would additionally give you old trees.

  There's some trickiness for the history of commits thing for stopping at
  the point where you have everything, but also behaving appropriately if
  you try once, fail partway through, and then try again. It's on my queue
  of things to think about.
 
 Can't you just stop the recursion when you hit a commit you already
 have?

The problem is that, if you've fetched the final commit already, and then
the server dies, and you try again later, you already have the last one,
and so you think you've got everything.

At this point, I also want to put off doing much further with recursion
and commits until revision.h and such are sorted out.

-Daniel
*This .sig left intentionally blank*

-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [3/5] Add http-pull

2005-04-17 Thread Petr Baudis
Dear diary, on Sun, Apr 17, 2005 at 09:24:27PM CEST, I got a letter
where Daniel Barkalow [EMAIL PROTECTED] told me that...
 On Sun, 17 Apr 2005, Petr Baudis wrote:
 
  Dear diary, on Sun, Apr 17, 2005 at 08:49:11PM CEST, I got a letter
  where Daniel Barkalow [EMAIL PROTECTED] told me that...
   There's some trickiness for the history of commits thing for stopping at
   the point where you have everything, but also behaving appropriately if
   you try once, fail partway through, and then try again. It's on my queue
   of things to think about.
  
  Can't you just stop the recursion when you hit a commit you already
  have?
 
 The problem is that, if you've fetched the final commit already, and then
 the server dies, and you try again later, you already have the last one,
 and so you think you've got everything.

Hmm, some kind of journaling? ;-)

 At this point, I also want to put off doing much further with recursion
 and commits until revision.h and such are sorted out.

Agreed.

-- 
Petr Pasky Baudis
Stuff: http://pasky.or.cz/
C++: an octopus made by nailing extra legs onto a dog. -- Steve Taylor
-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Re-done kernel archive - real one?

2005-04-17 Thread Linus Torvalds


On Sun, 17 Apr 2005, Russell King wrote:
 
 This will (and does) do exactly what I want.  I'll also read into the
 above a request that you want it in forward date order. 8)

No, I actually don't _think_ I care. In many ways I'm more used to
reverse date order, because that's usually how you view a changelog
(with a pager, and most recent changes at the top).

Which one makes sense when asking me to merge? I don't know, and I don't
think it really even matters, but maybe we can add a for now to whatever 
decision you end up coming to?

Linus
-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [4/5] Add option for hardlinkable cache of extracted blobs

2005-04-17 Thread Russell King
On Sun, Apr 17, 2005 at 04:03:46PM -0400, Daniel Barkalow wrote:
 Actually, the documentation I've got says:
 
 F_OK requests checking whether merely testing for the existence of the
  file would be allowed (this depends on the permissions of the directories
  in the path to the file, as given in path-name.)
 
 So it shouldn't complain about a filename which you're allowed to try to
 stat, even if there's nothing there. And it would depend on the privs of
 the wrong user in looking at the path.

Isn't it the case that with selinux, various objects may be hidden
depending on their accessibility?  I wonder if this has an effect
here.

(or what about any other security model?)

-- 
Russell King

-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [2.1/5] Add merge-base

2005-04-17 Thread Daniel Barkalow
On Sun, 17 Apr 2005, Petr Baudis wrote:

 Dear diary, on Sun, Apr 17, 2005 at 06:51:59PM CEST, I got a letter
 where Daniel Barkalow [EMAIL PROTECTED] told me that...
  merge-base finds one of the best common ancestors of a pair of commits. In
  particular, it finds one of the ones which is fewest commits away from the
  further of the heads.
  
  Signed-Off-By: Daniel Barkalow [EMAIL PROTECTED]
 
 Note that during merge with Linus (probably the most complicated I've
 got so far, but still thankfully not too painful thanks to the rej
 tool) I've decided to revert your merge-base in favour of Linus'
 version. I did this mainly to make me merging Linus less awful; we
 should probably clean it up first and decide which solution to go for in
 the first place before possibly replacing it again, I think.

Sure. I'm working on the rearrangement now.

-Daniel
*This .sig left intentionally blank*

-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Re-done kernel archive - real one?

2005-04-17 Thread Jochen Roemling
Linus Torvalds wrote:
Ie we have two phases to the merge: first get the objects, with something
like
	repo=kernel.org:/pub/kernel/people/torvalds/linux-2.6.git
	rsync --ignore-existing -acv $(repo)/ .git/
 

Could you place a tarball there for people like me who are no real 
kernel hackers and don't have a kernel.org account? Or is there an 
anonymous account that I'm just to ignorant to know of?

I'm just somebody who is very interested in the new things happening 
here, but I guess there will be some month to go and a lot of things to 
learn until you see the first kernel patch that is signed off by me. :-)

By the way: Does the git repository include all 60.000 changes of just 
the latest version of the 17.000 file in the kernel?

Best regards
Jochen Römling
-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


(Resend) [PATCH] checkout-cache -a should not extract unmerged stages

2005-04-17 Thread Junio C Hamano
Linus,

do you have any particular reason you did not want the patch
to skip unmerged ones when checkout-cache -a is done, and if
so could you let me know?

Here is what happens before the patch:

$ ls -al
total 16
drwxrwsr-x  3 junio src 4096 Apr 17 14:30 ./
drwxrwsr-x  8 junio src 4096 Apr 17 14:17 ../
drwxr-sr-x  3 junio src 4096 Apr 17 14:30 .git/
-rw-rw-r--  1 junio src   29 Apr 17 14:30 SS
$ show-files --stage
100644 9e26851b98ab7dd3a3b9653a2efb9b4de0465310 0 SS
100644 e14bafaadce6c34768ba2ff8b3c6419e8839e7d2 1 TT
100644 99ef1b30fc6d6ea186d6eac62619e1afd65ad64e 2 TT
100644 033b9385f7a29882a6b4b34f67b20e2304d3489d 3 TT
$ ../++linus/checkout-cache -a
checkout-cache: SS already exists
checkout-cache: TT already exists
checkout-cache: TT already exists
$ ls -al
total 20
drwxrwsr-x  3 junio src 4096 Apr 17 14:31 ./
drwxrwsr-x  8 junio src 4096 Apr 17 14:17 ../
drwxr-sr-x  3 junio src 4096 Apr 17 14:30 .git/
-rw-rw-r--  1 junio src   29 Apr 17 14:30 SS
-rw-rw-r--  1 junio src  363 Apr 17 14:31 TT

See those two warning for TT?  It has extracted stage 1 and
complaining about what it has done when it goes on to extract
stage 2 and 3.

At this point what is in TT is from the stage 1.

This behaviour is somewhat defensible, in that you are giving
the user a ready access to the original (from stage 1), and he
can continue cat-file blob other stages to decide what to do.
But I think that the way the user wants to resolve the unmerged
state is not our business and it is not particulary useful for
the plumbing layer to assume that he would always need stage 1
contents to arrive the merged result (e.g. sdiff between stage 2
and stage 3 would not require stage 1).

With the patch, you get this:

$ checkout-cache -a
checkout-cache: SS already exists
checkout-cache: needs merge TT
$ ls -al
total 16
drwxrwsr-x  3 junio src 4096 Apr 17 14:32 ./
drwxrwsr-x  8 junio src 4096 Apr 17 14:17 ../
drwxr-sr-x  3 junio src 4096 Apr 17 14:30 .git/
-rw-rw-r--  1 junio src   29 Apr 17 14:30 SS

I think it is consistent with this behaviour you already have
merged:

$ ../++linus/checkout-cache SS TT
checkout-cache: SS already exists
checkout-cache: TT is unmerged.
$ ls -al
total 16
drwxrwsr-x  3 junio src 4096 Apr 17 14:32 ./
drwxrwsr-x  8 junio src 4096 Apr 17 14:17 ../
drwxr-sr-x  3 junio src 4096 Apr 17 14:30 .git/
-rw-rw-r--  1 junio src   29 Apr 17 14:30 SS

Attached is a re-diff with an updated message.  

I could also send you a patch that implements an alternative
strategy.  With or without -a, checkout of unmerged files can
result in something like this:

$ checkout-cache -a
checkout-cache: SS already exists
checkout-cache: storing stage 1 for TT in TT~1~
checkout-cache: storing stage 2 for TT in TT~2~
checkout-cache: storing stage 3 for TT in TT~3~
$ ls -al
total 28
drwxrwsr-x  3 junio src 4096 Apr 17 14:55 ./
drwxrwsr-x  8 junio src 4096 Apr 17 14:17 ../
drwxr-sr-x  3 junio src 4096 Apr 17 14:30 .git/
-rw-rw-r--  1 junio src   29 Apr 17 14:30 SS
-rw-rw-r--  1 junio src  363 Apr 17 14:55 TT~1
-rw-rw-r--  1 junio src  363 Apr 17 14:55 TT~2
-rw-rw-r--  1 junio src  363 Apr 17 14:55 TT~3

Maybe these two behaviours can be controlled with another
option (say, -m).

Petr, do you think this alternative behaviour would be useful
for Cogito when it starts using read-tree -m?


When checkout-cache -a is run, currently it attempts to extract
all existing unmerged stages to the same destination and
complains to what it itself has done for the first stage when it
tries to extract the later stages.  This is nonsensical.  Just
report the unmerged state and let the user sort the mess out
using show-files --unmerged and cat-file blob.

Signed-off-by: Junio C Hamano [EMAIL PROTECTED]
---

 checkout-cache.c |   11 +++
 1 files changed, 11 insertions(+)

--- ++linus/checkout-cache.c2005-04-17 13:57:04.0 -0700
+++ ++junio/checkout-cache.c2005-04-17 14:35:11.0 -0700
@@ -137,10 +137,21 @@
 
 static int checkout_all(void)
 {
+   struct cache_entry *unmerge_skipping = NULL;
int i;
 
for (i = 0; i  active_nr ; i++) {
struct cache_entry *ce = active_cache[i];
+   if (ce_stage(ce)) {
+   if (!unmerge_skipping ||
+   strcmp(unmerge_skipping-name, ce-name))
+   fprintf(stderr,
+   checkout-cache: needs merge %s\n,
+   ce-name);
+   unmerge_skipping = ce;
+   continue;
+   }
+   unmerge_skipping = NULL;
if (checkout_entry(ce)  0)
return -1;
}



First ever real kernel git merge!

2005-04-17 Thread Linus Torvalds

It may not be pretty, but it seems to have worked fine!

Here's my history log (with intermediate checking removed - I was being
pretty anal ;):

rsync -avz --ignore-existing 
master.kernel.org:/home/rmk/linux-2.6-rmk.git/ .git/
rsync -avz --ignore-existing 
master.kernel.org:/home/rmk/linux-2.6-rmk.git/HEAD .git/MERGE-HEAD
merge-base $(cat .git/HEAD) $(cat .git/MERGE-HEAD)
for i in e7905b2f22eb5d5308c9122b9c06c2d02473dd4f $(cat .git/HEAD) 
$(cat .git/MERGE-HEAD); do cat-file commit $i | head -1; done
read-tree -m cf9fd295d3048cd84c65d5e1a5a6b606bf4fddc6 
9c78e08d12ae8189f3bd5e03accc39e3f08e45c9 
a43c4447b2edc9fb01a6369f10c1165de4494c88
write-tree 
commit-tree 7792a93eddb3f9b8e3115daab8adb3030f258ce6 -p $(cat 
.git/HEAD) -p $(cat .git/MERGE-HEAD)
echo 5fa17ec1c56589476c7c6a2712b10c81b3d5f85a  .git/HEAD 
fsck-cache --unreachable 5fa17ec1c56589476c7c6a2712b10c81b3d5f85a

which looks really messy, because I really wanted to do each step slowly 
by hand, so those magic revision numbers are just cut-and-pasted from the 
results that all the previous stages had printed out.

NOTE! As expected, this merge had absolutely zero file-level clashes,
which is why I could just do the read-tree -m followed by a write-tree. 
But it's a real merge: I had some extra commits in my tree that were not
in Russell's tree, and obviously vice versa.

Also note! The end result is not actually written back to the corrent 
working directory, so to see what the merge result actually is, there's 
another final phase:

read-tree 7792a93eddb3f9b8e3115daab8adb3030f258ce6
update-cache --refresh
checkout-cache -f -a

which just updates the current working directory to the results. I'm _not_
caring about old dirty state for now - the theory was to get this thing
working first, and worry about making it nice to use later.

A second note: a real merge thing should notice that if the merge-base  
output ends up being one of the inputs (it one side is a strict subset of
the other side), then the merge itself should never be done, and the
script should just update directly to which-ever is non-common HEAD.

But as far as I can tell, this really did work out correctly and 100% 
according to plan. As a result, if you update to my current tree, the 
top-of-tree commit should be:

cat-file commit $(cat .git/HEAD)

tree 7792a93eddb3f9b8e3115daab8adb3030f258ce6
parent 8173055926cdb8534fbaed517a792bd45aed8377
parent df4449813c900973841d0fa5a9e9bc7186956e1e
author Linus Torvalds [EMAIL PROTECTED] 111377 -0700
committer Linus Torvalds [EMAIL PROTECTED] 111377 -0700

Merge with master.kernel.org:/home/rmk/linux-2.6-rmk.git - ARM changes

First ever true git merge. Let's see if it actually works.

Yehaa! It did take basically zero time, btw. Except for my bunbling about,
and the first rsync the objects from rmk's directory part (which wasn't
horrible, it just wasn't instantaneous like the other phases).

Btw, to see the output, you really want to have a git log that sorts by 
date. I had an old gitlog.sh that did the old recursive thing, and while 
it shows the right thing, the ordering ended up making it be very 
non-obvious that rmk's changes had been added recently, since they ended 
up being at the very bottom.

Linus
-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Add lsremote command.

2005-04-17 Thread Steven Cole
On Sunday 17 April 2005 08:03 am, Petr Baudis wrote:
 Dear diary, on Sun, Apr 17, 2005 at 07:36:51AM CEST, I got a letter
 where Steven Cole [EMAIL PROTECTED] told me that...
  This is a fairly trivial addition, but if users are adding remote 
  repositories
  with git addremote, then those users should be able to list out the remote
  list without having to know the details of where the remotes file is kept.
 
 Could you please send your patches inline? (Either in the body or with
 correct content-disposition header.)
 
 You got the return values other way around and you are missing a
 copyright notice at the top; you should also mention that you take no
 parameters.
 
 Please use -s instead of -e, since it is more appropriate in this case.
 Also, you should report the no remotes message to stderr. And always
 exit when you found that .git/remotes exists, not only if cat succeeds.
 
 Kind regards,
 
Thanks for the fixes.  Here's a better version.
Steven

 This is a fairly trivial addition, but if users are adding remote repositories
 with git addremote, then those users should be able to list out the remote
 list without having to know the details of where the remotes file is kept.

Signed-off-by: Steven Cole [EMAIL PROTECTED]

diff -urN git-pasky.orig/git git-pasky/git
--- git-pasky.orig/git  2005-04-17 15:02:37.0 -0600
+++ git-pasky/git   2005-04-17 15:04:23.0 -0600
@@ -41,6 +41,7 @@
log
ls  [TREE_ID]
lsobj   [OBJTYPE]
+   lsremote
merge   -b BASE_ID FROM_ID
pull[RNAME]
rm  FILE...
@@ -105,6 +106,7 @@
 log)gitlog.sh $@;;
 ls) gitls.sh $@;;
 lsobj)  gitlsobj.sh $@;;
+lsremote)   gitlsremote.sh $@;;
 merge)  gitmerge.sh $@;;
 pull)   gitpull.sh $@;;
 rm) gitrm.sh $@;;
diff -urN git-pasky.orig/gitlsremote.sh git-pasky/gitlsremote.sh
--- git-pasky.orig/gitlsremote.sh   1969-12-31 17:00:00.0 -0700
+++ git-pasky/gitlsremote.sh2005-04-17 16:20:48.0 -0600
@@ -0,0 +1,11 @@
+#!/bin/sh
+#
+# Lists remote GIT repositories
+# Copyright (c) Steven Cole 2005
+#
+# Takes no parameters
+#
+[ -s .git/remotes ]  cat .git/remotes
+[ -s .git/remotes ]  exit
+
+echo List of remotes is empty. See git addremote. 2

-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Re-done kernel archive - real one?

2005-04-17 Thread Linus Torvalds


On Sun, 17 Apr 2005, Russell King wrote:
 
 I pulled it tonight into a pristine tree (which of course worked.)

Goodie.

 In doing so, I noticed that I'd messed up one of the commits - there's
 a missing new file.  Grr.  I'll put that down to being a newbie git.

Actually, you should put that down to horribly bad interface tools.  With
BK, we had these nice tools that pointed out that there were files that
you might want to commit (ie bk citool), and made this very obvious.

Tools absolutely matter. And it will take time for us to build up that 
kind of helper infrastructure. So being newbie might be part of it, but 
it's the smaller part, I say. Rough interfaces is a big issue.

Linus
-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Building git on Fedora

2005-04-17 Thread jeff millar
Here's a tidbit to enable git to compile on Fedora.  Add the following 
line to /etc/rc.d/rc.local...

   ln -sf /lib/modules/`uname -r`/build/include/linux 
/usr/local/include/linux

This fix creates a symlink, on each boot up, in the local include 
directory that points to the kernel header files. If there's a better 
way to do this, I'm all ears.

jeff
-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch] fork optional branch point normazilation

2005-04-17 Thread Brad Roberts
On Sun, 17 Apr 2005, Linus Torvalds wrote:

 On Sun, 17 Apr 2005, Brad Roberts wrote:
 
  (ok, author looks better, but committer doesn't obey the AUTHOR_ vars yet)

 They should't, but maybe I should add COMMITTER_xxx overrides. I just do
 _not_ want people to think that they should claim to be somebody else:
 it's not a security issue (you could compile your own commit-tree.c
 after all), it's more of a social rule thing. I prefer seeing bad email
 addresses that at least match the system setup to seeing good email
 addresses that people made up just to make them look clean.

 Mind showing what your /etc/passwd file looks like (just your own entry,
 and please just remove your password entry if you don't use shadow
 passwords).

 Maybe I should just remove _all_ strange characters when I do the name
 cleanup in commit. Right now I just remove the ones that matter to
 parsing it unambiguosly: '\n' '' and ''.

 (The ',' character really is special: some people have

   Torvalds, Linus

 and maybe I should not just remove the commas, I should convert it to
 always be Linus Torvalds. But your gecos entry is just _strange_. Why
 the extra commas, I wonder?)

   Linus
 -

I fully agree with the intent of the field separation, they're two very
different activities.

braddr:x:1000:1000:Brad Roberts,,,:/home/braddr:/bin/bash

All gecos entries on all my debian boxes are of the form:

   fullname, office number, office extension, and home number

This is taken from the chfn man page on debian.  Looking on my nearest
redhat box, the chfn man page is roughly the same.  Debian's man page also
has one snippit that's not in redhat's, suggested delimiter is a ','.  A
bit of searching for other platforms, aix suggests a ';' as a delimiter.
HPUX seems to want a ','.

Later,
Brad

-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Merge with git-pasky II.

2005-04-17 Thread Petr Baudis
Dear diary, on Mon, Apr 18, 2005 at 01:29:05AM CEST, I got a letter
where Herbert Xu [EMAIL PROTECTED] told me that...
 I get the feeling that it isn't that bad.  For example, if we did it
 at the points where the blobs actually entered the tree, then the cost
 is always proportional to the change size (the number of new blobs).

No. The collision check is done in the opposite cache - when you want to
write a blob and there is already a file of the same hash in the tree.
So either the blob is already in the database, or you have a collision.

Therefore, the cost is proportional to the size of what stays unchanged.

-- 
Petr Pasky Baudis
Stuff: http://pasky.or.cz/
C++: an octopus made by nailing extra legs onto a dog. -- Steve Taylor
-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Building git on Fedora

2005-04-17 Thread Chris Wedgwood
On Sun, Apr 17, 2005 at 07:25:49PM -0400, jeff millar wrote:

 Here's a tidbit to enable git to compile on Fedora.  Add the
 following line to /etc/rc.d/rc.local...

ln -sf /lib/modules/`uname -r`/build/include/linux /usr/local/include/linux

I can't see why this should be needed.  What breaks without this?
-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch] fork optional branch point normazilation

2005-04-17 Thread Linus Torvalds


On Sun, 17 Apr 2005, Brad Roberts wrote:

 braddr:x:1000:1000:Brad Roberts,,,:/home/braddr:/bin/bash
 
 All gecos entries on all my debian boxes are of the form:
 
fullname, office number, office extension, and home number

Ahh, ok.

I'll make the cleanup thing just remove strange characters from the end, 
that should fix this kind of thing for now.

I'd just remove everything after the first strange number, but I can also 
see people using the lastname, firstname format, and I'd hate to just 
ignore firstname in that case.

Linus
-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Building git on Fedora

2005-04-17 Thread David Woodhouse
On Sun, 2005-04-17 at 19:25 -0400, jeff millar wrote:
 ln -sf /lib/modules/`uname -r`/build/include/linux 
 /usr/local/include/linux
 
 This fix creates a symlink, on each boot up, in the local include 
 directory that points to the kernel header files. If there's a better 
 way to do this, I'm all ears.

What's wrong with the contents of the glibc-kernheaders package? Can you
file specific bugs if you're having problems?

In the long run, the answer is to convince Linus that we _really_ need
the kernel to have a set of header files defining the ABI which are fit
for public consumption, rather than having a horrid mix of private and
exportable bits throughout the contents of the include/ directory. 

In the meantime, some poor mug has to clean the crap up and try to make
something suitable to live in /usr/include/linux -- and unfortunately at
the moment for Fedora that someone is me :)

Unless git is doing something with kernel-private headers that it
shouldn't, this probably wants to be discussed elsewhere -- most likely
in Bugzilla.

-- 
dwmw2

-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: full kernel history, in patchset format

2005-04-17 Thread Petr Baudis
Dear diary, on Mon, Apr 18, 2005 at 01:31:36AM CEST, I got a letter
where David Woodhouse [EMAIL PROTECTED] told me that...
 Note that any given copy of a tree doesn't _need_ to keep all the
 history back the beginning of time. It's OK if the oldest commit object
 in your tree actually refers back to a parent which doesn't exist
 locally. I can well imagine that some people will want to keep their
 trees pruned to keep only a few weeks of history, while other copies of
 the tree will keep everything.

I think this is bad, bad, bad. If you don't keep around all the
_commits_, you get into all sorts of troubles - when merging, when doing
git log, etc. And the commits themselves are probably actually pretty
small portion of the thing. I didn't do any actual measurement but I
would be pretty surprised if it would be much more than few megabytes of
data for the kernel history.

Of course an entirely different thing are _trees_ associated with those
commits. As long as you stay with a simple three-way merge, you
basically never want to look at trees which aren't heads and which you
don't specifically request to look at. And the trees and what they carry
inside is the main bulk of data.

-- 
Petr Pasky Baudis
Stuff: http://pasky.or.cz/
C++: an octopus made by nailing extra legs onto a dog. -- Steve Taylor
-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Merge with git-pasky II.

2005-04-17 Thread Linus Torvalds


On Mon, 18 Apr 2005, Herbert Xu wrote:
 
 I wasn't disputing that of course.  However, the same effect can be
 achieved in using a single hash with a bigger length, e.g., sha256
 or sha512.

No it cannot.

If somebody actually literally totally breaks that hash, length won't 
matter. There are (bad) hashes where you can literally edit the content of 
the file, and make sure that the end result has the same hash.

In that case, when the hash algorithm has actually been broken, the length 
of the hash ends up being not very relevant. 

For example, you might hash your file by blocking it up in 16-byte
blocks, and xoring all blocks together - the result is a 16-byte hash.  
It's a terrible hash, and obviously trivially breakable, and once broken
it does _not_ help to make it use its 32-byte cousin. Not at all. You can 
just modify the breaking thing to equally cheaply make modifications to a 
file and get the 32-byte hash right again.

Is that kind of breakage likely for sha1? Hell no. Is it possible? In your 
in theory world where practice doesn't matter, yes.

Linus
-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Merge with git-pasky II.

2005-04-17 Thread Kenneth Johansson
Petr Baudis wrote:
Dear diary, on Mon, Apr 18, 2005 at 01:29:05AM CEST, I got a letter
where Herbert Xu [EMAIL PROTECTED] told me that...
I get the feeling that it isn't that bad.  For example, if we did it
at the points where the blobs actually entered the tree, then the cost
is always proportional to the change size (the number of new blobs).

No. The collision check is done in the opposite cache - when you want to
write a blob and there is already a file of the same hash in the tree.
So either the blob is already in the database, or you have a collision.
Therefore, the cost is proportional to the size of what stays unchanged.
?? now I'm confused. Surly the only cost involved is to never write over 
a file that already exist in the cache and that is already done NOW as 
far as I read the code. So there is NO extra cost in detecting an collision.



-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch] git: fix 1-byte overflow in show-files.c

2005-04-17 Thread Petr Baudis
Dear diary, on Thu, Apr 14, 2005 at 02:53:54PM CEST, I got a letter
where Ingo Molnar [EMAIL PROTECTED] told me that...
 
 this patch fixes a 1-byte overflow in show-files.c (looks narrow is is 
 probably not exploitable). A specially crafted db object (tree) might 
 trigger this overflow.
 
 'fullname' is an array of 4096+1 bytes, and we do readdir(), which 
 produces entries that have strings with a length of 0-255 bytes. With a 
 long enough 'base', it's possible to construct a tree with a name in it 
 that has directory whose name ends precisely at offset 4095. At that 
 point this code:
 
 case DT_DIR:
 memcpy(fullname + baselen + len, /, 2);
 
 will attempt to append a / string to the directory name - resulting in 
 a 1-byte overflow (a zero byte is written to offset 4097, which is 
 outside the array).

The name ends precisely at offset 4095 with its NUL character:

 {PATH_MAX}
 Maximum number of bytes in a pathname, including the terminating
null character.
[ http://www.opengroup.org/onlinepubs/009695399/basedefs/limits.h.html ]

So, if I'm not mistaken, '/' will be written at offset 4095 instead of
the NUL and the NUL will be written at 4096. Everything's fine, right?

-- 
Petr Pasky Baudis
Stuff: http://pasky.or.cz/
C++: an octopus made by nailing extra legs onto a dog. -- Steve Taylor
-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: full kernel history, in patchset format

2005-04-17 Thread David Woodhouse
On Mon, 2005-04-18 at 02:50 +0200, Petr Baudis wrote:
 I think I will make git-pasky's default behaviour (when we get
 http-pull, that is) to keep the complete commit history but only trees
 you need/want; togglable to both sides.

I think the default behaviour should probably be to fetch everything.

-- 
dwmw2

-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Merge with git-pasky II.

2005-04-17 Thread Petr Baudis
Dear diary, on Mon, Apr 18, 2005 at 02:49:06AM CEST, I got a letter
where Herbert Xu [EMAIL PROTECTED] told me that...
 Therefore the only conclusion I can draw is that we're only calling
 update-cache on the set of changed files, or at most a small superset
 of them.  In that case, the cost of the collision check *is* proportional
 to the size of the change.

Yes, of course, sorry for the confusion.  We only consider files you
either specify manually or which have their stat metadata changed
relative to the directory cache. (That is from the git-pasky
perspective; from the plumbing perspective, the user just does
update-cache on whatever he picks.)

-- 
Petr Pasky Baudis
Stuff: http://pasky.or.cz/
C++: an octopus made by nailing extra legs onto a dog. -- Steve Taylor
-
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[2/5] Implementations of parsing functions

2005-04-17 Thread Daniel Barkalow
This implements the parsing functions.

Signed-Off-By: Daniel Barkalow [EMAIL PROTECTED]
Index: blob.c
===
--- /dev/null  (tree:5ca133e1b74aee39b2124c0ec9fd51539babb5e0)
+++ 1172a9b8f45b2fd640985595cc5258db3b027828/blob.c  (mode:100644 
sha1:04e0c1da9b1f4cdb1d1c5881b785babd3b0ceb09)
@@ -0,0 +1,24 @@
+#include blob.h
+#include cache.h
+#include stdlib.h
+
+const char *blob_type = blob;
+
+struct blob *lookup_blob(unsigned char *sha1)
+{
+   struct object *obj = lookup_object(sha1);
+   if (!obj) {
+   struct blob *ret = malloc(sizeof(struct blob));
+   bzero(ret, sizeof(struct blob));
+   created_object(sha1, ret-object);
+   ret-object.type = blob_type;
+   ret-object.parsed = 1;
+   return ret;
+   }
+   if (obj-parsed  obj-type != blob_type) {
+   error(Object %s is a %s, not a blob, 
+ sha1_to_hex(sha1), obj-type);
+   return NULL;
+   }
+   return (struct blob *) obj;
+}
Index: commit.c
===
--- /dev/null  (tree:5ca133e1b74aee39b2124c0ec9fd51539babb5e0)
+++ 1172a9b8f45b2fd640985595cc5258db3b027828/commit.c  (mode:100644 
sha1:0099baa63971d86ee30ef2a7da25057f0f45a964)
@@ -0,0 +1,85 @@
+#include commit.h
+#include cache.h
+#include string.h
+
+const char *commit_type = commit;
+
+struct commit *lookup_commit(unsigned char *sha1)
+{
+   struct object *obj = lookup_object(sha1);
+   if (!obj) {
+   struct commit *ret = malloc(sizeof(struct commit));
+   bzero(ret, sizeof(struct commit));
+   created_object(sha1, ret-object);
+   return ret;
+   }
+   if (obj-parsed  obj-type != commit_type) {
+   error(Object %s is a %s, not a commit, 
+ sha1_to_hex(sha1), obj-type);
+   return NULL;
+   }
+   return (struct commit *) obj;
+}
+
+static unsigned long parse_commit_date(const char *buf)
+{
+   unsigned long date;
+
+   if (memcmp(buf, author, 6))
+   return 0;
+   while (*buf++ != '\n')
+   /* nada */;
+   if (memcmp(buf, committer, 9))
+   return 0;
+   while (*buf++ != '')
+   /* nada */;
+   date = strtoul(buf, NULL, 10);
+   if (date == ULONG_MAX)
+   date = 0;
+   return date;
+}
+
+int parse_commit(struct commit *item)
+{
+   char type[20];
+   void * buffer, *bufptr;
+   unsigned long size;
+   unsigned char parent[20];
+   if (item-object.parsed)
+   return 0;
+   item-object.parsed = 1;
+   buffer = bufptr = read_sha1_file(item-object.sha1, type, size);
+   if (!buffer)
+   return error(Could not read %s,
+sha1_to_hex(item-object.sha1));
+   if (strcmp(type, commit_type))
+   return error(Object %s not a commit,
+sha1_to_hex(item-object.sha1));
+   item-object.type = commit_type;
+   get_sha1_hex(bufptr + 5, parent);
+   item-tree = lookup_tree(parent);
+   add_ref(item-object, item-tree-object);
+   bufptr += 46; /* tree  + hex sha1 + \n */
+   while (!memcmp(bufptr, parent , 7) 
+  !get_sha1_hex(bufptr + 7, parent)) {
+   struct commit_list *new_parent = 
+   malloc(sizeof(struct commit_list));
+   new_parent-next = item-parents;
+   new_parent-item = lookup_commit(parent);
+   add_ref(item-object, new_parent-item-object);
+   item-parents = new_parent;
+   bufptr += 48;
+   }
+   item-date = parse_commit_date(bufptr);
+   free(buffer);
+   return 0;
+}
+
+void free_commit_list(struct commit_list *list)
+{
+   while (list) {
+   struct commit_list *temp = list;
+   list = temp-next;
+   free(temp);
+   }
+}
Index: object.c
===
--- /dev/null  (tree:5ca133e1b74aee39b2124c0ec9fd51539babb5e0)
+++ 1172a9b8f45b2fd640985595cc5258db3b027828/object.c  (mode:100644 
sha1:986624ac7a7fd9229e05e1f181fd500640298d9e)
@@ -0,0 +1,96 @@
+#include object.h
+#include cache.h
+#include stdlib.h
+#include string.h
+
+struct object **objs;
+int nr_objs;
+static int obj_allocs;
+
+static int find_object(unsigned char *sha1)
+{
+   int first = 0, last = nr_objs;
+
+while (first  last) {
+int next = (first + last) / 2;
+struct object *obj = objs[next];
+int cmp;
+
+cmp = memcmp(sha1, obj-sha1, 20);
+if (!cmp)
+return next;
+if (cmp  0) {
+last = next;
+continue;
+}
+first =