[PATCH v2 04/12] refs: implement simple transactions for the packed-refs file

2013-06-19 Thread Michael Haggerty
Handle simple transactions for the packed-refs file at the
packed_ref_cache level via new functions lock_packed_refs(),
commit_packed_refs(), and rollback_packed_refs().

Only allow the packed ref cache to be modified (via add_packed_ref())
while the packed refs file is locked.

Change clone to add the new references within a transaction.

Signed-off-by: Michael Haggerty 
---
 builtin/clone.c |  7 -
 refs.c  | 83 ++---
 refs.h  | 27 +--
 3 files changed, 98 insertions(+), 19 deletions(-)

diff --git a/builtin/clone.c b/builtin/clone.c
index 66bff57..b0c000a 100644
--- a/builtin/clone.c
+++ b/builtin/clone.c
@@ -489,17 +489,22 @@ static struct ref *wanted_peer_refs(const struct ref 
*refs,
return local_refs;
 }
 
+static struct lock_file packed_refs_lock;
+
 static void write_remote_refs(const struct ref *local_refs)
 {
const struct ref *r;
 
+   lock_packed_refs(&packed_refs_lock, LOCK_DIE_ON_ERROR);
+
for (r = local_refs; r; r = r->next) {
if (!r->peer_ref)
continue;
add_packed_ref(r->peer_ref->name, r->old_sha1);
}
 
-   pack_refs(PACK_REFS_ALL);
+   if (commit_packed_refs())
+   die_errno("unable to overwrite old ref-pack file");
 }
 
 static void write_followtags(const struct ref *refs, const char *msg)
diff --git a/refs.c b/refs.c
index 373d95b..ad73251 100644
--- a/refs.c
+++ b/refs.c
@@ -808,6 +808,13 @@ static int is_refname_available(const char *refname, const 
char *oldrefname,
 
 struct packed_ref_cache {
struct ref_entry *root;
+
+   /*
+* Iff the packed-refs file associated with this instance is
+* currently locked for writing, this points at the associated
+* lock (which is owned by somebody else).
+*/
+   struct lock_file *lock;
 };
 
 /*
@@ -829,6 +836,8 @@ static struct ref_cache {
 static void clear_packed_ref_cache(struct ref_cache *refs)
 {
if (refs->packed) {
+   if (refs->packed->lock)
+   die("internal error: packed-ref cache cleared while 
locked");
free_ref_entry(refs->packed->root);
free(refs->packed);
refs->packed = NULL;
@@ -1038,7 +1047,12 @@ static struct ref_dir *get_packed_refs(struct ref_cache 
*refs)
 
 void add_packed_ref(const char *refname, const unsigned char *sha1)
 {
-   add_ref(get_packed_refs(&ref_cache),
+   struct packed_ref_cache *packed_ref_cache =
+   get_packed_ref_cache(&ref_cache);
+
+   if (!packed_ref_cache->lock)
+   die("internal error: packed refs not locked");
+   add_ref(get_packed_ref_dir(packed_ref_cache),
create_ref_entry(refname, sha1, REF_ISPACKED, 1));
 }
 
@@ -2035,6 +2049,52 @@ static int write_packed_entry_fn(struct ref_entry 
*entry, void *cb_data)
return 0;
 }
 
+int lock_packed_refs(struct lock_file *lock, int flags)
+{
+   struct packed_ref_cache *packed_ref_cache;
+
+   /* Discard the old cache because it might be invalid: */
+   clear_packed_ref_cache(&ref_cache);
+   if (hold_lock_file_for_update(lock, git_path("packed-refs"), flags) < 0)
+   return -1;
+   /* Read the current packed-refs while holding the lock: */
+   packed_ref_cache = get_packed_ref_cache(&ref_cache);
+   packed_ref_cache->lock = lock;
+   return 0;
+}
+
+int commit_packed_refs(void)
+{
+   struct packed_ref_cache *packed_ref_cache =
+   get_packed_ref_cache(&ref_cache);
+   int error = 0;
+
+   if (!packed_ref_cache->lock)
+   die("internal error: packed-refs not locked");
+   write_or_die(packed_ref_cache->lock->fd,
+PACKED_REFS_HEADER, strlen(PACKED_REFS_HEADER));
+
+   do_for_each_entry_in_dir(get_packed_ref_dir(packed_ref_cache),
+0, write_packed_entry_fn,
+&packed_ref_cache->lock->fd);
+   if (commit_lock_file(packed_ref_cache->lock))
+   error = -1;
+   packed_ref_cache->lock = NULL;
+   return error;
+}
+
+void rollback_packed_refs(void)
+{
+   struct packed_ref_cache *packed_ref_cache =
+   get_packed_ref_cache(&ref_cache);
+
+   if (!packed_ref_cache->lock)
+   die("internal error: packed-refs not locked");
+   rollback_lock_file(packed_ref_cache->lock);
+   packed_ref_cache->lock = NULL;
+   clear_packed_ref_cache(&ref_cache);
+}
+
 struct ref_to_prune {
struct ref_to_prune *next;
unsigned char sha1[20];
@@ -2153,23 +2213,19 @@ static struct lock_file packlock;
 int pack_refs(unsigned int flags)
 {
struct pack_refs_cb_data cbdata;
-   int fd;
 
memset(&cbdata, 0, sizeof(cbdata));
cbdata.flags = flags;
 
-   fd = hold_lock_file_for_update(&packlock, git_path("packed-refs"),
- 

Re: [PATCH v2 04/12] refs: implement simple transactions for the packed-refs file

2013-06-19 Thread Junio C Hamano
Michael Haggerty  writes:

> Handle simple transactions for the packed-refs file at the
> packed_ref_cache level via new functions lock_packed_refs(),
> commit_packed_refs(), and rollback_packed_refs().
>
> Only allow the packed ref cache to be modified (via add_packed_ref())
> while the packed refs file is locked.
>
> Change clone to add the new references within a transaction.
>
> Signed-off-by: Michael Haggerty 
> ---
>  builtin/clone.c |  7 -
>  refs.c  | 83 
> ++---
>  refs.h  | 27 +--
>  3 files changed, 98 insertions(+), 19 deletions(-)
>
> diff --git a/builtin/clone.c b/builtin/clone.c
> index 66bff57..b0c000a 100644
> --- a/builtin/clone.c
> +++ b/builtin/clone.c
> @@ -489,17 +489,22 @@ static struct ref *wanted_peer_refs(const struct ref 
> *refs,
>   return local_refs;
>  }
>  
> +static struct lock_file packed_refs_lock;
> +
>  static void write_remote_refs(const struct ref *local_refs)
>  {
>   const struct ref *r;
>  
> + lock_packed_refs(&packed_refs_lock, LOCK_DIE_ON_ERROR);
> +
>   for (r = local_refs; r; r = r->next) {
>   if (!r->peer_ref)
>   continue;
>   add_packed_ref(r->peer_ref->name, r->old_sha1);
>   }
>  
> - pack_refs(PACK_REFS_ALL);
> + if (commit_packed_refs())
> + die_errno("unable to overwrite old ref-pack file");
>  }

The calling convention used here looks somewhat strange.  You allow
callers to specify which lock-file structure is used when locking,
but when you are done, commit_packed_refs() does not take any
parameter.

lock_packed_refs() make the singleton in-core packed-ref-cache be
aware of which lock it is under, so commit_packed_refs() does not
need to be told (the singleton already knows what lockfile is in
effect), so I am not saying the code is broken, though.

Does the caller need to even have an access to this lock_file
instance?

>  static void write_followtags(const struct ref *refs, const char *msg)
> diff --git a/refs.c b/refs.c
> index 373d95b..ad73251 100644
> --- a/refs.c
> +++ b/refs.c
> @@ -808,6 +808,13 @@ static int is_refname_available(const char *refname, 
> const char *oldrefname,
>  
>  struct packed_ref_cache {
>   struct ref_entry *root;
> +
> + /*
> +  * Iff the packed-refs file associated with this instance is
> +  * currently locked for writing, this points at the associated
> +  * lock (which is owned by somebody else).
> +  */
> + struct lock_file *lock;
>  };
>  
>  /*
> @@ -829,6 +836,8 @@ static struct ref_cache {
>  static void clear_packed_ref_cache(struct ref_cache *refs)
>  {
>   if (refs->packed) {
> + if (refs->packed->lock)
> + die("internal error: packed-ref cache cleared while 
> locked");
>   free_ref_entry(refs->packed->root);
>   free(refs->packed);
>   refs->packed = NULL;
> @@ -1038,7 +1047,12 @@ static struct ref_dir *get_packed_refs(struct 
> ref_cache *refs)
>  
>  void add_packed_ref(const char *refname, const unsigned char *sha1)
>  {
> - add_ref(get_packed_refs(&ref_cache),
> + struct packed_ref_cache *packed_ref_cache =
> + get_packed_ref_cache(&ref_cache);
> +
> + if (!packed_ref_cache->lock)
> + die("internal error: packed refs not locked");
> + add_ref(get_packed_ref_dir(packed_ref_cache),
>   create_ref_entry(refname, sha1, REF_ISPACKED, 1));
>  }
>  
> @@ -2035,6 +2049,52 @@ static int write_packed_entry_fn(struct ref_entry 
> *entry, void *cb_data)
>   return 0;
>  }
>  
> +int lock_packed_refs(struct lock_file *lock, int flags)
> +{
> + struct packed_ref_cache *packed_ref_cache;
> +
> + /* Discard the old cache because it might be invalid: */
> + clear_packed_ref_cache(&ref_cache);
> + if (hold_lock_file_for_update(lock, git_path("packed-refs"), flags) < 0)
> + return -1;
> + /* Read the current packed-refs while holding the lock: */
> + packed_ref_cache = get_packed_ref_cache(&ref_cache);
> + packed_ref_cache->lock = lock;
> + return 0;
> +}
> +
> +int commit_packed_refs(void)
> +{
> + struct packed_ref_cache *packed_ref_cache =
> + get_packed_ref_cache(&ref_cache);
> + int error = 0;
> +
> + if (!packed_ref_cache->lock)
> + die("internal error: packed-refs not locked");
> + write_or_die(packed_ref_cache->lock->fd,
> +  PACKED_REFS_HEADER, strlen(PACKED_REFS_HEADER));
> +
> + do_for_each_entry_in_dir(get_packed_ref_dir(packed_ref_cache),
> +  0, write_packed_entry_fn,
> +  &packed_ref_cache->lock->fd);
> + if (commit_lock_file(packed_ref_cache->lock))
> + error = -1;
> + packed_ref_cache->lock = NULL;
> + return error;
> +}
> +
> +void rollback_packed_refs(void)
> +{
> + struct packed_ref_cache *pack

Re: [PATCH v2 04/12] refs: implement simple transactions for the packed-refs file

2013-06-20 Thread Michael Haggerty
On 06/19/2013 09:18 PM, Junio C Hamano wrote:
> Michael Haggerty  writes:
> 
>> Handle simple transactions for the packed-refs file at the
>> packed_ref_cache level via new functions lock_packed_refs(),
>> commit_packed_refs(), and rollback_packed_refs().
>>
>> Only allow the packed ref cache to be modified (via add_packed_ref())
>> while the packed refs file is locked.
>>
>> Change clone to add the new references within a transaction.
>>
>> Signed-off-by: Michael Haggerty 
>> ---
>>  builtin/clone.c |  7 -
>>  refs.c  | 83 
>> ++---
>>  refs.h  | 27 +--
>>  3 files changed, 98 insertions(+), 19 deletions(-)
>>
>> diff --git a/builtin/clone.c b/builtin/clone.c
>> index 66bff57..b0c000a 100644
>> --- a/builtin/clone.c
>> +++ b/builtin/clone.c
>> @@ -489,17 +489,22 @@ static struct ref *wanted_peer_refs(const struct ref 
>> *refs,
>>  return local_refs;
>>  }
>>  
>> +static struct lock_file packed_refs_lock;
>> +
>>  static void write_remote_refs(const struct ref *local_refs)
>>  {
>>  const struct ref *r;
>>  
>> +lock_packed_refs(&packed_refs_lock, LOCK_DIE_ON_ERROR);
>> +
>>  for (r = local_refs; r; r = r->next) {
>>  if (!r->peer_ref)
>>  continue;
>>  add_packed_ref(r->peer_ref->name, r->old_sha1);
>>  }
>>  
>> -pack_refs(PACK_REFS_ALL);
>> +if (commit_packed_refs())
>> +die_errno("unable to overwrite old ref-pack file");
>>  }
> 
> The calling convention used here looks somewhat strange.  You allow
> callers to specify which lock-file structure is used when locking,
> but when you are done, commit_packed_refs() does not take any
> parameter.
> 
> lock_packed_refs() make the singleton in-core packed-ref-cache be
> aware of which lock it is under, so commit_packed_refs() does not
> need to be told (the singleton already knows what lockfile is in
> effect), so I am not saying the code is broken, though.
> 
> Does the caller need to even have an access to this lock_file
> instance?

No it doesn't.  My reasoning was as follows:

lock_file instances can never be freed, because they are added to a
linked list in the atexit handler but never removed.  Therefore they
have to be allocated statically (or allocated dynamically then leaked).

[I just noticed that lock_ref_sha1_basic() leaks a struct lock_file
every time that it is called.]

I wanted to leave the way open to allow other packed refs caches to be
locked.  But since all packed refs caches are allocated dynamically, the
lock_file instance cannot be part of struct packed_ref_cache.  So I
thought that the easiest approach is to rely on the caller, who
presumably can know that only a finite number of locks are in use at
once, to supply a usable lock_file instance.

But currently only the main packed ref cache can be locked, so it would
be possible for lock_packed_refs() to use the static packlock instance
for locking.  I will change it to do so.

If/when we add support for locking other packed ref caches, we can
change the API to use a caller-supplied lock_file.  Or even better,
implement a way to remove a lock_file instance from the atexit list;
then lock_packed_refs() could use dynamically-allocated lock_file
instances without having to leak them.

v3 to come.

Michael

-- 
Michael Haggerty
mhag...@alum.mit.edu
http://softwareswirl.blogspot.com/
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 04/12] refs: implement simple transactions for the packed-refs file

2013-06-20 Thread Jeff King
On Thu, Jun 20, 2013 at 09:49:51AM +0200, Michael Haggerty wrote:

> [I just noticed that lock_ref_sha1_basic() leaks a struct lock_file
> every time that it is called.]

I noticed that recently, too. I have a patch series about 90% complete
that abstracts the tempfile handling (the ultimate goal of which is to
optionally clean up tmp_* files in the objects/ directory). It refactors
the lockfile cleanup, and it would not be too hard to have a committed
or rolled-back lockfile actually remove itself from the "to clean at
exit" list.

Which would make it perfectly safe to have a lockfile as an automatic
variable as long as you commit or rollback before leaving the function.

-Peff
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 04/12] refs: implement simple transactions for the packed-refs file

2013-06-20 Thread Junio C Hamano
Michael Haggerty  writes:

> But currently only the main packed ref cache can be locked, so it would
> be possible for lock_packed_refs() to use the static packlock instance
> for locking.

Perhaps I am missing something from the previous discussions, but I
am having trouble understanding the "main packed ref cache" part of
the above.  "main" as opposed to...?  Is it envisioned that later
somebody can lock one subpart while another can lock a different and
non-overlapping subpart, to make changes independently, and somehow
their non-overlapping changes will be consolidated into a single
consistent result?
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 04/12] refs: implement simple transactions for the packed-refs file

2013-06-20 Thread Michael Haggerty
On 06/20/2013 07:11 PM, Junio C Hamano wrote:
> Michael Haggerty  writes:
> 
>> But currently only the main packed ref cache can be locked, so it would
>> be possible for lock_packed_refs() to use the static packlock instance
>> for locking.
> 
> Perhaps I am missing something from the previous discussions, but I
> am having trouble understanding the "main packed ref cache" part of
> the above.  "main" as opposed to...?

"main" as opposed to "submodule".

> Is it envisioned that later
> somebody can lock one subpart while another can lock a different and
> non-overlapping subpart, to make changes independently, and somehow
> their non-overlapping changes will be consolidated into a single
> consistent result?

No, the scenario would be that a git process wants to change a reference
in a submodule directly, as opposed to starting another git process
within the submodule, as I believe is done now.  Maybe it's too
far-fetched even to consider...

Michael

-- 
Michael Haggerty
mhag...@alum.mit.edu
http://softwareswirl.blogspot.com/
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 04/12] refs: implement simple transactions for the packed-refs file

2013-06-20 Thread Michael Haggerty
On 06/20/2013 01:55 PM, Jeff King wrote:
> On Thu, Jun 20, 2013 at 09:49:51AM +0200, Michael Haggerty wrote:
> 
>> [I just noticed that lock_ref_sha1_basic() leaks a struct lock_file
>> every time that it is called.]
> 
> I noticed that recently, too. I have a patch series about 90% complete
> that abstracts the tempfile handling (the ultimate goal of which is to
> optionally clean up tmp_* files in the objects/ directory). It refactors
> the lockfile cleanup, and it would not be too hard to have a committed
> or rolled-back lockfile actually remove itself from the "to clean at
> exit" list.
> 
> Which would make it perfectly safe to have a lockfile as an automatic
> variable as long as you commit or rollback before leaving the function.

Cool, then I won't work on that.  You might also have to make the
lockfile list into a doubly-linked-list to avoid having to do a linear
scan to find the entry to delete, unless the total number of entries is
known to remain small.

Please CC me on the patch series when it is done.

Michael

-- 
Michael Haggerty
mhag...@alum.mit.edu
http://softwareswirl.blogspot.com/
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 04/12] refs: implement simple transactions for the packed-refs file

2013-06-20 Thread Junio C Hamano
Michael Haggerty  writes:

> On 06/20/2013 07:11 PM, Junio C Hamano wrote:
>
>> Perhaps I am missing something from the previous discussions, but I
>> am having trouble understanding the "main packed ref cache" part of
>> the above.  "main" as opposed to...?
>
> "main" as opposed to "submodule".

I see.

> No, the scenario would be that a git process wants to change a reference
> in a submodule directly, as opposed to starting another git process
> within the submodule, as I believe is done now.  Maybe it's too
> far-fetched even to consider...

Perhaps.  But the "singleton lock because we only handle main packed
ref cache" does not prevent us from doing so later, so I think v3 is
OK.

Thanks.
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 04/12] refs: implement simple transactions for the packed-refs file

2013-06-20 Thread Jeff King
On Thu, Jun 20, 2013 at 08:03:43PM +0200, Michael Haggerty wrote:

> > I noticed that recently, too. I have a patch series about 90% complete
> > that abstracts the tempfile handling (the ultimate goal of which is to
> > optionally clean up tmp_* files in the objects/ directory). It refactors
> > the lockfile cleanup, and it would not be too hard to have a committed
> > or rolled-back lockfile actually remove itself from the "to clean at
> > exit" list.
> > 
> > Which would make it perfectly safe to have a lockfile as an automatic
> > variable as long as you commit or rollback before leaving the function.
> 
> Cool, then I won't work on that.  You might also have to make the
> lockfile list into a doubly-linked-list to avoid having to do a linear
> scan to find the entry to delete, unless the total number of entries is
> known to remain small.

Yes, I noticed that potential issue, but I don't think it is worth
worrying about. We typically only take one lock at a time, or a handful
of tempfiles (e.g., one object at a time, or two files for diff).

And once it's abstracted out, it would be easy to handle later.

The part I am a little stuck on is plugging it into
pack-objects/index-pack. Their output handling is a little convoluted
because they may be writing to stdout, to a tempfile, to a named file,
or even appending to an existing file in the case of index-pack
--fix-thin. I don't think it's unmanageable, but I need to spend some
more time on the refactoring.

> Please CC me on the patch series when it is done.

Will do.

-Peff
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html