Here's a rather hackish implementation of the write side.  Any
thoughts on the format?  (Obviously the implementation needs work.
For example, it needs to be optional.

Thoughts so far:
 - I want to put the value of "prefix" into an extended header.
 - Should blobs have their sha1 hashes in an extended header?  Pros:
it makes figuring out substitutions easier.  Cons: it adds 512 bytes
per file.
 - I want to support tags as roots.
 - I (or someone) need to write a verifier / verified unpacker.  Does
git accept Python code?

This thing is tested in the sense that GNU tar unpacks its output
without any warnings or other fanfare.

--Andy
diff --git a/archive-tar.c b/archive-tar.c
index 719b629..c6bf7e4 100644
--- a/archive-tar.c
+++ b/archive-tar.c
@@ -2,6 +2,8 @@
  * Copyright (c) 2005, 2006 Rene Scharfe
  */
 #include "cache.h"
+#include "tree.h"
+#include "object.h"
 #include "tar.h"
 #include "archive.h"
 #include "streaming.h"
@@ -200,6 +202,74 @@ static int write_extended_header(struct archiver_args *args,
 	return 0;
 }
 
+/*
+ * A GIT-SCM object header is a global extended header that embeds a single
+ * git object.  This object serves a purpose described by the "purpose"
+ * field.  Valid purposes include:
+ *
+ *  - "root" -- an object that, by itself, in conjunction with other roots,
+ *    or in conjunction with external data, identifies a root to use to
+ *    verify this archive.
+ *  - "vrfy" -- an object that can be use to prove that the contents
+ *    of this archive are as described.
+ *
+ * There's one basic rule to observe: every "vrfy" object must hash to
+ * a SHA-1 that matches something described in a "root", another "vrfy" object,
+ * or something typed in by a user decoding the archive.
+ *
+ * (Of course, if you want the archive to be usefully verifiable, all of the
+ *  non-GIT-SCM contents should also be attributable to an appropriate
+ *  "vrfy" object.)
+ *
+ * The fields are:
+ *  GIT-SCM.obj.purpose: the purpose of the embedded object
+ *  GIT-SCM.obj.sha1: the sha1 of the embedded object
+ *  GIT-SCM.obj.type: the type of the embedded object
+ *  GIT-SCM.obj.data: the data in the embedded object
+ *
+ * The block header is intentionally unspecified, except that it must
+ * have typeflag 'g'.  (This is to allow some flexibility in trying to
+ * preserve compatibility with old tar implementations.)
+ */
+static int write_gitscm_obj_header(struct archiver_args *args,
+				   const char *purpose,
+				   const unsigned char *sha1)
+{
+	struct strbuf ext_header = STRBUF_INIT;
+	struct ustar_header header;
+	unsigned int mode;
+	enum object_type type;
+	unsigned long size;
+	void *buffer;
+	const char *typestr;
+	int err = 0;
+
+	strbuf_append_ext_header(&ext_header, "GIT-SCM.obj.purpose",
+				 purpose, strlen(purpose));
+	strbuf_append_ext_header(&ext_header, "GIT-SCM.obj.sha1",
+				 sha1_to_hex(sha1), 40);
+
+	buffer = read_sha1_file(sha1, &type, &size);
+	typestr = typename(type);
+
+	strbuf_append_ext_header(&ext_header, "GIT-SCM.obj.type",
+				 typestr, strlen(typestr));
+	strbuf_append_ext_header(&ext_header, "GIT-SCM.obj.data",
+				 buffer, size);
+	free(buffer);
+	buffer = NULL;
+
+	memset(&header, 0, sizeof(header));
+	*header.typeflag = TYPEFLAG_GLOBAL_HEADER;
+	mode = 0100666;
+	strcpy(header.name, "pax_global_header");
+	prepare_header(args, &header, mode, ext_header.len);
+	write_blocked(&header, sizeof(header));
+	write_blocked(ext_header.buf, ext_header.len);
+	strbuf_release(&ext_header);
+	return err;
+}
+
 static int write_tar_entry(struct archiver_args *args,
 			   const unsigned char *sha1,
 			   const char *path, size_t pathlen,
@@ -212,6 +282,10 @@ static int write_tar_entry(struct archiver_args *args,
 	void *buffer;
 	int err = 0;
 
+	if (S_ISDIR(mode)) {
+		write_gitscm_obj_header(args, "vrfy", sha1);
+	}
+
 	memset(&header, 0, sizeof(header));
 
 	if (S_ISDIR(mode) || S_ISGITLINK(mode)) {
@@ -384,8 +458,11 @@ static int write_tar_archive(const struct archiver *ar,
 
 	if (args->commit_sha1)
 		err = write_global_extended_header(args);
-	if (!err)
+	if (!err) {
+		write_gitscm_obj_header(args, "root", args->commit_sha1);
+		write_gitscm_obj_header(args, "vrfy", args->tree->object.sha1);
 		err = write_archive_entries(args, write_tar_entry);
+	}
 	if (!err)
 		write_trailer();
 	return err;

Reply via email to