Teach clone to support excluding large blobs through a blob-max-bytes
parameter.

Signed-off-by: Jonathan Tan <jonathanta...@google.com>
---
 builtin/clone.c  | 23 +++++++++++++++++++++--
 t/t5601-clone.sh | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 70 insertions(+), 2 deletions(-)

diff --git a/builtin/clone.c b/builtin/clone.c
index dbddd98f8..4c2193dc4 100644
--- a/builtin/clone.c
+++ b/builtin/clone.c
@@ -60,6 +60,7 @@ static struct string_list option_optional_reference = 
STRING_LIST_INIT_NODUP;
 static int option_dissociate;
 static int max_jobs = -1;
 static struct string_list option_recurse_submodules = STRING_LIST_INIT_NODUP;
+static char *blob_max_bytes;
 
 static int recurse_submodules_cb(const struct option *opt,
                                 const char *arg, int unset)
@@ -135,6 +136,8 @@ static struct option builtin_clone_options[] = {
                        TRANSPORT_FAMILY_IPV4),
        OPT_SET_INT('6', "ipv6", &family, N_("use IPv6 addresses only"),
                        TRANSPORT_FAMILY_IPV6),
+       OPT_STRING(0, "blob-max-bytes", &blob_max_bytes, N_("bytes"),
+                  N_("do not fetch blobs above this size")),
        OPT_END()
 };
 
@@ -886,6 +889,8 @@ int cmd_clone(int argc, const char **argv, const char 
*prefix)
        struct refspec *refspec;
        const char *fetch_pattern;
 
+       fetch_if_missing = 0;
+
        packet_trace_identity("clone");
        argc = parse_options(argc, argv, prefix, builtin_clone_options,
                             builtin_clone_usage, 0);
@@ -1104,7 +1109,13 @@ int cmd_clone(int argc, const char **argv, const char 
*prefix)
                transport_set_option(transport, TRANS_OPT_UPLOADPACK,
                                     option_upload_pack);
 
-       if (transport->smart_options && !deepen)
+       if (blob_max_bytes) {
+               transport_set_option(transport, TRANS_OPT_BLOB_MAX_BYTES,
+                                    blob_max_bytes);
+               transport_set_option(transport, TRANS_OPT_FROM_PROMISOR, "1");
+       }
+
+       if (transport->smart_options && !deepen && !blob_max_bytes)
                transport->smart_options->check_self_contained_and_connected = 
1;
 
        refs = transport_get_remote_refs(transport);
@@ -1164,13 +1175,20 @@ int cmd_clone(int argc, const char **argv, const char 
*prefix)
        write_refspec_config(src_ref_prefix, our_head_points_at,
                        remote_head_points_at, &branch_top);
 
+       if (blob_max_bytes) {
+               git_config_set("core.repositoryformatversion", "1");
+               git_config_set("extensions.partialclone", "origin");
+               repository_format_partial_clone = "origin";
+       }
+
        if (is_local)
                clone_local(path, git_dir);
        else if (refs && complete_refs_before_fetch)
                transport_fetch_refs(transport, mapped_refs);
 
        update_remote_refs(refs, mapped_refs, remote_head_points_at,
-                          branch_top.buf, reflog_msg.buf, transport, 
!is_local);
+                          branch_top.buf, reflog_msg.buf, transport,
+                          !is_local && !blob_max_bytes);
 
        update_head(our_head_points_at, remote_head, reflog_msg.buf);
 
@@ -1191,6 +1209,7 @@ int cmd_clone(int argc, const char **argv, const char 
*prefix)
        }
 
        junk_mode = JUNK_LEAVE_REPO;
+       fetch_if_missing = 1;
        err = checkout(submodule_progress);
 
        strbuf_release(&reflog_msg);
diff --git a/t/t5601-clone.sh b/t/t5601-clone.sh
index 9c56f771b..951b1ffa8 100755
--- a/t/t5601-clone.sh
+++ b/t/t5601-clone.sh
@@ -571,4 +571,53 @@ test_expect_success 'GIT_TRACE_PACKFILE produces a usable 
pack' '
        git -C replay.git index-pack -v --stdin <tmp.pack
 '
 
+partial_clone () {
+       SERVER="$1" &&
+       URL="$2" &&
+
+       rm -rf "$SERVER" client &&
+       test_create_repo "$SERVER" &&
+       test_commit -C "$SERVER" one &&
+       HASH1=$(git hash-object "$SERVER/one.t") &&
+       git -C "$SERVER" revert HEAD &&
+       test_commit -C "$SERVER" two &&
+       HASH2=$(git hash-object "$SERVER/two.t") &&
+       test_config -C "$SERVER" uploadpack.advertiseblobmaxbytes 1 &&
+       test_config -C "$SERVER" uploadpack.allowanysha1inwant 1 &&
+
+       git clone --blob-max-bytes=0 "$URL" client &&
+
+       git -C client fsck &&
+
+       # Ensure that unneeded blobs are not inadvertently fetched.
+       test_config -C client extensions.partialclone "not a remote" &&
+       test_must_fail git -C client cat-file -e "$HASH1" &&
+
+       # But this blob was fetched, because clone performs an initial checkout
+       git -C client cat-file -e "$HASH2"
+}
+
+test_expect_success 'partial clone' '
+       partial_clone server "file://$(pwd)/server"
+'
+
+test_expect_success 'partial clone: warn if server does not support 
blob-max-bytes' '
+       rm -rf server client &&
+       test_create_repo server &&
+       test_commit -C server one &&
+
+       git clone --blob-max-bytes=0 "file://$(pwd)/server" client 2> err &&
+
+       test_i18ngrep "blob-max-bytes not recognized by server" err
+'
+
+. "$TEST_DIRECTORY"/lib-httpd.sh
+start_httpd
+
+test_expect_success 'partial clone using HTTP' '
+       partial_clone "$HTTPD_DOCUMENT_ROOT_PATH/server" 
"$HTTPD_URL/smart/server"
+'
+
+stop_httpd
+
 test_done
-- 
2.14.2.822.g60be5d43e6-goog

Reply via email to