AdrianVovk commented on code in PR #62:
URL: 
https://github.com/apache/buildstream-plugins/pull/62#discussion_r1525705300


##########
src/buildstream_plugins/sources/cargo.py:
##########
@@ -294,6 +310,268 @@ def _get_mirror_file(self, sha=None):
         return os.path.join(self._get_mirror_dir(), sha or self.sha)
 
 
+# Locks on repositories for write access
+REPO_LOCKS = {}  # type: dict[str, threading.Lock]
+
+
+# CrateGit()
+#
+# Use a SourceFetcher class to be the per crate helper.
+#
+# This one is for crates fetched from git repositories.
+#
+# Args:
+#    cargo (Cargo): The main Source implementation
+#    name (str): The name of the crate to depend on
+#    version (str): The version of the crate to depend on
+#    repo (str): Repository URL
+#    commit (str): Sha of the git commit
+class CrateGit(SourceFetcher):
+    def __init__(self, cargo, name, version, repo, commit):
+        super().__init__()
+
+        self.cargo = cargo
+        self.name = name
+        self.version = str(version)
+        self.repo = repo
+        self.commit = commit
+        # TODO: Is this right?
+        self.mark_download_url(self.repo)
+
+    ########################################################
+    #     SourceFetcher API method implementations         #
+    ########################################################
+
+    def fetch(self, alias_override=None, **kwargs):
+        lock = REPO_LOCKS.setdefault(self._get_mirror_dir(), threading.Lock())
+
+        with lock, self._mirror_repo() as repo, 
self.cargo.timed_activity(f"Fetching from {self.repo}"):
+            # TODO: Auth not supported
+            client, path = dulwich.client.get_transport_and_path(self.repo)
+            remote_refs = client.fetch(
+                path,
+                repo,
+                determine_wants=lambda refs, depth=None: 
[self.commit.encode()],
+                depth=1,
+            )
+
+    ########################################################
+    #        Helper APIs for the Cargo Source to use       #
+    ########################################################
+
+    def ref_node(self):
+        return {"kind": "git", "name": self.name, "version": self.version, 
"repo": self.repo, "commit": self.commit}
+
+    # stage()
+    #
+    # A delegate method to do the work for a single git repo
+    # in Source.stage().
+    #
+    # Args:
+    #    (directory): The vendor subdirectory to stage to
+    #
+    def stage(self, directory):
+        self.cargo.status(f"Checking out {self.commit}")
+
+        crate_target_dir = os.path.join(directory, 
f"{self.name}-{self.version}")
+        tmp_dir = os.path.join(directory, f"{self.name}-{self.version}-tmp")
+
+        try:
+            os.mkdir(tmp_dir)
+        except FileExistsError:
+            raise SourceError(
+                f"Cargo doens't support vendoring for identical crate version 
from different sources {self.name} {self.version}"
+            )
+
+        with Repo(self._get_mirror_dir(), bare=True) as mirror:
+            with Repo.init(tmp_dir) as dest:
+                dest.object_store.add_object(mirror[self.commit.encode()])
+                dest.refs[b"HEAD"] = self.commit.encode()
+                dest.update_shallow([self.commit.encode()], [])
+
+            with Repo(tmp_dir, object_store=mirror.object_store) as dest:
+                dest.reset_index()
+
+        # Workspace handling
+        #
+        # When new workspace features are added it is worth checking if
+        # 
<https://github.com/flatpak/flatpak-builder-tools/blob/HEAD/cargo/flatpak-cargo-generator.py>
+        # has implemented them already. This implementation is inspired by the 
mentioned source.
+
+        with open(os.path.join(tmp_dir, "Cargo.toml"), "rb") as f:
+            root_toml = tomllib.load(f)
+
+        crates = {}
+
+        if "workspace" in root_toml and "memers":

Review Comment:
   typo
   
   but also the condition doesn't make sense to me



##########
src/buildstream_plugins/sources/cargo.py:
##########
@@ -367,17 +644,31 @@ def track(self, *, previous_sources_dir):
         for package in lock["package"]:
             if "source" not in package:
                 continue
-            new_ref += [{"name": package["name"], "version": 
str(package["version"]), "sha": package.get("checksum")}]
+
+            ref = {}
+
+            if package["source"][:4] == "git+":

Review Comment:
   perhaps it's worth splitting on the plus, then checking if it's git, missing 
(=> "registry"), or something else (=> error)
   
   Does cargo support any other transports? Will it ever?
   
   



##########
src/buildstream_plugins/sources/cargo.py:
##########
@@ -294,6 +310,268 @@ def _get_mirror_file(self, sha=None):
         return os.path.join(self._get_mirror_dir(), sha or self.sha)
 
 
+# Locks on repositories for write access
+REPO_LOCKS = {}  # type: dict[str, threading.Lock]
+
+
+# CrateGit()
+#
+# Use a SourceFetcher class to be the per crate helper.
+#
+# This one is for crates fetched from git repositories.
+#
+# Args:
+#    cargo (Cargo): The main Source implementation
+#    name (str): The name of the crate to depend on
+#    version (str): The version of the crate to depend on
+#    repo (str): Repository URL
+#    commit (str): Sha of the git commit
+class CrateGit(SourceFetcher):
+    def __init__(self, cargo, name, version, repo, commit):
+        super().__init__()
+
+        self.cargo = cargo
+        self.name = name
+        self.version = str(version)
+        self.repo = repo
+        self.commit = commit
+        # TODO: Is this right?
+        self.mark_download_url(self.repo)
+
+    ########################################################
+    #     SourceFetcher API method implementations         #
+    ########################################################
+
+    def fetch(self, alias_override=None, **kwargs):
+        lock = REPO_LOCKS.setdefault(self._get_mirror_dir(), threading.Lock())
+
+        with lock, self._mirror_repo() as repo, 
self.cargo.timed_activity(f"Fetching from {self.repo}"):
+            # TODO: Auth not supported
+            client, path = dulwich.client.get_transport_and_path(self.repo)
+            remote_refs = client.fetch(
+                path,
+                repo,
+                determine_wants=lambda refs, depth=None: 
[self.commit.encode()],
+                depth=1,
+            )
+
+    ########################################################
+    #        Helper APIs for the Cargo Source to use       #
+    ########################################################
+
+    def ref_node(self):
+        return {"kind": "git", "name": self.name, "version": self.version, 
"repo": self.repo, "commit": self.commit}
+
+    # stage()
+    #
+    # A delegate method to do the work for a single git repo
+    # in Source.stage().
+    #
+    # Args:
+    #    (directory): The vendor subdirectory to stage to
+    #
+    def stage(self, directory):
+        self.cargo.status(f"Checking out {self.commit}")
+
+        crate_target_dir = os.path.join(directory, 
f"{self.name}-{self.version}")
+        tmp_dir = os.path.join(directory, f"{self.name}-{self.version}-tmp")
+
+        try:
+            os.mkdir(tmp_dir)
+        except FileExistsError:
+            raise SourceError(
+                f"Cargo doens't support vendoring for identical crate version 
from different sources {self.name} {self.version}"

Review Comment:
   This error message isn't particularly clear, IMO, but it's hard to come up 
with a one-liner that covers it in a non-confusing way...
   
   - "Crate {self.name} {self.version} is vendored from multiple sources, which 
is unsupported"
   - "Cannot stage {self.name} {self.version} because it was already staged by 
a different fetcher"
   - "This project requests crate {self.name} {self.version} from multiple 
sources, which is incompatible with vendoring"
   
   :shrug: 



##########
src/buildstream_plugins/sources/cargo.py:
##########
@@ -294,6 +310,268 @@ def _get_mirror_file(self, sha=None):
         return os.path.join(self._get_mirror_dir(), sha or self.sha)
 
 
+# Locks on repositories for write access
+REPO_LOCKS = {}  # type: dict[str, threading.Lock]
+
+
+# CrateGit()
+#
+# Use a SourceFetcher class to be the per crate helper.
+#
+# This one is for crates fetched from git repositories.
+#
+# Args:
+#    cargo (Cargo): The main Source implementation
+#    name (str): The name of the crate to depend on
+#    version (str): The version of the crate to depend on
+#    repo (str): Repository URL
+#    commit (str): Sha of the git commit
+class CrateGit(SourceFetcher):
+    def __init__(self, cargo, name, version, repo, commit):
+        super().__init__()
+
+        self.cargo = cargo
+        self.name = name
+        self.version = str(version)
+        self.repo = repo
+        self.commit = commit
+        # TODO: Is this right?
+        self.mark_download_url(self.repo)
+
+    ########################################################
+    #     SourceFetcher API method implementations         #
+    ########################################################
+
+    def fetch(self, alias_override=None, **kwargs):
+        lock = REPO_LOCKS.setdefault(self._get_mirror_dir(), threading.Lock())
+
+        with lock, self._mirror_repo() as repo, 
self.cargo.timed_activity(f"Fetching from {self.repo}"):
+            # TODO: Auth not supported
+            client, path = dulwich.client.get_transport_and_path(self.repo)
+            remote_refs = client.fetch(
+                path,
+                repo,
+                determine_wants=lambda refs, depth=None: 
[self.commit.encode()],
+                depth=1,
+            )
+
+    ########################################################
+    #        Helper APIs for the Cargo Source to use       #
+    ########################################################
+
+    def ref_node(self):
+        return {"kind": "git", "name": self.name, "version": self.version, 
"repo": self.repo, "commit": self.commit}
+
+    # stage()
+    #
+    # A delegate method to do the work for a single git repo
+    # in Source.stage().
+    #
+    # Args:
+    #    (directory): The vendor subdirectory to stage to
+    #
+    def stage(self, directory):
+        self.cargo.status(f"Checking out {self.commit}")
+
+        crate_target_dir = os.path.join(directory, 
f"{self.name}-{self.version}")
+        tmp_dir = os.path.join(directory, f"{self.name}-{self.version}-tmp")
+
+        try:
+            os.mkdir(tmp_dir)
+        except FileExistsError:
+            raise SourceError(
+                f"Cargo doens't support vendoring for identical crate version 
from different sources {self.name} {self.version}"
+            )
+
+        with Repo(self._get_mirror_dir(), bare=True) as mirror:
+            with Repo.init(tmp_dir) as dest:
+                dest.object_store.add_object(mirror[self.commit.encode()])
+                dest.refs[b"HEAD"] = self.commit.encode()
+                dest.update_shallow([self.commit.encode()], [])
+
+            with Repo(tmp_dir, object_store=mirror.object_store) as dest:
+                dest.reset_index()
+
+        # Workspace handling
+        #
+        # When new workspace features are added it is worth checking if
+        # 
<https://github.com/flatpak/flatpak-builder-tools/blob/HEAD/cargo/flatpak-cargo-generator.py>
+        # has implemented them already. This implementation is inspired by the 
mentioned source.
+
+        with open(os.path.join(tmp_dir, "Cargo.toml"), "rb") as f:
+            root_toml = tomllib.load(f)
+
+        crates = {}
+
+        if "workspace" in root_toml and "memers":
+            # Find wanted crate inside workspace
+            for member in root_toml["workspace"].get("members", []):
+                for crate_toml_path in glob.glob(os.path.join(tmp_dir, member, 
"Cargo.toml")):
+                    crate_path = 
os.path.normpath(os.path.dirname(crate_toml_path))
+
+                    with open(crate_toml_path, "rb") as f:
+                        crate_toml = tomllib.load(f)
+                        crates[crate_toml["package"]["name"]] = {
+                            "config": crate_toml,
+                            "path": crate_path,
+                        }
+
+            crate = crates[self.name]
+            # Apply information inherited from workspace Cargo.toml
+            config_inherit_workspace(crate["config"], root_toml["workspace"])
+
+            with open(os.path.join(crates[self.name]["path"], "Cargo.toml"), 
"bw") as f:
+                tomli_w.dump(crate["config"], f)
+
+            shutil.move(crate["path"], crate_target_dir)
+        else:
+            # No workspaces involed, just reploy complete dir as is

Review Comment:
   typo



##########
src/buildstream_plugins/sources/cargo.py:
##########
@@ -294,6 +310,268 @@ def _get_mirror_file(self, sha=None):
         return os.path.join(self._get_mirror_dir(), sha or self.sha)
 
 
+# Locks on repositories for write access
+REPO_LOCKS = {}  # type: dict[str, threading.Lock]
+
+
+# CrateGit()
+#
+# Use a SourceFetcher class to be the per crate helper.
+#
+# This one is for crates fetched from git repositories.
+#
+# Args:
+#    cargo (Cargo): The main Source implementation
+#    name (str): The name of the crate to depend on
+#    version (str): The version of the crate to depend on
+#    repo (str): Repository URL
+#    commit (str): Sha of the git commit
+class CrateGit(SourceFetcher):
+    def __init__(self, cargo, name, version, repo, commit):
+        super().__init__()
+
+        self.cargo = cargo
+        self.name = name
+        self.version = str(version)
+        self.repo = repo
+        self.commit = commit
+        # TODO: Is this right?
+        self.mark_download_url(self.repo)
+
+    ########################################################
+    #     SourceFetcher API method implementations         #
+    ########################################################
+
+    def fetch(self, alias_override=None, **kwargs):
+        lock = REPO_LOCKS.setdefault(self._get_mirror_dir(), threading.Lock())
+
+        with lock, self._mirror_repo() as repo, 
self.cargo.timed_activity(f"Fetching from {self.repo}"):
+            # TODO: Auth not supported
+            client, path = dulwich.client.get_transport_and_path(self.repo)
+            remote_refs = client.fetch(
+                path,
+                repo,
+                determine_wants=lambda refs, depth=None: 
[self.commit.encode()],
+                depth=1,
+            )
+
+    ########################################################
+    #        Helper APIs for the Cargo Source to use       #
+    ########################################################
+
+    def ref_node(self):
+        return {"kind": "git", "name": self.name, "version": self.version, 
"repo": self.repo, "commit": self.commit}
+
+    # stage()
+    #
+    # A delegate method to do the work for a single git repo
+    # in Source.stage().
+    #
+    # Args:
+    #    (directory): The vendor subdirectory to stage to
+    #
+    def stage(self, directory):
+        self.cargo.status(f"Checking out {self.commit}")
+
+        crate_target_dir = os.path.join(directory, 
f"{self.name}-{self.version}")
+        tmp_dir = os.path.join(directory, f"{self.name}-{self.version}-tmp")
+
+        try:
+            os.mkdir(tmp_dir)
+        except FileExistsError:
+            raise SourceError(
+                f"Cargo doens't support vendoring for identical crate version 
from different sources {self.name} {self.version}"

Review Comment:
   This error message & check should probably be duplicated in the other 
fetcher too, because right now depending on the ordering of the `Cargo.lock` 
file you'll get inconsistent behavior: either an error message, or the git 
source will silently get overwritten by the tarball



##########
src/buildstream_plugins/sources/cargo.py:
##########
@@ -294,6 +310,268 @@ def _get_mirror_file(self, sha=None):
         return os.path.join(self._get_mirror_dir(), sha or self.sha)
 
 
+# Locks on repositories for write access
+REPO_LOCKS = {}  # type: dict[str, threading.Lock]
+
+
+# CrateGit()
+#
+# Use a SourceFetcher class to be the per crate helper.
+#
+# This one is for crates fetched from git repositories.
+#
+# Args:
+#    cargo (Cargo): The main Source implementation
+#    name (str): The name of the crate to depend on
+#    version (str): The version of the crate to depend on
+#    repo (str): Repository URL
+#    commit (str): Sha of the git commit
+class CrateGit(SourceFetcher):
+    def __init__(self, cargo, name, version, repo, commit):
+        super().__init__()
+
+        self.cargo = cargo
+        self.name = name
+        self.version = str(version)
+        self.repo = repo
+        self.commit = commit
+        # TODO: Is this right?
+        self.mark_download_url(self.repo)
+
+    ########################################################
+    #     SourceFetcher API method implementations         #
+    ########################################################
+
+    def fetch(self, alias_override=None, **kwargs):
+        lock = REPO_LOCKS.setdefault(self._get_mirror_dir(), threading.Lock())
+
+        with lock, self._mirror_repo() as repo, 
self.cargo.timed_activity(f"Fetching from {self.repo}"):
+            # TODO: Auth not supported
+            client, path = dulwich.client.get_transport_and_path(self.repo)
+            remote_refs = client.fetch(
+                path,
+                repo,
+                determine_wants=lambda refs, depth=None: 
[self.commit.encode()],
+                depth=1,
+            )
+
+    ########################################################
+    #        Helper APIs for the Cargo Source to use       #
+    ########################################################
+
+    def ref_node(self):
+        return {"kind": "git", "name": self.name, "version": self.version, 
"repo": self.repo, "commit": self.commit}
+
+    # stage()
+    #
+    # A delegate method to do the work for a single git repo
+    # in Source.stage().
+    #
+    # Args:
+    #    (directory): The vendor subdirectory to stage to
+    #
+    def stage(self, directory):
+        self.cargo.status(f"Checking out {self.commit}")
+
+        crate_target_dir = os.path.join(directory, 
f"{self.name}-{self.version}")
+        tmp_dir = os.path.join(directory, f"{self.name}-{self.version}-tmp")
+
+        try:
+            os.mkdir(tmp_dir)
+        except FileExistsError:
+            raise SourceError(
+                f"Cargo doens't support vendoring for identical crate version 
from different sources {self.name} {self.version}"
+            )
+
+        with Repo(self._get_mirror_dir(), bare=True) as mirror:
+            with Repo.init(tmp_dir) as dest:
+                dest.object_store.add_object(mirror[self.commit.encode()])
+                dest.refs[b"HEAD"] = self.commit.encode()
+                dest.update_shallow([self.commit.encode()], [])
+
+            with Repo(tmp_dir, object_store=mirror.object_store) as dest:
+                dest.reset_index()
+
+        # Workspace handling
+        #
+        # When new workspace features are added it is worth checking if
+        # 
<https://github.com/flatpak/flatpak-builder-tools/blob/HEAD/cargo/flatpak-cargo-generator.py>
+        # has implemented them already. This implementation is inspired by the 
mentioned source.
+
+        with open(os.path.join(tmp_dir, "Cargo.toml"), "rb") as f:
+            root_toml = tomllib.load(f)
+
+        crates = {}
+
+        if "workspace" in root_toml and "memers":
+            # Find wanted crate inside workspace
+            for member in root_toml["workspace"].get("members", []):
+                for crate_toml_path in glob.glob(os.path.join(tmp_dir, member, 
"Cargo.toml")):
+                    crate_path = 
os.path.normpath(os.path.dirname(crate_toml_path))
+
+                    with open(crate_toml_path, "rb") as f:
+                        crate_toml = tomllib.load(f)
+                        crates[crate_toml["package"]["name"]] = {
+                            "config": crate_toml,
+                            "path": crate_path,
+                        }
+
+            crate = crates[self.name]
+            # Apply information inherited from workspace Cargo.toml
+            config_inherit_workspace(crate["config"], root_toml["workspace"])
+
+            with open(os.path.join(crates[self.name]["path"], "Cargo.toml"), 
"bw") as f:
+                tomli_w.dump(crate["config"], f)
+
+            shutil.move(crate["path"], crate_target_dir)
+        else:
+            # No workspaces involed, just reploy complete dir as is
+            shutil.move(tmp_dir, crate_target_dir)
+
+        # Write .cargo-checksum.json required by cargo vendoring
+        with open(os.path.join(crate_target_dir, ".cargo-checksum.json"), "w") 
as f:
+            json.dump({"files": {}, "package": None}, f)
+
+        shutil.rmtree(tmp_dir, ignore_errors=True)
+
+    # is_cached()
+    #
+    # Get whether we have a local cached version of the git commit
+    #
+    # Returns:
+    #   (bool): Whether we are cached or not
+    #
+    def is_cached(self):
+        with Repo(self._get_mirror_dir(), bare=True) as repo:
+            return self.commit.encode() in repo
+
+    # is_resolved()
+    #
+    # Get whether the current git repo is resolved
+    #
+    # Returns:
+    #   (bool): Always true since we always have a commit
+    #
+    def is_resolved(self):
+        return True
+
+    ########################################################
+    #                   Private helpers                    #
+    ########################################################
+
+    # _get_mirror_dir()
+    #
+    # Gets the local mirror directory for this upstream git repository
+    #
+    def _get_mirror_dir(self):
+        if self.repo.endswith(".git"):
+            norm_url = self.repo[:-4]
+        else:
+            norm_url = self.repo
+
+        return os.path.join(
+            self.cargo.get_mirror_directory(),
+            utils.url_directory_name(norm_url) + ".git",
+        )
+
+    # _get_url()
+    #
+    # Gets the git URL to download this crate from
+    #
+    # Args:
+    #    alias (str|None): The URL alias to apply, if any
+    #
+    # Returns:
+    #    (str): The URL for this crate
+    #
+    def _get_url(self, alias=None):
+        return self.cargo.translate_url(self.cargo.repo, alias_override=alias)

Review Comment:
   self.cargo.repo seems incorrect. Maybe you meant self.repo?
   
   You also don't use this function anyway... You should be using it when 
calling `dulwich.client.get_transport_and_path` above, and possibly elsewhere. 
Make sure to pass through the `alias_override`.
   
   Once you start doing this, buildstream should start yelling at you about 
"unaliased sources". I described the fix in the comment about marking the URL



##########
src/buildstream_plugins/sources/cargo.py:
##########
@@ -294,6 +308,166 @@ def _get_mirror_file(self, sha=None):
         return os.path.join(self._get_mirror_dir(), sha or self.sha)
 
 
+# Locks on repositories for write access
+REPO_LOCKS = {}
+
+
+# CrateGit()
+#
+# Use a SourceFetcher class to be the per crate helper.
+#
+# This one is for crates fetched from git repositories.
+#
+# Args:
+#    cargo (Cargo): The main Source implementation
+#    name (str): The name of the crate to depend on
+#    version (str): The version of the crate to depend on
+#    repo (str): Repository URL
+#    commit (str): Sha of the git commit
+class CrateGit(SourceFetcher):
+    def __init__(self, cargo, name, version, repo, commit):
+        super().__init__()
+
+        self.cargo = cargo
+        self.name = name
+        self.version = str(version)
+        self.repo = repo
+        self.commit = commit
+        # TODO: Is this right?
+        self.mark_download_url(self.repo)

Review Comment:
   The whole alias witchcraft is very poorly documented (from the plugin side, 
anyway), and I have a tenuous grasp on it as best, mostly from my experience of 
guess & checking when writing my own modules. That said, I'll take a stab at 
explaining what's going on because I think I know what's going on here. Someone 
will have to fact-check me.
   
   Basically you need to warn BuildStream @ element load/parse time about any 
URLs you will be translating for source aliases later on (i.e. at track time, 
or at fetch time, etc). So you mark the untranslated URL, and BuildStream won't 
be surprised by it later, when asked to resolve aliases.
   
   Why bst needs this: I have no idea, sorry. I just know that it does.
   
   The wrinkle here is that you don't actually translate the URL, which means 
that this is wholly incompatible with buildstream aliases and mirroring. You 
would be getting warnings about this ("Unaliased source!") but you completely 
bypass these warnings by never trying to translate the URL.



##########
src/buildstream_plugins/sources/cargo.py:
##########
@@ -294,6 +310,268 @@ def _get_mirror_file(self, sha=None):
         return os.path.join(self._get_mirror_dir(), sha or self.sha)
 
 
+# Locks on repositories for write access
+REPO_LOCKS = {}  # type: dict[str, threading.Lock]
+
+
+# CrateGit()
+#
+# Use a SourceFetcher class to be the per crate helper.
+#
+# This one is for crates fetched from git repositories.
+#
+# Args:
+#    cargo (Cargo): The main Source implementation
+#    name (str): The name of the crate to depend on
+#    version (str): The version of the crate to depend on
+#    repo (str): Repository URL
+#    commit (str): Sha of the git commit
+class CrateGit(SourceFetcher):
+    def __init__(self, cargo, name, version, repo, commit):
+        super().__init__()
+
+        self.cargo = cargo
+        self.name = name
+        self.version = str(version)
+        self.repo = repo
+        self.commit = commit
+        # TODO: Is this right?
+        self.mark_download_url(self.repo)
+
+    ########################################################
+    #     SourceFetcher API method implementations         #
+    ########################################################
+
+    def fetch(self, alias_override=None, **kwargs):
+        lock = REPO_LOCKS.setdefault(self._get_mirror_dir(), threading.Lock())
+
+        with lock, self._mirror_repo() as repo, 
self.cargo.timed_activity(f"Fetching from {self.repo}"):
+            # TODO: Auth not supported
+            client, path = dulwich.client.get_transport_and_path(self.repo)
+            remote_refs = client.fetch(
+                path,
+                repo,
+                determine_wants=lambda refs, depth=None: 
[self.commit.encode()],
+                depth=1,
+            )
+
+    ########################################################
+    #        Helper APIs for the Cargo Source to use       #
+    ########################################################
+
+    def ref_node(self):
+        return {"kind": "git", "name": self.name, "version": self.version, 
"repo": self.repo, "commit": self.commit}
+
+    # stage()
+    #
+    # A delegate method to do the work for a single git repo
+    # in Source.stage().
+    #
+    # Args:
+    #    (directory): The vendor subdirectory to stage to
+    #
+    def stage(self, directory):
+        self.cargo.status(f"Checking out {self.commit}")
+
+        crate_target_dir = os.path.join(directory, 
f"{self.name}-{self.version}")
+        tmp_dir = os.path.join(directory, f"{self.name}-{self.version}-tmp")
+
+        try:
+            os.mkdir(tmp_dir)
+        except FileExistsError:
+            raise SourceError(
+                f"Cargo doens't support vendoring for identical crate version 
from different sources {self.name} {self.version}"
+            )
+
+        with Repo(self._get_mirror_dir(), bare=True) as mirror:
+            with Repo.init(tmp_dir) as dest:
+                dest.object_store.add_object(mirror[self.commit.encode()])
+                dest.refs[b"HEAD"] = self.commit.encode()
+                dest.update_shallow([self.commit.encode()], [])
+
+            with Repo(tmp_dir, object_store=mirror.object_store) as dest:
+                dest.reset_index()
+
+        # Workspace handling
+        #
+        # When new workspace features are added it is worth checking if
+        # 
<https://github.com/flatpak/flatpak-builder-tools/blob/HEAD/cargo/flatpak-cargo-generator.py>
+        # has implemented them already. This implementation is inspired by the 
mentioned source.
+
+        with open(os.path.join(tmp_dir, "Cargo.toml"), "rb") as f:
+            root_toml = tomllib.load(f)
+
+        crates = {}
+
+        if "workspace" in root_toml and "memers":
+            # Find wanted crate inside workspace
+            for member in root_toml["workspace"].get("members", []):
+                for crate_toml_path in glob.glob(os.path.join(tmp_dir, member, 
"Cargo.toml")):
+                    crate_path = 
os.path.normpath(os.path.dirname(crate_toml_path))
+
+                    with open(crate_toml_path, "rb") as f:
+                        crate_toml = tomllib.load(f)
+                        crates[crate_toml["package"]["name"]] = {
+                            "config": crate_toml,
+                            "path": crate_path,
+                        }
+
+            crate = crates[self.name]
+            # Apply information inherited from workspace Cargo.toml
+            config_inherit_workspace(crate["config"], root_toml["workspace"])
+
+            with open(os.path.join(crates[self.name]["path"], "Cargo.toml"), 
"bw") as f:
+                tomli_w.dump(crate["config"], f)
+
+            shutil.move(crate["path"], crate_target_dir)
+        else:
+            # No workspaces involed, just reploy complete dir as is
+            shutil.move(tmp_dir, crate_target_dir)
+
+        # Write .cargo-checksum.json required by cargo vendoring
+        with open(os.path.join(crate_target_dir, ".cargo-checksum.json"), "w") 
as f:
+            json.dump({"files": {}, "package": None}, f)
+
+        shutil.rmtree(tmp_dir, ignore_errors=True)
+
+    # is_cached()
+    #
+    # Get whether we have a local cached version of the git commit
+    #
+    # Returns:
+    #   (bool): Whether we are cached or not
+    #
+    def is_cached(self):
+        with Repo(self._get_mirror_dir(), bare=True) as repo:
+            return self.commit.encode() in repo
+
+    # is_resolved()
+    #
+    # Get whether the current git repo is resolved
+    #
+    # Returns:
+    #   (bool): Always true since we always have a commit
+    #
+    def is_resolved(self):
+        return True
+
+    ########################################################
+    #                   Private helpers                    #
+    ########################################################
+
+    # _get_mirror_dir()
+    #
+    # Gets the local mirror directory for this upstream git repository
+    #
+    def _get_mirror_dir(self):
+        if self.repo.endswith(".git"):
+            norm_url = self.repo[:-4]
+        else:
+            norm_url = self.repo
+
+        return os.path.join(
+            self.cargo.get_mirror_directory(),
+            utils.url_directory_name(norm_url) + ".git",
+        )
+
+    # _get_url()
+    #
+    # Gets the git URL to download this crate from
+    #
+    # Args:
+    #    alias (str|None): The URL alias to apply, if any
+    #
+    # Returns:
+    #    (str): The URL for this crate
+    #
+    def _get_url(self, alias=None):
+        return self.cargo.translate_url(self.cargo.repo, alias_override=alias)
+
+    # _mirror_repo()
+    #
+    # Returns the mirror repo, initialized if it doesn not exist yet
+    #
+    # Returns:
+    #    (Repo): The mirror repo crate
+    #
+    def _mirror_repo(self):
+        try:
+            return Repo.init_bare(self._get_mirror_dir(), mkdir=True)
+        except FileExistsError:
+            return Repo(self._get_mirror_dir(), bare=True)
+
+
+# config_inherit_workspace()
+#
+# Adds inherited values to config
+#
+# Args:
+#    config (dict): Crate config
+#    workspace_config (dict): Workspace config
+def config_inherit_workspace(config, workspace_config):
+    workspace_deps = workspace_config.get("dependencies")
+    if workspace_deps is not None:
+        dependencies = []
+        for key in ["dependencies", "dev-dependencies", "build-dependencies"]:
+            if key in config:
+                dependencies.append(config[key])
+
+        if "traget" in config:
+            for target in config["target"].values():
+                if "dependencies" in target:
+                    dependencies.append(target["dependencies"])
+
+        for deps in dependencies:
+            inherit_deps(deps, workspace_deps)
+
+    workspace_package = workspace_config.get("package")
+    if workspace_package is not None:
+        inherit_package(config["package"], workspace_package)
+
+
+# inherit_package()
+#
+# Adds inherited [package] entries to config
+#
+# Args:
+#    items (dict): Package items
+#    workspace_items (dict): Workspace items
+def inherit_package(items, workspace_items):
+    for key, value in items.items():
+        if isinstance(value, dict) and "workspace" in value:
+            workspace_value = workspace_items.get(key)
+            if workspace_value is None:
+                raise SourceError("Can't inherit package information from 
workspace: Value missing from workspace")

Review Comment:
   As someone not familiar with the intricacies of Cargo, this error message 
makes no sense to me sorry.



##########
src/buildstream_plugins/sources/cargo.py:
##########
@@ -294,6 +310,268 @@ def _get_mirror_file(self, sha=None):
         return os.path.join(self._get_mirror_dir(), sha or self.sha)
 
 
+# Locks on repositories for write access
+REPO_LOCKS = {}  # type: dict[str, threading.Lock]
+
+
+# CrateGit()
+#
+# Use a SourceFetcher class to be the per crate helper.
+#
+# This one is for crates fetched from git repositories.
+#
+# Args:
+#    cargo (Cargo): The main Source implementation
+#    name (str): The name of the crate to depend on
+#    version (str): The version of the crate to depend on
+#    repo (str): Repository URL
+#    commit (str): Sha of the git commit
+class CrateGit(SourceFetcher):
+    def __init__(self, cargo, name, version, repo, commit):
+        super().__init__()
+
+        self.cargo = cargo
+        self.name = name
+        self.version = str(version)
+        self.repo = repo
+        self.commit = commit
+        # TODO: Is this right?
+        self.mark_download_url(self.repo)
+
+    ########################################################
+    #     SourceFetcher API method implementations         #
+    ########################################################
+
+    def fetch(self, alias_override=None, **kwargs):
+        lock = REPO_LOCKS.setdefault(self._get_mirror_dir(), threading.Lock())
+
+        with lock, self._mirror_repo() as repo, 
self.cargo.timed_activity(f"Fetching from {self.repo}"):
+            # TODO: Auth not supported
+            client, path = dulwich.client.get_transport_and_path(self.repo)
+            remote_refs = client.fetch(
+                path,
+                repo,
+                determine_wants=lambda refs, depth=None: 
[self.commit.encode()],
+                depth=1,
+            )
+
+    ########################################################
+    #        Helper APIs for the Cargo Source to use       #
+    ########################################################
+
+    def ref_node(self):
+        return {"kind": "git", "name": self.name, "version": self.version, 
"repo": self.repo, "commit": self.commit}
+
+    # stage()
+    #
+    # A delegate method to do the work for a single git repo
+    # in Source.stage().
+    #
+    # Args:
+    #    (directory): The vendor subdirectory to stage to
+    #
+    def stage(self, directory):
+        self.cargo.status(f"Checking out {self.commit}")
+
+        crate_target_dir = os.path.join(directory, 
f"{self.name}-{self.version}")
+        tmp_dir = os.path.join(directory, f"{self.name}-{self.version}-tmp")
+
+        try:
+            os.mkdir(tmp_dir)
+        except FileExistsError:
+            raise SourceError(
+                f"Cargo doens't support vendoring for identical crate version 
from different sources {self.name} {self.version}"
+            )
+
+        with Repo(self._get_mirror_dir(), bare=True) as mirror:
+            with Repo.init(tmp_dir) as dest:
+                dest.object_store.add_object(mirror[self.commit.encode()])
+                dest.refs[b"HEAD"] = self.commit.encode()
+                dest.update_shallow([self.commit.encode()], [])
+
+            with Repo(tmp_dir, object_store=mirror.object_store) as dest:
+                dest.reset_index()
+
+        # Workspace handling
+        #
+        # When new workspace features are added it is worth checking if
+        # 
<https://github.com/flatpak/flatpak-builder-tools/blob/HEAD/cargo/flatpak-cargo-generator.py>
+        # has implemented them already. This implementation is inspired by the 
mentioned source.
+
+        with open(os.path.join(tmp_dir, "Cargo.toml"), "rb") as f:
+            root_toml = tomllib.load(f)
+
+        crates = {}
+
+        if "workspace" in root_toml and "memers":
+            # Find wanted crate inside workspace
+            for member in root_toml["workspace"].get("members", []):
+                for crate_toml_path in glob.glob(os.path.join(tmp_dir, member, 
"Cargo.toml")):
+                    crate_path = 
os.path.normpath(os.path.dirname(crate_toml_path))
+
+                    with open(crate_toml_path, "rb") as f:
+                        crate_toml = tomllib.load(f)
+                        crates[crate_toml["package"]["name"]] = {
+                            "config": crate_toml,
+                            "path": crate_path,
+                        }
+
+            crate = crates[self.name]
+            # Apply information inherited from workspace Cargo.toml
+            config_inherit_workspace(crate["config"], root_toml["workspace"])
+
+            with open(os.path.join(crates[self.name]["path"], "Cargo.toml"), 
"bw") as f:
+                tomli_w.dump(crate["config"], f)
+
+            shutil.move(crate["path"], crate_target_dir)
+        else:
+            # No workspaces involed, just reploy complete dir as is
+            shutil.move(tmp_dir, crate_target_dir)
+
+        # Write .cargo-checksum.json required by cargo vendoring
+        with open(os.path.join(crate_target_dir, ".cargo-checksum.json"), "w") 
as f:
+            json.dump({"files": {}, "package": None}, f)
+
+        shutil.rmtree(tmp_dir, ignore_errors=True)
+
+    # is_cached()
+    #
+    # Get whether we have a local cached version of the git commit
+    #
+    # Returns:
+    #   (bool): Whether we are cached or not
+    #
+    def is_cached(self):
+        with Repo(self._get_mirror_dir(), bare=True) as repo:
+            return self.commit.encode() in repo
+
+    # is_resolved()
+    #
+    # Get whether the current git repo is resolved
+    #
+    # Returns:
+    #   (bool): Always true since we always have a commit
+    #
+    def is_resolved(self):
+        return True
+
+    ########################################################
+    #                   Private helpers                    #
+    ########################################################
+
+    # _get_mirror_dir()
+    #
+    # Gets the local mirror directory for this upstream git repository
+    #
+    def _get_mirror_dir(self):
+        if self.repo.endswith(".git"):
+            norm_url = self.repo[:-4]
+        else:
+            norm_url = self.repo
+
+        return os.path.join(
+            self.cargo.get_mirror_directory(),
+            utils.url_directory_name(norm_url) + ".git",
+        )
+
+    # _get_url()
+    #
+    # Gets the git URL to download this crate from
+    #
+    # Args:
+    #    alias (str|None): The URL alias to apply, if any
+    #
+    # Returns:
+    #    (str): The URL for this crate
+    #
+    def _get_url(self, alias=None):
+        return self.cargo.translate_url(self.cargo.repo, alias_override=alias)
+
+    # _mirror_repo()
+    #
+    # Returns the mirror repo, initialized if it doesn not exist yet
+    #
+    # Returns:
+    #    (Repo): The mirror repo crate
+    #
+    def _mirror_repo(self):
+        try:
+            return Repo.init_bare(self._get_mirror_dir(), mkdir=True)
+        except FileExistsError:
+            return Repo(self._get_mirror_dir(), bare=True)
+
+
+# config_inherit_workspace()
+#
+# Adds inherited values to config
+#
+# Args:
+#    config (dict): Crate config
+#    workspace_config (dict): Workspace config
+def config_inherit_workspace(config, workspace_config):
+    workspace_deps = workspace_config.get("dependencies")
+    if workspace_deps is not None:
+        dependencies = []
+        for key in ["dependencies", "dev-dependencies", "build-dependencies"]:
+            if key in config:
+                dependencies.append(config[key])
+
+        if "traget" in config:

Review Comment:
   typo



##########
src/buildstream_plugins/sources/cargo.py:
##########
@@ -294,6 +310,268 @@ def _get_mirror_file(self, sha=None):
         return os.path.join(self._get_mirror_dir(), sha or self.sha)
 
 
+# Locks on repositories for write access
+REPO_LOCKS = {}  # type: dict[str, threading.Lock]
+
+
+# CrateGit()
+#
+# Use a SourceFetcher class to be the per crate helper.
+#
+# This one is for crates fetched from git repositories.
+#
+# Args:
+#    cargo (Cargo): The main Source implementation
+#    name (str): The name of the crate to depend on
+#    version (str): The version of the crate to depend on
+#    repo (str): Repository URL
+#    commit (str): Sha of the git commit
+class CrateGit(SourceFetcher):
+    def __init__(self, cargo, name, version, repo, commit):
+        super().__init__()
+
+        self.cargo = cargo
+        self.name = name
+        self.version = str(version)
+        self.repo = repo
+        self.commit = commit
+        # TODO: Is this right?
+        self.mark_download_url(self.repo)
+
+    ########################################################
+    #     SourceFetcher API method implementations         #
+    ########################################################
+
+    def fetch(self, alias_override=None, **kwargs):
+        lock = REPO_LOCKS.setdefault(self._get_mirror_dir(), threading.Lock())
+
+        with lock, self._mirror_repo() as repo, 
self.cargo.timed_activity(f"Fetching from {self.repo}"):
+            # TODO: Auth not supported
+            client, path = dulwich.client.get_transport_and_path(self.repo)
+            remote_refs = client.fetch(
+                path,
+                repo,
+                determine_wants=lambda refs, depth=None: 
[self.commit.encode()],
+                depth=1,
+            )
+
+    ########################################################
+    #        Helper APIs for the Cargo Source to use       #
+    ########################################################
+
+    def ref_node(self):
+        return {"kind": "git", "name": self.name, "version": self.version, 
"repo": self.repo, "commit": self.commit}
+
+    # stage()
+    #
+    # A delegate method to do the work for a single git repo
+    # in Source.stage().
+    #
+    # Args:
+    #    (directory): The vendor subdirectory to stage to
+    #
+    def stage(self, directory):
+        self.cargo.status(f"Checking out {self.commit}")
+
+        crate_target_dir = os.path.join(directory, 
f"{self.name}-{self.version}")
+        tmp_dir = os.path.join(directory, f"{self.name}-{self.version}-tmp")
+
+        try:
+            os.mkdir(tmp_dir)
+        except FileExistsError:
+            raise SourceError(
+                f"Cargo doens't support vendoring for identical crate version 
from different sources {self.name} {self.version}"
+            )
+
+        with Repo(self._get_mirror_dir(), bare=True) as mirror:
+            with Repo.init(tmp_dir) as dest:
+                dest.object_store.add_object(mirror[self.commit.encode()])
+                dest.refs[b"HEAD"] = self.commit.encode()
+                dest.update_shallow([self.commit.encode()], [])
+
+            with Repo(tmp_dir, object_store=mirror.object_store) as dest:
+                dest.reset_index()
+
+        # Workspace handling
+        #
+        # When new workspace features are added it is worth checking if
+        # 
<https://github.com/flatpak/flatpak-builder-tools/blob/HEAD/cargo/flatpak-cargo-generator.py>
+        # has implemented them already. This implementation is inspired by the 
mentioned source.
+
+        with open(os.path.join(tmp_dir, "Cargo.toml"), "rb") as f:
+            root_toml = tomllib.load(f)
+
+        crates = {}
+
+        if "workspace" in root_toml and "memers":
+            # Find wanted crate inside workspace
+            for member in root_toml["workspace"].get("members", []):
+                for crate_toml_path in glob.glob(os.path.join(tmp_dir, member, 
"Cargo.toml")):
+                    crate_path = 
os.path.normpath(os.path.dirname(crate_toml_path))
+
+                    with open(crate_toml_path, "rb") as f:
+                        crate_toml = tomllib.load(f)
+                        crates[crate_toml["package"]["name"]] = {
+                            "config": crate_toml,
+                            "path": crate_path,
+                        }
+
+            crate = crates[self.name]
+            # Apply information inherited from workspace Cargo.toml
+            config_inherit_workspace(crate["config"], root_toml["workspace"])
+
+            with open(os.path.join(crates[self.name]["path"], "Cargo.toml"), 
"bw") as f:
+                tomli_w.dump(crate["config"], f)
+
+            shutil.move(crate["path"], crate_target_dir)
+        else:
+            # No workspaces involed, just reploy complete dir as is
+            shutil.move(tmp_dir, crate_target_dir)
+
+        # Write .cargo-checksum.json required by cargo vendoring
+        with open(os.path.join(crate_target_dir, ".cargo-checksum.json"), "w") 
as f:
+            json.dump({"files": {}, "package": None}, f)
+
+        shutil.rmtree(tmp_dir, ignore_errors=True)
+
+    # is_cached()
+    #
+    # Get whether we have a local cached version of the git commit
+    #
+    # Returns:
+    #   (bool): Whether we are cached or not
+    #
+    def is_cached(self):
+        with Repo(self._get_mirror_dir(), bare=True) as repo:
+            return self.commit.encode() in repo
+
+    # is_resolved()
+    #
+    # Get whether the current git repo is resolved
+    #
+    # Returns:
+    #   (bool): Always true since we always have a commit
+    #
+    def is_resolved(self):
+        return True
+
+    ########################################################
+    #                   Private helpers                    #
+    ########################################################
+
+    # _get_mirror_dir()
+    #
+    # Gets the local mirror directory for this upstream git repository
+    #
+    def _get_mirror_dir(self):
+        if self.repo.endswith(".git"):
+            norm_url = self.repo[:-4]
+        else:
+            norm_url = self.repo
+
+        return os.path.join(
+            self.cargo.get_mirror_directory(),
+            utils.url_directory_name(norm_url) + ".git",
+        )
+
+    # _get_url()
+    #
+    # Gets the git URL to download this crate from
+    #
+    # Args:
+    #    alias (str|None): The URL alias to apply, if any
+    #
+    # Returns:
+    #    (str): The URL for this crate
+    #
+    def _get_url(self, alias=None):
+        return self.cargo.translate_url(self.cargo.repo, alias_override=alias)
+
+    # _mirror_repo()
+    #
+    # Returns the mirror repo, initialized if it doesn not exist yet
+    #
+    # Returns:
+    #    (Repo): The mirror repo crate
+    #
+    def _mirror_repo(self):
+        try:
+            return Repo.init_bare(self._get_mirror_dir(), mkdir=True)
+        except FileExistsError:
+            return Repo(self._get_mirror_dir(), bare=True)
+
+
+# config_inherit_workspace()
+#
+# Adds inherited values to config
+#
+# Args:
+#    config (dict): Crate config
+#    workspace_config (dict): Workspace config
+def config_inherit_workspace(config, workspace_config):
+    workspace_deps = workspace_config.get("dependencies")
+    if workspace_deps is not None:
+        dependencies = []
+        for key in ["dependencies", "dev-dependencies", "build-dependencies"]:
+            if key in config:
+                dependencies.append(config[key])
+
+        if "traget" in config:
+            for target in config["target"].values():
+                if "dependencies" in target:
+                    dependencies.append(target["dependencies"])
+
+        for deps in dependencies:
+            inherit_deps(deps, workspace_deps)
+
+    workspace_package = workspace_config.get("package")
+    if workspace_package is not None:
+        inherit_package(config["package"], workspace_package)
+
+
+# inherit_package()
+#
+# Adds inherited [package] entries to config

Review Comment:
   Function docs for this function, and similar ones 
(`config_inherit_workspace`, `inherit_deps`, etc), aren't very helpful.
   
   > Adds inherited [package] entries to config
   
   Inherited from where? Adds to what config?



##########
src/buildstream_plugins/sources/cargo.py:
##########
@@ -294,6 +308,166 @@ def _get_mirror_file(self, sha=None):
         return os.path.join(self._get_mirror_dir(), sha or self.sha)
 
 
+# Locks on repositories for write access
+REPO_LOCKS = {}
+
+
+# CrateGit()
+#
+# Use a SourceFetcher class to be the per crate helper.
+#
+# This one is for crates fetched from git repositories.
+#
+# Args:
+#    cargo (Cargo): The main Source implementation
+#    name (str): The name of the crate to depend on
+#    version (str): The version of the crate to depend on
+#    repo (str): Repository URL
+#    commit (str): Sha of the git commit
+class CrateGit(SourceFetcher):
+    def __init__(self, cargo, name, version, repo, commit):
+        super().__init__()
+
+        self.cargo = cargo
+        self.name = name
+        self.version = str(version)
+        self.repo = repo
+        self.commit = commit
+        # TODO: Is this right?
+        self.mark_download_url(self.repo)

Review Comment:
   The only real use-case I can think of for an alias here is just to outright 
mirror the entire dependency tree of the project. Here's the simplest way to 
support this, IMO:
   
   The user will set this configuration in their project:
   
   ```yaml
   - kind: cargo
     git_mirror: somealias:
   ```
   
   In your code you'd do something like this (pseudocode):
   ```python3
   def __init__(...):
     ...
     self.mark_download_url(cargo.git_mirror)
     ...
   ```
   
   ```python3
   def _get_url(...):
      if self.cargo.git_mirror is not None:
         mirror = self.cargo.translate_url(self.cargo.git_mirror, ...) # 
Resolves aliases
         if mirror == "": # Reasoning: this lets generic projects like 
freedesktop-sdk provide an alias for builders to override, without actually 
specifying any mirror of its own to use by default
             return self.repo
         protocol, url = split_protocol(self.repo) # ex: 
"https://github.com/foobar"; => "https", "github.com/foobar"
         return urljoin(mirror, "/", protocol, "/", url)
      else:
         return self.repo
   ```
   
   Then make sure to use _get_url when giving a URL to dulwich to handle
   
   So for example, if `git_mirror` is set to 
`our-private-gitlab:cargo-mirrors/` and `our-private-gitlab:` is set to 
`https://gitlab.example.com/`, then: https://github.com/someorg/somecrate will 
be translated into 
https://gitlab.example.com/cargo-mirrors/https/github.com/someorg/somecrate, 
and that's what we'll ultimately fetch.
   
   freedesktop-sdk will set `git_mirror` to `cargo_git_mirror:` by default, and 
`cargo_git_mirror` will be set to "" (blank). Then 
https://github.com/someorg/somecrate will be left untouched: 
`self.cargo.git_mirror` is not none, but once translated it'll resolve to an 
empty string so it'll be ignored.
   
   Some company that builds freedesktop-sdk can override `cargo_git_mirror` to 
be `https://gitlab.example.com/cargo-mirrors/` in their global buildstream.conf 
file, because they are in an environment where they need mirroring. This way 
https://github.com/someorg/somecrate will again be translated into 
https://gitlab.example.com/cargo-mirrors/https/github.com/someorg/somecrate, 
because this time `self.cargo.git_mirror` (again, set to `cargo_git_mirror:` by 
freedesktop-sdk) will get translated into 
`https://gitlab.example.com/cargo-mirrors/`



##########
src/buildstream_plugins/sources/cargo.py:
##########
@@ -294,6 +310,268 @@ def _get_mirror_file(self, sha=None):
         return os.path.join(self._get_mirror_dir(), sha or self.sha)
 
 
+# Locks on repositories for write access
+REPO_LOCKS = {}  # type: dict[str, threading.Lock]
+
+
+# CrateGit()
+#
+# Use a SourceFetcher class to be the per crate helper.
+#
+# This one is for crates fetched from git repositories.
+#
+# Args:
+#    cargo (Cargo): The main Source implementation
+#    name (str): The name of the crate to depend on
+#    version (str): The version of the crate to depend on
+#    repo (str): Repository URL
+#    commit (str): Sha of the git commit
+class CrateGit(SourceFetcher):
+    def __init__(self, cargo, name, version, repo, commit):
+        super().__init__()
+
+        self.cargo = cargo
+        self.name = name
+        self.version = str(version)
+        self.repo = repo
+        self.commit = commit
+        # TODO: Is this right?
+        self.mark_download_url(self.repo)
+
+    ########################################################
+    #     SourceFetcher API method implementations         #
+    ########################################################
+
+    def fetch(self, alias_override=None, **kwargs):
+        lock = REPO_LOCKS.setdefault(self._get_mirror_dir(), threading.Lock())
+
+        with lock, self._mirror_repo() as repo, 
self.cargo.timed_activity(f"Fetching from {self.repo}"):
+            # TODO: Auth not supported
+            client, path = dulwich.client.get_transport_and_path(self.repo)
+            remote_refs = client.fetch(
+                path,
+                repo,
+                determine_wants=lambda refs, depth=None: 
[self.commit.encode()],
+                depth=1,
+            )
+
+    ########################################################
+    #        Helper APIs for the Cargo Source to use       #
+    ########################################################
+
+    def ref_node(self):
+        return {"kind": "git", "name": self.name, "version": self.version, 
"repo": self.repo, "commit": self.commit}
+
+    # stage()
+    #
+    # A delegate method to do the work for a single git repo
+    # in Source.stage().
+    #
+    # Args:
+    #    (directory): The vendor subdirectory to stage to
+    #
+    def stage(self, directory):
+        self.cargo.status(f"Checking out {self.commit}")
+
+        crate_target_dir = os.path.join(directory, 
f"{self.name}-{self.version}")
+        tmp_dir = os.path.join(directory, f"{self.name}-{self.version}-tmp")
+
+        try:
+            os.mkdir(tmp_dir)
+        except FileExistsError:
+            raise SourceError(
+                f"Cargo doens't support vendoring for identical crate version 
from different sources {self.name} {self.version}"
+            )
+
+        with Repo(self._get_mirror_dir(), bare=True) as mirror:
+            with Repo.init(tmp_dir) as dest:
+                dest.object_store.add_object(mirror[self.commit.encode()])
+                dest.refs[b"HEAD"] = self.commit.encode()
+                dest.update_shallow([self.commit.encode()], [])
+
+            with Repo(tmp_dir, object_store=mirror.object_store) as dest:
+                dest.reset_index()
+
+        # Workspace handling
+        #
+        # When new workspace features are added it is worth checking if
+        # 
<https://github.com/flatpak/flatpak-builder-tools/blob/HEAD/cargo/flatpak-cargo-generator.py>
+        # has implemented them already. This implementation is inspired by the 
mentioned source.
+
+        with open(os.path.join(tmp_dir, "Cargo.toml"), "rb") as f:
+            root_toml = tomllib.load(f)
+
+        crates = {}
+
+        if "workspace" in root_toml and "memers":
+            # Find wanted crate inside workspace
+            for member in root_toml["workspace"].get("members", []):
+                for crate_toml_path in glob.glob(os.path.join(tmp_dir, member, 
"Cargo.toml")):
+                    crate_path = 
os.path.normpath(os.path.dirname(crate_toml_path))
+
+                    with open(crate_toml_path, "rb") as f:
+                        crate_toml = tomllib.load(f)
+                        crates[crate_toml["package"]["name"]] = {
+                            "config": crate_toml,
+                            "path": crate_path,
+                        }
+
+            crate = crates[self.name]
+            # Apply information inherited from workspace Cargo.toml
+            config_inherit_workspace(crate["config"], root_toml["workspace"])
+
+            with open(os.path.join(crates[self.name]["path"], "Cargo.toml"), 
"bw") as f:
+                tomli_w.dump(crate["config"], f)
+
+            shutil.move(crate["path"], crate_target_dir)
+        else:
+            # No workspaces involed, just reploy complete dir as is
+            shutil.move(tmp_dir, crate_target_dir)
+
+        # Write .cargo-checksum.json required by cargo vendoring
+        with open(os.path.join(crate_target_dir, ".cargo-checksum.json"), "w") 
as f:
+            json.dump({"files": {}, "package": None}, f)
+
+        shutil.rmtree(tmp_dir, ignore_errors=True)
+
+    # is_cached()
+    #
+    # Get whether we have a local cached version of the git commit
+    #
+    # Returns:
+    #   (bool): Whether we are cached or not
+    #
+    def is_cached(self):
+        with Repo(self._get_mirror_dir(), bare=True) as repo:
+            return self.commit.encode() in repo
+
+    # is_resolved()
+    #
+    # Get whether the current git repo is resolved
+    #
+    # Returns:
+    #   (bool): Always true since we always have a commit
+    #
+    def is_resolved(self):
+        return True
+
+    ########################################################
+    #                   Private helpers                    #
+    ########################################################
+
+    # _get_mirror_dir()
+    #
+    # Gets the local mirror directory for this upstream git repository
+    #
+    def _get_mirror_dir(self):
+        if self.repo.endswith(".git"):
+            norm_url = self.repo[:-4]
+        else:
+            norm_url = self.repo
+
+        return os.path.join(
+            self.cargo.get_mirror_directory(),
+            utils.url_directory_name(norm_url) + ".git",
+        )
+
+    # _get_url()
+    #
+    # Gets the git URL to download this crate from
+    #
+    # Args:
+    #    alias (str|None): The URL alias to apply, if any
+    #
+    # Returns:
+    #    (str): The URL for this crate
+    #
+    def _get_url(self, alias=None):
+        return self.cargo.translate_url(self.cargo.repo, alias_override=alias)
+
+    # _mirror_repo()
+    #
+    # Returns the mirror repo, initialized if it doesn not exist yet
+    #
+    # Returns:
+    #    (Repo): The mirror repo crate
+    #
+    def _mirror_repo(self):
+        try:
+            return Repo.init_bare(self._get_mirror_dir(), mkdir=True)
+        except FileExistsError:
+            return Repo(self._get_mirror_dir(), bare=True)
+
+
+# config_inherit_workspace()
+#
+# Adds inherited values to config
+#
+# Args:
+#    config (dict): Crate config
+#    workspace_config (dict): Workspace config
+def config_inherit_workspace(config, workspace_config):
+    workspace_deps = workspace_config.get("dependencies")
+    if workspace_deps is not None:
+        dependencies = []
+        for key in ["dependencies", "dev-dependencies", "build-dependencies"]:
+            if key in config:
+                dependencies.append(config[key])
+
+        if "traget" in config:
+            for target in config["target"].values():
+                if "dependencies" in target:
+                    dependencies.append(target["dependencies"])
+
+        for deps in dependencies:
+            inherit_deps(deps, workspace_deps)
+
+    workspace_package = workspace_config.get("package")
+    if workspace_package is not None:
+        inherit_package(config["package"], workspace_package)
+
+
+# inherit_package()
+#
+# Adds inherited [package] entries to config

Review Comment:
   Maybe there's some documentation of the Cargo.toml file that you can 
reference in these function comments, so it's clear what exactly you're doing 
and why? Right now the code is just twiddling some entries in some 
dictionaries, and unless I go looking for docs I have no way to know whether 
it's correct (which by the way I'm not doing for this review; I'll assume that 
you're manipulating the toml files correctly :smile:)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to