On 2025/11/21 17:00, Gao Xiang wrote:

Hi Yifan,

On 2025/11/20 17:22, Yifan Zhao wrote:
This patch introduces support for AWS Signature Version 4 for s3erofs
remote backend.

Now users can specify the folowing options:
  - passwd_file=Y, S3 credentials file in the format $ak:$sk (optional);
  - urlstyle=<vhost, path>, S3 API calling style (optional);
  - sig=<2,4>, S3 API signature version (optional);
  - region=W, region code for S3 endpoint (required for sig=4).

e.g.:
mkfs.erofs \
     --s3=s3.us-east-1.amazonaws.com,sig=4,region=us-east-1 \
     output.img some_bucket/path/to/object

Thanks for the effort!

Could we find a public s3 bucket and post here as an example?

Hi Xiang,

I have found *noaa-goes19.s3.amazonaws.com* and update the commit msg.



Signed-off-by: Yifan Zhao <[email protected]>
---
  lib/liberofs_s3.h |   1 +
  lib/remotes/s3.c  | 567 +++++++++++++++++++++++++++++++++++++---------
  mkfs/main.c       |  14 +-
  3 files changed, 471 insertions(+), 111 deletions(-)

diff --git a/lib/liberofs_s3.h b/lib/liberofs_s3.h
index f2ec822..f4886cd 100644
--- a/lib/liberofs_s3.h
+++ b/lib/liberofs_s3.h
@@ -27,6 +27,7 @@ enum s3erofs_signature_version {
  struct erofs_s3 {
      void *easy_curl;
      const char *endpoint;
+    const char *region;
      char access_key[S3_ACCESS_KEY_LEN + 1];
      char secret_key[S3_SECRET_KEY_LEN + 1];
  diff --git a/lib/remotes/s3.c b/lib/remotes/s3.c
index 0f7e1a9..3263dd7 100644
--- a/lib/remotes/s3.c
+++ b/lib/remotes/s3.c
@@ -23,7 +23,8 @@
  #define S3EROFS_PATH_MAX        1024
  #define S3EROFS_MAX_QUERY_PARAMS    16
  #define S3EROFS_URL_LEN            8192
-#define S3EROFS_CANONICAL_QUERY_LEN    2048
+#define S3EROFS_CANONICAL_URI_LEN    1024

Is there a spec to document that?

Sorry, I made a mistake. The AWS documentation [1] explicitly specifies

that the maximum key length is 1024 bytes; therefore, setting the length

here to 1024 is unreasonable. I' ve reverted it back to 2048.

[1] https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html


+#define S3EROFS_CANONICAL_QUERY_LEN S3EROFS_URL_LEN
    #define BASE64_ENCODE_LEN(len)    (((len + 2) / 3) * 4)
  @@ -34,52 +35,142 @@ struct s3erofs_query_params {
  };
    struct s3erofs_curl_request {
-    const char *method;

It seems it's removed... S3 only allows `GET` method?

In our foreseeable usage scenarios, we will not modify the OBS bucket;

therefore, I believe we can use only the GET method to reduce the

number of unnecessary parameters.


      char url[S3EROFS_URL_LEN];
+    char canonical_uri[S3EROFS_CANONICAL_URI_LEN];
      char canonical_query[S3EROFS_CANONICAL_QUERY_LEN];
  };
  +static const char *s3erofs_parse_host(const char *endpoint, const char **schema) {

K&R style is:

static const char *s3erofs_parse_host()
{
    if (!tmp) {
        ...
    } else {
        ...
    }

}

Fixed.
+    const char *tmp = strstr(endpoint, "://");
+    const char *host;
+
+    if (!tmp) {
+        host = endpoint;
+        if (schema)
+            *schema = NULL;
+    } else {
+        host = tmp + sizeof("://") - 1;
+        if (schema) {
+            *schema = strndup(endpoint, host - endpoint);
+            if (!*schema)
+                return ERR_PTR(-ENOMEM);
+        }
+    }
+
+    return host;
+}
+
+static int s3erofs_urlencode(const char *input, char **output)
+{

static void *s3erofs_urlencode(const char *input)
{
    char *output;

    output = malloc(strlen(input) * 3 + 1);
    if (!output)
        return ERR_PTR(-ENOMEM);

    ...

    return output;

}

Fixed.
+    static const char hex[] = "0123456789ABCDEF";
+    int i;
+    char c, *p;
+
+    *output = malloc(strlen(input) * 3 + 1);
+    if (!*output)
+        return -ENOMEM;
+
+    p = *output;
+    for (i = 0; i < strlen(input); ++i) {
+        c = (unsigned char)input[i];
+
+        // Unreserved characters: A-Z a-z 0-9 - . _ ~
+        if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||
+            (c >= '0' && c <= '9') || c == '-' || c == '.' || c == '_' ||
+            c == '~') {
+            *p++ = c;
+        } else {
+            *p++ = '%';
+            *p++ = hex[c >> 4];
+            *p++ = hex[c & 0x0F];
+        }
+    }
+    *p = '\0';
+
+    return 0;
+}
+
+struct kv_pair {
+    char *key;
+    char *value;
+};
+
+static int compare_kv_pair(const void *a, const void *b)
+{
+    return strcmp(((const struct kv_pair *)a)->key, ((const struct kv_pair *)b)->key);
+}
+
+static int s3erofs_prepare_canonical_query(struct s3erofs_curl_request *req,
+                       struct s3erofs_query_params *params)
+{
+    struct kv_pair *pairs;
+    int i, pos = 0, ret = 0;
+
+    if (params->num == 0)

    if (!params->num) {
    }

Fixed.
+        return 0;
+
+    pairs = malloc(sizeof(struct kv_pair) * params->num);
+    for (i = 0; i < params->num; i++) {
+        ret = s3erofs_urlencode(params->key[i], &pairs[i].key);
+        if (ret < 0)
+            goto out;
+        ret = s3erofs_urlencode(params->value[i], &pairs[i].value);

Why we use urlencoding now?

According to AWS docs [2] urlencoding is needed when Calculate *CanonicalQueryString*,

and custom UriEncode function is recommended to cover the (potential) semantic differerence.

[2] https://docs.aws.amazon.com/AmazonS3/latest/API/sig-v4-header-based-auth.html


Thanks,

Yifan Zhao

Thanks,
Gao Xiang

Reply via email to