Hi Yifan,
On 2025/11/20 17:22, Yifan Zhao wrote:
This patch introduces support for AWS Signature Version 4 for s3erofs
remote backend.
Now users can specify the folowing options:
- passwd_file=Y, S3 credentials file in the format $ak:$sk (optional);
- urlstyle=<vhost, path>, S3 API calling style (optional);
- sig=<2,4>, S3 API signature version (optional);
- region=W, region code for S3 endpoint (required for sig=4).
e.g.:
mkfs.erofs \
--s3=s3.us-east-1.amazonaws.com,sig=4,region=us-east-1 \
output.img some_bucket/path/to/object
Thanks for the effort!
Could we find a public s3 bucket and post here as an example?
Signed-off-by: Yifan Zhao <[email protected]>
---
lib/liberofs_s3.h | 1 +
lib/remotes/s3.c | 567 +++++++++++++++++++++++++++++++++++++---------
mkfs/main.c | 14 +-
3 files changed, 471 insertions(+), 111 deletions(-)
diff --git a/lib/liberofs_s3.h b/lib/liberofs_s3.h
index f2ec822..f4886cd 100644
--- a/lib/liberofs_s3.h
+++ b/lib/liberofs_s3.h
@@ -27,6 +27,7 @@ enum s3erofs_signature_version {
struct erofs_s3 {
void *easy_curl;
const char *endpoint;
+ const char *region;
char access_key[S3_ACCESS_KEY_LEN + 1];
char secret_key[S3_SECRET_KEY_LEN + 1];
diff --git a/lib/remotes/s3.c b/lib/remotes/s3.c
index 0f7e1a9..3263dd7 100644
--- a/lib/remotes/s3.c
+++ b/lib/remotes/s3.c
@@ -23,7 +23,8 @@
#define S3EROFS_PATH_MAX 1024
#define S3EROFS_MAX_QUERY_PARAMS 16
#define S3EROFS_URL_LEN 8192
-#define S3EROFS_CANONICAL_QUERY_LEN 2048
+#define S3EROFS_CANONICAL_URI_LEN 1024
Is there a spec to document that?
+#define S3EROFS_CANONICAL_QUERY_LEN S3EROFS_URL_LEN
#define BASE64_ENCODE_LEN(len) (((len + 2) / 3) * 4)
@@ -34,52 +35,142 @@ struct s3erofs_query_params {
};
struct s3erofs_curl_request {
- const char *method;
It seems it's removed... S3 only allows `GET` method?
char url[S3EROFS_URL_LEN];
+ char canonical_uri[S3EROFS_CANONICAL_URI_LEN];
char canonical_query[S3EROFS_CANONICAL_QUERY_LEN];
};
+static const char *s3erofs_parse_host(const char *endpoint, const char **schema) {
K&R style is:
static const char *s3erofs_parse_host()
{
if (!tmp) {
...
} else {
...
}
}
+ const char *tmp = strstr(endpoint, "://");
+ const char *host;
+
+ if (!tmp) {
+ host = endpoint;
+ if (schema)
+ *schema = NULL;
+ } else {
+ host = tmp + sizeof("://") - 1;
+ if (schema) {
+ *schema = strndup(endpoint, host - endpoint);
+ if (!*schema)
+ return ERR_PTR(-ENOMEM);
+ }
+ }
+
+ return host;
+}
+
+static int s3erofs_urlencode(const char *input, char **output)
+{
static void *s3erofs_urlencode(const char *input)
{
char *output;
output = malloc(strlen(input) * 3 + 1);
if (!output)
return ERR_PTR(-ENOMEM);
...
return output;
}
+ static const char hex[] = "0123456789ABCDEF";
+ int i;
+ char c, *p;
+
+ *output = malloc(strlen(input) * 3 + 1);
+ if (!*output)
+ return -ENOMEM;
+
+ p = *output;
+ for (i = 0; i < strlen(input); ++i) {
+ c = (unsigned char)input[i];
+
+ // Unreserved characters: A-Z a-z 0-9 - . _ ~
+ if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||
+ (c >= '0' && c <= '9') || c == '-' || c == '.' || c == '_'
||
+ c == '~') {
+ *p++ = c;
+ } else {
+ *p++ = '%';
+ *p++ = hex[c >> 4];
+ *p++ = hex[c & 0x0F];
+ }
+ }
+ *p = '\0';
+
+ return 0;
+}
+
+struct kv_pair {
+ char *key;
+ char *value;
+};
+
+static int compare_kv_pair(const void *a, const void *b)
+{
+ return strcmp(((const struct kv_pair *)a)->key, ((const struct kv_pair
*)b)->key);
+}
+
+static int s3erofs_prepare_canonical_query(struct s3erofs_curl_request *req,
+ struct s3erofs_query_params *params)
+{
+ struct kv_pair *pairs;
+ int i, pos = 0, ret = 0;
+
+ if (params->num == 0)
if (!params->num) {
}
+ return 0;
+
+ pairs = malloc(sizeof(struct kv_pair) * params->num);
+ for (i = 0; i < params->num; i++) {
+ ret = s3erofs_urlencode(params->key[i], &pairs[i].key);
+ if (ret < 0)
+ goto out;
+ ret = s3erofs_urlencode(params->value[i], &pairs[i].value);
Why we use urlencoding now?
Thanks,
Gao Xiang