The current implementation does not correctly handle the escaping of object key names. This patch ensures compliance with the AWS S3 documentation [1] for proper key name encoding and character handling.
[1] https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html Signed-off-by: Yifan Zhao <[email protected]> --- lib/remotes/s3.c | 115 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 105 insertions(+), 10 deletions(-) diff --git a/lib/remotes/s3.c b/lib/remotes/s3.c index 8351674..a5b27eb 100644 --- a/lib/remotes/s3.c +++ b/lib/remotes/s3.c @@ -61,11 +61,17 @@ static const char *s3erofs_parse_host(const char *endpoint, const char **schema) return host; } -static void *s3erofs_urlencode(const char *input) +enum s3erofs_urlencode_mode { + S3EROFS_URLENCODE_QUERY_PARAM, + S3EROFS_URLENCODE_S3_KEY, +}; + +static void *s3erofs_urlencode(const char *input, enum s3erofs_urlencode_mode mode) { static const char hex[] = "0123456789ABCDEF"; char *p, *url; int i, c; + bool safe; url = malloc(strlen(input) * 3 + 1); if (!url) @@ -73,13 +79,31 @@ static void *s3erofs_urlencode(const char *input) p = url; for (i = 0; i < strlen(input); ++i) { - c = input[i]; - - // Unreserved characters: A-Z a-z 0-9 - . _ ~ - if (isalpha(c) || isdigit(c) || c == '-' || c == '.' || - c == '_' || c == '~') { + c = (unsigned char)input[i]; + + if (mode == S3EROFS_URLENCODE_S3_KEY) + /* + * AWS S3 safe characters for object key names: + * - Alphanumeric: 0-9 a-z A-Z + * - Special: ! - _ . * ' ( ) + * - Forward slash (/) for hierarchy + * See: https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html + */ + safe = isalpha(c) || isdigit(c) || c == '!' || c == '-' || + c == '_' || c == '.' || c == '*' || c == '(' || c == ')' || + c == '\'' || c == '/'; + else + /* + * URL encode query parameters + * See: https://docs.aws.amazon.com/AmazonS3/latest/API/sig-v4-header-based-auth.html#create-signature-presign-entire-payload + */ + safe = isalpha(c) || isdigit(c) || c == '-' || c == '.' || + c == '_' || c == '~'; + + if (safe) { *p++ = c; } else { + /* URL encode this character */ *p++ = '%'; *p++ = hex[c >> 4]; *p++ = hex[c & 0x0F]; @@ -111,13 +135,13 @@ static int s3erofs_prepare_canonical_query(struct s3erofs_curl_request *req, pairs = calloc(1, sizeof(struct s3erofs_qsort_kv) * params->num); for (i = 0; i < params->num; i++) { - pairs[i].key = s3erofs_urlencode(params->key[i]); + pairs[i].key = s3erofs_urlencode(params->key[i], S3EROFS_URLENCODE_QUERY_PARAM); if (IS_ERR(pairs[i].key)) { ret = PTR_ERR(pairs[i].key); pairs[i].key = NULL; goto out; } - pairs[i].value = s3erofs_urlencode(params->value[i]); + pairs[i].value = s3erofs_urlencode(params->value[i], S3EROFS_URLENCODE_QUERY_PARAM); if (IS_ERR(pairs[i].value)) { ret = PTR_ERR(pairs[i].value); pairs[i].value = NULL; @@ -154,6 +178,7 @@ static int s3erofs_prepare_url(struct s3erofs_curl_request *req, bool slash = false; bool bucket_domain = false; char *url = req->url; + char *encoded_key = NULL; int pos, canonical_uri_pos, i, ret = 0; if (!endpoint) @@ -198,11 +223,18 @@ static int s3erofs_prepare_url(struct s3erofs_curl_request *req, } } if (key) { + encoded_key = s3erofs_urlencode(key, S3EROFS_URLENCODE_S3_KEY); + if (IS_ERR(encoded_key)) { + ret = PTR_ERR(encoded_key); + encoded_key = NULL; + goto err; + } + if (url[pos - 1] == '/') --pos; else slash = true; - pos += snprintf(url + pos, S3EROFS_URL_LEN - pos, "/%s", key); + pos += snprintf(url + pos, S3EROFS_URL_LEN - pos, "/%s", encoded_key); } if (sig == S3EROFS_SIGNATURE_VERSION_2) { @@ -220,7 +252,8 @@ static int s3erofs_prepare_url(struct s3erofs_curl_request *req, i = 1; } i += snprintf(req->canonical_uri + i, S3EROFS_CANONICAL_URI_LEN - i, - "%s%s%s", path, slash ? "/" : "", key ? key : ""); + "%s%s%s", path, slash ? "/" : "", + encoded_key ? encoded_key : ""); } else { i = snprintf(req->canonical_uri, S3EROFS_CANONICAL_URI_LEN, "%s", url + canonical_uri_pos); @@ -241,6 +274,8 @@ static int s3erofs_prepare_url(struct s3erofs_curl_request *req, erofs_dbg("Request canonical_uri %s", req->canonical_uri); err: + if (encoded_key) + free(encoded_key); if (schema != https) free((void *)schema); return ret; @@ -1410,6 +1445,66 @@ static bool test_s3erofs_prepare_url(void) .expected_canonical_v2 = "/bucket/object.txt", .expected_canonical_v4 = "/object.txt", .expected_ret = 0, + }, + { + .name = "Key with spaces", + .endpoint = "s3.amazonaws.com", + .path = "bucket", + .key = "my folder/my file.txt", + .url_style = S3EROFS_URL_STYLE_VIRTUAL_HOST, + .expected_url = + "https://bucket.s3.amazonaws.com/my%20folder/my%20file.txt", + .expected_canonical_v2 = "/bucket/my%20folder/my%20file.txt", + .expected_canonical_v4 = "/my%20folder/my%20file.txt", + .expected_ret = 0, + }, + { + .name = "Key with special characters (&, $, @, =)", + .endpoint = "s3.amazonaws.com", + .path = "bucket", + .key = "file&name$test@sign=value.txt", + .url_style = S3EROFS_URL_STYLE_PATH, + .expected_url = + "https://s3.amazonaws.com/bucket/file%26name%24test%40sign%3Dvalue.txt", + .expected_canonical_v2 = "/bucket/file%26name%24test%40sign%3Dvalue.txt", + .expected_canonical_v4 = "/bucket/file%26name%24test%40sign%3Dvalue.txt", + .expected_ret = 0, + }, + { + .name = "Key with semicolon, colon, and plus", + .endpoint = "s3.amazonaws.com", + .path = "bucket", + .key = "file;name:test+data.txt", + .url_style = S3EROFS_URL_STYLE_VIRTUAL_HOST, + .expected_url = + "https://bucket.s3.amazonaws.com/file%3Bname%3Atest%2Bdata.txt", + .expected_canonical_v2 = "/bucket/file%3Bname%3Atest%2Bdata.txt", + .expected_canonical_v4 = "/file%3Bname%3Atest%2Bdata.txt", + .expected_ret = 0, + }, + { + .name = "Key with comma and question mark", + .endpoint = "s3.amazonaws.com", + .path = "bucket", + .key = "file,name?query.txt", + .url_style = S3EROFS_URL_STYLE_PATH, + .expected_url = + "https://s3.amazonaws.com/bucket/file%2Cname%3Fquery.txt", + .expected_canonical_v2 = "/bucket/file%2Cname%3Fquery.txt", + .expected_canonical_v4 = "/bucket/file%2Cname%3Fquery.txt", + .expected_ret = 0, + }, + { + .name = "Key with multiple special characters", + .endpoint = "s3.amazonaws.com", + .path = "bucket", + .key = "path/to/file name & [email protected]", + .url_style = S3EROFS_URL_STYLE_VIRTUAL_HOST, + .expected_url = + "https://bucket.s3.amazonaws.com/path/to/file%20name%20%26%20data%402024.txt", + .expected_canonical_v2 = "/bucket/path/to/file%20name%20%26%20data%402024.txt", + .expected_canonical_v4 = "/path/to/file%20name%20%26%20data%402024.txt", + .expected_ret = 0, } }; -- 2.47.3
