Kurtiscwright commented on code in PR #2311: URL: https://github.com/apache/iceberg-rust/pull/2311#discussion_r3156687671
########## crates/catalog/rest/src/catalog.rs: ########## @@ -52,6 +53,12 @@ pub const REST_CATALOG_PROP_URI: &str = "uri"; pub const REST_CATALOG_PROP_WAREHOUSE: &str = "warehouse"; /// Disable header redaction in error logs (defaults to false for security) pub const REST_CATALOG_PROP_DISABLE_HEADER_REDACTION: &str = "disable-header-redaction"; +/// Enable AWS SigV4 signing for REST catalog requests +pub const REST_CATALOG_PROP_SIGV4_ENABLED: &str = "rest.sigv4-enabled"; +/// The AWS service name to use for SigV4 signing (e.g. "s3", "execute-api") +pub const REST_CATALOG_PROP_SIGNING_NAME: &str = "rest.signing-name"; +/// The AWS region to use for SigV4 signing (e.g. "us-east-1") +pub const REST_CATALOG_PROP_SIGNING_REGION: &str = "rest.signing-region"; Review Comment: Nitpick: Should the three SigV4 const variables, above this comment, live in `signing.rs`? They are AWS specific, and keeping them next to the signer would make the signing module self-contained as more auth schemes are added later. ########## crates/catalog/rest/src/signing.rs: ########## @@ -0,0 +1,263 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! HTTP request signing for the REST catalog. + +use std::fmt::Debug; +use std::sync::Arc; + +use async_trait::async_trait; +use iceberg::{Error, ErrorKind, Result}; +use reqsign_aws_v4::{Credential, DefaultCredentialProvider, RequestSigner}; +use reqsign_core::{Context, HttpSend, OsEnv, ProvideCredential, Signer}; +use reqsign_file_read_tokio::TokioFileRead; +use reqwest::{Client, Request}; + +use crate::RestCatalogConfig; + +/// A trait for signing HTTP requests. +#[async_trait] +pub trait HttpRequestSigner: Send + Sync + Debug { + /// Sign the request by modifying its headers (and potentially other parts). + async fn sign(&self, parts: &mut http::request::Parts) -> Result<()>; + + /// Sign a full [`reqwest::Request`] by converting it to [`http::request::Parts`] and back. + async fn sign_request(&self, mut request: Request) -> Result<Request> { + let (mut parts, _) = http::Request::builder() + .method(request.method().clone()) + .uri(request.url().as_str()) + .body(()) + .expect("request parts derived from a valid request") + .into_parts(); + // Compute the SHA256 hash of the request body for the content hash header. + // Some AWS services (e.g. Glue) reject UNSIGNED-PAYLOAD. + let body_bytes = request.body().and_then(|b| b.as_bytes()).unwrap_or(&[]); + parts.headers.insert( + http::HeaderName::from_static("x-amz-content-sha256"), + http::HeaderValue::from_str(&reqsign_core::hash::hex_sha256(body_bytes)) + .expect("hex string is valid header value"), + ); + self.sign(&mut parts).await?; + // Merge signing headers into the original request, preserving + // application headers (content-type, user-agent, etc.) that + // should not participate in signing. + request.headers_mut().extend(parts.headers); + Ok(request) + } +} + +/// Try to build a request signer from the config, or return `None` if signing is not enabled. +impl TryFrom<&RestCatalogConfig> for Option<Arc<dyn HttpRequestSigner>> { + type Error = Error; + + fn try_from(cfg: &RestCatalogConfig) -> Result<Self> { + if !cfg.sigv4_enabled() { + return Ok(None); + } + let signing_region = cfg.signing_region().ok_or_else(|| { + Error::new( + ErrorKind::DataInvalid, + format!( + "'{}' is required when '{}' is true", + crate::REST_CATALOG_PROP_SIGNING_REGION, + crate::REST_CATALOG_PROP_SIGV4_ENABLED, + ), + ) + })?; + let signing_name = cfg.signing_name().ok_or_else(|| { + Error::new( + ErrorKind::DataInvalid, + format!( + "'{}' is required when '{}' is true", + crate::REST_CATALOG_PROP_SIGNING_NAME, + crate::REST_CATALOG_PROP_SIGV4_ENABLED, + ), + ) + })?; + Ok(Some(Arc::new(SigV4Signer::new( + cfg.client().unwrap_or_default(), + signing_name, + signing_region, + )))) + } +} + +/// The HttpRequestSigner implementation for AWS SigV4 +#[derive(Debug)] +pub struct SigV4Signer { + /// The inner reqwest signer with an AWS credential loader. + inner: Signer<Credential>, +} + +impl SigV4Signer { + /// Create a new SigV4 signer using the default AWS credential chain + /// (env vars, profiles, IMDS, etc.). + pub fn new(client: Client, service: &str, region: &str) -> Self { + Self::with_credential_provider(client, service, region, DefaultCredentialProvider::new()) + } + + /// Create a new SigV4 signer with a custom credential provider. + pub fn with_credential_provider( + client: Client, + service: &str, + region: &str, + credential_provider: impl ProvideCredential<Credential = Credential>, + ) -> Self { + let ctx = Context::new() + .with_file_read(TokioFileRead) + .with_http_send(ReqwestHttpSend(client)) + .with_env(OsEnv); + let signer = RequestSigner::new(service, region); + Self { + inner: Signer::new(ctx, credential_provider, signer), + } + } +} + +#[async_trait] +impl HttpRequestSigner for SigV4Signer { + async fn sign(&self, parts: &mut http::request::Parts) -> Result<()> { + // Patch the URI for signing; reqsign-aws-v4 workaround. + let uri = parts.uri.clone(); + parts.uri = patch_uri_for_signing(&uri)?; + // Sign with the patched URI. + self.inner.sign(parts, None).await.map_err(|e| { + Error::new(ErrorKind::Unexpected, "Failed to sign request with SigV4").with_source(e) + })?; + // Restore the original URI in the request; signing should only modify headers. + parts.uri = uri; + Ok(()) + } +} + +/// Pre-encode percent signs in the URI path for correct SigV4 canonical URI computation. +/// +/// Workaround for a bug in `reqsign-aws-v4` where `canonical_request_string` decodes +/// percent-encoded path segments then re-encodes them, losing the double-encoding that +/// AWS SigV4 requires. By replacing `%` with `%25` before signing, reqsign's +/// decode→reencode cycle produces the correct double-encoded form. +/// +/// TODO: remove once fixed upstream in apache/opendal (reqsign-aws-v4). Review Comment: Is there an upstream issue tracking this in apache/opendal? ########## crates/catalog/rest/src/signing.rs: ########## @@ -0,0 +1,263 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! HTTP request signing for the REST catalog. + +use std::fmt::Debug; +use std::sync::Arc; + +use async_trait::async_trait; +use iceberg::{Error, ErrorKind, Result}; +use reqsign_aws_v4::{Credential, DefaultCredentialProvider, RequestSigner}; +use reqsign_core::{Context, HttpSend, OsEnv, ProvideCredential, Signer}; +use reqsign_file_read_tokio::TokioFileRead; +use reqwest::{Client, Request}; + +use crate::RestCatalogConfig; + +/// A trait for signing HTTP requests. +#[async_trait] +pub trait HttpRequestSigner: Send + Sync + Debug { Review Comment: The HttpRequestSigner trait is named generically, which suggests future signer implementations may be added. If that is the intent, the config-to-signer logic in RestCatalogConfig::signer (and the TryFrom impl it delegates to) currently only recognizes rest.sigv4-enabled and builds a SigV4 signer. Adding a second signer later would then force if-else chaining (if cfg.sigv4_enabled() { ... } else if cfg.azure_enabled() { ... }) which isn't ergonomic. Alternatively a second signer could refactor to a single rest.signer-type discriminant property plus per-type namespaces, but would be a breaking change for anyone already using the SigV4 properties. The current shape works fine for SigV4 but may surprise a future contributor who adds a second signer and finds their requests silently unsigned because the SigV4 check fell through first. If SigV4 is the only signer this crate will ship, and other signers are expected to come in through the with_signer(...) method on RestCatalogBuilder, that is a reasonable position too. In that case the trait name could be tightened to something like SigV4RequestSigner to signal that intent. ########## crates/catalog/rest/src/signing.rs: ########## @@ -0,0 +1,263 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! HTTP request signing for the REST catalog. + +use std::fmt::Debug; +use std::sync::Arc; + +use async_trait::async_trait; +use iceberg::{Error, ErrorKind, Result}; +use reqsign_aws_v4::{Credential, DefaultCredentialProvider, RequestSigner}; +use reqsign_core::{Context, HttpSend, OsEnv, ProvideCredential, Signer}; +use reqsign_file_read_tokio::TokioFileRead; +use reqwest::{Client, Request}; + +use crate::RestCatalogConfig; + +/// A trait for signing HTTP requests. +#[async_trait] +pub trait HttpRequestSigner: Send + Sync + Debug { + /// Sign the request by modifying its headers (and potentially other parts). + async fn sign(&self, parts: &mut http::request::Parts) -> Result<()>; + + /// Sign a full [`reqwest::Request`] by converting it to [`http::request::Parts`] and back. + async fn sign_request(&self, mut request: Request) -> Result<Request> { + let (mut parts, _) = http::Request::builder() + .method(request.method().clone()) + .uri(request.url().as_str()) + .body(()) + .expect("request parts derived from a valid request") + .into_parts(); + // Compute the SHA256 hash of the request body for the content hash header. + // Some AWS services (e.g. Glue) reject UNSIGNED-PAYLOAD. + let body_bytes = request.body().and_then(|b| b.as_bytes()).unwrap_or(&[]); + parts.headers.insert( + http::HeaderName::from_static("x-amz-content-sha256"), + http::HeaderValue::from_str(&reqsign_core::hash::hex_sha256(body_bytes)) + .expect("hex string is valid header value"), + ); + self.sign(&mut parts).await?; + // Merge signing headers into the original request, preserving + // application headers (content-type, user-agent, etc.) that + // should not participate in signing. + request.headers_mut().extend(parts.headers); + Ok(request) + } +} + +/// Try to build a request signer from the config, or return `None` if signing is not enabled. +impl TryFrom<&RestCatalogConfig> for Option<Arc<dyn HttpRequestSigner>> { + type Error = Error; + + fn try_from(cfg: &RestCatalogConfig) -> Result<Self> { + if !cfg.sigv4_enabled() { + return Ok(None); + } + let signing_region = cfg.signing_region().ok_or_else(|| { + Error::new( + ErrorKind::DataInvalid, + format!( + "'{}' is required when '{}' is true", + crate::REST_CATALOG_PROP_SIGNING_REGION, + crate::REST_CATALOG_PROP_SIGV4_ENABLED, + ), + ) + })?; + let signing_name = cfg.signing_name().ok_or_else(|| { + Error::new( + ErrorKind::DataInvalid, + format!( + "'{}' is required when '{}' is true", + crate::REST_CATALOG_PROP_SIGNING_NAME, + crate::REST_CATALOG_PROP_SIGV4_ENABLED, + ), + ) + })?; + Ok(Some(Arc::new(SigV4Signer::new( + cfg.client().unwrap_or_default(), + signing_name, + signing_region, + )))) + } +} + +/// The HttpRequestSigner implementation for AWS SigV4 +#[derive(Debug)] +pub struct SigV4Signer { + /// The inner reqwest signer with an AWS credential loader. + inner: Signer<Credential>, +} + +impl SigV4Signer { + /// Create a new SigV4 signer using the default AWS credential chain + /// (env vars, profiles, IMDS, etc.). + pub fn new(client: Client, service: &str, region: &str) -> Self { + Self::with_credential_provider(client, service, region, DefaultCredentialProvider::new()) + } + + /// Create a new SigV4 signer with a custom credential provider. + pub fn with_credential_provider( + client: Client, + service: &str, + region: &str, + credential_provider: impl ProvideCredential<Credential = Credential>, + ) -> Self { + let ctx = Context::new() + .with_file_read(TokioFileRead) + .with_http_send(ReqwestHttpSend(client)) + .with_env(OsEnv); + let signer = RequestSigner::new(service, region); + Self { + inner: Signer::new(ctx, credential_provider, signer), + } + } +} + +#[async_trait] +impl HttpRequestSigner for SigV4Signer { + async fn sign(&self, parts: &mut http::request::Parts) -> Result<()> { + // Patch the URI for signing; reqsign-aws-v4 workaround. + let uri = parts.uri.clone(); + parts.uri = patch_uri_for_signing(&uri)?; + // Sign with the patched URI. + self.inner.sign(parts, None).await.map_err(|e| { + Error::new(ErrorKind::Unexpected, "Failed to sign request with SigV4").with_source(e) + })?; + // Restore the original URI in the request; signing should only modify headers. + parts.uri = uri; + Ok(()) + } +} + +/// Pre-encode percent signs in the URI path for correct SigV4 canonical URI computation. +/// +/// Workaround for a bug in `reqsign-aws-v4` where `canonical_request_string` decodes +/// percent-encoded path segments then re-encodes them, losing the double-encoding that +/// AWS SigV4 requires. By replacing `%` with `%25` before signing, reqsign's +/// decode→reencode cycle produces the correct double-encoded form. +/// +/// TODO: remove once fixed upstream in apache/opendal (reqsign-aws-v4). +fn patch_uri_for_signing(uri: &http::Uri) -> Result<http::Uri> { + let path = uri.path().replace('%', "%25"); + let paq = if let Some(query) = uri.query() { + format!("{path}?{query}") + } else { + path + }; + let mut parts = uri.clone().into_parts(); + parts.path_and_query = Some(paq.parse().map_err(|e| { + Error::new(ErrorKind::Unexpected, "failed to rebuild URI for signing").with_source(e) + })?); + http::Uri::from_parts(parts).map_err(|e| { + Error::new(ErrorKind::Unexpected, "failed to rebuild URI for signing").with_source(e) + }) +} + +/// Bridges reqwest 0.12 with `reqsign_core::HttpSend`. +/// +/// The published `reqsign-http-send-reqwest` crate requires reqwest >=0.13, +/// which is incompatible with the workspace, so we provide a minimal adapter. +#[derive(Debug)] +struct ReqwestHttpSend(Client); + +/// Implements `HttpSend` for a reqwest 0.12 client. +impl HttpSend for ReqwestHttpSend { + async fn http_send( + &self, + req: http::Request<bytes::Bytes>, + ) -> reqsign_core::Result<http::Response<bytes::Bytes>> { + let req = Request::try_from(req).map_err(|e| { + reqsign_core::Error::unexpected("failed to convert request").with_source(e) + })?; + let resp = self.0.execute(req).await.map_err(|e| { + reqsign_core::Error::unexpected("failed to send request").with_source(e) + })?; + let status = resp.status(); + let headers = resp.headers().clone(); + let body = resp.bytes().await.map_err(|e| { + reqsign_core::Error::unexpected("failed to read response body").with_source(e) + })?; + let mut response = http::Response::builder() + .status(status) + .body(body) + .map_err(|e| { + reqsign_core::Error::unexpected("failed to build response").with_source(e) + })?; + *response.headers_mut() = headers; + Ok(response) + } +} + +#[cfg(test)] Review Comment: Suggestion (not blocking): would it be worth adding a small test that uses a mock `HttpRequestSigner` to confirm `HttpClient::execute` calls `sign_request` on the signer before executing? A recording mock with a counter would make this cheap and would protect the extension point from silent regression. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
