adamreeve commented on code in PR #9203:
URL: https://github.com/apache/arrow-rs/pull/9203#discussion_r2893682286
##########
parquet/tests/encryption/encryption_async.rs:
##########
@@ -183,113 +323,285 @@ async fn
test_plaintext_footer_read_without_decryption() {
#[tokio::test]
async fn test_non_uniform_encryption() {
- let test_data = arrow::util::test_util::parquet_test_data();
- let path =
format!("{test_data}/encrypt_columns_and_footer.parquet.encrypted");
- let mut file = File::open(&path).await.unwrap();
+ async fn non_uniform_encryption(footer_key: &[u8], column_keys: &[(&str,
&[u8])]) {
+ let path = encryption_util::encrypted_data_path(
+ footer_key,
+ "encrypt_columns_and_footer.parquet.encrypted",
+ );
+ let mut file = File::open(&path).await.unwrap();
- let footer_key = "0123456789012345".as_bytes().to_vec(); // 128bit/16
- let column_1_key = "1234567890123450".as_bytes().to_vec();
- let column_2_key = "1234567890123451".as_bytes().to_vec();
+ let mut builder =
FileDecryptionProperties::builder(footer_key.to_vec());
+ for (column_name, key) in column_keys {
+ builder = builder.with_column_key(column_name, key.to_vec());
+ }
+ let decryption_properties = builder.build().unwrap();
- let decryption_properties =
FileDecryptionProperties::builder(footer_key.to_vec())
- .with_column_key("double_field", column_1_key)
- .with_column_key("float_field", column_2_key)
- .build()
- .unwrap();
+ verify_encryption_test_file_read_async(&mut file,
decryption_properties)
+ .await
+ .unwrap();
+ }
- verify_encryption_test_file_read_async(&mut file, decryption_properties)
- .await
- .unwrap();
+ // AES-128
+ non_uniform_encryption(
+ b"0123456789012345",
+ &[
+ ("double_field", b"1234567890123450".as_slice()),
+ ("float_field", b"1234567890123451".as_slice()),
+ ],
+ )
+ .await;
+
+ // AES-256
+ non_uniform_encryption(
+ b"01234567890123456789012345678901", // 256bit/32
+ &[
+ (
+ "double_field",
+ b"12345678901234567890123456789012".as_slice(),
+ ),
+ (
+ "float_field",
+ b"12345678901234567890123456789013".as_slice(),
+ ),
+ (
+ "boolean_field",
+ b"12345678901234567890123456789014".as_slice(),
+ ),
+ (
+ "int32_field",
+ b"12345678901234567890123456789015".as_slice(),
+ ),
+ ("ba_field", b"12345678901234567890123456789016".as_slice()),
+ ("flba_field", b"12345678901234567890123456789017".as_slice()),
+ (
+ "int64_field",
+ b"12345678901234567890123456789018".as_slice(),
+ ),
+ (
+ "int96_field",
+ b"12345678901234567890123456789019".as_slice(),
+ ),
+ ],
+ )
+ .await;
}
#[tokio::test]
async fn test_uniform_encryption() {
- let test_data = arrow::util::test_util::parquet_test_data();
- let path = format!("{test_data}/uniform_encryption.parquet.encrypted");
- let mut file = File::open(&path).await.unwrap();
+ async fn uniform_encryption(footer_key: &[u8], column_keys: &[(&str,
&[u8])]) {
+ let path = encryption_util::encrypted_data_path(
+ footer_key,
+ "uniform_encryption.parquet.encrypted",
+ );
+ let mut file = File::open(&path).await.unwrap();
- let key_code: &[u8] = "0123456789012345".as_bytes();
- let decryption_properties =
FileDecryptionProperties::builder(key_code.to_vec())
- .build()
- .unwrap();
+ let mut builder =
FileDecryptionProperties::builder(footer_key.to_vec());
+ for (column_name, key) in column_keys {
+ builder = builder.with_column_key(column_name, key.to_vec());
+ }
+ let decryption_properties = builder.build().unwrap();
- verify_encryption_test_file_read_async(&mut file, decryption_properties)
- .await
- .unwrap();
+ verify_encryption_test_file_read_async(&mut file,
decryption_properties)
+ .await
+ .unwrap();
+ }
+
+ // AES-128: there is always a footer key even with a plaintext footer,
+ // but this is used for signing the footer.
+ uniform_encryption(
+ b"0123456789012345", // 128bit/16
+ &[],
+ )
+ .await;
+
+ // AES-256
+ uniform_encryption(
+ b"01234567890123456789012345678901", // 256bit/32
+ &[],
+ )
+ .await;
}
#[tokio::test]
async fn test_aes_ctr_encryption() {
- let test_data = arrow::util::test_util::parquet_test_data();
- let path =
format!("{test_data}/encrypt_columns_and_footer_ctr.parquet.encrypted");
- let mut file = File::open(&path).await.unwrap();
+ async fn aes_ctr_encryption(footer_key: &[u8], column_keys: &[(&str,
&[u8])]) {
+ let path = encryption_util::encrypted_data_path(
+ footer_key,
+ "encrypt_columns_and_footer_ctr.parquet.encrypted",
+ );
+ let mut file = File::open(&path).await.unwrap();
- let footer_key = "0123456789012345".as_bytes().to_vec();
- let column_1_key = "1234567890123450".as_bytes().to_vec();
- //let column_2_key = "1234567890123451".as_bytes().to_vec();
+ let mut builder =
FileDecryptionProperties::builder(footer_key.to_vec());
+ for (column_name, key) in column_keys {
+ builder = builder.with_column_key(column_name, key.to_vec());
+ }
+ let decryption_properties = builder.build().unwrap();
- let decryption_properties = FileDecryptionProperties::builder(footer_key)
- .with_column_key("double_field", column_1_key.clone())
- .with_column_key("float_field", column_1_key)
- .build()
- .unwrap();
+ let options =
+
ArrowReaderOptions::new().with_file_decryption_properties(decryption_properties);
+ let metadata = ArrowReaderMetadata::load_async(&mut file,
options).await;
- let options =
ArrowReaderOptions::new().with_file_decryption_properties(decryption_properties);
- let metadata = ArrowReaderMetadata::load_async(&mut file, options).await;
+ match metadata {
+ Err(ParquetError::NYI(s)) => {
+ assert!(s.contains("AES_GCM_CTR_V1"));
+ }
+ _ => {
+ panic!("Expected ParquetError::NYI");
+ }
+ };
+ }
- match metadata {
- Err(ParquetError::NYI(s)) => {
- assert!(s.contains("AES_GCM_CTR_V1"));
- }
- _ => {
- panic!("Expected ParquetError::NYI");
- }
- };
+ // AES-128
+ aes_ctr_encryption(
+ b"0123456789012345", // 128bit/16
+ &[
+ ("double_field", b"1234567890123450".as_slice()),
+ ("float_field", b"1234567890123451".as_slice()),
+ ],
+ )
+ .await;
+
+ // AES-256
+ aes_ctr_encryption(
+ b"01234567890123456789012345678901", // 256bit/32
+ &[
+ (
+ "double_field",
+ b"12345678901234567890123456789012".as_slice(),
+ ),
+ (
+ "float_field",
+ b"12345678901234567890123456789013".as_slice(),
+ ),
+ ],
+ )
+ .await;
}
#[tokio::test]
async fn test_decrypting_without_decryption_properties_fails() {
let test_data = arrow::util::test_util::parquet_test_data();
- let path = format!("{test_data}/uniform_encryption.parquet.encrypted");
- let mut file = File::open(&path).await.unwrap();
+ let paths = [
+ format!("{test_data}/uniform_encryption.parquet.encrypted"),
+ format!("{test_data}/aes256/uniform_encryption.parquet.encrypted"),
+ ];
- let options = ArrowReaderOptions::new();
- let result = ArrowReaderMetadata::load_async(&mut file, options).await;
- assert!(result.is_err());
- assert_eq!(
- result.unwrap_err().to_string(),
- "Parquet error: Parquet file has an encrypted footer but decryption
properties were not provided"
- );
+ for path in &paths {
+ let mut file = File::open(&path).await.unwrap();
+
+ let options = ArrowReaderOptions::new();
+ let result = ArrowReaderMetadata::load_async(&mut file, options).await;
+ assert!(result.is_err());
+ assert_eq!(
+ result.unwrap_err().to_string(),
+ "Parquet error: Parquet file has an encrypted footer but
decryption properties were not provided"
+ );
+ }
}
#[tokio::test]
async fn test_write_non_uniform_encryption() {
- let testdata = arrow::util::test_util::parquet_test_data();
- let path =
format!("{testdata}/encrypt_columns_and_footer.parquet.encrypted");
+ async fn write_non_uniform_encryption(
+ footer_key: &[u8],
+ column_names: Vec<&str>,
+ column_keys: Vec<Vec<u8>>,
+ encryption_column_keys: &[(&str, &[u8])],
+ ) {
+ let path = encryption_util::encrypted_data_path(
+ footer_key,
+ "encrypt_columns_and_footer.parquet.encrypted",
+ );
+
+ let decryption_properties =
FileDecryptionProperties::builder(footer_key.to_vec())
+ .with_column_keys(column_names.to_vec(), column_keys.clone())
+ .unwrap()
+ .build()
+ .unwrap();
- let footer_key = b"0123456789012345".to_vec(); // 128bit/16
- let column_names = vec!["double_field", "float_field"];
- let column_keys = vec![b"1234567890123450".to_vec(),
b"1234567890123451".to_vec()];
+ let mut builder =
FileEncryptionProperties::builder(footer_key.to_vec());
+ for (column_name, key) in encryption_column_keys {
+ builder = builder.with_column_key(column_name, key.to_vec());
+ }
+ let file_encryption_properties = builder.build().unwrap();
- let decryption_properties =
FileDecryptionProperties::builder(footer_key.clone())
- .with_column_keys(column_names.clone(), column_keys.clone())
- .unwrap()
- .build()
+ read_and_roundtrip_to_encrypted_file_async(
+ &path,
+ decryption_properties,
+ file_encryption_properties,
+ )
+ .await
.unwrap();
+ }
- let file_encryption_properties =
FileEncryptionProperties::builder(footer_key)
- .with_column_keys(column_names, column_keys)
- .unwrap()
- .build()
- .unwrap();
+ write_non_uniform_encryption(
+ b"0123456789012345", // 128bit/16,
+ vec!["double_field", "float_field"],
+ vec![b"1234567890123450".to_vec(), b"1234567890123451".to_vec()],
+ &[
+ ("double_field", b"1234567890123450".as_slice()),
+ ("float_field", b"1234567890123451".as_slice()),
+ ],
+ )
+ .await;
- read_and_roundtrip_to_encrypted_file_async(
- &path,
- decryption_properties,
- file_encryption_properties,
+ // AES-256
+ // The asymmetric column names is because we check column paths in the
validate_encrypted_column_names function of [encryption::encrypt]
Review Comment:
Hi @hsiang-c. This doesn't look like expected behaviour to me, you shouldn't
have to specify the same encryption key for both `int64_field` and
`int64_field.list.int64_field`. I would expect that the full path should be
used everywhere.
I'm also surprised that the path is `int64_field.list.int64_field`, the path
should be `int64_field.list.element` according to the [spec for the list
logical
type](https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#lists).
Is this another bug related to encryption, or does arrow-rs differ from the
spec here?
We need to support the use case where different struct fields can be
encrypted with different keys, so I don't think your suggested fix to change to
using `c.name()` instead of `c.path()` is correct, although I'm not sure where
you're suggesting to change that.
But we may want to allow simpler encryption configuration using the top
level field names as a new feature. C++ Arrow has recently added that for
example: https://github.com/apache/arrow/pull/45462
I agree that this isn't related to the current PR so can be fixed separately.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]