This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 11158ba8cf Support writing encrypted Parquet files with plaintext 
footers (#7439)
11158ba8cf is described below

commit 11158ba8cfc471354ed14d04c88c9c631143459a
Author: Rok Mihevc <r...@mihevc.org>
AuthorDate: Thu May 8 12:59:00 2025 +0200

    Support writing encrypted Parquet files with plaintext footers (#7439)
    
    * Initial commit
    
    * Lint and clippy
    
    * Plaintext layout is different to encrypted one
    
    * Lint and expected memory size at decryption
    
    * Apply suggestions from code review
    
    Co-authored-by: Adam Reeve <adre...@gmail.com>
    
    * Review feedback
    
    * Lint
    
    * Update parquet/tests/encryption/encryption.rs
    
    Co-authored-by: Adam Reeve <adre...@gmail.com>
    
    * Review feedback
    
    * Initial commit
    
    * Encrypt plaintext to extract nonce and footer
    
    * fix
    
    * Add encryption algorithm to file_metadata before writing
    
    * fix
    
    * Apply suggestions from code review
    
    Co-authored-by: Adam Reeve <adre...@gmail.com>
    
    * Fix
    
    * lint
    
    * Move get_footer_encryption_algorithm into MetadataObjectWriter
    
    * Fix
    
    * Review feedback
    
    * Avoid unwraps in file_crypto_metada method
    
    * Minor tidy
    
    * Test wrting and reading with a different footer key
    
    ---------
    
    Co-authored-by: Adam Reeve <adre...@gmail.com>
---
 parquet/src/encryption/encrypt.rs      | 27 ++++++++++++++-
 parquet/src/file/metadata/writer.rs    | 62 +++++++++++++++++++++++-----------
 parquet/src/file/writer.rs             |  6 ----
 parquet/tests/encryption/encryption.rs | 52 ++++++++++++++++++++++------
 4 files changed, 111 insertions(+), 36 deletions(-)

diff --git a/parquet/src/encryption/encrypt.rs 
b/parquet/src/encryption/encrypt.rs
index 9a801434c0..c8d3ffc0ee 100644
--- a/parquet/src/encryption/encrypt.rs
+++ b/parquet/src/encryption/encrypt.rs
@@ -17,7 +17,9 @@
 
 //! Configuration and utilities for Parquet Modular Encryption
 
-use crate::encryption::ciphers::{BlockEncryptor, RingGcmBlockEncryptor};
+use crate::encryption::ciphers::{
+    BlockEncryptor, RingGcmBlockEncryptor, NONCE_LEN, SIZE_LEN, TAG_LEN,
+};
 use crate::errors::{ParquetError, Result};
 use crate::file::column_crypto_metadata::{ColumnCryptoMetaData, 
EncryptionWithColumnKey};
 use crate::schema::types::{ColumnDescPtr, SchemaDescriptor};
@@ -374,6 +376,29 @@ pub(crate) fn encrypt_object<T: TSerializable, W: Write>(
     Ok(())
 }
 
+pub(crate) fn write_signed_plaintext_object<T: TSerializable, W: Write>(
+    object: &T,
+    encryptor: &mut Box<dyn BlockEncryptor>,
+    sink: &mut W,
+    module_aad: &[u8],
+) -> Result<()> {
+    let mut buffer: Vec<u8> = vec![];
+    {
+        let mut protocol = TCompactOutputProtocol::new(&mut buffer);
+        object.write_to_out_protocol(&mut protocol)?;
+    }
+    sink.write_all(&buffer)?;
+    buffer = encryptor.encrypt(buffer.as_ref(), module_aad)?;
+
+    // Format of encrypted buffer is: [ciphertext size, nonce, ciphertext, 
authentication tag]
+    let nonce = &buffer[SIZE_LEN..SIZE_LEN + NONCE_LEN];
+    let tag = &buffer[buffer.len() - TAG_LEN..];
+    sink.write_all(nonce)?;
+    sink.write_all(tag)?;
+
+    Ok(())
+}
+
 /// Encrypt a Thrift serializable object to a byte vector
 pub(crate) fn encrypt_object_to_vec<T: TSerializable>(
     object: &T,
diff --git a/parquet/src/file/metadata/writer.rs 
b/parquet/src/file/metadata/writer.rs
index c1fc413144..a01ad5d881 100644
--- a/parquet/src/file/metadata/writer.rs
+++ b/parquet/src/file/metadata/writer.rs
@@ -16,17 +16,21 @@
 // under the License.
 
 #[cfg(feature = "encryption")]
-use crate::encryption::encrypt::{encrypt_object, encrypt_object_to_vec, 
FileEncryptor};
-#[cfg(feature = "encryption")]
-use crate::encryption::modules::{create_footer_aad, create_module_aad, 
ModuleType};
+use crate::encryption::{
+    encrypt::{
+        encrypt_object, encrypt_object_to_vec, write_signed_plaintext_object, 
FileEncryptor,
+    },
+    modules::{create_footer_aad, create_module_aad, ModuleType},
+};
 #[cfg(feature = "encryption")]
 use crate::errors::ParquetError;
 use crate::errors::Result;
 use crate::file::metadata::{KeyValue, ParquetMetaData};
 use crate::file::page_index::index::Index;
 use crate::file::writer::{get_file_magic, TrackedWrite};
+use crate::format::EncryptionAlgorithm;
 #[cfg(feature = "encryption")]
-use crate::format::{AesGcmV1, ColumnCryptoMetaData, EncryptionAlgorithm};
+use crate::format::{AesGcmV1, ColumnCryptoMetaData};
 use crate::format::{ColumnChunk, ColumnIndex, FileMetaData, OffsetIndex, 
RowGroup};
 use crate::schema::types;
 use crate::schema::types::{SchemaDescPtr, SchemaDescriptor, TypePtr};
@@ -149,7 +153,7 @@ impl<'a, W: Write> ThriftMetadataWriter<'a, W> {
             schema: types::to_thrift(self.schema.as_ref())?,
             created_by: self.created_by.clone(),
             column_orders,
-            encryption_algorithm: None,
+            encryption_algorithm: 
self.object_writer.get_footer_encryption_algorithm(),
             footer_signing_key_metadata: None,
         };
 
@@ -474,6 +478,10 @@ impl MetadataObjectWriter {
     pub fn get_file_magic(&self) -> &[u8; 4] {
         get_file_magic()
     }
+
+    fn get_footer_encryption_algorithm(&self) -> Option<EncryptionAlgorithm> {
+        None
+    }
 }
 
 /// Implementations of [`MetadataObjectWriter`] methods that rely on 
encryption being enabled
@@ -503,6 +511,11 @@ impl MetadataObjectWriter {
                 let mut encryptor = file_encryptor.get_footer_encryptor()?;
                 encrypt_object(file_metadata, &mut encryptor, &mut sink, &aad)
             }
+            Some(file_encryptor) if 
file_metadata.encryption_algorithm.is_some() => {
+                let aad = create_footer_aad(file_encryptor.file_aad())?;
+                let mut encryptor = file_encryptor.get_footer_encryptor()?;
+                write_signed_plaintext_object(file_metadata, &mut encryptor, 
&mut sink, &aad)
+            }
             _ => Self::write_object(file_metadata, &mut sink),
         }
     }
@@ -622,25 +635,36 @@ impl MetadataObjectWriter {
         }
     }
 
-    fn file_crypto_metadata(
-        file_encryptor: &FileEncryptor,
-    ) -> Result<crate::format::FileCryptoMetaData> {
-        let properties = file_encryptor.properties();
-        let supply_aad_prefix = properties
+    fn get_footer_encryption_algorithm(&self) -> Option<EncryptionAlgorithm> {
+        if let Some(file_encryptor) = &self.file_encryptor {
+            return 
Some(Self::encryption_algorithm_from_encryptor(file_encryptor));
+        }
+        None
+    }
+
+    fn encryption_algorithm_from_encryptor(file_encryptor: &FileEncryptor) -> 
EncryptionAlgorithm {
+        let supply_aad_prefix = file_encryptor
+            .properties()
             .aad_prefix()
-            .map(|_| !properties.store_aad_prefix());
-        let encryption_algorithm = AesGcmV1 {
-            aad_prefix: if properties.store_aad_prefix() {
-                properties.aad_prefix().cloned()
-            } else {
-                None
-            },
+            .map(|_| !file_encryptor.properties().store_aad_prefix());
+        let aad_prefix = if file_encryptor.properties().store_aad_prefix() {
+            file_encryptor.properties().aad_prefix().cloned()
+        } else {
+            None
+        };
+        EncryptionAlgorithm::AESGCMV1(AesGcmV1 {
+            aad_prefix,
             aad_file_unique: Some(file_encryptor.aad_file_unique().clone()),
             supply_aad_prefix,
-        };
+        })
+    }
 
+    fn file_crypto_metadata(
+        file_encryptor: &FileEncryptor,
+    ) -> Result<crate::format::FileCryptoMetaData> {
+        let properties = file_encryptor.properties();
         Ok(crate::format::FileCryptoMetaData {
-            encryption_algorithm: 
EncryptionAlgorithm::AESGCMV1(encryption_algorithm),
+            encryption_algorithm: 
Self::encryption_algorithm_from_encryptor(file_encryptor),
             key_metadata: properties.footer_key_metadata().cloned(),
         })
     }
diff --git a/parquet/src/file/writer.rs b/parquet/src/file/writer.rs
index 18e357ebc2..0298d8a51d 100644
--- a/parquet/src/file/writer.rs
+++ b/parquet/src/file/writer.rs
@@ -212,12 +212,6 @@ impl<W: Write + Send> SerializedFileWriter<W> {
         if let Some(file_encryption_properties) = 
&properties.file_encryption_properties {
             
file_encryption_properties.validate_encrypted_column_names(schema_descriptor)?;
 
-            if !file_encryption_properties.encrypt_footer() {
-                return Err(general_err!(
-                    "Writing encrypted files with plaintext footers is not 
supported yet"
-                ));
-            }
-
             Ok(Some(Arc::new(FileEncryptor::new(
                 file_encryption_properties.clone(),
             )?)))
diff --git a/parquet/tests/encryption/encryption.rs 
b/parquet/tests/encryption/encryption.rs
index da263a5df6..134e3383b3 100644
--- a/parquet/tests/encryption/encryption.rs
+++ b/parquet/tests/encryption/encryption.rs
@@ -433,18 +433,23 @@ fn test_write_non_uniform_encryption() {
     read_and_roundtrip_to_encrypted_file(&path, decryption_properties, 
file_encryption_properties);
 }
 
-// todo: currently we raise if writing with plaintext footer, but we should 
support it
-// for uniform and non-uniform encryption (see 
https://github.com/apache/arrow-rs/issues/7320)
 #[test]
 fn test_write_uniform_encryption_plaintext_footer() {
     let testdata = arrow::util::test_util::parquet_test_data();
-    let path = 
format!("{testdata}/encrypt_columns_and_footer.parquet.encrypted");
+    let path = 
format!("{testdata}/encrypt_columns_plaintext_footer.parquet.encrypted");
 
     let footer_key = b"0123456789012345".to_vec(); // 128bit/16
+    let wrong_footer_key = b"0000000000000000".to_vec(); // 128bit/16
     let column_1_key = b"1234567890123450".to_vec();
     let column_2_key = b"1234567890123451".to_vec();
 
     let decryption_properties = 
FileDecryptionProperties::builder(footer_key.clone())
+        .with_column_key("double_field", column_1_key.clone())
+        .with_column_key("float_field", column_2_key.clone())
+        .build()
+        .unwrap();
+
+    let wrong_decryption_properties = 
FileDecryptionProperties::builder(wrong_footer_key)
         .with_column_key("double_field", column_1_key)
         .with_column_key("float_field", column_2_key)
         .build()
@@ -455,26 +460,53 @@ fn test_write_uniform_encryption_plaintext_footer() {
         .build()
         .unwrap();
 
+    // Try writing plaintext footer and then reading it with the correct 
footer key
+    read_and_roundtrip_to_encrypted_file(
+        &path,
+        decryption_properties.clone(),
+        file_encryption_properties.clone(),
+    );
+
+    // Try writing plaintext footer and then reading it with the wrong footer 
key
+    let temp_file = tempfile::tempfile().unwrap();
+
+    // read example data
     let file = File::open(path).unwrap();
     let options = ArrowReaderOptions::default()
         .with_file_decryption_properties(decryption_properties.clone());
     let metadata = ArrowReaderMetadata::load(&file, options.clone()).unwrap();
 
+    let builder = ParquetRecordBatchReaderBuilder::try_new_with_options(file, 
options).unwrap();
+    let batch_reader = builder.build().unwrap();
+    let batches = batch_reader
+        .collect::<parquet::errors::Result<Vec<RecordBatch>, _>>()
+        .unwrap();
+
+    // write example data
     let props = WriterProperties::builder()
         .with_file_encryption_properties(file_encryption_properties)
         .build();
-    let temp_file = tempfile::tempfile().unwrap();
 
-    let writer = ArrowWriter::try_new(
+    let mut writer = ArrowWriter::try_new(
         temp_file.try_clone().unwrap(),
         metadata.schema().clone(),
         Some(props),
-    );
-    assert!(writer.is_err());
-    assert_eq!(
-        writer.unwrap_err().to_string(),
-        "Parquet error: Writing encrypted files with plaintext footers is not 
supported yet"
     )
+    .unwrap();
+    for batch in batches {
+        writer.write(&batch).unwrap();
+    }
+    writer.close().unwrap();
+
+    // Try reading plaintext footer and with the wrong footer key
+    let options =
+        
ArrowReaderOptions::default().with_file_decryption_properties(wrong_decryption_properties);
+    let result = ArrowReaderMetadata::load(&temp_file, options.clone());
+    assert!(result.is_err());
+    assert!(result
+        .unwrap_err()
+        .to_string()
+        .starts_with("Parquet error: Footer signature verification failed. 
Computed: ["));
 }
 
 #[test]

Reply via email to