emkornfield commented on a change in pull request #11538:
URL: https://github.com/apache/arrow/pull/11538#discussion_r741406958



##########
File path: go/parquet/encryption_read_config_test.go
##########
@@ -0,0 +1,443 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package parquet_test
+
+import (
+       "encoding/binary"
+       "fmt"
+       "os"
+       "path"
+       "testing"
+
+       "github.com/apache/arrow/go/arrow/memory"
+       "github.com/apache/arrow/go/parquet"
+       "github.com/apache/arrow/go/parquet/file"
+       "github.com/apache/arrow/go/parquet/internal/encryption"
+       "github.com/stretchr/testify/suite"
+)
+
+/*
+ * This file contains a unit-test for reading encrypted Parquet files with
+ * different decryption configurations.
+ *
+ * The unit-test is called multiple times, each time to decrypt parquet files 
using
+ * different decryption configuration as described below.
+ * In each call two encrypted files are read: one temporary file that was 
generated using
+ * encryption_write_config_test.go test and will be deleted upon
+ * reading it, while the second resides in
+ * parquet-testing/data repository. Those two encrypted files were encrypted 
using the
+ * same encryption configuration.
+ * The encrypted parquet file names are passed as parameter to the unit-test.
+ *
+ * A detailed description of the Parquet Modular Encryption specification can 
be found
+ * here:
+ * https://github.com/apache/parquet-format/blob/encryption/Encryption.md
+ *
+ * The following decryption configurations are used to decrypt each parquet 
file:
+ *
+ *  - Decryption configuration 1:   Decrypt using key retriever that holds the 
keys of
+ *                                  two encrypted columns and the footer key.
+ *  - Decryption configuration 2:   Decrypt using key retriever that holds the 
keys of
+ *                                  two encrypted columns and the footer key. 
Supplies
+ *                                  aad_prefix to verify file identity.
+ *  - Decryption configuration 3:   Decrypt using explicit column and footer 
keys
+ *                                  (instead of key retrieval callback).
+ *  - Decryption Configuration 4:   PlainText Footer mode - test legacy reads,
+ *                                  read the footer + all non-encrypted 
columns.
+ *                                  (pairs with encryption configuration 3)
+ *
+ * The encrypted parquet files that is read was encrypted using one of the 
configurations
+ * below:
+ *
+ *  - Encryption configuration 1:   Encrypt all columns and the footer with 
the same key.
+ *                                  (uniform encryption)
+ *  - Encryption configuration 2:   Encrypt two columns and the footer, with 
different
+ *                                  keys.
+ *  - Encryption configuration 3:   Encrypt two columns, with different keys.
+ *                                  Don’t encrypt footer (to enable legacy 
readers)
+ *                                  - plaintext footer mode.
+ *  - Encryption configuration 4:   Encrypt two columns and the footer, with 
different
+ *                                  keys. Supply aad_prefix for file identity
+ *                                  verification.
+ *  - Encryption configuration 5:   Encrypt two columns and the footer, with 
different
+ *                                  keys. Supply aad_prefix, and call
+ *                                  disable_aad_prefix_storage to prevent file
+ *                                  identity storage in file metadata.
+ *  - Encryption configuration 6:   Encrypt two columns and the footer, with 
different
+ *                                  keys. Use the alternative (AES_GCM_CTR_V1) 
algorithm.
+ */
+
+func getDataDir() string {
+       datadir := os.Getenv("PARQUET_TEST_DATA")
+       if datadir == "" {
+               panic("please point the PARQUET_TEST_DATA environment variable 
to the test data dir")
+       }
+       return datadir
+}
+
+type TestDecryptionSuite struct {
+       suite.Suite
+
+       pathToDouble        string
+       pathToFloat         string
+       decryptionConfigs   []*parquet.FileDecryptionProperties
+       footerEncryptionKey string
+       colEncryptionKey1   string
+       colEncryptionKey2   string
+       fileName            string
+}
+
+func (d *TestDecryptionSuite) TearDownSuite() {
+       os.Remove(tempdir)
+}
+
+func TestFileEncryptionDecryption(t *testing.T) {
+       suite.Run(t, new(EncryptionConfigTestSuite))
+       suite.Run(t, new(TestDecryptionSuite))
+}
+
+func (d *TestDecryptionSuite) SetupSuite() {
+       d.pathToDouble = "double_field"
+       d.pathToFloat = "float_field"
+       d.footerEncryptionKey = FooterEncryptionKey
+       d.colEncryptionKey1 = ColumnEncryptionKey1
+       d.colEncryptionKey2 = ColumnEncryptionKey2
+       d.fileName = FileName
+
+       d.createDecryptionConfigs()
+}
+
+func (d *TestDecryptionSuite) createDecryptionConfigs() {
+       // Decryption configuration 1: Decrypt using key retriever callback 
that holds the
+       // keys of two encrypted columns and the footer key.
+       stringKr1 := make(encryption.StringKeyIDRetriever)
+       stringKr1.PutKey("kf", d.footerEncryptionKey)
+       stringKr1.PutKey("kc1", d.colEncryptionKey1)
+       stringKr1.PutKey("kc2", d.colEncryptionKey2)
+
+       d.decryptionConfigs = append(d.decryptionConfigs,
+               
parquet.NewFileDecryptionProperties(parquet.WithKeyRetriever(stringKr1)))
+
+       // Decryption configuration 2: Decrypt using key retriever callback 
that holds the
+       // keys of two encrypted columns and the footer key. Supply aad_prefix.
+       stringKr2 := make(encryption.StringKeyIDRetriever)
+       stringKr2.PutKey("kf", d.footerEncryptionKey)
+       stringKr2.PutKey("kc1", d.colEncryptionKey1)
+       stringKr2.PutKey("kc2", d.colEncryptionKey2)
+       d.decryptionConfigs = append(d.decryptionConfigs,
+               
parquet.NewFileDecryptionProperties(parquet.WithKeyRetriever(stringKr2), 
parquet.WithDecryptAadPrefix(d.fileName)))
+
+       // Decryption configuration 3: Decrypt using explicit column and footer 
keys. Supply
+       // aad_prefix.
+       decryptCols := make(parquet.ColumnPathToDecryptionPropsMap)
+       decryptCols[d.pathToFloat] = 
parquet.NewColumnDecryptionProperties(d.pathToFloat, 
parquet.WithDecryptKey(d.colEncryptionKey2))
+       decryptCols[d.pathToDouble] = 
parquet.NewColumnDecryptionProperties(d.pathToDouble, 
parquet.WithDecryptKey(d.colEncryptionKey1))
+       d.decryptionConfigs = append(d.decryptionConfigs,
+               
parquet.NewFileDecryptionProperties(parquet.WithFooterKey(d.footerEncryptionKey),
 parquet.WithColumnKeys(decryptCols)))
+
+       // Decryption Configuration 4: use plaintext footer mode, read only 
footer + plaintext
+       // columns.
+       d.decryptionConfigs = append(d.decryptionConfigs, nil)
+}
+
+func (d *TestDecryptionSuite) decryptFile(filename string, decryptConfigNum 
int) {
+       // if we get decryption_config_num = x then it means the actual number 
is x+1
+       // and since we want decryption_config_num=4 we set the condition to 3
+       props := parquet.NewReaderProperties(memory.DefaultAllocator)
+       if decryptConfigNum != 3 {

Review comment:
       it might be more cleaner to encapsulate the test case into its own 
class/struct instead of the if/then here.  At the end of the day that might be 
just a slightly different form of obfuscation.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to