pitrou commented on code in PR #44043:
URL: https://github.com/apache/arrow/pull/44043#discussion_r1913225647


##########
cpp/src/parquet/file_writer.cc:
##########
@@ -359,14 +359,28 @@ class FileSerializer : public ParquetFileWriter::Contents 
{
     if (row_group_writer_) {
       row_group_writer_->Close();
     }
+    int16_t row_group_ordinal = 0;
+    if (file_encryptor_ == nullptr) {
+      // Using -1 to indicate that the row group ordinal is not set.
+      row_group_ordinal = -1;
+    } else {
+      // Parquet thrifts using int16 for row group ordinal, so we can't have 
more than
+      // 32767 row groups in a file.
+      if (num_row_groups_ < std::numeric_limits<int16_t>::max()) {

Review Comment:
   We can make this a non-strict inequality
   ```suggestion
         if (num_row_groups_ <= std::numeric_limits<int16_t>::max()) {
   ```
   



##########
cpp/src/parquet/metadata.cc:
##########
@@ -219,30 +219,29 @@ const std::string& ColumnCryptoMetaData::key_metadata() 
const {
 // ColumnChunk metadata
 class ColumnChunkMetaData::ColumnChunkMetaDataImpl {
  public:
-  explicit ColumnChunkMetaDataImpl(const format::ColumnChunk* column,
-                                   const ColumnDescriptor* descr,
-                                   int16_t row_group_ordinal, int16_t 
column_ordinal,
-                                   const ReaderProperties& properties,
-                                   const ApplicationVersion* writer_version,
-                                   std::shared_ptr<InternalFileDecryptor> 
file_decryptor)
+  explicit ColumnChunkMetaDataImpl(
+      const format::ColumnChunk* column, const ColumnDescriptor* descr,
+      int16_t row_group_ordinal, int16_t column_ordinal,
+      const ReaderProperties& properties, const ApplicationVersion* 
writer_version,
+      const std::shared_ptr<InternalFileDecryptor>& file_decryptor)
       : column_(column),
         descr_(descr),
         properties_(properties),
         writer_version_(writer_version) {
     column_metadata_ = &column->meta_data;
     if (column->__isset.crypto_metadata) {  // column metadata is encrypted
-      format::ColumnCryptoMetaData ccmd = column->crypto_metadata;
+      const format::ColumnCryptoMetaData& ccmd = column->crypto_metadata;
 
       if (ccmd.__isset.ENCRYPTION_WITH_COLUMN_KEY) {
         if (file_decryptor != nullptr && file_decryptor->properties() != 
nullptr) {
           // should decrypt metadata
           std::shared_ptr<schema::ColumnPath> path = 
std::make_shared<schema::ColumnPath>(
               ccmd.ENCRYPTION_WITH_COLUMN_KEY.path_in_schema);
-          std::string key_metadata = 
ccmd.ENCRYPTION_WITH_COLUMN_KEY.key_metadata;
+          const std::string& key_metadata = 
ccmd.ENCRYPTION_WITH_COLUMN_KEY.key_metadata;
 
-          std::string aad_column_metadata = encryption::CreateModuleAad(
+          const std::string& aad_column_metadata = encryption::CreateModuleAad(

Review Comment:
   `CreateModuleAad` returns a value, not a reference
   ```suggestion
             std::string aad_column_metadata = encryption::CreateModuleAad(
   ```



##########
cpp/src/parquet/metadata.cc:
##########
@@ -219,30 +219,29 @@ const std::string& ColumnCryptoMetaData::key_metadata() 
const {
 // ColumnChunk metadata
 class ColumnChunkMetaData::ColumnChunkMetaDataImpl {
  public:
-  explicit ColumnChunkMetaDataImpl(const format::ColumnChunk* column,
-                                   const ColumnDescriptor* descr,
-                                   int16_t row_group_ordinal, int16_t 
column_ordinal,
-                                   const ReaderProperties& properties,
-                                   const ApplicationVersion* writer_version,
-                                   std::shared_ptr<InternalFileDecryptor> 
file_decryptor)
+  explicit ColumnChunkMetaDataImpl(
+      const format::ColumnChunk* column, const ColumnDescriptor* descr,
+      int16_t row_group_ordinal, int16_t column_ordinal,
+      const ReaderProperties& properties, const ApplicationVersion* 
writer_version,
+      const std::shared_ptr<InternalFileDecryptor>& file_decryptor)
       : column_(column),
         descr_(descr),
         properties_(properties),
         writer_version_(writer_version) {
     column_metadata_ = &column->meta_data;
     if (column->__isset.crypto_metadata) {  // column metadata is encrypted
-      format::ColumnCryptoMetaData ccmd = column->crypto_metadata;
+      const format::ColumnCryptoMetaData& ccmd = column->crypto_metadata;
 
       if (ccmd.__isset.ENCRYPTION_WITH_COLUMN_KEY) {
         if (file_decryptor != nullptr && file_decryptor->properties() != 
nullptr) {
           // should decrypt metadata
           std::shared_ptr<schema::ColumnPath> path = 
std::make_shared<schema::ColumnPath>(
               ccmd.ENCRYPTION_WITH_COLUMN_KEY.path_in_schema);
-          std::string key_metadata = 
ccmd.ENCRYPTION_WITH_COLUMN_KEY.key_metadata;
+          const std::string& key_metadata = 
ccmd.ENCRYPTION_WITH_COLUMN_KEY.key_metadata;

Review Comment:
   I don't think it's necessary, any invalid row group ordinal will fail the 
AAD check when reading.



##########
cpp/src/parquet/file_writer.cc:
##########
@@ -359,14 +359,28 @@ class FileSerializer : public ParquetFileWriter::Contents 
{
     if (row_group_writer_) {
       row_group_writer_->Close();
     }
+    int16_t row_group_ordinal = 0;
+    if (file_encryptor_ == nullptr) {
+      // Using -1 to indicate that the row group ordinal is not set.
+      row_group_ordinal = -1;
+    } else {

Review Comment:
   This can be simplified to:
   ```suggestion
       int16_t row_group_ordinal = -1;  // row group ordinal not set
       if (file_encryptor_ != nullptr) {
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to