This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/orc-format.git


The following commit(s) were added to refs/heads/main by this push:
     new ca0112c  ORC-FORMAT-2: Enforce `BASIC` rule (#5)
ca0112c is described below

commit ca0112c821c40649a5d7c61975b5b9c5e48cd99d
Author: Dongjoon Hyun <[email protected]>
AuthorDate: Thu Dec 7 22:03:33 2023 -0800

    ORC-FORMAT-2: Enforce `BASIC` rule (#5)
    
    ### What changes were proposed in this pull request?
    
    This closes #2 .
    
    ### Why are the changes needed?
    
    `BASIC` has many rules in addition to `MINIMAL`.
    
    ### How was this patch tested?
    
    Pass the CIs.
---
 src/main/proto/buf.yaml                  |   2 +-
 src/main/proto/orc/proto/orc_proto.proto | 112 +++++++++++++++----------------
 2 files changed, 57 insertions(+), 57 deletions(-)

diff --git a/src/main/proto/buf.yaml b/src/main/proto/buf.yaml
index 31b7729..47f6919 100644
--- a/src/main/proto/buf.yaml
+++ b/src/main/proto/buf.yaml
@@ -20,4 +20,4 @@ breaking:
     - FILE
 lint:
   use:
-    - MINIMAL
+    - BASIC
diff --git a/src/main/proto/orc/proto/orc_proto.proto 
b/src/main/proto/orc/proto/orc_proto.proto
index 45d7d2a..5ca432d 100644
--- a/src/main/proto/orc/proto/orc_proto.proto
+++ b/src/main/proto/orc/proto/orc_proto.proto
@@ -41,8 +41,8 @@ message StringStatistics {
   optional sint64 sum = 3;
   // If the minimum or maximum value was longer than 1024 bytes, store a lower 
or upper
   // bound instead of the minimum or maximum values above.
-  optional string lowerBound = 4;
-  optional string upperBound = 5;
+  optional string lower_bound = 4;
+  optional string upper_bound = 5;
 }
 
 message BucketStatistics {
@@ -65,11 +65,11 @@ message TimestampStatistics {
   // min,max values saved as milliseconds since epoch
   optional sint64 minimum = 1;
   optional sint64 maximum = 2;
-  optional sint64 minimumUtc = 3;
-  optional sint64 maximumUtc = 4;
+  optional sint64 minimum_utc = 3;
+  optional sint64 maximum_utc = 4;
   // store the lower 6 TS digits for min/max to achieve nanosecond precision
-  optional int32 minimumNanos = 5;
-  optional int32 maximumNanos = 6;
+  optional int32 minimum_nanos = 5;
+  optional int32 maximum_nanos = 6;
 }
 
 message BinaryStatistics {
@@ -79,24 +79,24 @@ message BinaryStatistics {
 
 // Statistics for list and map
 message CollectionStatistics {
-  optional uint64 minChildren = 1;
-  optional uint64 maxChildren = 2;
-  optional uint64 totalChildren = 3;
+  optional uint64 min_children = 1;
+  optional uint64 max_children = 2;
+  optional uint64 total_children = 3;
 }
 
 message ColumnStatistics {
-  optional uint64 numberOfValues = 1;
-  optional IntegerStatistics intStatistics = 2;
-  optional DoubleStatistics doubleStatistics = 3;
-  optional StringStatistics stringStatistics = 4;
-  optional BucketStatistics bucketStatistics = 5;
-  optional DecimalStatistics decimalStatistics = 6;
-  optional DateStatistics dateStatistics = 7;
-  optional BinaryStatistics binaryStatistics = 8;
-  optional TimestampStatistics timestampStatistics = 9;
-  optional bool hasNull = 10;
-  optional uint64 bytesOnDisk = 11;
-  optional CollectionStatistics collectionStatistics = 12;
+  optional uint64 number_of_values = 1;
+  optional IntegerStatistics int_statistics = 2;
+  optional DoubleStatistics double_statistics = 3;
+  optional StringStatistics string_statistics = 4;
+  optional BucketStatistics bucket_statistics = 5;
+  optional DecimalStatistics decimal_statistics = 6;
+  optional DateStatistics date_statistics = 7;
+  optional BinaryStatistics binary_statistics = 8;
+  optional TimestampStatistics timestamp_statistics = 9;
+  optional bool has_null = 10;
+  optional uint64 bytes_on_disk = 11;
+  optional CollectionStatistics collection_statistics = 12;
 }
 
 message RowIndexEntry {
@@ -109,13 +109,13 @@ message RowIndex {
 }
 
 message BloomFilter {
-  optional uint32 numHashFunctions = 1;
+  optional uint32 num_hash_functions = 1;
   repeated fixed64 bitset = 2;
   optional bytes utf8bitset = 3;
 }
 
 message BloomFilterIndex {
-  repeated BloomFilter bloomFilter = 1;
+  repeated BloomFilter bloom_filter = 1;
 }
 
 message Stream {
@@ -153,12 +153,12 @@ message ColumnEncoding {
     DICTIONARY_V2 = 3;
   }
   optional Kind kind = 1;
-  optional uint32 dictionarySize = 2;
+  optional uint32 dictionary_size = 2;
 
   // The encoding of the bloom filters for this column:
   //   0 or missing = none or original
   //   1            = ORC-135 (utc for timestamps)
-  optional uint32 bloomEncoding = 3;
+  optional uint32 bloom_encoding = 3;
 }
 
 message StripeEncryptionVariant {
@@ -178,7 +178,7 @@ message StripeEncryptionVariant {
 message StripeFooter {
   repeated Stream streams = 1;
   repeated ColumnEncoding columns = 2;
-  optional string writerTimezone = 3;
+  optional string writer_timezone = 3;
   // one for each column encryption variant
   repeated StripeEncryptionVariant encryption = 4;
 }
@@ -219,8 +219,8 @@ message Type {
   }
   optional Kind kind = 1;
   repeated uint32 subtypes = 2 [packed=true];
-  repeated string fieldNames = 3;
-  optional uint32 maximumLength = 4;
+  repeated string field_names = 3;
+  optional uint32 maximum_length = 4;
   optional uint32 precision = 5;
   optional uint32 scale = 6;
   repeated StringPair attributes = 7;
@@ -230,13 +230,13 @@ message StripeInformation {
   // the global file offset of the start of the stripe
   optional uint64 offset = 1;
   // the number of bytes of index
-  optional uint64 indexLength = 2;
+  optional uint64 index_length = 2;
   // the number of bytes of data
-  optional uint64 dataLength = 3;
+  optional uint64 data_length = 3;
   // the number of bytes in the stripe footer
-  optional uint64 footerLength = 4;
+  optional uint64 footer_length = 4;
   // the number of rows in this stripe
-  optional uint64 numberOfRows = 5;
+  optional uint64 number_of_rows = 5;
   // If this is present, the reader should use this value for the encryption
   // stripe id for setting the encryption IV. Otherwise, the reader should
   // use one larger than the previous stripe's encryptStripeId.
@@ -246,10 +246,10 @@ message StripeInformation {
   // each of the input files will reset it to 1.
   // Note that 1 was choosen, because protobuf v3 doesn't serialize
   // primitive types that are the default (eg. 0).
-  optional uint64 encryptStripeId = 6;
+  optional uint64 encrypt_stripe_id = 6;
   // For each encryption variant, the new encrypted local key to use
   // until we find a replacement.
-  repeated bytes encryptedLocalKeys = 7;
+  repeated bytes encrypted_local_keys = 7;
 }
 
 message UserMetadataItem {
@@ -261,19 +261,19 @@ message UserMetadataItem {
 // ColumnStatistics for each column.
 // This message type is only used in ORC v0 and v1.
 message StripeStatistics {
-  repeated ColumnStatistics colStats = 1;
+  repeated ColumnStatistics col_stats = 1;
 }
 
 // This message type is only used in ORC v0 and v1.
 message Metadata {
-  repeated StripeStatistics stripeStats = 1;
+  repeated StripeStatistics stripe_stats = 1;
 }
 
 // In ORC v2 (and for encrypted columns in v1), each column has
 // their column statistics written separately.
 message ColumnarStripeStatistics {
   // one value for each stripe in the file
-  repeated ColumnStatistics colStats = 1;
+  repeated ColumnStatistics col_stats = 1;
 }
 
 enum EncryptionAlgorithm {
@@ -292,15 +292,15 @@ message DataMask {
   // the kind of masking, which may include third party masks
   optional string name = 1;
   // parameters for the mask
-  repeated string maskParameters = 2;
+  repeated string mask_parameters = 2;
   // the unencrypted column roots this mask was applied to
   repeated uint32 columns = 3 [packed = true];
 }
 
 // Information about the encryption keys.
 message EncryptionKey {
-  optional string keyName = 1;
-  optional uint32 keyVersion = 2;
+  optional string key_name = 1;
+  optional uint32 key_version = 2;
   optional EncryptionAlgorithm algorithm = 3;
 }
 
@@ -313,11 +313,11 @@ message EncryptionVariant {
   // an index into the Encryption.key list.
   optional uint32 key = 2;
   // the encrypted key for the file footer
-  optional bytes encryptedKey = 3;
+  optional bytes encrypted_key = 3;
   // the stripe statistics for this variant
-  repeated Stream stripeStatistics = 4;
+  repeated Stream stripe_statistics = 4;
   // encrypted file statistics as a FileStatistics
-  optional bytes fileStatistics = 5;
+  optional bytes file_statistics = 5;
 }
 
 // Which KeyProvider encrypted the local keys.
@@ -340,7 +340,7 @@ message Encryption {
   // they should get the unencrypted masked data.
   repeated EncryptionVariant variants = 3;
   // How are the local keys encrypted?
-  optional KeyProviderKind keyProvider = 4;
+  optional KeyProviderKind key_provider = 4;
 }
 
 enum CalendarKind {
@@ -352,14 +352,14 @@ enum CalendarKind {
 }
 
 message Footer {
-  optional uint64 headerLength = 1;
-  optional uint64 contentLength = 2;
+  optional uint64 header_length = 1;
+  optional uint64 content_length = 2;
   repeated StripeInformation stripes = 3;
   repeated Type types = 4;
   repeated UserMetadataItem metadata = 5;
-  optional uint64 numberOfRows = 6;
+  optional uint64 number_of_rows = 6;
   repeated ColumnStatistics statistics = 7;
-  optional uint32 rowIndexStride = 8;
+  optional uint32 row_index_stride = 8;
 
   // Each implementation that writes ORC files should register for a code
   // 0 = ORC Java
@@ -377,7 +377,7 @@ message Footer {
   // informative description about the version of the software that wrote
   // the file. It is assumed to be within a given writer, so for example
   // ORC 1.7.2 = "1.7.2". It may include suffixes, such as "-SNAPSHOT".
-  optional string softwareVersion = 12;
+  optional string software_version = 12;
 }
 
 enum CompressionKind {
@@ -391,14 +391,14 @@ enum CompressionKind {
 
 // Serialized length must be less that 255 bytes
 message PostScript {
-  optional uint64 footerLength = 1;
+  optional uint64 footer_length = 1;
   optional CompressionKind compression = 2;
-  optional uint64 compressionBlockSize = 3;
+  optional uint64 compression_block_size = 3;
   // the version of the file format
   //   [0, 11] = Hive 0.11
   //   [0, 12] = Hive 0.12
   repeated uint32 version = 4 [packed = true];
-  optional uint64 metadataLength = 5;
+  optional uint64 metadata_length = 5;
 
   // The version of the writer that wrote the file. This number is
   // updated when we make fixes or large changes to the writer so that
@@ -436,10 +436,10 @@ message PostScript {
   // Version of the CUDF writer:
   //   6 = original
   //
-  optional uint32 writerVersion = 6;
+  optional uint32 writer_version = 6;
 
   // the number of bytes in the encrypted stripe statistics
-  optional uint64 stripeStatisticsLength = 7;
+  optional uint64 stripe_statistics_length = 7;
 
   // Leave this last in the record
   optional string magic = 8000;
@@ -450,6 +450,6 @@ message PostScript {
 message FileTail {
   optional PostScript postscript = 1;
   optional Footer footer = 2;
-  optional uint64 fileLength = 3;
-  optional uint64 postscriptLength = 4;
+  optional uint64 file_length = 3;
+  optional uint64 postscript_length = 4;
 }

Reply via email to