tustvold commented on code in PR #4938:
URL: https://github.com/apache/arrow-rs/pull/4938#discussion_r1362846325
##########
parquet/src/basic.rs:
##########
@@ -2130,4 +2235,93 @@ mod tests {
);
assert_eq!(ColumnOrder::UNDEFINED.sort_order(), SortOrder::SIGNED);
}
+
+ #[test]
+ fn test_parse_encoding() {
+ let mut encoding: Encoding = "PLAIN".parse().unwrap();
+ assert_eq!(encoding, Encoding::PLAIN);
+ encoding = "PLAIN_DICTIONARY".parse().unwrap();
+ assert_eq!(encoding, Encoding::PLAIN_DICTIONARY);
+ encoding = "RLE".parse().unwrap();
+ assert_eq!(encoding, Encoding::RLE);
+ encoding = "BIT_PACKED".parse().unwrap();
+ assert_eq!(encoding, Encoding::BIT_PACKED);
+ encoding = "DELTA_BINARY_PACKED".parse().unwrap();
+ assert_eq!(encoding, Encoding::DELTA_BINARY_PACKED);
+ encoding = "DELTA_LENGTH_BYTE_ARRAY".parse().unwrap();
+ assert_eq!(encoding, Encoding::DELTA_LENGTH_BYTE_ARRAY);
+ encoding = "DELTA_BYTE_ARRAY".parse().unwrap();
+ assert_eq!(encoding, Encoding::DELTA_BYTE_ARRAY);
+ encoding = "RLE_DICTIONARY".parse().unwrap();
+ assert_eq!(encoding, Encoding::RLE_DICTIONARY);
+ encoding = "BYTE_STREAM_SPLIT".parse().unwrap();
+ assert_eq!(encoding, Encoding::BYTE_STREAM_SPLIT);
+
+ // test lowercase
+ encoding = "Byte_Stream_Split".parse().unwrap();
+ assert_eq!(encoding, Encoding::BYTE_STREAM_SPLIT);
+
+ // test unknown string
+ match "plain_xxx".parse::<Encoding>() {
+ Ok(e) => {
+ panic!("Should not be able to parse {:?}", e);
+ }
+ Err(e) => {
+ assert_eq!(e.to_string(), "Parquet error: unknown encoding:
plain_xxx");
+ }
+ }
+ }
+
+ #[test]
+ fn test_parse_compression() {
+ let mut compress: Compression = "snappy".parse().unwrap();
+ assert_eq!(compress, Compression::SNAPPY);
+ compress = "lzo".parse().unwrap();
+ assert_eq!(compress, Compression::LZO);
+ compress = "zstd(3)".parse().unwrap();
+ assert_eq!(compress,
Compression::ZSTD(ZstdLevel::try_new(3).unwrap()));
+ compress = "LZ4_raw".parse().unwrap();
+ assert_eq!(compress, Compression::LZ4_RAW);
+ compress = "uncompressed".parse().unwrap();
+ assert_eq!(compress, Compression::UNCOMPRESSED);
+ compress = "snappy".parse().unwrap();
+ assert_eq!(compress, Compression::SNAPPY);
+ compress = "gzip(9)".parse().unwrap();
+ assert_eq!(compress,
Compression::GZIP(GzipLevel::try_new(9).unwrap()));
+ compress = "lzo".parse().unwrap();
+ assert_eq!(compress, Compression::LZO);
+ compress = "brotli(3)".parse().unwrap();
+ assert_eq!(
+ compress,
+ Compression::BROTLI(BrotliLevel::try_new(3).unwrap())
+ );
+ compress = "lz4".parse().unwrap();
+ assert_eq!(compress, Compression::LZ4);
+
+ // test unknown compression
+ match "unknown".parse::<Compression>() {
Review Comment:
These could also make use of unwrap_err()
##########
parquet/src/basic.rs:
##########
@@ -278,6 +279,25 @@ pub enum Encoding {
BYTE_STREAM_SPLIT,
}
+impl FromStr for Encoding {
+ type Err = ParquetError;
+
+ fn from_str(s: &str) -> Result<Self, Self::Err> {
+ match s.to_owned().to_uppercase().as_str() {
Review Comment:
I wonder if we should be case sensitive and leave it to the caller to force
to upper case if that is what they want?
It seems strange that we would support parsing things like `PlAiN`
##########
parquet/src/basic.rs:
##########
@@ -2130,4 +2235,93 @@ mod tests {
);
assert_eq!(ColumnOrder::UNDEFINED.sort_order(), SortOrder::SIGNED);
}
+
+ #[test]
+ fn test_parse_encoding() {
+ let mut encoding: Encoding = "PLAIN".parse().unwrap();
+ assert_eq!(encoding, Encoding::PLAIN);
+ encoding = "PLAIN_DICTIONARY".parse().unwrap();
+ assert_eq!(encoding, Encoding::PLAIN_DICTIONARY);
+ encoding = "RLE".parse().unwrap();
+ assert_eq!(encoding, Encoding::RLE);
+ encoding = "BIT_PACKED".parse().unwrap();
+ assert_eq!(encoding, Encoding::BIT_PACKED);
+ encoding = "DELTA_BINARY_PACKED".parse().unwrap();
+ assert_eq!(encoding, Encoding::DELTA_BINARY_PACKED);
+ encoding = "DELTA_LENGTH_BYTE_ARRAY".parse().unwrap();
+ assert_eq!(encoding, Encoding::DELTA_LENGTH_BYTE_ARRAY);
+ encoding = "DELTA_BYTE_ARRAY".parse().unwrap();
+ assert_eq!(encoding, Encoding::DELTA_BYTE_ARRAY);
+ encoding = "RLE_DICTIONARY".parse().unwrap();
+ assert_eq!(encoding, Encoding::RLE_DICTIONARY);
+ encoding = "BYTE_STREAM_SPLIT".parse().unwrap();
+ assert_eq!(encoding, Encoding::BYTE_STREAM_SPLIT);
+
+ // test lowercase
+ encoding = "Byte_Stream_Split".parse().unwrap();
+ assert_eq!(encoding, Encoding::BYTE_STREAM_SPLIT);
+
+ // test unknown string
+ match "plain_xxx".parse::<Encoding>() {
+ Ok(e) => {
+ panic!("Should not be able to parse {:?}", e);
+ }
+ Err(e) => {
+ assert_eq!(e.to_string(), "Parquet error: unknown encoding:
plain_xxx");
+ }
+ }
Review Comment:
```suggestion
let err = "plain_xxx".parse::<Encoding>().unwrap_err();
assert_eq!(e.to_string(), "Parquet error: unknown encoding:
plain_xxx");
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]