[
https://issues.apache.org/jira/browse/PARQUET-2380?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17787041#comment-17787041
]
ASF GitHub Bot commented on PARQUET-2380:
-----------------------------------------
ConeyLiu commented on code in PR #1195:
URL: https://github.com/apache/parquet-mr/pull/1195#discussion_r1396744262
##########
parquet-hadoop/src/test/java/org/apache/parquet/hadoop/rewrite/ParquetRewriterTest.java:
##########
@@ -99,28 +103,47 @@ public class ParquetRewriterTest {
private final int numRecord = 100000;
private final Configuration conf = new Configuration();
+ private final ParquetConfiguration parquetConf = new
PlainParquetConfiguration();
private final ParquetProperties.WriterVersion writerVersion;
private final IndexCache.CacheStrategy indexCacheStrategy;
+ private final boolean usingHadoop;
private List<EncryptionTestFile> inputFiles = null;
private String outputFile = null;
private ParquetRewriter rewriter = null;
- @Parameterized.Parameters(name = "WriterVersion = {0}, IndexCacheStrategy =
{1}")
+ @Parameterized.Parameters(name = "WriterVersion = {0}, IndexCacheStrategy =
{1}, UsingHadoop = {2}")
public static Object[][] parameters() {
- return new Object[][] {{"v1", "NONE"}, {"v1", "PREFETCH_BLOCK"}, {"v2",
"NONE"}, {"v2", "PREFETCH_BLOCK"}};
- }
-
- public ParquetRewriterTest(String writerVersion, String indexCacheStrategy) {
+ return new Object[][] {
+ {"v1", "NONE", true},
+ {"v1", "PREFETCH_BLOCK", true},
+ {"v2", "NONE", true},
+ {"v2", "PREFETCH_BLOCK", true},
+ {"v1", "NONE", false},
+ {"v1", "PREFETCH_BLOCK", false},
+ {"v2", "NONE", false},
+ {"v2", "PREFETCH_BLOCK", false}
+ };
+ }
+
+ public ParquetRewriterTest(String writerVersion, String indexCacheStrategy,
boolean usingHadoop) {
this.writerVersion =
ParquetProperties.WriterVersion.fromString(writerVersion);
this.indexCacheStrategy =
IndexCache.CacheStrategy.valueOf(indexCacheStrategy);
+ this.usingHadoop = usingHadoop;
}
private void testPruneSingleColumnTranslateCodec(List<Path> inputPaths)
throws Exception {
- Path outputPath = new Path(outputFile);
- List<String> pruneColumns = Arrays.asList("Gender");
+ RewriteOptions.Builder builder;
Review Comment:
The code should be wrapped into a private method to reuse since it appears
in multiple places.
> Decouple RewriteOptions from Hadoop classes
> -------------------------------------------
>
> Key: PARQUET-2380
> URL: https://issues.apache.org/jira/browse/PARQUET-2380
> Project: Parquet
> Issue Type: Improvement
> Components: parquet-mr
> Reporter: Atour Mousavi Gourabi
> Priority: Major
>
> ParquetRewriter's RewriteOptions makes use of Hadoop Path and Configuration,
> where it could instead allow users to specify these using the Parquet
> interface methods as well. This would allow for proper decoupling Hadoop for
> rewriting in a later stage as well.
> This is part of a larger effort to decouple Parquet from Hadoop libraries.
--
This message was sent by Atlassian Jira
(v8.20.10#820010)