[ 
https://issues.apache.org/jira/browse/DRILL-5660?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16101532#comment-16101532
 ] 

ASF GitHub Bot commented on DRILL-5660:
---------------------------------------

Github user arina-ielchiieva commented on a diff in the pull request:

    https://github.com/apache/drill/pull/877#discussion_r129539105
  
    --- Diff: 
exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetMetadataCache.java
 ---
    @@ -424,32 +432,121 @@ public void testMoveCache() throws Exception {
     
       @Test
       public void testMetadataCacheAbsolutePaths() throws Exception {
    +    final String absolutePathsMetadata = "absolute_paths_metadata";
         try {
           test("use dfs_test.tmp");
    -      final String relative_path_metadata_t1 = RELATIVE_PATHS_METADATA + 
"/t1";
    -      final String relative_path_metadata_t2 = RELATIVE_PATHS_METADATA + 
"/t2";
    -      test("create table `%s` as select * from cp.`tpch/nation.parquet`", 
relative_path_metadata_t1);
    -      test("create table `%s` as select * from cp.`tpch/nation.parquet`", 
relative_path_metadata_t2);
    +      // creating two inner directories to leverage 
METADATA_DIRECTORIES_FILENAME metadata file as well
    +      final String absolutePathsMetadataT1 = absolutePathsMetadata + "/t1";
    +      final String absolutePathsMetadataT2 = absolutePathsMetadata + "/t2";
    +      test("create table `%s` as select * from cp.`tpch/nation.parquet`", 
absolutePathsMetadataT1);
    +      test("create table `%s` as select * from cp.`tpch/nation.parquet`", 
absolutePathsMetadataT2);
           
copyMetaDataCacheToTempReplacingInternalPaths("parquet/metadata_with_absolute_path/"
 +
    -          "metadata_directories_with_absolute_paths.requires_replace.txt", 
RELATIVE_PATHS_METADATA, Metadata.METADATA_DIRECTORIES_FILENAME);
    +          "metadata_directories_with_absolute_paths.requires_replace.txt", 
absolutePathsMetadata, Metadata.METADATA_DIRECTORIES_FILENAME);
           
copyMetaDataCacheToTempReplacingInternalPaths("parquet/metadata_with_absolute_path/"
 +
    -          "metadata_table_with_absolute_paths.requires_replace.txt", 
RELATIVE_PATHS_METADATA, Metadata.METADATA_FILENAME);
    +          "metadata_table_with_absolute_paths.requires_replace.txt", 
absolutePathsMetadata, Metadata.METADATA_FILENAME);
           
copyMetaDataCacheToTempReplacingInternalPaths("parquet/metadata_with_absolute_path/"
 +
    -          "metadata_table_with_absolute_paths_t1.requires_replace.txt", 
relative_path_metadata_t1, Metadata.METADATA_FILENAME);
    +          "metadata_table_with_absolute_paths_t1.requires_replace.txt", 
absolutePathsMetadataT1, Metadata.METADATA_FILENAME);
           
copyMetaDataCacheToTempReplacingInternalPaths("parquet/metadata_with_absolute_path/"
 +
    -          "metadata_table_with_absolute_paths_t2.requires_replace.txt", 
relative_path_metadata_t2, Metadata.METADATA_FILENAME);
    +          "metadata_table_with_absolute_paths_t2.requires_replace.txt", 
absolutePathsMetadataT2, Metadata.METADATA_FILENAME);
    +      String query = String.format("select * from %s", 
absolutePathsMetadata);
    +      int expectedRowCount = 50;
    +      int expectedNumFiles = 1; // point to selectionRoot since no pruning 
is done in this query
    +      int actualRowCount = testSql(query);
    +      assertEquals("An incorrect result was obtained while querying a 
table with metadata cache files",
    +          expectedRowCount, actualRowCount);
    +      String numFilesPattern = "numFiles=" + expectedNumFiles;
    +      String usedMetaPattern = "usedMetadataFile=true";
    +      String cacheFileRootPattern = String.format("cacheFileRoot=%s/%s", 
getDfsTestTmpSchemaLocation(), absolutePathsMetadata);
    +      PlanTestBase.testPlanMatchingPatterns(query, new 
String[]{numFilesPattern, usedMetaPattern, cacheFileRootPattern},
    +          new String[] {"Filter"});
    +    } finally {
    +      test("drop table if exists %s", absolutePathsMetadata);
    +    }
    +  }
     
    -      int rowCount = testSql(String.format("select * from %s", 
RELATIVE_PATHS_METADATA));
    -      assertEquals("An incorrect result was obtained while querying a 
table with metadata cache files", 50, rowCount);
    +  @Test
    +  public void testSpacesInMetadataCachePath() throws Exception {
    +    final String pathWithSpaces = "path with spaces";
    +    try {
    +      // creating multilevel table to store path with spaces in both 
metadata files (METADATA and METADATA_DIRECTORIES)
    +      test("create table dfs_test.tmp.`%s` as select * from 
cp.`tpch/nation.parquet`", pathWithSpaces);
    +      test("create table dfs_test.tmp.`%1$s/%1$s` as select * from 
cp.`tpch/nation.parquet`", pathWithSpaces);
    +      test("refresh table metadata dfs_test.tmp.`%s`", pathWithSpaces);
    +      checkForMetadataFile(pathWithSpaces);
    +      String query = String.format("select * from dfs_test.tmp.`%s`", 
pathWithSpaces);
    +      int expectedRowCount = 50;
    +      int expectedNumFiles = 1; // point to selectionRoot since no pruning 
is done in this query
    +      int actualRowCount = testSql(query);
    +      assertEquals("An incorrect result was obtained while querying a 
table with metadata cache files",
    +          expectedRowCount, actualRowCount);
    +      String numFilesPattern = "numFiles=" + expectedNumFiles;
    +      String usedMetaPattern = "usedMetadataFile=true";
    +      String cacheFileRootPattern = String.format("cacheFileRoot=%s/%s", 
getDfsTestTmpSchemaLocation(), pathWithSpaces);
    +      PlanTestBase.testPlanMatchingPatterns(query, new 
String[]{numFilesPattern, usedMetaPattern, cacheFileRootPattern},
    +          new String[] {"Filter"});
    +    } finally {
    +      test("drop table if exists dfs_test.tmp.`%s`", pathWithSpaces);
    +    }
    +  }
    +
    +  @Test
    +  public void testFutureUnsupportedMetadataVersion() throws Exception {
    +    final String unsupportedMetadataVersion = 
"unsupported_metadata_version";
    +    try {
    +      test("use dfs_test.tmp");
    +      test("create table `%s` as select * from cp.`tpch/nation.parquet`", 
unsupportedMetadataVersion);
    +      String lastVersion = 
Iterables.getLast(MetadataVersions.SUPPORTED_VERSIONS);
    +      for (String supportedVersion : MetadataVersions.SUPPORTED_VERSIONS) {
    +        if (MetadataVersions.compare(lastVersion, supportedVersion) < 0) {
    +          lastVersion = supportedVersion;
    +        }
    +      }
    +      // Get the future version, which is absent in 
MetadataVersions.SUPPORTED_VERSIONS list
    +      String futureVersion = "v" + (Integer.parseInt("" + 
lastVersion.charAt(1)) + 1);
    --- End diff --
    
    Please user `String.valueOf` instead of concatenating with empty string.


> Drill 1.10 queries fail due to Parquet Metadata "corruption" from DRILL-3867
> ----------------------------------------------------------------------------
>
>                 Key: DRILL-5660
>                 URL: https://issues.apache.org/jira/browse/DRILL-5660
>             Project: Apache Drill
>          Issue Type: Bug
>    Affects Versions: 1.11.0
>            Reporter: Paul Rogers
>            Assignee: Vitalii Diravka
>              Labels: doc-impacting
>
> Drill recently accepted a PR for the following bug:
> DRILL-3867: Store relative paths in metadata file
> This PR turned out to have a flaw which affects version compatibility.
> The DRILL-3867 PR changed the format of Parquet metadata files to store 
> relative paths. All Drill servers after that PR create files with relative 
> paths. But, the version number of the file is unchanged, so that older 
> Drillbits don't know that they can't understand the file.
> Instead, if an older Drill tries to read the file, queries fail left and 
> right. Drill will resolve the paths, but does so relative to the user's HDFS 
> home directory, which is wrong.
> What should have happened is that we should have bumped the parquet metadata 
> file version number so that older Drillbits can’t read the file. This ticket 
> requests that we do that.
> Now, one could argue that the lack of version number change is fine. Once a 
> user upgrades Drill, they won't use an old Drillbit. But, things are not that 
> simple:
> * A developer tests a branch based on a pre-DRILL-3867 build on a cluster in 
> which metadata files have been created by a post-DRILL-3867 build. (This has 
> already occurred multiple times in our shop.)
> * A user tries to upgrade to Drill 1.11, finds an issue, and needs to roll 
> back to Drill 1.10. Doing so will cause queries to fail due to 
> seemingly-corrupt metadata files.
> * A user tries to do a rolling upgrade: running 1.11 on some servers, 1.10 on 
> others. Once a 1.11 server is installed, the metadata is updated ("corrupted" 
> from the perspective of 1.10) and queries fail.
> Standard practice in this scenario is to:
> * Bump the file version number when the file format changes, and
> * Software refuses to read files with a version newer than the software was 
> designed for.
> Of course, it is highly desirable that newer servers read old files, but that 
> is not the issue here.



--
This message was sent by Atlassian JIRA
(v6.4.14#64029)

Reply via email to