[ https://issues.apache.org/jira/browse/DRILL-4831?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15673119#comment-15673119 ]
ASF GitHub Bot commented on DRILL-4831: --------------------------------------- Github user amansinha100 commented on a diff in the pull request: https://github.com/apache/drill/pull/653#discussion_r88399438 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/Metadata.java --- @@ -495,31 +499,75 @@ private ParquetFileMetadata_v3 getParquetFileMetadata_v3(ParquetTableMetadata_v3 * @param p * @throws IOException */ - private void writeFile(ParquetTableMetadata_v3 parquetTableMetadata, Path p) throws IOException { + private void writeFile(ParquetTableMetadata_v3 parquetTableMetadata, String path) throws IOException { JsonFactory jsonFactory = new JsonFactory(); jsonFactory.configure(Feature.AUTO_CLOSE_TARGET, false); jsonFactory.configure(JsonParser.Feature.AUTO_CLOSE_SOURCE, false); ObjectMapper mapper = new ObjectMapper(jsonFactory); SimpleModule module = new SimpleModule(); module.addSerializer(ColumnMetadata_v3.class, new ColumnMetadata_v3.Serializer()); mapper.registerModule(module); - FSDataOutputStream os = fs.create(p); + + // If multiple clients are updating metadata cache file concurrently, the cache file + // can get corrupted. To prevent this, write to a unique temporary file and then do + // atomic rename. + UUID randomUUID = UUID.randomUUID(); + Path tmpPath = new Path(path, METADATA_FILENAME + "." + randomUUID); + + FSDataOutputStream os = fs.create(tmpPath); mapper.writerWithDefaultPrettyPrinter().writeValue(os, parquetTableMetadata); os.flush(); os.close(); + + // Use fileContext API as FileSystem rename is deprecated. + FileContext fileContext = FileContext.getFileContext(tmpPath.toUri()); + Path finalPath = new Path(path, METADATA_FILENAME); + + try { + fileContext.rename(tmpPath, finalPath, Options.Rename.OVERWRITE); + } catch (Exception e) { + logger.info("Metadata cache file rename from {} to {} failed", tmpPath.toString(), finalPath.toString(), e); + throw new IOException("metadata cache file rename failed"); + } finally { + if (fs.exists(tmpPath)) { + fs.delete(tmpPath, false); + } + } } - private void writeFile(ParquetTableMetadataDirs parquetTableMetadataDirs, Path p) throws IOException { + private void writeFile(ParquetTableMetadataDirs parquetTableMetadataDirs, String path) throws IOException { JsonFactory jsonFactory = new JsonFactory(); jsonFactory.configure(Feature.AUTO_CLOSE_TARGET, false); jsonFactory.configure(JsonParser.Feature.AUTO_CLOSE_SOURCE, false); ObjectMapper mapper = new ObjectMapper(jsonFactory); SimpleModule module = new SimpleModule(); mapper.registerModule(module); - FSDataOutputStream os = fs.create(p); + + // If multiple clients are updating metadata cache file concurrently, the cache file + // can get corrupted. To prevent this, write to a unique temporary file and then do + // atomic rename. + UUID randomUUID = UUID.randomUUID(); + Path tmpPath = new Path(path, METADATA_DIRECTORIES_FILENAME + "." + randomUUID); + + FSDataOutputStream os = fs.create(tmpPath); mapper.writerWithDefaultPrettyPrinter().writeValue(os, parquetTableMetadataDirs); os.flush(); os.close(); + + // Use fileContext API as FileSystem rename is deprecated. + FileContext fileContext = FileContext.getFileContext(tmpPath.toUri()); + Path finalPath = new Path(path, METADATA_DIRECTORIES_FILENAME); + + try { + fileContext.rename(tmpPath, finalPath, Options.Rename.OVERWRITE); + } catch (Exception e) { + logger.info("Metadata cache file rename from {} to {} failed", tmpPath.toString(), finalPath.toString(), e); + throw new IOException("metadata cache file rename failed"); --- End diff -- This IOException is masking the original exception e. Better to rethrow using IOException(message, cause) constructor. > Running refresh table metadata concurrently randomly fails with > JsonParseException > ---------------------------------------------------------------------------------- > > Key: DRILL-4831 > URL: https://issues.apache.org/jira/browse/DRILL-4831 > Project: Apache Drill > Issue Type: Bug > Components: Metadata > Affects Versions: 1.8.0 > Reporter: Rahul Challapalli > Assignee: Aman Sinha > Attachments: error.log, l_3level.tgz > > > git.commit.id.abbrev=f476eb5 > Just run the below command concurrently from 10 different JDBC connections. > There is a likelihood that you might encounter the below error > Extracts from the log > {code} > Caused By (java.lang.AssertionError) Internal error: Error while applying > rule DrillPushProjIntoScan, args > [rel#189411:LogicalProject.NONE.ANY([]).[](input=rel#189289:Subset#3.ENUMERABLE.ANY([]).[],l_orderkey=$1,dir0=$2,dir1=$3,dir2=$4,l_shipdate=$5,l_extendedprice=$6,l_discount=$7), > rel#189233:EnumerableTableScan.ENUMERABLE.ANY([]).[](table=[dfs, > metadata_caching_pp, l_3level])] > org.apache.calcite.util.Util.newInternal():792 > org.apache.calcite.plan.volcano.VolcanoRuleCall.onMatch():251 > ..... > ..... > java.lang.Thread.run():745 > Caused By (org.apache.drill.common.exceptions.DrillRuntimeException) > com.fasterxml.jackson.core.JsonParseException: Illegal character ((CTRL-CHAR, > code 0)): only regular white space (\r, \n, \t) is allowed between tokens > at [Source: com.mapr.fs.MapRFsDataInputStream@57a574a8; line: 1, column: 2] > org.apache.drill.exec.planner.logical.DrillPushProjIntoScan.onMatch():95 > {code} > Attached the complete log message and the data set -- This message was sent by Atlassian JIRA (v6.3.4#6332)