Github user xubo245 commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2804#discussion_r231105573 --- Diff: store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonSchemaReader.java --- @@ -61,14 +65,122 @@ public static Schema readSchemaInSchemaFile(String schemaFilePath) throws IOExce return new Schema(schemaList); } + /** + * get carbondata/carbonindex file in path + * + * @param path carbon file path + * @return CarbonFile array + */ + private static CarbonFile[] getCarbonFile(String path, final String extension) + throws IOException { + String dataFilePath = path; + if (!(dataFilePath.contains(extension))) { + CarbonFile[] carbonFiles = FileFactory + .getCarbonFile(path) + .listFiles(new CarbonFileFilter() { + @Override + public boolean accept(CarbonFile file) { + if (file == null) { + return false; + } + return file.getName().endsWith(extension); + } + }); + if (carbonFiles == null || carbonFiles.length < 1) { + throw new IOException("Carbon file not exists."); + } + return carbonFiles; + } + return null; + } + + /** + * read schema from path, + * path can be folder path, carbonindex file path, and carbondata file path + * and will not check all files schema + * + * @param path file/folder path + * @return schema + * @throws IOException + */ + public static Schema readSchema(String path) throws IOException { + return readSchema(path, false); + } + + /** + * read schema from path, + * path can be folder path, carbonindex file path, and carbondata file path + * and user can decide whether check all files schema + * + * @param path file/folder path + * @param validateSchema whether check all files schema + * @return schema + * @throws IOException + */ + public static Schema readSchema(String path, boolean validateSchema) throws IOException { + if (path.endsWith(INDEX_FILE_EXT)) { + return readSchemaFromIndexFile(path); + } else if (path.endsWith(CARBON_DATA_EXT)) { + return readSchemaFromDataFile(path); + } else if (validateSchema) { + CarbonFile[] carbonIndexFiles = getCarbonFile(path, INDEX_FILE_EXT); + Schema schema; + if (carbonIndexFiles != null && carbonIndexFiles.length != 0) { + schema = readSchemaFromIndexFile(carbonIndexFiles[0].getAbsolutePath()); + for (int i = 1; i < carbonIndexFiles.length; i++) { + Schema schema2 = readSchemaFromIndexFile(carbonIndexFiles[i].getAbsolutePath()); + if (schema != schema2) { + throw new CarbonDataLoadingException("Schema is different between different files."); + } + } + CarbonFile[] carbonDataFiles = getCarbonFile(path, CARBON_DATA_EXT); + for (int i = 0; i < carbonDataFiles.length; i++) { + Schema schema2 = readSchemaFromDataFile(carbonDataFiles[i].getAbsolutePath()); + if (!schema.equals(schema2)) { + throw new CarbonDataLoadingException("Schema is different between different files."); + } + } + return schema; + } else { + throw new CarbonDataLoadingException("No carbonindex file in this path."); + } + } else { + String indexFilePath = getCarbonFile(path, INDEX_FILE_EXT)[0].getAbsolutePath(); + if (indexFilePath != null) { + return readSchemaFromIndexFile(indexFilePath); + } else { + String dataFilePath = getCarbonFile(path, CARBON_DATA_EXT)[0].getAbsolutePath(); --- End diff -- yeah, removed else
---