>From Hussain Towaileb <hussai...@gmail.com>: Hussain Towaileb has uploaded this change for review. ( https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17004 )
Change subject: Add support to passing delimiter for prefix ...................................................................... Add support to passing delimiter for prefix Change-Id: Id8cadefa813bc2978cfccedb11eedabc14b6a64a --- M asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java M asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/microsoft/AzureBlobStorageExternalDatasetTest.java M asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/over-1000-objects/over-1000-objects.001.adm M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStream.java M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/parquet/AwsS3ParquetReaderFactory.java M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/aws/s3/S3Utils.java 8 files changed, 18 insertions(+), 27 deletions(-) git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb refs/changes/04/17004/1 diff --git a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java index 316d261..cde32b8 100644 --- a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java +++ b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java @@ -59,7 +59,7 @@ // This is used for a test to generate over 1000 number of files public static final String OVER_1000_OBJECTS_PATH = "over-1000-objects"; - public static final int OVER_1000_OBJECTS_COUNT = 2999; + public static final int OVER_1000_OBJECTS_COUNT = 9999; private static Uploader playgroundDataLoader; private static Uploader fixedDataLoader; diff --git a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/microsoft/AzureBlobStorageExternalDatasetTest.java b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/microsoft/AzureBlobStorageExternalDatasetTest.java index 7de2d7e..fd3e32d 100644 --- a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/microsoft/AzureBlobStorageExternalDatasetTest.java +++ b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/microsoft/AzureBlobStorageExternalDatasetTest.java @@ -117,7 +117,7 @@ // This is used for a test to generate over 1000 number of files private static final String OVER_1000_OBJECTS_PATH = "over-1000-objects"; - private static final int OVER_1000_OBJECTS_COUNT = 2999; + private static final int OVER_1000_OBJECTS_COUNT = 9999; private static final Set<String> fileNames = new HashSet<>(); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/over-1000-objects/over-1000-objects.001.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/over-1000-objects/over-1000-objects.001.adm index b610b1d..f652703 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/over-1000-objects/over-1000-objects.001.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/over-1000-objects/over-1000-objects.001.adm @@ -1 +1 @@ -{ "count": 2999 } \ No newline at end of file +{ "count": 9999 } \ No newline at end of file diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStream.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStream.java index bbcf9cd..b4e0a85 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStream.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStream.java @@ -133,7 +133,7 @@ private S3Client buildAwsS3Client(Map<String, String> configuration) throws HyracksDataException { try { - return S3Utils.buildAwsS3Client(configuration); + return S3Utils.buildClient(configuration); } catch (CompilationException ex) { throw HyracksDataException.create(ex); } diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/parquet/AwsS3ParquetReaderFactory.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/parquet/AwsS3ParquetReaderFactory.java index 92b7a95..da7bd05 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/parquet/AwsS3ParquetReaderFactory.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/parquet/AwsS3ParquetReaderFactory.java @@ -63,7 +63,7 @@ try { JobConf conf = createHdfsConf(serviceCtx, configuration); int numberOfPartitions = getPartitionConstraint().getLocations().length; - S3Utils.configureAwsS3HdfsJobConf(conf, configuration, numberOfPartitions); + S3Utils.configureHdfsJobConf(conf, configuration, numberOfPartitions); configureHdfsConf(conf, configuration); } catch (SdkException | SdkBaseException ex) { throw new RuntimeDataException(ErrorCode.EXTERNAL_SOURCE_ERROR, getMessageOrToString(ex)); diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java index 429706e..c07e944 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java @@ -297,6 +297,8 @@ public static final String DEFINITION_FIELD_NAME = "definition"; public static final String CONTAINER_NAME_FIELD_NAME = "container"; + public static final String KEY_DELIMITER_DEFAULT = "/"; + public static class ParquetOptions { private ParquetOptions() { diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java index 62dc074..39de6bf 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java @@ -23,6 +23,7 @@ import static org.apache.asterix.common.exceptions.ErrorCode.PARAMETERS_REQUIRED; import static org.apache.asterix.external.util.ExternalDataConstants.KEY_ADAPTER_NAME_GCS; import static org.apache.asterix.external.util.ExternalDataConstants.KEY_DELIMITER; +import static org.apache.asterix.external.util.ExternalDataConstants.KEY_DELIMITER_DEFAULT; import static org.apache.asterix.external.util.ExternalDataConstants.KEY_ESCAPE; import static org.apache.asterix.external.util.ExternalDataConstants.KEY_EXCLUDE; import static org.apache.asterix.external.util.ExternalDataConstants.KEY_EXTERNAL_SCAN_BUFFER_SIZE; @@ -652,10 +653,14 @@ return getPrefix(configuration, true); } - public static String getPrefix(Map<String, String> configuration, boolean appendSlash) { + public static String getPrefix(Map<String, String> configuration, boolean appendDelimiter) { String definition = configuration.get(ExternalDataConstants.DEFINITION_FIELD_NAME); + String delimiter = configuration.get(KEY_DELIMITER); + delimiter = delimiter != null ? delimiter : KEY_DELIMITER_DEFAULT; + + // append the delimiter at the end if it is not there if (definition != null && !definition.isEmpty()) { - return appendSlash ? definition + (!definition.endsWith("/") ? "/" : "") : definition; + return appendDelimiter ? definition + (!definition.endsWith(delimiter) ? delimiter : "") : definition; } return ""; } diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/aws/s3/S3Utils.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/aws/s3/S3Utils.java index 6775bf12b..50a1299 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/aws/s3/S3Utils.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/aws/s3/S3Utils.java @@ -82,7 +82,7 @@ * @return S3 client * @throws CompilationException CompilationException */ - public static S3Client buildAwsS3Client(Map<String, String> configuration) throws CompilationException { + public static S3Client buildClient(Map<String, String> configuration) throws CompilationException { // TODO(Hussain): Need to ensure that all required parameters are present in a previous step String instanceProfile = configuration.get(INSTANCE_PROFILE_FIELD_NAME); String accessKeyId = configuration.get(ACCESS_KEY_ID_FIELD_NAME); @@ -100,7 +100,6 @@ if (instanceProfile == null && accessKeyId == null && secretAccessKey == null && sessionToken == null) { credentialsProvider = AnonymousCredentialsProvider.create(); } else if (instanceProfile != null) { - // only "true" value is allowed if (!instanceProfile.equalsIgnoreCase("true")) { throw new CompilationException(INVALID_PARAM_VALUE_ALLOWED_VALUE, INSTANCE_PROFILE_FIELD_NAME, "true"); @@ -180,8 +179,7 @@ * @param configuration properties * @param numberOfPartitions number of partitions in the cluster */ - public static void configureAwsS3HdfsJobConf(JobConf conf, Map<String, String> configuration, - int numberOfPartitions) { + public static void configureHdfsJobConf(JobConf conf, Map<String, String> configuration, int numberOfPartitions) { String accessKeyId = configuration.get(ACCESS_KEY_ID_FIELD_NAME); String secretAccessKey = configuration.get(SECRET_ACCESS_KEY_FIELD_NAME); String sessionToken = configuration.get(SESSION_TOKEN_FIELD_NAME); @@ -243,24 +241,10 @@ throw new CompilationException(ErrorCode.PARAMETERS_REQUIRED, srcLoc, ExternalDataConstants.KEY_FORMAT); } - // Both parameters should be passed, or neither should be passed (for anonymous/no auth) - String accessKeyId = configuration.get(ACCESS_KEY_ID_FIELD_NAME); - String secretAccessKey = configuration.get(SECRET_ACCESS_KEY_FIELD_NAME); - if (accessKeyId == null || secretAccessKey == null) { - // If one is passed, the other is required - if (accessKeyId != null) { - throw new CompilationException(REQUIRED_PARAM_IF_PARAM_IS_PRESENT, SECRET_ACCESS_KEY_FIELD_NAME, - ACCESS_KEY_ID_FIELD_NAME); - } else if (secretAccessKey != null) { - throw new CompilationException(REQUIRED_PARAM_IF_PARAM_IS_PRESENT, ACCESS_KEY_ID_FIELD_NAME, - SECRET_ACCESS_KEY_FIELD_NAME); - } - } - validateIncludeExclude(configuration); // Check if the bucket is present - S3Client s3Client = buildAwsS3Client(configuration); + S3Client s3Client = buildClient(configuration); S3Response response; boolean useOldApi = false; String container = configuration.get(ExternalDataConstants.CONTAINER_NAME_FIELD_NAME); @@ -338,7 +322,7 @@ // Prepare to retrieve the objects List<S3Object> filesOnly; String container = configuration.get(ExternalDataConstants.CONTAINER_NAME_FIELD_NAME); - S3Client s3Client = buildAwsS3Client(configuration); + S3Client s3Client = buildClient(configuration); String prefix = getPrefix(configuration); try { -- To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17004 To unsubscribe, or for help writing mail filters, visit https://asterix-gerrit.ics.uci.edu/settings Gerrit-Project: asterixdb Gerrit-Branch: master Gerrit-Change-Id: Id8cadefa813bc2978cfccedb11eedabc14b6a64a Gerrit-Change-Number: 17004 Gerrit-PatchSet: 1 Gerrit-Owner: Hussain Towaileb <hussai...@gmail.com> Gerrit-MessageType: newchange