>From Preetham Poluparthi <[email protected]>: Preetham Poluparthi has submitted this change. ( https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20593?usp=email )
Change subject: [NO ISSUE] Fix parquet azure with dynamic prefix ...................................................................... [NO ISSUE] Fix parquet azure with dynamic prefix Ext-ref: MB-69466 Change-Id: I81c33dae177fb59256eeb82e04aef4db5533ee37 Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20593 Tested-by: Hussain Towaileb <[email protected]> Integration-Tests: Hussain Towaileb <[email protected]> Reviewed-by: Hussain Towaileb <[email protected]> --- M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/azure/blob/BlobUtils.java M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/azure/datalake/DatalakeUtils.java 4 files changed, 50 insertions(+), 7 deletions(-) Approvals: Hussain Towaileb: Looks good to me, approved; Verified; Verified diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java index 2846cf1..0eb935e 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java @@ -153,7 +153,9 @@ public static final String KEY_ADAPTER_NAME_HTTP = "http_adapter"; public static final String KEY_ADAPTER_NAME_AWS_S3 = "S3"; public static final String KEY_ADAPTER_NAME_AZURE_BLOB = "AZUREBLOB"; + public static final String KEY_ADAPTER_NAME_AZURE_BLOB_ALIAS = "AZURE_BLOB"; public static final String KEY_ADAPTER_NAME_AZURE_DATA_LAKE = "AZUREDATALAKE"; + public static final String KEY_ADAPTER_NAME_AZURE_DATA_LAKE_ALIAS = "AZURE_DATALAKE"; public static final String KEY_ADAPTER_NAME_GCS = "GCS"; public static final String KEY_ADAPTER_NAME_HDFS = "HDFS"; diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java index de31531..83c3a8b 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java @@ -87,6 +87,8 @@ import org.apache.asterix.external.util.aws.s3.S3Constants; import org.apache.asterix.external.util.aws.s3.S3Utils; import org.apache.asterix.external.util.azure.AzureConstants; +import org.apache.asterix.external.util.azure.blob.BlobUtils; +import org.apache.asterix.external.util.azure.datalake.DatalakeUtils; import org.apache.asterix.external.util.google.GCSConstants; import org.apache.asterix.external.util.google.GCSUtils; import org.apache.asterix.om.types.ARecordType; @@ -1137,26 +1139,32 @@ return configuration.getOrDefault(DEFINITION_FIELD_NAME, configuration.get(KEY_PATH)); } - public static String getProtocolContainerPair(Map<String, String> configurations) { + public static String getProtocolContainerPair(Map<String, String> configurations) throws CompilationException { String container = configurations.getOrDefault(ExternalDataConstants.CONTAINER_NAME_FIELD_NAME, ""); String type = configurations.getOrDefault(ExternalDataConstants.KEY_EXTERNAL_SOURCE_TYPE, ""); String protocol; switch (type) { case ExternalDataConstants.KEY_ADAPTER_NAME_AWS_S3: protocol = S3Constants.HADOOP_S3_PROTOCOL; - break; + return protocol + "://" + container + "/"; case ExternalDataConstants.KEY_ADAPTER_NAME_AZURE_BLOB: - case ExternalDataConstants.KEY_ADAPTER_NAME_AZURE_DATA_LAKE: + case ExternalDataConstants.KEY_ADAPTER_NAME_AZURE_BLOB_ALIAS: protocol = AzureConstants.HADOOP_AZURE_PROTOCOL; - break; + String blobEndpoint = BlobUtils.getEndpointFromClient(configurations); + return protocol + "://" + container + "@" + blobEndpoint + "/"; + case ExternalDataConstants.KEY_ADAPTER_NAME_AZURE_DATA_LAKE: + case ExternalDataConstants.KEY_ADAPTER_NAME_AZURE_DATA_LAKE_ALIAS: + protocol = AzureConstants.HADOOP_AZURE_PROTOCOL; + String dataLakeEndpoint = DatalakeUtils.getEndpointFromClient(configurations); + return protocol + "://" + container + "@" + dataLakeEndpoint + "/"; case ExternalDataConstants.KEY_ADAPTER_NAME_GCS: protocol = GCSConstants.HADOOP_GCS_PROTOCOL; - break; + return protocol + "://" + container + "/"; case ExternalDataConstants.KEY_ADAPTER_NAME_LOCALFS: String path = getDefinitionOrPath(configurations); String[] nodePathPair = path.trim().split("://"); protocol = nodePathPair[0]; - break; + return protocol + "://" + container + "/"; case ExternalDataConstants.KEY_ADAPTER_NAME_HDFS: // Remove trailing slashes as prefixes/paths in hdfs start with a slash (absolute paths) return configurations.get(ExternalDataConstants.KEY_HDFS_URL).replaceAll("/+$", ""); @@ -1164,7 +1172,6 @@ return ""; } - return protocol + "://" + container + "/"; } public static void validateType(Map<String, String> properties, ARecordType itemType) throws CompilationException { diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/azure/blob/BlobUtils.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/azure/blob/BlobUtils.java index 4860188..a695320 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/azure/blob/BlobUtils.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/azure/blob/BlobUtils.java @@ -52,6 +52,7 @@ import org.apache.asterix.external.util.ExternalDataPrefix; import org.apache.asterix.external.util.ExternalDataUtils; import org.apache.asterix.external.util.azure.AzureConstants; +import org.apache.asterix.external.util.azure.AzureUtils; import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException; import org.apache.hyracks.api.exceptions.HyracksDataException; import org.apache.hyracks.api.exceptions.IWarningCollector; @@ -344,4 +345,20 @@ throw new CompilationException(ErrorCode.EXTERNAL_SOURCE_ERROR, ex, getMessageOrToString(ex)); } } + + public static String getEndpointFromClient(Map<String, String> configuration) throws CompilationException { + String endpoint = configuration.get(ENDPOINT_FIELD_NAME); + if (endpoint == null) { + throw new CompilationException(PARAMETERS_REQUIRED, ENDPOINT_FIELD_NAME); + } + + BlobServiceClientBuilder builder = new BlobServiceClientBuilder(); + try { + builder.endpoint(endpoint); + } catch (Exception ex) { + throw new CompilationException(ErrorCode.EXTERNAL_SOURCE_ERROR, ex, getMessageOrToString(ex)); + } + return AzureUtils.extractEndPoint(builder.buildClient().getAccountUrl()); + } + } diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/azure/datalake/DatalakeUtils.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/azure/datalake/DatalakeUtils.java index 475ede3..395cb0f 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/azure/datalake/DatalakeUtils.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/azure/datalake/DatalakeUtils.java @@ -54,6 +54,7 @@ import org.apache.asterix.external.util.ExternalDataPrefix; import org.apache.asterix.external.util.ExternalDataUtils; import org.apache.asterix.external.util.azure.AzureConstants; +import org.apache.asterix.external.util.azure.AzureUtils; import org.apache.hyracks.api.exceptions.HyracksDataException; import org.apache.hyracks.api.exceptions.IWarningCollector; import org.apache.hyracks.api.exceptions.SourceLocation; @@ -315,4 +316,20 @@ throw new CompilationException(ErrorCode.EXTERNAL_SOURCE_ERROR, ex, getMessageOrToString(ex)); } } + + public static String getEndpointFromClient(Map<String, String> configuration) throws CompilationException { + String endpoint = configuration.get(ENDPOINT_FIELD_NAME); + if (endpoint == null) { + throw new CompilationException(PARAMETERS_REQUIRED, ENDPOINT_FIELD_NAME); + } + + DataLakeServiceClientBuilder builder = new DataLakeServiceClientBuilder(); + try { + builder.endpoint(endpoint); + } catch (Exception ex) { + throw new CompilationException(ErrorCode.EXTERNAL_SOURCE_ERROR, ex, getMessageOrToString(ex)); + } + return AzureUtils.extractEndPoint(builder.buildClient().getAccountUrl()); + } + } -- To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20593?usp=email To unsubscribe, or for help writing mail filters, visit https://asterix-gerrit.ics.uci.edu/settings?usp=email Gerrit-MessageType: merged Gerrit-Project: asterixdb Gerrit-Branch: phoenix Gerrit-Change-Id: I81c33dae177fb59256eeb82e04aef4db5533ee37 Gerrit-Change-Number: 20593 Gerrit-PatchSet: 3 Gerrit-Owner: Preetham Poluparthi <[email protected]> Gerrit-Reviewer: Anon. E. Moose #1000171 Gerrit-Reviewer: Hussain Towaileb <[email protected]> Gerrit-Reviewer: Jenkins <[email protected]> Gerrit-Reviewer: Preetham Poluparthi <[email protected]>
