>From Hussain Towaileb <[email protected]>:
Hussain Towaileb has uploaded this change for review. (
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17649 )
Change subject: [ASTERIXDB-3228][EXT] Add utility to extract computed fields
from prefix
......................................................................
[ASTERIXDB-3228][EXT] Add utility to extract computed fields from prefix
Change-Id: Iaaa94fbf8f52de743324154a20b9769406badb8e
---
M
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
M
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
2 files changed, 63 insertions(+), 0 deletions(-)
git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb
refs/changes/49/17649/1
diff --git
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
index 0080e9b..bc2ce63 100644
---
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
+++
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
@@ -24,6 +24,7 @@
import java.util.TimeZone;
import java.util.function.LongSupplier;
import java.util.function.Supplier;
+import java.util.regex.Pattern;
import org.apache.asterix.om.types.ATypeTag;
import org.apache.hyracks.util.StorageUtil;
@@ -303,6 +304,8 @@
public static final String DEFINITION_FIELD_NAME = "definition";
public static final String CONTAINER_NAME_FIELD_NAME = "container";
public static final String SUBPATH = "subpath";
+ public static final String PREFIX_DEFAULT_DELIMITER = "/";
+ public static final Pattern COMPUTED_FIELD_PATTERN =
Pattern.compile("\\{[^{}:]+:[^{}:]+}");
public static class ParquetOptions {
private ParquetOptions() {
diff --git
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
index 35e68ed..60846dd 100644
---
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
+++
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
@@ -18,6 +18,7 @@
*/
package org.apache.asterix.external.util;
+import static
org.apache.asterix.external.util.ExternalDataConstants.COMPUTED_FIELD_PATTERN;
import static
org.apache.asterix.external.util.ExternalDataConstants.KEY_DELIMITER;
import static
org.apache.asterix.external.util.ExternalDataConstants.KEY_ESCAPE;
import static
org.apache.asterix.external.util.ExternalDataConstants.KEY_EXCLUDE;
@@ -26,6 +27,7 @@
import static org.apache.asterix.external.util.ExternalDataConstants.KEY_QUOTE;
import static
org.apache.asterix.external.util.ExternalDataConstants.KEY_RECORD_END;
import static
org.apache.asterix.external.util.ExternalDataConstants.KEY_RECORD_START;
+import static
org.apache.asterix.external.util.ExternalDataConstants.PREFIX_DEFAULT_DELIMITER;
import static
org.apache.asterix.external.util.azure.blob_storage.AzureUtils.validateAzureBlobProperties;
import static
org.apache.asterix.external.util.azure.blob_storage.AzureUtils.validateAzureDataLakeProperties;
import static
org.apache.asterix.external.util.google.gcs.GCSUtils.validateProperties;
@@ -75,6 +77,7 @@
import org.apache.asterix.runtime.evaluators.common.NumberUtils;
import org.apache.asterix.runtime.projection.DataProjectionFiltrationInfo;
import org.apache.asterix.runtime.projection.FunctionCallInformation;
+import org.apache.commons.lang3.tuple.Pair;
import org.apache.hadoop.conf.Configuration;
import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
import org.apache.hyracks.algebricks.common.exceptions.NotImplementedException;
@@ -965,4 +968,52 @@
argHolder.getDataOutput().writeByte(ARRAY16);
argHolder.getDataOutput().writeShort((short) 0);
}
+
+ /**
+ * returns the segments of a prefix, separated by the delimiter
+ *
+ * @param prefix prefix
+ * @return an array of prefix segments
+ */
+ public static List<String> getPrefixSegments(String prefix) {
+ return prefix.isEmpty() ? Collections.emptyList() :
Arrays.asList(prefix.split(PREFIX_DEFAULT_DELIMITER));
+ }
+
+ /**
+ * Extracts and returns the computed fields and their indexes from the
provided prefix
+ * @param prefix prefix
+ *
+ * @return Pair of computed field names and their segment index in the
prefix
+ */
+ public static Pair<List<String>, List<Integer>>
getComputedFieldSegments(String prefix) {
+ List<String> segments = getPrefixSegments(prefix);
+ return getComputedFieldSegments(segments);
+ }
+
+ public static Pair<List<String>, List<Integer>>
getComputedFieldSegments(List<String> segments) {
+ List<String> computedFieldSegmentNames = new ArrayList<>();
+ List<Integer> computedFieldSegmentIndexes = new ArrayList<>();
+
+ // check if there are any segments before doing any testing
+ if (segments.size() != 0) {
+ // search for computed fields in each segment
+ Matcher matcher = COMPUTED_FIELD_PATTERN.matcher(segments.get(0));
+ if (matcher.find()) {
+ computedFieldSegmentNames.add(segments.get(0));
+ computedFieldSegmentIndexes.add(0);
+ }
+
+ if (segments.size() > 1) {
+ for (int i = 1; i < segments.size(); i++) {
+ matcher.reset(segments.get(i));
+ if (matcher.find()) {
+ computedFieldSegmentNames.add(segments.get(i));
+ computedFieldSegmentIndexes.add(i);
+ }
+ }
+ }
+ }
+
+ return Pair.of(computedFieldSegmentNames, computedFieldSegmentIndexes);
+ }
}
--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17649
To unsubscribe, or for help writing mail filters, visit
https://asterix-gerrit.ics.uci.edu/settings
Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Change-Id: Iaaa94fbf8f52de743324154a20b9769406badb8e
Gerrit-Change-Number: 17649
Gerrit-PatchSet: 1
Gerrit-Owner: Hussain Towaileb <[email protected]>
Gerrit-MessageType: newchange