>From Hussain Towaileb <[email protected]>:

Hussain Towaileb has uploaded this change for review. ( 
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17649 )


Change subject: [ASTERIXDB-3228][EXT] Add utility to extract computed fields 
from prefix
......................................................................

[ASTERIXDB-3228][EXT] Add utility to extract computed fields from prefix

Change-Id: Iaaa94fbf8f52de743324154a20b9769406badb8e
---
M 
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
M 
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
2 files changed, 63 insertions(+), 0 deletions(-)



  git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb 
refs/changes/49/17649/1

diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
index 0080e9b..bc2ce63 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
@@ -24,6 +24,7 @@
 import java.util.TimeZone;
 import java.util.function.LongSupplier;
 import java.util.function.Supplier;
+import java.util.regex.Pattern;

 import org.apache.asterix.om.types.ATypeTag;
 import org.apache.hyracks.util.StorageUtil;
@@ -303,6 +304,8 @@
     public static final String DEFINITION_FIELD_NAME = "definition";
     public static final String CONTAINER_NAME_FIELD_NAME = "container";
     public static final String SUBPATH = "subpath";
+    public static final String PREFIX_DEFAULT_DELIMITER = "/";
+    public static final Pattern COMPUTED_FIELD_PATTERN = 
Pattern.compile("\\{[^{}:]+:[^{}:]+}");

     public static class ParquetOptions {
         private ParquetOptions() {
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
index 35e68ed..60846dd 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
@@ -18,6 +18,7 @@
  */
 package org.apache.asterix.external.util;

+import static 
org.apache.asterix.external.util.ExternalDataConstants.COMPUTED_FIELD_PATTERN;
 import static 
org.apache.asterix.external.util.ExternalDataConstants.KEY_DELIMITER;
 import static 
org.apache.asterix.external.util.ExternalDataConstants.KEY_ESCAPE;
 import static 
org.apache.asterix.external.util.ExternalDataConstants.KEY_EXCLUDE;
@@ -26,6 +27,7 @@
 import static org.apache.asterix.external.util.ExternalDataConstants.KEY_QUOTE;
 import static 
org.apache.asterix.external.util.ExternalDataConstants.KEY_RECORD_END;
 import static 
org.apache.asterix.external.util.ExternalDataConstants.KEY_RECORD_START;
+import static 
org.apache.asterix.external.util.ExternalDataConstants.PREFIX_DEFAULT_DELIMITER;
 import static 
org.apache.asterix.external.util.azure.blob_storage.AzureUtils.validateAzureBlobProperties;
 import static 
org.apache.asterix.external.util.azure.blob_storage.AzureUtils.validateAzureDataLakeProperties;
 import static 
org.apache.asterix.external.util.google.gcs.GCSUtils.validateProperties;
@@ -75,6 +77,7 @@
 import org.apache.asterix.runtime.evaluators.common.NumberUtils;
 import org.apache.asterix.runtime.projection.DataProjectionFiltrationInfo;
 import org.apache.asterix.runtime.projection.FunctionCallInformation;
+import org.apache.commons.lang3.tuple.Pair;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
 import org.apache.hyracks.algebricks.common.exceptions.NotImplementedException;
@@ -965,4 +968,52 @@
         argHolder.getDataOutput().writeByte(ARRAY16);
         argHolder.getDataOutput().writeShort((short) 0);
     }
+
+    /**
+     * returns the segments of a prefix, separated by the delimiter
+     *
+     * @param prefix prefix
+     * @return an array of prefix segments
+     */
+    public static List<String> getPrefixSegments(String prefix) {
+        return prefix.isEmpty() ? Collections.emptyList() : 
Arrays.asList(prefix.split(PREFIX_DEFAULT_DELIMITER));
+    }
+
+    /**
+     * Extracts and returns the computed fields and their indexes from the 
provided prefix
+     * @param prefix prefix
+     *
+     * @return Pair of computed field names and their segment index in the 
prefix
+     */
+    public static Pair<List<String>, List<Integer>> 
getComputedFieldSegments(String prefix) {
+        List<String> segments = getPrefixSegments(prefix);
+        return getComputedFieldSegments(segments);
+    }
+
+    public static Pair<List<String>, List<Integer>> 
getComputedFieldSegments(List<String> segments) {
+        List<String> computedFieldSegmentNames = new ArrayList<>();
+        List<Integer> computedFieldSegmentIndexes = new ArrayList<>();
+
+        // check if there are any segments before doing any testing
+        if (segments.size() != 0) {
+            // search for computed fields in each segment
+            Matcher matcher = COMPUTED_FIELD_PATTERN.matcher(segments.get(0));
+            if (matcher.find()) {
+                computedFieldSegmentNames.add(segments.get(0));
+                computedFieldSegmentIndexes.add(0);
+            }
+
+            if (segments.size() > 1) {
+                for (int i = 1; i < segments.size(); i++) {
+                    matcher.reset(segments.get(i));
+                    if (matcher.find()) {
+                        computedFieldSegmentNames.add(segments.get(i));
+                        computedFieldSegmentIndexes.add(i);
+                    }
+                }
+            }
+        }
+
+        return Pair.of(computedFieldSegmentNames, computedFieldSegmentIndexes);
+    }
 }

--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17649
To unsubscribe, or for help writing mail filters, visit 
https://asterix-gerrit.ics.uci.edu/settings

Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Change-Id: Iaaa94fbf8f52de743324154a20b9769406badb8e
Gerrit-Change-Number: 17649
Gerrit-PatchSet: 1
Gerrit-Owner: Hussain Towaileb <[email protected]>
Gerrit-MessageType: newchange

Reply via email to