This is an automated email from the ASF dual-hosted git repository.
kfaraz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new b12e5f300e Add filter in cloud object input source for backward
compatibility (#13437)
b12e5f300e is described below
commit b12e5f300e7c2795ba3d9c7ef17fb64f4925b9c0
Author: Tejaswini Bandlamudi <[email protected]>
AuthorDate: Mon Nov 28 23:04:33 2022 +0530
Add filter in cloud object input source for backward compatibility (#13437)
https://github.com/apache/druid/pull/13027 PR replaces `filter` parameter
with
`objectGlob` in ingestion input source. However, this will cause existing
ingestion
jobs to fail if they are using a filter already. This PR adds old filter
functionality
alongside objectGlob to preserve backward compatibility.
---
.../data/input/impl/CloudObjectInputSource.java | 32 ++++++++++++-
.../input/impl/CloudObjectInputSourceTest.java | 45 +++++++++++++-----
.../druid/data/input/aliyun/OssInputSource.java | 4 +-
.../data/input/aliyun/OssInputSourceTest.java | 17 +++++++
.../druid/data/input/azure/AzureInputSource.java | 4 +-
.../data/input/azure/AzureInputSourceTest.java | 55 ++++++++++++++++++++++
.../google/GoogleCloudStorageInputSource.java | 5 +-
.../google/GoogleCloudStorageInputSourceTest.java | 36 ++++++++++++--
.../apache/druid/data/input/s3/S3InputSource.java | 6 ++-
9 files changed, 181 insertions(+), 23 deletions(-)
diff --git
a/core/src/main/java/org/apache/druid/data/input/impl/CloudObjectInputSource.java
b/core/src/main/java/org/apache/druid/data/input/impl/CloudObjectInputSource.java
index 3c3e3c6b72..716e5b611c 100644
---
a/core/src/main/java/org/apache/druid/data/input/impl/CloudObjectInputSource.java
+++
b/core/src/main/java/org/apache/druid/data/input/impl/CloudObjectInputSource.java
@@ -21,7 +21,9 @@ package org.apache.druid.data.input.impl;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonProperty;
+import com.google.common.base.Preconditions;
import com.google.common.primitives.Ints;
+import org.apache.commons.io.FilenameUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.druid.data.input.AbstractInputSource;
import org.apache.druid.data.input.InputEntity;
@@ -50,6 +52,13 @@ public abstract class CloudObjectInputSource extends
AbstractInputSource
private final List<URI> uris;
private final List<URI> prefixes;
private final List<CloudObjectLocation> objects;
+
+ /**
+ * Preserved filter for backward compatibility, should be removed on next
major release;
+ * use objectGlob instead.
+ */
+ @Deprecated
+ private final String filter;
private final String objectGlob;
public CloudObjectInputSource(
@@ -63,6 +72,7 @@ public abstract class CloudObjectInputSource extends
AbstractInputSource
this.uris = uris;
this.prefixes = prefixes;
this.objects = objects;
+ this.filter = null;
this.objectGlob = null;
illegalArgsChecker();
@@ -73,6 +83,7 @@ public abstract class CloudObjectInputSource extends
AbstractInputSource
@Nullable List<URI> uris,
@Nullable List<URI> prefixes,
@Nullable List<CloudObjectLocation> objects,
+ @Deprecated @Nullable String filter,
@Nullable String objectGlob
)
{
@@ -80,8 +91,12 @@ public abstract class CloudObjectInputSource extends
AbstractInputSource
this.uris = uris;
this.prefixes = prefixes;
this.objects = objects;
+ this.filter = filter;
this.objectGlob = objectGlob;
-
+ Preconditions.checkArgument(
+ filter == null || objectGlob == null,
+ "Cannot use filter and objectGlob together. Try using objectGlob
instead of filter."
+ );
illegalArgsChecker();
}
@@ -107,6 +122,14 @@ public abstract class CloudObjectInputSource extends
AbstractInputSource
return objects;
}
+ @Nullable
+ @JsonProperty
+ @JsonInclude(JsonInclude.Include.NON_NULL)
+ public String getFilter()
+ {
+ return filter;
+ }
+
@Nullable
@JsonProperty
@JsonInclude(JsonInclude.Include.NON_NULL)
@@ -144,6 +167,8 @@ public abstract class CloudObjectInputSource extends
AbstractInputSource
if (StringUtils.isNotBlank(objectGlob)) {
PathMatcher m = FileSystems.getDefault().getPathMatcher("glob:" +
getObjectGlob());
objectStream = objectStream.filter(object ->
m.matches(Paths.get(object.getPath())));
+ } else if (StringUtils.isNotBlank(filter)) {
+ objectStream = objectStream.filter(object ->
FilenameUtils.wildcardMatch(object.getPath(), getFilter()));
}
return objectStream.map(object -> new
InputSplit<>(Collections.singletonList(object)));
@@ -155,6 +180,8 @@ public abstract class CloudObjectInputSource extends
AbstractInputSource
if (StringUtils.isNotBlank(objectGlob)) {
PathMatcher m = FileSystems.getDefault().getPathMatcher("glob:" +
getObjectGlob());
uriStream = uriStream.filter(uri ->
m.matches(Paths.get(uri.toString())));
+ } else if (StringUtils.isNotBlank(filter)) {
+ uriStream = uriStream.filter(uri ->
FilenameUtils.wildcardMatch(uri.toString(), filter));
}
return uriStream.map(CloudObjectLocation::new).map(object -> new
InputSplit<>(Collections.singletonList(object)));
@@ -212,13 +239,14 @@ public abstract class CloudObjectInputSource extends
AbstractInputSource
Objects.equals(uris, that.uris) &&
Objects.equals(prefixes, that.prefixes) &&
Objects.equals(objects, that.objects) &&
+ Objects.equals(filter, that.filter) &&
Objects.equals(objectGlob, that.objectGlob);
}
@Override
public int hashCode()
{
- return Objects.hash(scheme, uris, prefixes, objects, objectGlob);
+ return Objects.hash(scheme, uris, prefixes, objects, filter, objectGlob);
}
private void illegalArgsChecker() throws IllegalArgumentException
diff --git
a/core/src/test/java/org/apache/druid/data/input/impl/CloudObjectInputSourceTest.java
b/core/src/test/java/org/apache/druid/data/input/impl/CloudObjectInputSourceTest.java
index 5395814039..da421b1b16 100644
---
a/core/src/test/java/org/apache/druid/data/input/impl/CloudObjectInputSourceTest.java
+++
b/core/src/test/java/org/apache/druid/data/input/impl/CloudObjectInputSourceTest.java
@@ -72,7 +72,7 @@ public class CloudObjectInputSourceTest
public void testGetUris()
{
CloudObjectInputSource inputSource =
Mockito.mock(CloudObjectInputSource.class, Mockito.withSettings()
- .useConstructor(SCHEME, URIS, null, null, null)
+ .useConstructor(SCHEME, URIS, null, null, null, null)
.defaultAnswer(Mockito.CALLS_REAL_METHODS)
);
@@ -86,7 +86,7 @@ public class CloudObjectInputSourceTest
public void testGetPrefixes()
{
CloudObjectInputSource inputSource =
Mockito.mock(CloudObjectInputSource.class, Mockito.withSettings()
- .useConstructor(SCHEME, null, PREFIXES, null, null)
+ .useConstructor(SCHEME, null, PREFIXES, null, null, null)
.defaultAnswer(Mockito.CALLS_REAL_METHODS)
);
@@ -100,7 +100,7 @@ public class CloudObjectInputSourceTest
public void testGetObjectGlob()
{
CloudObjectInputSource inputSource =
Mockito.mock(CloudObjectInputSource.class, Mockito.withSettings()
- .useConstructor(SCHEME, URIS, null, null, "**.parquet")
+ .useConstructor(SCHEME, URIS, null, null, null, "**.parquet")
.defaultAnswer(Mockito.CALLS_REAL_METHODS)
);
@@ -111,12 +111,12 @@ public class CloudObjectInputSourceTest
public void testInequality()
{
CloudObjectInputSource inputSource1 =
Mockito.mock(CloudObjectInputSource.class, Mockito.withSettings()
- .useConstructor(SCHEME, URIS, null, null, "**.parquet")
+ .useConstructor(SCHEME, URIS, null, null, null, "**.parquet")
.defaultAnswer(Mockito.CALLS_REAL_METHODS)
);
CloudObjectInputSource inputSource2 =
Mockito.mock(CloudObjectInputSource.class, Mockito.withSettings()
- .useConstructor(SCHEME, URIS, null, null, "**.csv")
+ .useConstructor(SCHEME, URIS, null, null, null, "**.csv")
.defaultAnswer(Mockito.CALLS_REAL_METHODS)
);
@@ -128,9 +128,32 @@ public class CloudObjectInputSourceTest
@Test
public void testWithUrisFilter()
{
- CloudObjectInputSource inputSource =
Mockito.mock(CloudObjectInputSource.class, Mockito.withSettings()
- .useConstructor(SCHEME, URIS2, null, null, "**.csv")
- .defaultAnswer(Mockito.CALLS_REAL_METHODS)
+ CloudObjectInputSource inputSource =
Mockito.mock(CloudObjectInputSource.class,
+ Mockito.withSettings()
+
.useConstructor(SCHEME, URIS2, null, null, "*.csv", null)
+
.defaultAnswer(Mockito.CALLS_REAL_METHODS)
+ );
+
+ Stream<InputSplit<List<CloudObjectLocation>>> splits =
inputSource.createSplits(
+ new JsonInputFormat(JSONPathSpec.DEFAULT, null, null, null, null),
+ new MaxSizeSplitHintSpec(null, 1)
+ );
+
+ List<CloudObjectLocation> returnedLocations =
splits.map(InputSplit::get).collect(Collectors.toList()).get(0);
+
+ List<URI> returnedLocationUris = returnedLocations.stream().map(object ->
object.toUri(SCHEME)).collect(Collectors.toList());
+
+ Assert.assertEquals("*.csv", inputSource.getFilter());
+ Assert.assertEquals(URIS, returnedLocationUris);
+ }
+
+ @Test
+ public void testWithUrisObjectGlob()
+ {
+ CloudObjectInputSource inputSource =
Mockito.mock(CloudObjectInputSource.class,
+ Mockito.withSettings()
+
.useConstructor(SCHEME, URIS2, null, null, null, "**.csv")
+
.defaultAnswer(Mockito.CALLS_REAL_METHODS)
);
Stream<InputSplit<List<CloudObjectLocation>>> splits =
inputSource.createSplits(
@@ -150,7 +173,7 @@ public class CloudObjectInputSourceTest
public void testWithUris()
{
CloudObjectInputSource inputSource =
Mockito.mock(CloudObjectInputSource.class, Mockito.withSettings()
- .useConstructor(SCHEME, URIS, null, null, null)
+ .useConstructor(SCHEME, URIS, null, null, null, null)
.defaultAnswer(Mockito.CALLS_REAL_METHODS)
);
@@ -171,7 +194,7 @@ public class CloudObjectInputSourceTest
public void testWithObjectsFilter()
{
CloudObjectInputSource inputSource =
Mockito.mock(CloudObjectInputSource.class, Mockito.withSettings()
- .useConstructor(SCHEME, null, null, OBJECTS_BEFORE_GLOB, "**.csv")
+ .useConstructor(SCHEME, null, null, OBJECTS_BEFORE_GLOB, null,
"**.csv")
.defaultAnswer(Mockito.CALLS_REAL_METHODS)
);
@@ -192,7 +215,7 @@ public class CloudObjectInputSourceTest
public void testWithObjects()
{
CloudObjectInputSource inputSource =
Mockito.mock(CloudObjectInputSource.class, Mockito.withSettings()
- .useConstructor(SCHEME, null, null, OBJECTS, null)
+ .useConstructor(SCHEME, null, null, OBJECTS, null, null)
.defaultAnswer(Mockito.CALLS_REAL_METHODS)
);
diff --git
a/extensions-contrib/aliyun-oss-extensions/src/main/java/org/apache/druid/data/input/aliyun/OssInputSource.java
b/extensions-contrib/aliyun-oss-extensions/src/main/java/org/apache/druid/data/input/aliyun/OssInputSource.java
index 71edd26b59..a90e96f047 100644
---
a/extensions-contrib/aliyun-oss-extensions/src/main/java/org/apache/druid/data/input/aliyun/OssInputSource.java
+++
b/extensions-contrib/aliyun-oss-extensions/src/main/java/org/apache/druid/data/input/aliyun/OssInputSource.java
@@ -79,11 +79,12 @@ public class OssInputSource extends CloudObjectInputSource
@JsonProperty("uris") @Nullable List<URI> uris,
@JsonProperty("prefixes") @Nullable List<URI> prefixes,
@JsonProperty("objects") @Nullable List<CloudObjectLocation> objects,
+ @Deprecated @JsonProperty("filter") @Nullable String filter,
@JsonProperty("objectGlob") @Nullable String objectGlob,
@JsonProperty("properties") @Nullable OssClientConfig inputSourceConfig
)
{
- super(OssStorageDruidModule.SCHEME, uris, prefixes, objects, objectGlob);
+ super(OssStorageDruidModule.SCHEME, uris, prefixes, objects, filter,
objectGlob);
this.inputDataConfig = Preconditions.checkNotNull(inputDataConfig,
"inputDataConfig");
Preconditions.checkNotNull(client, "client");
this.inputSourceConfig = inputSourceConfig;
@@ -136,6 +137,7 @@ public class OssInputSource extends CloudObjectInputSource
null,
null,
split.get(),
+ getFilter(),
getObjectGlob(),
getOssInputSourceConfig()
);
diff --git
a/extensions-contrib/aliyun-oss-extensions/src/test/java/org/apache/druid/data/input/aliyun/OssInputSourceTest.java
b/extensions-contrib/aliyun-oss-extensions/src/test/java/org/apache/druid/data/input/aliyun/OssInputSourceTest.java
index 817167df38..e259bcccbd 100644
---
a/extensions-contrib/aliyun-oss-extensions/src/test/java/org/apache/druid/data/input/aliyun/OssInputSourceTest.java
+++
b/extensions-contrib/aliyun-oss-extensions/src/test/java/org/apache/druid/data/input/aliyun/OssInputSourceTest.java
@@ -143,6 +143,7 @@ public class OssInputSourceTest extends
InitializedNullHandlingTest
null,
null,
null,
+ null,
null
);
final OssInputSource serdeWithUris =
MAPPER.readValue(MAPPER.writeValueAsString(withUris), OssInputSource.class);
@@ -159,6 +160,7 @@ public class OssInputSourceTest extends
InitializedNullHandlingTest
PREFIXES,
null,
null,
+ null,
null
);
final OssInputSource serdeWithPrefixes =
@@ -176,6 +178,7 @@ public class OssInputSourceTest extends
InitializedNullHandlingTest
null,
EXPECTED_LOCATION,
null,
+ null,
null
);
final OssInputSource serdeWithPrefixes =
@@ -200,6 +203,7 @@ public class OssInputSourceTest extends
InitializedNullHandlingTest
null,
EXPECTED_LOCATION,
null,
+ null,
mockConfigPropertiesWithoutKeyAndSecret
);
Assert.assertNotNull(withPrefixes);
@@ -220,6 +224,7 @@ public class OssInputSourceTest extends
InitializedNullHandlingTest
null,
EXPECTED_LOCATION,
null,
+ null,
CLOUD_CONFIG_PROPERTIES
);
final OssInputSource serdeWithPrefixes =
@@ -240,6 +245,7 @@ public class OssInputSourceTest extends
InitializedNullHandlingTest
null,
EXPECTED_LOCATION,
null,
+ null,
null
);
final OssInputSource serdeWithPrefixes =
@@ -258,6 +264,7 @@ public class OssInputSourceTest extends
InitializedNullHandlingTest
ImmutableList.of(),
EXPECTED_LOCATION,
null,
+ null,
null
);
final OssInputSource serdeWithPrefixes =
@@ -277,6 +284,7 @@ public class OssInputSourceTest extends
InitializedNullHandlingTest
PREFIXES,
EXPECTED_LOCATION,
null,
+ null,
null
);
}
@@ -293,6 +301,7 @@ public class OssInputSourceTest extends
InitializedNullHandlingTest
PREFIXES,
ImmutableList.of(),
null,
+ null,
null
);
}
@@ -309,6 +318,7 @@ public class OssInputSourceTest extends
InitializedNullHandlingTest
PREFIXES,
EXPECTED_LOCATION,
null,
+ null,
null
);
}
@@ -323,6 +333,7 @@ public class OssInputSourceTest extends
InitializedNullHandlingTest
null,
null,
null,
+ null,
null
);
@@ -349,6 +360,7 @@ public class OssInputSourceTest extends
InitializedNullHandlingTest
PREFIXES,
null,
null,
+ null,
null
);
@@ -376,6 +388,7 @@ public class OssInputSourceTest extends
InitializedNullHandlingTest
PREFIXES,
null,
null,
+ null,
null
);
@@ -406,6 +419,7 @@ public class OssInputSourceTest extends
InitializedNullHandlingTest
PREFIXES,
null,
null,
+ null,
null
);
@@ -435,6 +449,7 @@ public class OssInputSourceTest extends
InitializedNullHandlingTest
ImmutableList.of(PREFIXES.get(0), EXPECTED_URIS.get(1)),
null,
null,
+ null,
null
);
@@ -466,6 +481,7 @@ public class OssInputSourceTest extends
InitializedNullHandlingTest
ImmutableList.of(PREFIXES.get(0), EXPECTED_URIS.get(1)),
null,
null,
+ null,
null
);
@@ -510,6 +526,7 @@ public class OssInputSourceTest extends
InitializedNullHandlingTest
ImmutableList.of(PREFIXES.get(0), EXPECTED_COMPRESSED_URIS.get(1)),
null,
null,
+ null,
null
);
diff --git
a/extensions-core/azure-extensions/src/main/java/org/apache/druid/data/input/azure/AzureInputSource.java
b/extensions-core/azure-extensions/src/main/java/org/apache/druid/data/input/azure/AzureInputSource.java
index 673f3c3fc2..2c4ff37695 100644
---
a/extensions-core/azure-extensions/src/main/java/org/apache/druid/data/input/azure/AzureInputSource.java
+++
b/extensions-core/azure-extensions/src/main/java/org/apache/druid/data/input/azure/AzureInputSource.java
@@ -74,10 +74,11 @@ public class AzureInputSource extends CloudObjectInputSource
@JsonProperty("uris") @Nullable List<URI> uris,
@JsonProperty("prefixes") @Nullable List<URI> prefixes,
@JsonProperty("objects") @Nullable List<CloudObjectLocation> objects,
+ @Deprecated @JsonProperty("filter") @Nullable String filter,
@JsonProperty("objectGlob") @Nullable String objectGlob
)
{
- super(SCHEME, uris, prefixes, objects, objectGlob);
+ super(SCHEME, uris, prefixes, objects, filter, objectGlob);
this.storage = Preconditions.checkNotNull(storage, "AzureStorage");
this.entityFactory = Preconditions.checkNotNull(entityFactory,
"AzureEntityFactory");
this.azureCloudBlobIterableFactory = Preconditions.checkNotNull(
@@ -103,6 +104,7 @@ public class AzureInputSource extends CloudObjectInputSource
null,
null,
split.get(),
+ getFilter(),
getObjectGlob()
);
}
diff --git
a/extensions-core/azure-extensions/src/test/java/org/apache/druid/data/input/azure/AzureInputSourceTest.java
b/extensions-core/azure-extensions/src/test/java/org/apache/druid/data/input/azure/AzureInputSourceTest.java
index 8009f9e22e..cc25d480a8 100644
---
a/extensions-core/azure-extensions/src/test/java/org/apache/druid/data/input/azure/AzureInputSourceTest.java
+++
b/extensions-core/azure-extensions/src/test/java/org/apache/druid/data/input/azure/AzureInputSourceTest.java
@@ -22,6 +22,7 @@ package org.apache.druid.data.input.azure;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterators;
import nl.jqno.equalsverifier.EqualsVerifier;
+import org.apache.commons.io.FilenameUtils;
import org.apache.druid.data.input.InputSplit;
import org.apache.druid.data.input.MaxSizeSplitHintSpec;
import org.apache.druid.data.input.impl.CloudObjectLocation;
@@ -111,6 +112,7 @@ public class AzureInputSourceTest extends EasyMockSupport
EMPTY_URIS,
EMPTY_PREFIXES,
EMPTY_OBJECTS,
+ null,
null
);
}
@@ -132,6 +134,7 @@ public class AzureInputSourceTest extends EasyMockSupport
EMPTY_URIS,
EMPTY_PREFIXES,
objects,
+ null,
null
);
@@ -166,6 +169,55 @@ public class AzureInputSourceTest extends EasyMockSupport
EMPTY_URIS,
prefixes,
EMPTY_OBJECTS,
+ null,
+ null
+ );
+
+ Stream<InputSplit<List<CloudObjectLocation>>> cloudObjectStream =
azureInputSource.getPrefixesSplitStream(
+ new MaxSizeSplitHintSpec(null, 1)
+ );
+
+ List<List<CloudObjectLocation>> actualCloudLocationList =
cloudObjectStream.map(InputSplit::get)
+
.collect(Collectors.toList());
+ verifyAll();
+ Assert.assertEquals(expectedCloudLocations, actualCloudLocationList);
+ }
+
+ @Test
+ public void
test_getPrefixesSplitStream_withFilter_successfullyCreatesCloudLocation_returnsExpectedLocations()
+ {
+ List<URI> prefixes = ImmutableList.of(PREFIX_URI);
+ List<List<CloudObjectLocation>> expectedCloudLocations =
ImmutableList.of(ImmutableList.of(CLOUD_OBJECT_LOCATION_1));
+ List<CloudBlobHolder> expectedCloudBlobs =
ImmutableList.of(cloudBlobDruid1);
+ Iterator<CloudBlobHolder> expectedCloudBlobsIterator =
expectedCloudBlobs.iterator();
+ String filter = "*.csv";
+
+ expectedCloudBlobsIterator = Iterators.filter(
+ expectedCloudBlobsIterator,
+ object -> FilenameUtils.wildcardMatch(object.getName(), filter)
+ );
+
+
EasyMock.expect(inputDataConfig.getMaxListingLength()).andReturn(MAX_LISTING_LENGTH);
+ EasyMock.expect(azureCloudBlobIterableFactory.create(prefixes,
MAX_LISTING_LENGTH)).andReturn(
+ azureCloudBlobIterable);
+
EasyMock.expect(azureCloudBlobIterable.iterator()).andReturn(expectedCloudBlobsIterator);
+
EasyMock.expect(azureCloudBlobToLocationConverter.createCloudObjectLocation(cloudBlobDruid1))
+ .andReturn(CLOUD_OBJECT_LOCATION_1);
+
EasyMock.expect(cloudBlobDruid1.getBlobLength()).andReturn(100L).anyTimes();
+ EasyMock.expect(cloudBlobDruid1.getName()).andReturn(BLOB_PATH).anyTimes();
+
+ replayAll();
+
+ azureInputSource = new AzureInputSource(
+ storage,
+ entityFactory,
+ azureCloudBlobIterableFactory,
+ azureCloudBlobToLocationConverter,
+ inputDataConfig,
+ EMPTY_URIS,
+ prefixes,
+ EMPTY_OBJECTS,
+ filter,
null
);
@@ -215,6 +267,7 @@ public class AzureInputSourceTest extends EasyMockSupport
EMPTY_URIS,
prefixes,
EMPTY_OBJECTS,
+ null,
objectGlob
);
@@ -244,6 +297,7 @@ public class AzureInputSourceTest extends EasyMockSupport
EMPTY_URIS,
prefixes,
EMPTY_OBJECTS,
+ null,
null
);
@@ -265,6 +319,7 @@ public class AzureInputSourceTest extends EasyMockSupport
EMPTY_URIS,
prefixes,
EMPTY_OBJECTS,
+ null,
null
);
diff --git
a/extensions-core/google-extensions/src/main/java/org/apache/druid/data/input/google/GoogleCloudStorageInputSource.java
b/extensions-core/google-extensions/src/main/java/org/apache/druid/data/input/google/GoogleCloudStorageInputSource.java
index a4d175aab2..009bae7dbe 100644
---
a/extensions-core/google-extensions/src/main/java/org/apache/druid/data/input/google/GoogleCloudStorageInputSource.java
+++
b/extensions-core/google-extensions/src/main/java/org/apache/druid/data/input/google/GoogleCloudStorageInputSource.java
@@ -65,10 +65,11 @@ public class GoogleCloudStorageInputSource extends
CloudObjectInputSource
@JsonProperty("uris") @Nullable List<URI> uris,
@JsonProperty("prefixes") @Nullable List<URI> prefixes,
@JsonProperty("objects") @Nullable List<CloudObjectLocation> objects,
+ @Deprecated @JsonProperty("filter") @Nullable String filter,
@JsonProperty("objectGlob") @Nullable String objectGlob
)
{
- super(GoogleStorageDruidModule.SCHEME_GS, uris, prefixes, objects,
objectGlob);
+ super(GoogleStorageDruidModule.SCHEME_GS, uris, prefixes, objects, filter,
objectGlob);
this.storage = storage;
this.inputDataConfig = inputDataConfig;
}
@@ -117,7 +118,7 @@ public class GoogleCloudStorageInputSource extends
CloudObjectInputSource
@Override
public SplittableInputSource<List<CloudObjectLocation>>
withSplit(InputSplit<List<CloudObjectLocation>> split)
{
- return new GoogleCloudStorageInputSource(storage, inputDataConfig, null,
null, split.get(), getObjectGlob());
+ return new GoogleCloudStorageInputSource(storage, inputDataConfig, null,
null, split.get(), getFilter(), getObjectGlob());
}
private CloudObjectLocation byteSourceFromStorageObject(final StorageObject
storageObject)
diff --git
a/extensions-core/google-extensions/src/test/java/org/apache/druid/data/input/google/GoogleCloudStorageInputSourceTest.java
b/extensions-core/google-extensions/src/test/java/org/apache/druid/data/input/google/GoogleCloudStorageInputSourceTest.java
index c0648e7f4c..f15b6280cc 100644
---
a/extensions-core/google-extensions/src/test/java/org/apache/druid/data/input/google/GoogleCloudStorageInputSourceTest.java
+++
b/extensions-core/google-extensions/src/test/java/org/apache/druid/data/input/google/GoogleCloudStorageInputSourceTest.java
@@ -116,7 +116,7 @@ public class GoogleCloudStorageInputSourceTest extends
InitializedNullHandlingTe
{
final ObjectMapper mapper = createGoogleObjectMapper();
final GoogleCloudStorageInputSource withUris =
- new GoogleCloudStorageInputSource(STORAGE, INPUT_DATA_CONFIG,
EXPECTED_URIS, ImmutableList.of(), null, null);
+ new GoogleCloudStorageInputSource(STORAGE, INPUT_DATA_CONFIG,
EXPECTED_URIS, ImmutableList.of(), null, null, null);
final GoogleCloudStorageInputSource serdeWithUris =
mapper.readValue(mapper.writeValueAsString(withUris),
GoogleCloudStorageInputSource.class);
Assert.assertEquals(withUris, serdeWithUris);
@@ -127,7 +127,7 @@ public class GoogleCloudStorageInputSourceTest extends
InitializedNullHandlingTe
{
final ObjectMapper mapper = createGoogleObjectMapper();
final GoogleCloudStorageInputSource withPrefixes =
- new GoogleCloudStorageInputSource(STORAGE, INPUT_DATA_CONFIG,
ImmutableList.of(), PREFIXES, null, null);
+ new GoogleCloudStorageInputSource(STORAGE, INPUT_DATA_CONFIG,
ImmutableList.of(), PREFIXES, null, null, null);
final GoogleCloudStorageInputSource serdeWithPrefixes =
mapper.readValue(mapper.writeValueAsString(withPrefixes),
GoogleCloudStorageInputSource.class);
Assert.assertEquals(withPrefixes, serdeWithPrefixes);
@@ -144,6 +144,7 @@ public class GoogleCloudStorageInputSourceTest extends
InitializedNullHandlingTe
null,
null,
ImmutableList.of(new CloudObjectLocation("foo", "bar/file.gz")),
+ null,
null
);
final GoogleCloudStorageInputSource serdeWithObjects =
@@ -156,7 +157,7 @@ public class GoogleCloudStorageInputSourceTest extends
InitializedNullHandlingTe
{
GoogleCloudStorageInputSource inputSource =
- new GoogleCloudStorageInputSource(STORAGE, INPUT_DATA_CONFIG,
EXPECTED_URIS, ImmutableList.of(), null, null);
+ new GoogleCloudStorageInputSource(STORAGE, INPUT_DATA_CONFIG,
EXPECTED_URIS, ImmutableList.of(), null, null, null);
Stream<InputSplit<List<CloudObjectLocation>>> splits =
inputSource.createSplits(
new JsonInputFormat(JSONPathSpec.DEFAULT, null, null, null, null),
@@ -174,6 +175,7 @@ public class GoogleCloudStorageInputSourceTest extends
InitializedNullHandlingTe
URIS_BEFORE_GLOB,
null,
null,
+ null,
"**.csv"
);
@@ -184,6 +186,26 @@ public class GoogleCloudStorageInputSourceTest extends
InitializedNullHandlingTe
Assert.assertEquals(EXPECTED_OBJECTS,
splits.map(InputSplit::get).collect(Collectors.toList()));
}
+ @Test
+ public void testWithUrisFilter()
+ {
+ GoogleCloudStorageInputSource inputSource = new
GoogleCloudStorageInputSource(
+ STORAGE,
+ INPUT_DATA_CONFIG,
+ URIS_BEFORE_GLOB,
+ null,
+ null,
+ "*.csv",
+ null
+ );
+
+ Stream<InputSplit<List<CloudObjectLocation>>> splits =
inputSource.createSplits(
+ new JsonInputFormat(JSONPathSpec.DEFAULT, null, null, null, null),
+ null
+ );
+ Assert.assertEquals(EXPECTED_OBJECTS,
splits.map(InputSplit::get).collect(Collectors.toList()));
+ }
+
@Test
public void testIllegalObjectsAndPrefixes()
{
@@ -195,6 +217,7 @@ public class GoogleCloudStorageInputSourceTest extends
InitializedNullHandlingTe
null,
PREFIXES,
EXPECTED_OBJECTS.get(0),
+ null,
"**.csv"
);
}
@@ -210,6 +233,7 @@ public class GoogleCloudStorageInputSourceTest extends
InitializedNullHandlingTe
URIS_BEFORE_GLOB,
PREFIXES,
null,
+ null,
"**.csv"
);
}
@@ -226,7 +250,7 @@ public class GoogleCloudStorageInputSourceTest extends
InitializedNullHandlingTe
EasyMock.replay(INPUT_DATA_CONFIG);
GoogleCloudStorageInputSource inputSource =
- new GoogleCloudStorageInputSource(STORAGE, INPUT_DATA_CONFIG, null,
PREFIXES, null, null);
+ new GoogleCloudStorageInputSource(STORAGE, INPUT_DATA_CONFIG, null,
PREFIXES, null, null, null);
Stream<InputSplit<List<CloudObjectLocation>>> splits =
inputSource.createSplits(
new JsonInputFormat(JSONPathSpec.DEFAULT, null, null, null, null),
@@ -248,7 +272,7 @@ public class GoogleCloudStorageInputSourceTest extends
InitializedNullHandlingTe
EasyMock.replay(INPUT_DATA_CONFIG);
GoogleCloudStorageInputSource inputSource =
- new GoogleCloudStorageInputSource(STORAGE, INPUT_DATA_CONFIG, null,
PREFIXES, null, null);
+ new GoogleCloudStorageInputSource(STORAGE, INPUT_DATA_CONFIG, null,
PREFIXES, null, null, null);
Stream<InputSplit<List<CloudObjectLocation>>> splits =
inputSource.createSplits(
new JsonInputFormat(JSONPathSpec.DEFAULT, null, null, null, null),
@@ -280,6 +304,7 @@ public class GoogleCloudStorageInputSourceTest extends
InitializedNullHandlingTe
null,
PREFIXES,
null,
+ null,
null
);
@@ -324,6 +349,7 @@ public class GoogleCloudStorageInputSourceTest extends
InitializedNullHandlingTe
null,
PREFIXES,
null,
+ null,
null
);
diff --git
a/extensions-core/s3-extensions/src/main/java/org/apache/druid/data/input/s3/S3InputSource.java
b/extensions-core/s3-extensions/src/main/java/org/apache/druid/data/input/s3/S3InputSource.java
index 4d0d4f0a94..c0ee349ec7 100644
---
a/extensions-core/s3-extensions/src/main/java/org/apache/druid/data/input/s3/S3InputSource.java
+++
b/extensions-core/s3-extensions/src/main/java/org/apache/druid/data/input/s3/S3InputSource.java
@@ -111,6 +111,7 @@ public class S3InputSource extends CloudObjectInputSource
@JsonProperty("uris") @Nullable List<URI> uris,
@JsonProperty("prefixes") @Nullable List<URI> prefixes,
@JsonProperty("objects") @Nullable List<CloudObjectLocation> objects,
+ @Deprecated @JsonProperty("filter") @Nullable String filter,
@JsonProperty("objectGlob") @Nullable String objectGlob,
@JsonProperty("properties") @Nullable S3InputSourceConfig
s3InputSourceConfig,
@JsonProperty("proxyConfig") @Nullable AWSProxyConfig awsProxyConfig,
@@ -118,7 +119,7 @@ public class S3InputSource extends CloudObjectInputSource
@JsonProperty("clientConfig") @Nullable AWSClientConfig awsClientConfig
)
{
- super(S3StorageDruidModule.SCHEME, uris, prefixes, objects, objectGlob);
+ super(S3StorageDruidModule.SCHEME, uris, prefixes, objects, filter,
objectGlob);
this.inputDataConfig = Preconditions.checkNotNull(inputDataConfig,
"S3DataSegmentPusherConfig");
Preconditions.checkNotNull(s3Client, "s3Client");
this.s3InputSourceConfig = s3InputSourceConfig;
@@ -202,6 +203,7 @@ public class S3InputSource extends CloudObjectInputSource
uris,
prefixes,
objects,
+ null,
objectGlob,
s3InputSourceConfig,
awsProxyConfig,
@@ -234,6 +236,7 @@ public class S3InputSource extends CloudObjectInputSource
uris,
prefixes,
objects,
+ null,
objectGlob,
s3InputSourceConfig,
awsProxyConfig,
@@ -343,6 +346,7 @@ public class S3InputSource extends CloudObjectInputSource
null,
null,
split.get(),
+ getFilter(),
getObjectGlob(),
getS3InputSourceConfig(),
getAwsProxyConfig(),
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]