This is an automated email from the ASF dual-hosted git repository.

gian pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git


The following commit(s) were added to refs/heads/master by this push:
     new 857693f5cf8 Decorate sampling response with system fields if specified 
(#15536)
857693f5cf8 is described below

commit 857693f5cf8e48fdfd02ef111ca7f7f739fbf558
Author: zachjsh <[email protected]>
AuthorDate: Wed Dec 13 15:16:59 2023 -0500

    Decorate sampling response with system fields if specified (#15536)
    
    * * decorate sampling response with system fields if specified
    
    * * add unit test
---
 .../input/impl/InputEntityIteratingReader.java     | 11 +++-
 .../input/impl/InputEntityIteratingReaderTest.java | 74 ++++++++++++++++++++++
 2 files changed, 84 insertions(+), 1 deletion(-)

diff --git 
a/processing/src/main/java/org/apache/druid/data/input/impl/InputEntityIteratingReader.java
 
b/processing/src/main/java/org/apache/druid/data/input/impl/InputEntityIteratingReader.java
index ca67388b1a2..a0479c181e9 100644
--- 
a/processing/src/main/java/org/apache/druid/data/input/impl/InputEntityIteratingReader.java
+++ 
b/processing/src/main/java/org/apache/druid/data/input/impl/InputEntityIteratingReader.java
@@ -34,6 +34,7 @@ import 
org.apache.druid.java.util.common.parsers.CloseableIterator;
 import java.io.File;
 import java.io.IOException;
 import java.util.function.Function;
+import java.util.stream.Collectors;
 
 /**
  * InputSourceReader iterating multiple {@link InputEntity}s. This class could 
be used for
@@ -86,9 +87,17 @@ public class InputEntityIteratingReader implements 
InputSourceReader
   {
     return createIterator(entity -> {
       // InputEntityReader is stateful and so a new one should be created per 
entity.
+      final Function<InputRow, InputRow> systemFieldDecorator = 
systemFieldDecoratorFactory.decorator(entity);
       try {
         final InputEntityReader reader = 
inputFormat.createReader(inputRowSchema, entity, temporaryDirectory);
-        return reader.sample();
+        return reader.sample()
+            .map(i -> InputRowListPlusRawValues.ofList(i.getRawValuesList(),
+                i.getInputRows() == null
+                    ? null
+                    : i.getInputRows().stream().map(
+                        systemFieldDecorator).collect(Collectors.toList()),
+                i.getParseException()
+            ));
       }
       catch (IOException e) {
         throw new RuntimeException(e);
diff --git 
a/processing/src/test/java/org/apache/druid/data/input/impl/InputEntityIteratingReaderTest.java
 
b/processing/src/test/java/org/apache/druid/data/input/impl/InputEntityIteratingReaderTest.java
index 70b75d23955..744c29dba2a 100644
--- 
a/processing/src/test/java/org/apache/druid/data/input/impl/InputEntityIteratingReaderTest.java
+++ 
b/processing/src/test/java/org/apache/druid/data/input/impl/InputEntityIteratingReaderTest.java
@@ -23,9 +23,12 @@ import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Iterables;
 import org.apache.druid.data.input.ColumnsFilter;
 import org.apache.druid.data.input.InputRow;
+import org.apache.druid.data.input.InputRowListPlusRawValues;
 import org.apache.druid.data.input.InputRowSchema;
 import org.apache.druid.data.input.InputStats;
+import org.apache.druid.data.input.impl.systemfield.SystemField;
 import 
org.apache.druid.data.input.impl.systemfield.SystemFieldDecoratorFactory;
+import org.apache.druid.data.input.impl.systemfield.SystemFields;
 import org.apache.druid.java.util.common.CloseableIterators;
 import org.apache.druid.java.util.common.DateTimes;
 import org.apache.druid.java.util.common.StringUtils;
@@ -44,6 +47,7 @@ import java.net.URISyntaxException;
 import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
 import java.util.ArrayList;
+import java.util.EnumSet;
 import java.util.List;
 
 public class InputEntityIteratingReaderTest extends InitializedNullHandlingTest
@@ -110,6 +114,76 @@ public class InputEntityIteratingReaderTest extends 
InitializedNullHandlingTest
     }
   }
 
+  @Test
+  public void testSampleWithSystemFields() throws IOException
+  {
+    final int numFiles = 5;
+    final List<File> files = new ArrayList<>();
+    for (int i = 0; i < numFiles; i++) {
+      final File file = temporaryFolder.newFile("test_" + i);
+      files.add(file);
+      try (Writer writer = Files.newBufferedWriter(file.toPath(), 
StandardCharsets.UTF_8)) {
+        writer.write(StringUtils.format("%d,%s,%d\n", 20190101 + i, "name_" + 
i, i));
+        writer.write(StringUtils.format("%d,%s,%d", 20190102 + i, "name_" + (i 
+ 1), i + 1));
+      }
+    }
+
+    LocalInputSource inputSource = new LocalInputSource(
+        temporaryFolder.getRoot(),
+        "test_*",
+        null,
+        new SystemFields(EnumSet.of(SystemField.URI, SystemField.PATH)));
+    final InputEntityIteratingReader reader = new InputEntityIteratingReader(
+        new InputRowSchema(
+            new TimestampSpec("time", "yyyyMMdd", null),
+            new DimensionsSpec(
+                DimensionsSpec.getDefaultSchemas(ImmutableList.of(
+                    "time",
+                    "name",
+                    "score",
+                    SystemField.URI.getFieldName(),
+                    SystemField.PATH.getFieldName()
+                ))
+            ),
+            ColumnsFilter.all()
+        ),
+        new CsvInputFormat(
+            ImmutableList.of("time", "name", "score"),
+            null,
+            null,
+            false,
+            0
+        ),
+        CloseableIterators.withEmptyBaggage(
+            files.stream().flatMap(file -> ImmutableList.of(new 
FileEntity(file)).stream()).iterator()
+        ),
+        SystemFieldDecoratorFactory.fromInputSource(inputSource),
+        temporaryFolder.newFolder()
+    );
+
+    try (CloseableIterator<InputRowListPlusRawValues> iterator = 
reader.sample()) {
+      int i = 0;
+      while (iterator.hasNext()) {
+        InputRow row = iterator.next().getInputRows().get(0);
+        Assert.assertEquals(DateTimes.of(StringUtils.format("2019-01-%02d", i 
+ 1)), row.getTimestamp());
+        Assert.assertEquals(StringUtils.format("name_%d", i), 
Iterables.getOnlyElement(row.getDimension("name")));
+        Assert.assertEquals(Integer.toString(i), 
Iterables.getOnlyElement(row.getDimension("score")));
+        Assert.assertEquals(files.get(i).toURI().toString(), 
row.getDimension(SystemField.URI.getFieldName()).get(0));
+        Assert.assertEquals(files.get(i).getAbsolutePath(), 
row.getDimension(SystemField.PATH.getFieldName()).get(0));
+
+        Assert.assertTrue(iterator.hasNext());
+        row = iterator.next().getInputRows().get(0);
+        Assert.assertEquals(DateTimes.of(StringUtils.format("2019-01-%02d", i 
+ 2)), row.getTimestamp());
+        Assert.assertEquals(StringUtils.format("name_%d", i + 1), 
Iterables.getOnlyElement(row.getDimension("name")));
+        Assert.assertEquals(Integer.toString(i + 1), 
Iterables.getOnlyElement(row.getDimension("score")));
+        Assert.assertEquals(files.get(i).toURI().toString(), 
row.getDimension(SystemField.URI.getFieldName()).get(0));
+        Assert.assertEquals(files.get(i).getAbsolutePath(), 
row.getDimension(SystemField.PATH.getFieldName()).get(0));
+        i++;
+      }
+      Assert.assertEquals(numFiles, i);
+    }
+  }
+
   @Test
   public void testIncorrectURI() throws IOException, URISyntaxException
   {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to