>From Hussain Towaileb <[email protected]>: Hussain Towaileb has submitted this change. ( https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19354 )
Change subject: [ASTERIXDB-3509]: COPY TO CSV Perf Issue ...................................................................... [ASTERIXDB-3509]: COPY TO CSV Perf Issue Details: Instance creation of csv printerFactory was getting created for all the data in the records. Change that to single instance creation per query. Change-Id: I55b114c81707e57307b52a1828952fa56847b2dc Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19354 Reviewed-by: Hussain Towaileb <[email protected]> Integration-Tests: Jenkins <[email protected]> Tested-by: Jenkins <[email protected]> --- M asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/AObjectPrinterFactory.java M asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/ANullPrinterFactory.java M asterixdb/asterix-om/src/main/java/org/apache/asterix/om/pointables/printer/csv/APrintVisitor.java M asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/AStringPrinterFactory.java 4 files changed, 33 insertions(+), 21 deletions(-) Approvals: Hussain Towaileb: Looks good to me, approved Jenkins: Verified; Verified Objections: Anon. E. Moose #1000171: Violations found diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/ANullPrinterFactory.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/ANullPrinterFactory.java index 3d3ca3e..101d7c2 100644 --- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/ANullPrinterFactory.java +++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/ANullPrinterFactory.java @@ -19,7 +19,6 @@ package org.apache.asterix.dataflow.data.nontagged.printers.csv; import java.io.PrintStream; -import java.util.concurrent.ConcurrentHashMap; import org.apache.hyracks.algebricks.data.IPrinter; import org.apache.hyracks.algebricks.data.IPrinterFactory; @@ -27,8 +26,6 @@ public class ANullPrinterFactory implements IPrinterFactory { private static final long serialVersionUID = 1L; private static final String DEFAULT_NULL_STRING = ""; - // Store the information about the instance based on the parameters - private static final ConcurrentHashMap<String, ANullPrinterFactory> instanceCache = new ConcurrentHashMap<>(); private String nullString; private ANullPrinterFactory(String nullString) { @@ -36,8 +33,7 @@ } public static ANullPrinterFactory createInstance(String nullString) { - String key = CSVUtils.generateKey(nullString); - return instanceCache.computeIfAbsent(key, k -> new ANullPrinterFactory(nullString)); + return new ANullPrinterFactory(nullString); } private final IPrinter PRINTER = (byte[] b, int s, int l, PrintStream ps) -> { diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/AObjectPrinterFactory.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/AObjectPrinterFactory.java index 20b1ef0..fdde82c 100644 --- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/AObjectPrinterFactory.java +++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/AObjectPrinterFactory.java @@ -27,7 +27,6 @@ import java.io.PrintStream; import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; import org.apache.asterix.om.pointables.ARecordVisitablePointable; import org.apache.asterix.om.pointables.base.DefaultOpenFieldType; @@ -43,7 +42,8 @@ public class AObjectPrinterFactory implements IPrinterFactory { private static final long serialVersionUID = 1L; - private static final ConcurrentHashMap<String, AObjectPrinterFactory> instanceCache = new ConcurrentHashMap<>(); + private final IPrinter nullPrinter; + private final IPrinter stringPrinter; private ARecordType itemType; private Map<String, String> configuration; private boolean emptyFieldAsNull; @@ -53,12 +53,15 @@ this.configuration = configuration; String emptyFieldAsNullStr = configuration.get(KEY_EMPTY_FIELD_AS_NULL); this.emptyFieldAsNull = emptyFieldAsNullStr != null && Boolean.parseBoolean(emptyFieldAsNullStr); + this.nullPrinter = ANullPrinterFactory.createInstance(configuration.get(KEY_NULL)).createPrinter(); + this.stringPrinter = + AStringPrinterFactory.createInstance(configuration.get(KEY_QUOTE), configuration.get(KEY_FORCE_QUOTE), + configuration.get(KEY_ESCAPE), configuration.get(KEY_DELIMITER)).createPrinter(); + } public static AObjectPrinterFactory createInstance(ARecordType itemType, Map<String, String> configuration) { - // generate a unique identifier based on the parameters and hash the instance corresponding to it. - String key = CSVUtils.generateKey(itemType, configuration); - return instanceCache.computeIfAbsent(key, k -> new AObjectPrinterFactory(itemType, configuration)); + return new AObjectPrinterFactory(itemType, configuration); } public boolean printFlatValue(ATypeTag typeTag, byte[] b, int s, int l, PrintStream ps) @@ -78,7 +81,7 @@ return true; case MISSING: case NULL: - ANullPrinterFactory.createInstance(configuration.get(KEY_NULL)).createPrinter().print(b, s, l, ps); + nullPrinter.print(b, s, l, ps); return true; case BOOLEAN: ABooleanPrinterFactory.PRINTER.print(b, s, l, ps); @@ -130,12 +133,9 @@ return true; case STRING: if (emptyFieldAsNull && CSVUtils.isEmptyString(b, s, l)) { - ANullPrinterFactory.createInstance(configuration.get(KEY_NULL)).createPrinter().print(b, s, l, ps); + nullPrinter.print(b, s, l, ps); } else { - AStringPrinterFactory - .createInstance(configuration.get(KEY_QUOTE), configuration.get(KEY_FORCE_QUOTE), - configuration.get(KEY_ESCAPE), configuration.get(KEY_DELIMITER)) - .createPrinter().print(b, s, l, ps); + stringPrinter.print(b, s, l, ps); } return true; case BINARY: diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/AStringPrinterFactory.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/AStringPrinterFactory.java index ae368bd..d17b7cb 100644 --- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/AStringPrinterFactory.java +++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/AStringPrinterFactory.java @@ -25,7 +25,6 @@ import java.io.IOException; import java.io.PrintStream; -import java.util.concurrent.ConcurrentHashMap; import org.apache.asterix.dataflow.data.nontagged.printers.PrintTools; import org.apache.hyracks.algebricks.data.IPrinter; @@ -34,7 +33,6 @@ public class AStringPrinterFactory implements IPrinterFactory { private static final long serialVersionUID = 1L; - private static final ConcurrentHashMap<String, AStringPrinterFactory> instanceCache = new ConcurrentHashMap<>(); private static final String NONE = "none"; private String quote; private Boolean forceQuote; @@ -51,8 +49,7 @@ public static AStringPrinterFactory createInstance(String quote, String forceQuoteStr, String escape, String delimiter) { boolean forceQuote = forceQuoteStr == null || Boolean.parseBoolean(forceQuoteStr); - String key = CSVUtils.generateKey(quote, forceQuoteStr, escape, delimiter); - return instanceCache.computeIfAbsent(key, k -> new AStringPrinterFactory(quote, forceQuote, escape, delimiter)); + return new AStringPrinterFactory(quote, forceQuote, escape, delimiter); } private final IPrinter PRINTER = (byte[] b, int s, int l, PrintStream ps) -> { diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/pointables/printer/csv/APrintVisitor.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/pointables/printer/csv/APrintVisitor.java index 22c502b..bbe5286 100644 --- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/pointables/printer/csv/APrintVisitor.java +++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/pointables/printer/csv/APrintVisitor.java @@ -44,6 +44,7 @@ public class APrintVisitor extends AbstractPrintVisitor { private final ARecordType itemType; private final Map<String, String> configuration; + private AObjectPrinterFactory objectPrinterFactory; public APrintVisitor(ARecordType itemType, Map<String, String> configuration) { super(); @@ -67,6 +68,9 @@ @Override protected boolean printFlatValue(ATypeTag typeTag, byte[] b, int s, int l, PrintStream ps) throws HyracksDataException { - return AObjectPrinterFactory.createInstance(itemType, configuration).printFlatValue(typeTag, b, s, l, ps); + if (objectPrinterFactory == null) { + objectPrinterFactory = AObjectPrinterFactory.createInstance(itemType, configuration); + } + return objectPrinterFactory.printFlatValue(typeTag, b, s, l, ps); } } -- To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19354 To unsubscribe, or for help writing mail filters, visit https://asterix-gerrit.ics.uci.edu/settings Gerrit-Project: asterixdb Gerrit-Branch: master Gerrit-Change-Id: I55b114c81707e57307b52a1828952fa56847b2dc Gerrit-Change-Number: 19354 Gerrit-PatchSet: 6 Gerrit-Owner: Utsav Singh <[email protected]> Gerrit-Reviewer: Anon. E. Moose #1000171 Gerrit-Reviewer: Hussain Towaileb <[email protected]> Gerrit-Reviewer: Jenkins <[email protected]> Gerrit-Reviewer: Murtadha Hubail <[email protected]> Gerrit-MessageType: merged
