>From Hussain Towaileb <[email protected]>:

Hussain Towaileb has submitted this change. ( 
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19354 )

Change subject: [ASTERIXDB-3509]: COPY TO CSV Perf Issue
......................................................................

[ASTERIXDB-3509]: COPY TO CSV Perf Issue

Details: Instance creation of csv printerFactory was getting created for all 
the data in the records. Change that to single instance creation per query.

Change-Id: I55b114c81707e57307b52a1828952fa56847b2dc
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19354
Reviewed-by: Hussain Towaileb <[email protected]>
Integration-Tests: Jenkins <[email protected]>
Tested-by: Jenkins <[email protected]>
---
M 
asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/AObjectPrinterFactory.java
M 
asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/ANullPrinterFactory.java
M 
asterixdb/asterix-om/src/main/java/org/apache/asterix/om/pointables/printer/csv/APrintVisitor.java
M 
asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/AStringPrinterFactory.java
4 files changed, 33 insertions(+), 21 deletions(-)

Approvals:
  Hussain Towaileb: Looks good to me, approved
  Jenkins: Verified; Verified

Objections:
  Anon. E. Moose #1000171: Violations found




diff --git 
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/ANullPrinterFactory.java
 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/ANullPrinterFactory.java
index 3d3ca3e..101d7c2 100644
--- 
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/ANullPrinterFactory.java
+++ 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/ANullPrinterFactory.java
@@ -19,7 +19,6 @@
 package org.apache.asterix.dataflow.data.nontagged.printers.csv;

 import java.io.PrintStream;
-import java.util.concurrent.ConcurrentHashMap;

 import org.apache.hyracks.algebricks.data.IPrinter;
 import org.apache.hyracks.algebricks.data.IPrinterFactory;
@@ -27,8 +26,6 @@
 public class ANullPrinterFactory implements IPrinterFactory {
     private static final long serialVersionUID = 1L;
     private static final String DEFAULT_NULL_STRING = "";
-    // Store the information about the instance based on the parameters
-    private static final ConcurrentHashMap<String, ANullPrinterFactory> 
instanceCache = new ConcurrentHashMap<>();
     private String nullString;

     private ANullPrinterFactory(String nullString) {
@@ -36,8 +33,7 @@
     }

     public static ANullPrinterFactory createInstance(String nullString) {
-        String key = CSVUtils.generateKey(nullString);
-        return instanceCache.computeIfAbsent(key, k -> new 
ANullPrinterFactory(nullString));
+        return new ANullPrinterFactory(nullString);
     }

     private final IPrinter PRINTER = (byte[] b, int s, int l, PrintStream ps) 
-> {
diff --git 
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/AObjectPrinterFactory.java
 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/AObjectPrinterFactory.java
index 20b1ef0..fdde82c 100644
--- 
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/AObjectPrinterFactory.java
+++ 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/AObjectPrinterFactory.java
@@ -27,7 +27,6 @@

 import java.io.PrintStream;
 import java.util.Map;
-import java.util.concurrent.ConcurrentHashMap;

 import org.apache.asterix.om.pointables.ARecordVisitablePointable;
 import org.apache.asterix.om.pointables.base.DefaultOpenFieldType;
@@ -43,7 +42,8 @@

 public class AObjectPrinterFactory implements IPrinterFactory {
     private static final long serialVersionUID = 1L;
-    private static final ConcurrentHashMap<String, AObjectPrinterFactory> 
instanceCache = new ConcurrentHashMap<>();
+    private final IPrinter nullPrinter;
+    private final IPrinter stringPrinter;
     private ARecordType itemType;
     private Map<String, String> configuration;
     private boolean emptyFieldAsNull;
@@ -53,12 +53,15 @@
         this.configuration = configuration;
         String emptyFieldAsNullStr = 
configuration.get(KEY_EMPTY_FIELD_AS_NULL);
         this.emptyFieldAsNull = emptyFieldAsNullStr != null && 
Boolean.parseBoolean(emptyFieldAsNullStr);
+        this.nullPrinter = 
ANullPrinterFactory.createInstance(configuration.get(KEY_NULL)).createPrinter();
+        this.stringPrinter =
+                
AStringPrinterFactory.createInstance(configuration.get(KEY_QUOTE), 
configuration.get(KEY_FORCE_QUOTE),
+                        configuration.get(KEY_ESCAPE), 
configuration.get(KEY_DELIMITER)).createPrinter();
+
     }

     public static AObjectPrinterFactory createInstance(ARecordType itemType, 
Map<String, String> configuration) {
-        // generate a unique identifier based on the parameters and hash the 
instance corresponding to it.
-        String key = CSVUtils.generateKey(itemType, configuration);
-        return instanceCache.computeIfAbsent(key, k -> new 
AObjectPrinterFactory(itemType, configuration));
+        return new AObjectPrinterFactory(itemType, configuration);
     }

     public boolean printFlatValue(ATypeTag typeTag, byte[] b, int s, int l, 
PrintStream ps)
@@ -78,7 +81,7 @@
                 return true;
             case MISSING:
             case NULL:
-                
ANullPrinterFactory.createInstance(configuration.get(KEY_NULL)).createPrinter().print(b,
 s, l, ps);
+                nullPrinter.print(b, s, l, ps);
                 return true;
             case BOOLEAN:
                 ABooleanPrinterFactory.PRINTER.print(b, s, l, ps);
@@ -130,12 +133,9 @@
                 return true;
             case STRING:
                 if (emptyFieldAsNull && CSVUtils.isEmptyString(b, s, l)) {
-                    
ANullPrinterFactory.createInstance(configuration.get(KEY_NULL)).createPrinter().print(b,
 s, l, ps);
+                    nullPrinter.print(b, s, l, ps);
                 } else {
-                    AStringPrinterFactory
-                            .createInstance(configuration.get(KEY_QUOTE), 
configuration.get(KEY_FORCE_QUOTE),
-                                    configuration.get(KEY_ESCAPE), 
configuration.get(KEY_DELIMITER))
-                            .createPrinter().print(b, s, l, ps);
+                    stringPrinter.print(b, s, l, ps);
                 }
                 return true;
             case BINARY:
diff --git 
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/AStringPrinterFactory.java
 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/AStringPrinterFactory.java
index ae368bd..d17b7cb 100644
--- 
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/AStringPrinterFactory.java
+++ 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/AStringPrinterFactory.java
@@ -25,7 +25,6 @@

 import java.io.IOException;
 import java.io.PrintStream;
-import java.util.concurrent.ConcurrentHashMap;

 import org.apache.asterix.dataflow.data.nontagged.printers.PrintTools;
 import org.apache.hyracks.algebricks.data.IPrinter;
@@ -34,7 +33,6 @@

 public class AStringPrinterFactory implements IPrinterFactory {
     private static final long serialVersionUID = 1L;
-    private static final ConcurrentHashMap<String, AStringPrinterFactory> 
instanceCache = new ConcurrentHashMap<>();
     private static final String NONE = "none";
     private String quote;
     private Boolean forceQuote;
@@ -51,8 +49,7 @@
     public static AStringPrinterFactory createInstance(String quote, String 
forceQuoteStr, String escape,
             String delimiter) {
         boolean forceQuote = forceQuoteStr == null || 
Boolean.parseBoolean(forceQuoteStr);
-        String key = CSVUtils.generateKey(quote, forceQuoteStr, escape, 
delimiter);
-        return instanceCache.computeIfAbsent(key, k -> new 
AStringPrinterFactory(quote, forceQuote, escape, delimiter));
+        return new AStringPrinterFactory(quote, forceQuote, escape, delimiter);
     }

     private final IPrinter PRINTER = (byte[] b, int s, int l, PrintStream ps) 
-> {
diff --git 
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/pointables/printer/csv/APrintVisitor.java
 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/pointables/printer/csv/APrintVisitor.java
index 22c502b..bbe5286 100644
--- 
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/pointables/printer/csv/APrintVisitor.java
+++ 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/pointables/printer/csv/APrintVisitor.java
@@ -44,6 +44,7 @@
 public class APrintVisitor extends AbstractPrintVisitor {
     private final ARecordType itemType;
     private final Map<String, String> configuration;
+    private AObjectPrinterFactory objectPrinterFactory;

     public APrintVisitor(ARecordType itemType, Map<String, String> 
configuration) {
         super();
@@ -67,6 +68,9 @@
     @Override
     protected boolean printFlatValue(ATypeTag typeTag, byte[] b, int s, int l, 
PrintStream ps)
             throws HyracksDataException {
-        return AObjectPrinterFactory.createInstance(itemType, 
configuration).printFlatValue(typeTag, b, s, l, ps);
+        if (objectPrinterFactory == null) {
+            objectPrinterFactory = 
AObjectPrinterFactory.createInstance(itemType, configuration);
+        }
+        return objectPrinterFactory.printFlatValue(typeTag, b, s, l, ps);
     }
 }

--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19354
To unsubscribe, or for help writing mail filters, visit 
https://asterix-gerrit.ics.uci.edu/settings

Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Change-Id: I55b114c81707e57307b52a1828952fa56847b2dc
Gerrit-Change-Number: 19354
Gerrit-PatchSet: 6
Gerrit-Owner: Utsav Singh <[email protected]>
Gerrit-Reviewer: Anon. E. Moose #1000171
Gerrit-Reviewer: Hussain Towaileb <[email protected]>
Gerrit-Reviewer: Jenkins <[email protected]>
Gerrit-Reviewer: Murtadha Hubail <[email protected]>
Gerrit-MessageType: merged

Reply via email to