This is an automated email from the ASF dual-hosted git repository.

nehapawar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new ca900e4  Optimize GroovyExpressionEvaluator (#5257)
ca900e4 is described below

commit ca900e4f52f65738d5cb03ab10484f6c97bcd68f
Author: Neha Pawar <[email protected]>
AuthorDate: Thu Apr 16 12:56:46 2020 -0700

    Optimize GroovyExpressionEvaluator (#5257)
---
 .../perf/BenchmarkGroovyExpressionEvaluation.java  | 190 +++++++++++++++++++++
 .../evaluators/GroovyExpressionEvaluator.java      |  39 ++---
 .../spi/utils/SchemaFieldExtractorUtilsTest.java   |  17 +-
 3 files changed, 220 insertions(+), 26 deletions(-)

diff --git 
a/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkGroovyExpressionEvaluation.java
 
b/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkGroovyExpressionEvaluation.java
new file mode 100644
index 0000000..170e537
--- /dev/null
+++ 
b/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkGroovyExpressionEvaluation.java
@@ -0,0 +1,190 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.perf;
+
+import groovy.lang.Binding;
+import groovy.lang.GroovyClassLoader;
+import groovy.lang.GroovyCodeSource;
+import groovy.lang.GroovyShell;
+import groovy.lang.Script;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+import java.util.concurrent.TimeUnit;
+import org.apache.commons.lang3.RandomStringUtils;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.options.ChainedOptionsBuilder;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+import org.openjdk.jmh.runner.options.TimeValue;
+
+
+@State(Scope.Benchmark)
+@Fork(value = 1, jvmArgs = {"-server", "-Xmx8G", 
"-XX:MaxDirectMemorySize=16G"})
+public class BenchmarkGroovyExpressionEvaluation {
+
+  private final GroovyClassLoader _groovyClassLoader = new GroovyClassLoader();
+  private final Random _random = new Random();
+
+  private String _concatScriptText;
+  private GroovyCodeSource _concatCodeSource;
+  private String _maxScriptText;
+  private GroovyCodeSource _maxCodeSource;
+
+  private Binding _concatBinding;
+  private Script _concatScript;
+  private Script _concatGCLScript;
+  private Binding _maxBinding;
+  private Script _maxScript;
+  private Script _maxGCLScript;
+
+  @Setup
+  public void setup()
+      throws IllegalAccessException, InstantiationException {
+    _concatScriptText = "firstName + ' ' + lastName";
+    _concatBinding = new Binding();
+    _concatScript = new GroovyShell(_concatBinding).parse(_concatScriptText);
+    _concatCodeSource = new GroovyCodeSource(_concatScriptText, 
Math.abs(_concatScriptText.hashCode()) + ".groovy",
+        GroovyShell.DEFAULT_CODE_BASE);
+    _concatGCLScript = (Script) 
_groovyClassLoader.parseClass(_concatCodeSource).newInstance();
+
+    _maxScriptText = "longList.max{ it.toBigDecimal() }";
+    _maxBinding = new Binding();
+    _maxScript = new GroovyShell(_maxBinding).parse(_maxScriptText);
+    _maxCodeSource = new GroovyCodeSource(_maxScriptText, 
Math.abs(_maxScriptText.hashCode()) + ".groovy",
+        GroovyShell.DEFAULT_CODE_BASE);
+    _maxGCLScript = (Script) 
_groovyClassLoader.parseClass(_maxCodeSource).newInstance();
+  }
+
+  private String getFirstName() {
+    return RandomStringUtils.randomAlphabetic(10);
+  }
+
+  private String getLastName() {
+    return RandomStringUtils.randomAlphabetic(20);
+  }
+
+  private List<String> getLongList() {
+    int listLength = _random.nextInt(100) + 10;
+    List<String> longList = new ArrayList<>(listLength);
+    for (int i = 0; i < listLength; i++) {
+      longList.add(String.valueOf(_random.nextInt()));
+    }
+    return longList;
+  }
+
+  @Benchmark
+  @BenchmarkMode(Mode.AverageTime)
+  @OutputTimeUnit(TimeUnit.MICROSECONDS)
+  public void javaConcat() {
+    getFullNameJava(getFirstName(), getLastName());
+  }
+
+  @Benchmark
+  @BenchmarkMode(Mode.AverageTime)
+  @OutputTimeUnit(TimeUnit.MICROSECONDS)
+  public void groovyShellConcat() {
+    getFullNameGroovyShell(getFirstName(), getLastName());
+  }
+
+  @Benchmark
+  @BenchmarkMode(Mode.AverageTime)
+  @OutputTimeUnit(TimeUnit.MICROSECONDS)
+  public void groovyCodeSourceConcat() {
+    getFullNameGroovyCodeSource(getFirstName(), getLastName());
+  }
+
+  @Benchmark
+  @BenchmarkMode(Mode.AverageTime)
+  @OutputTimeUnit(TimeUnit.MICROSECONDS)
+  public void javaMax() {
+    List<String> longList = getLongList();
+    getMaxJava(longList);
+  }
+
+  @Benchmark
+  @BenchmarkMode(Mode.AverageTime)
+  @OutputTimeUnit(TimeUnit.MICROSECONDS)
+  public void groovyShellMax() {
+    List<String> longList = getLongList();
+    getMaxGroovyShell(longList);
+  }
+
+  @Benchmark
+  @BenchmarkMode(Mode.AverageTime)
+  @OutputTimeUnit(TimeUnit.MICROSECONDS)
+  public void groovyCodeSourceMax() {
+    List<String> longList = getLongList();
+    getMaxGroovyCodeSource(longList);
+  }
+
+  private String getFullNameJava(String firstName, String lastName) {
+    return String.join(" ", firstName, lastName);
+  }
+
+  private Object getFullNameGroovyShell(String firstName, String lastName) {
+    _concatBinding.setVariable("firstName", firstName);
+    _concatBinding.setVariable("lastName", lastName);
+    return _concatScript.run();
+  }
+
+  private Object getFullNameGroovyCodeSource(String firstName, String 
lastName) {
+    _concatBinding.setVariable("firstName", firstName);
+    _concatBinding.setVariable("lastName", lastName);
+    _concatGCLScript.setBinding(_concatBinding);
+    return _concatGCLScript.run();
+  }
+
+  private int getMaxJava(List<String> longList) {
+    int maxInt = Integer.MIN_VALUE;
+    for (String value : longList) {
+      int number = Integer.parseInt(value);
+      if (number > maxInt) {
+        maxInt = number;
+      }
+    }
+    return maxInt;
+  }
+
+  private Object getMaxGroovyShell(List<String> longList) {
+    _maxBinding.setVariable("longList", longList);
+    return _maxScript.run();
+  }
+
+  private Object getMaxGroovyCodeSource(List<String> longList) {
+    _maxBinding.setVariable("longList", longList);
+    _maxGCLScript.setBinding(_maxBinding);
+    return _maxGCLScript.run();
+  }
+
+  public static void main(String[] args)
+      throws Exception {
+    ChainedOptionsBuilder opt = new 
OptionsBuilder().include(BenchmarkGroovyExpressionEvaluation.class.getSimpleName())
+        
.warmupTime(TimeValue.seconds(10)).warmupIterations(1).measurementTime(TimeValue.seconds(30))
+        .measurementIterations(3).forks(1);
+    new Runner(opt.build()).run();
+  }
+}
diff --git 
a/pinot-spi/src/main/java/org/apache/pinot/spi/data/function/evaluators/GroovyExpressionEvaluator.java
 
b/pinot-spi/src/main/java/org/apache/pinot/spi/data/function/evaluators/GroovyExpressionEvaluator.java
index ba718e6..68ecfa6 100644
--- 
a/pinot-spi/src/main/java/org/apache/pinot/spi/data/function/evaluators/GroovyExpressionEvaluator.java
+++ 
b/pinot-spi/src/main/java/org/apache/pinot/spi/data/function/evaluators/GroovyExpressionEvaluator.java
@@ -18,13 +18,13 @@
  */
 package org.apache.pinot.spi.data.function.evaluators;
 
+import com.google.common.base.Preconditions;
 import com.google.common.base.Splitter;
 import groovy.lang.Binding;
 import groovy.lang.GroovyShell;
+import groovy.lang.Script;
 import java.util.Collections;
-import java.util.HashMap;
 import java.util.List;
-import java.util.Map;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import org.apache.pinot.spi.data.readers.GenericRow;
@@ -54,21 +54,21 @@ public class GroovyExpressionEvaluator implements 
ExpressionEvaluator {
   private static final String SCRIPT_GROUP_NAME = "script";
   private static final String ARGUMENTS_SEPARATOR = ",";
 
-  private List<String> _arguments;
-  private String _script;
+  private final List<String> _arguments;
+  private final Binding _binding;
+  private final Script _script;
 
   public GroovyExpressionEvaluator(String transformExpression) {
     Matcher matcher = GROOVY_FUNCTION_PATTERN.matcher(transformExpression);
-    if (matcher.matches()) {
-      _script = matcher.group(SCRIPT_GROUP_NAME);
-
-      String arguments = matcher.group(ARGUMENTS_GROUP_NAME);
-      if (arguments != null) {
-        _arguments = 
Splitter.on(ARGUMENTS_SEPARATOR).trimResults().splitToList(arguments);
-      } else {
-        _arguments = Collections.emptyList();
-      }
+    Preconditions.checkState(matcher.matches(), "Invalid transform expression: 
%s", transformExpression);
+    String arguments = matcher.group(ARGUMENTS_GROUP_NAME);
+    if (arguments != null) {
+      _arguments = 
Splitter.on(ARGUMENTS_SEPARATOR).trimResults().splitToList(arguments);
+    } else {
+      _arguments = Collections.emptyList();
     }
+    _binding = new Binding();
+    _script = new 
GroovyShell(_binding).parse(matcher.group(SCRIPT_GROUP_NAME));
   }
 
   public static String getGroovyExpressionPrefix() {
@@ -82,21 +82,14 @@ public class GroovyExpressionEvaluator implements 
ExpressionEvaluator {
 
   @Override
   public Object evaluate(GenericRow genericRow) {
-    Map<String, Object> params = new HashMap<>();
     for (String argument : _arguments) {
       Object value = genericRow.getValue(argument);
       if (value == null) {
-        // TODO: disallow evaluation of any of the params is null? Or give 
complete control to function?
+        // FIXME: if any param is null a) exit OR b) assume function handles 
it ?
         return null;
       }
-      params.put(argument, value);
-    }
-
-    Binding binding = new Binding();
-    for (String argument : _arguments) {
-      binding.setVariable(argument, params.get(argument));
+      _binding.setVariable(argument, value);
     }
-    GroovyShell shell = new GroovyShell(binding);
-    return shell.evaluate(_script);
+    return _script.run();
   }
 }
diff --git 
a/pinot-spi/src/test/java/org/apache/pinot/spi/utils/SchemaFieldExtractorUtilsTest.java
 
b/pinot-spi/src/test/java/org/apache/pinot/spi/utils/SchemaFieldExtractorUtilsTest.java
index 5035f4b..c8cd285 100644
--- 
a/pinot-spi/src/test/java/org/apache/pinot/spi/utils/SchemaFieldExtractorUtilsTest.java
+++ 
b/pinot-spi/src/test/java/org/apache/pinot/spi/utils/SchemaFieldExtractorUtilsTest.java
@@ -141,20 +141,31 @@ public class SchemaFieldExtractorUtilsTest {
     pinotSchema.addField(timeFieldSpec);
     Assert.assertFalse(SchemaFieldExtractorUtils.validate(pinotSchema));
 
+    // incorrect groovy function syntax
+    pinotSchema = new Schema();
+    dimensionFieldSpec = new DimensionFieldSpec("dim1", 
FieldSpec.DataType.STRING, true);
+    dimensionFieldSpec.setTransformFunction("Groovy(function, argument3)");
+    pinotSchema.addField(dimensionFieldSpec);
+    Assert.assertFalse(SchemaFieldExtractorUtils.validate(pinotSchema));
+
+    // valid schema, empty arguments
+    pinotSchema = new Schema();
+    dimensionFieldSpec = new DimensionFieldSpec("dim1", 
FieldSpec.DataType.STRING, true);
+    dimensionFieldSpec.setTransformFunction("Groovy({function})");
+    pinotSchema.addField(dimensionFieldSpec);
+    Assert.assertTrue(SchemaFieldExtractorUtils.validate(pinotSchema));
+
     // valid schema
     pinotSchema = new Schema();
     dimensionFieldSpec = new DimensionFieldSpec("dim1", 
FieldSpec.DataType.STRING, true);
     dimensionFieldSpec.setTransformFunction("Groovy({function}, argument1, 
argument2, argument3)");
     pinotSchema.addField(dimensionFieldSpec);
-
     metricFieldSpec = new MetricFieldSpec("m1", FieldSpec.DataType.LONG);
     metricFieldSpec.setTransformFunction("Groovy({function}, m2, m3)");
     pinotSchema.addField(metricFieldSpec);
-
     timeFieldSpec = new TimeFieldSpec("time", FieldSpec.DataType.LONG, 
TimeUnit.MILLISECONDS);
     timeFieldSpec.setTransformFunction("Groovy({function}, millis)");
     pinotSchema.addField(timeFieldSpec);
-
     Assert.assertTrue(SchemaFieldExtractorUtils.validate(pinotSchema));
 
     // valid time field spec


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to