This is an automated email from the ASF dual-hosted git repository.

kgyrtkirk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new cb23045  HIVE-25582: Empty result when using offset limit with MR 
(#2693) (Zhihua Deng reviewed by Laszlo Bodor and Zoltan Haindrich)
cb23045 is described below

commit cb23045f92c62bc43ef5739532b486b524d99e03
Author: dengzh <dengzhhu...@gmail.com>
AuthorDate: Tue Nov 23 16:02:58 2021 +0800

    HIVE-25582: Empty result when using offset limit with MR (#2693) (Zhihua 
Deng reviewed by Laszlo Bodor and Zoltan Haindrich)
---
 .../test/resources/testconfiguration.properties    |  1 +
 .../apache/hadoop/hive/ql/exec/ObjectCache.java    |  1 -
 .../apache/hadoop/hive/ql/exec/mr/ObjectCache.java | 23 ++++--
 .../test/queries/clientpositive/offset_limit_mr.q  | 12 +++
 .../results/clientpositive/offset_limit_mr.q.out   | 88 ++++++++++++++++++++++
 5 files changed, 118 insertions(+), 7 deletions(-)

diff --git a/itests/src/test/resources/testconfiguration.properties 
b/itests/src/test/resources/testconfiguration.properties
index 638af07..6b887f5 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -239,6 +239,7 @@ mr.query.files=\
   masking_5.q,\
   nonmr_fetch.q,\
   nonreserved_keywords_input37.q,\
+  offset_limit_mr.q,\
   parenthesis_star_by.q,\
   partition_vs_table_metadata.q,\
   row__id.q,\
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ObjectCache.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/ObjectCache.java
index cf04e1d..c9282b3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ObjectCache.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ObjectCache.java
@@ -48,7 +48,6 @@ public interface ObjectCache {
    *
    * @param <T>
    * @param key
-   *          function to generate the object if it's not there
    * @return the last cached object with the key, null if none.
    */
   public <T> T retrieve(String key) throws HiveException;
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ObjectCache.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ObjectCache.java
index 5bb96e3..0acf6d7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ObjectCache.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ObjectCache.java
@@ -18,7 +18,9 @@
 
 package org.apache.hadoop.hive.ql.exec.mr;
 
+import java.util.Map;
 import java.util.concurrent.Callable;
+import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.Future;
 import java.util.concurrent.TimeUnit;
@@ -26,36 +28,45 @@ import java.util.concurrent.TimeoutException;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
+
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 
 /**
- * ObjectCache. No-op implementation on MR we don't have a means to reuse
- * Objects between runs of the same task.
+ * ObjectCache. Simple implementation on MR we don't have a means to reuse
+ * Objects between runs of the same task, this acts as a local cache.
  *
  */
 public class ObjectCache implements org.apache.hadoop.hive.ql.exec.ObjectCache 
{
 
   private static final Logger LOG = 
LoggerFactory.getLogger(ObjectCache.class.getName());
 
+  private final Map<String, Object> cache = new ConcurrentHashMap<>();
+
   @Override
   public void release(String key) {
-    // nothing to do
     LOG.debug("{} no longer needed", key);
+    cache.remove(key);
   }
 
   @Override
   public <T> T retrieve(String key) throws HiveException {
-    return retrieve(key, null);
+    return (T) cache.get(key);
   }
 
   @Override
   public <T> T retrieve(String key, Callable<T> fn) throws HiveException {
+    T value = (T) cache.get(key);
+    if (value != null || fn == null) {
+      return value;
+    }
     try {
       LOG.debug("Creating {}", key);
-      return fn.call();
+      value = fn.call();
     } catch (Exception e) {
       throw new HiveException(e);
     }
+    T previous = (T) cache.putIfAbsent(key, value);
+    return previous != null ? previous : value;
   }
 
   @Override
@@ -94,6 +105,6 @@ public class ObjectCache implements 
org.apache.hadoop.hive.ql.exec.ObjectCache {
 
   @Override
   public void remove(String key) {
-    // nothing to do
+    cache.remove(key);
   }
 }
diff --git a/ql/src/test/queries/clientpositive/offset_limit_mr.q 
b/ql/src/test/queries/clientpositive/offset_limit_mr.q
new file mode 100644
index 0000000..caba496
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/offset_limit_mr.q
@@ -0,0 +1,12 @@
+--! qt:dataset:src
+
+SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY 
src.key LIMIT 10,10;
+
+SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY 
src.key LIMIT 0,10;
+
+SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY 
src.key LIMIT 1,10;
+
+SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY 
src.key LIMIT 300,100;
+
+SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY 
src.key LIMIT 100 OFFSET 300;
+
diff --git a/ql/src/test/results/clientpositive/offset_limit_mr.q.out 
b/ql/src/test/results/clientpositive/offset_limit_mr.q.out
new file mode 100644
index 0000000..44f5491
--- /dev/null
+++ b/ql/src/test/results/clientpositive/offset_limit_mr.q.out
@@ -0,0 +1,88 @@
+PREHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY 
src.key ORDER BY src.key LIMIT 10,10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY 
src.key ORDER BY src.key LIMIT 10,10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+116    116.0
+118    236.0
+119    357.0
+12     24.0
+120    240.0
+125    250.0
+126    126.0
+128    384.0
+129    258.0
+131    131.0
+PREHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY 
src.key ORDER BY src.key LIMIT 0,10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY 
src.key ORDER BY src.key LIMIT 0,10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0      0.0
+10     10.0
+100    200.0
+103    206.0
+104    208.0
+105    105.0
+11     11.0
+111    111.0
+113    226.0
+114    114.0
+PREHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY 
src.key ORDER BY src.key LIMIT 1,10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY 
src.key ORDER BY src.key LIMIT 1,10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+10     10.0
+100    200.0
+103    206.0
+104    208.0
+105    105.0
+11     11.0
+111    111.0
+113    226.0
+114    114.0
+116    116.0
+PREHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY 
src.key ORDER BY src.key LIMIT 300,100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY 
src.key ORDER BY src.key LIMIT 300,100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+86     86.0
+87     87.0
+9      9.0
+90     270.0
+92     92.0
+95     190.0
+96     96.0
+97     194.0
+98     196.0
+PREHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY 
src.key ORDER BY src.key LIMIT 100 OFFSET 300
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY 
src.key ORDER BY src.key LIMIT 100 OFFSET 300
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+86     86.0
+87     87.0
+9      9.0
+90     270.0
+92     92.0
+95     190.0
+96     96.0
+97     194.0
+98     196.0

Reply via email to