This is an automated email from the ASF dual-hosted git repository. kgyrtkirk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new cb23045 HIVE-25582: Empty result when using offset limit with MR (#2693) (Zhihua Deng reviewed by Laszlo Bodor and Zoltan Haindrich) cb23045 is described below commit cb23045f92c62bc43ef5739532b486b524d99e03 Author: dengzh <dengzhhu...@gmail.com> AuthorDate: Tue Nov 23 16:02:58 2021 +0800 HIVE-25582: Empty result when using offset limit with MR (#2693) (Zhihua Deng reviewed by Laszlo Bodor and Zoltan Haindrich) --- .../test/resources/testconfiguration.properties | 1 + .../apache/hadoop/hive/ql/exec/ObjectCache.java | 1 - .../apache/hadoop/hive/ql/exec/mr/ObjectCache.java | 23 ++++-- .../test/queries/clientpositive/offset_limit_mr.q | 12 +++ .../results/clientpositive/offset_limit_mr.q.out | 88 ++++++++++++++++++++++ 5 files changed, 118 insertions(+), 7 deletions(-) diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 638af07..6b887f5 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -239,6 +239,7 @@ mr.query.files=\ masking_5.q,\ nonmr_fetch.q,\ nonreserved_keywords_input37.q,\ + offset_limit_mr.q,\ parenthesis_star_by.q,\ partition_vs_table_metadata.q,\ row__id.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ObjectCache.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ObjectCache.java index cf04e1d..c9282b3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ObjectCache.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ObjectCache.java @@ -48,7 +48,6 @@ public interface ObjectCache { * * @param <T> * @param key - * function to generate the object if it's not there * @return the last cached object with the key, null if none. */ public <T> T retrieve(String key) throws HiveException; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ObjectCache.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ObjectCache.java index 5bb96e3..0acf6d7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ObjectCache.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ObjectCache.java @@ -18,7 +18,9 @@ package org.apache.hadoop.hive.ql.exec.mr; +import java.util.Map; import java.util.concurrent.Callable; +import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; @@ -26,36 +28,45 @@ import java.util.concurrent.TimeoutException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + import org.apache.hadoop.hive.ql.metadata.HiveException; /** - * ObjectCache. No-op implementation on MR we don't have a means to reuse - * Objects between runs of the same task. + * ObjectCache. Simple implementation on MR we don't have a means to reuse + * Objects between runs of the same task, this acts as a local cache. * */ public class ObjectCache implements org.apache.hadoop.hive.ql.exec.ObjectCache { private static final Logger LOG = LoggerFactory.getLogger(ObjectCache.class.getName()); + private final Map<String, Object> cache = new ConcurrentHashMap<>(); + @Override public void release(String key) { - // nothing to do LOG.debug("{} no longer needed", key); + cache.remove(key); } @Override public <T> T retrieve(String key) throws HiveException { - return retrieve(key, null); + return (T) cache.get(key); } @Override public <T> T retrieve(String key, Callable<T> fn) throws HiveException { + T value = (T) cache.get(key); + if (value != null || fn == null) { + return value; + } try { LOG.debug("Creating {}", key); - return fn.call(); + value = fn.call(); } catch (Exception e) { throw new HiveException(e); } + T previous = (T) cache.putIfAbsent(key, value); + return previous != null ? previous : value; } @Override @@ -94,6 +105,6 @@ public class ObjectCache implements org.apache.hadoop.hive.ql.exec.ObjectCache { @Override public void remove(String key) { - // nothing to do + cache.remove(key); } } diff --git a/ql/src/test/queries/clientpositive/offset_limit_mr.q b/ql/src/test/queries/clientpositive/offset_limit_mr.q new file mode 100644 index 0000000..caba496 --- /dev/null +++ b/ql/src/test/queries/clientpositive/offset_limit_mr.q @@ -0,0 +1,12 @@ +--! qt:dataset:src + +SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 10,10; + +SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 0,10; + +SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 1,10; + +SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 300,100; + +SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 100 OFFSET 300; + diff --git a/ql/src/test/results/clientpositive/offset_limit_mr.q.out b/ql/src/test/results/clientpositive/offset_limit_mr.q.out new file mode 100644 index 0000000..44f5491 --- /dev/null +++ b/ql/src/test/results/clientpositive/offset_limit_mr.q.out @@ -0,0 +1,88 @@ +PREHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 10,10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 10,10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +116 116.0 +118 236.0 +119 357.0 +12 24.0 +120 240.0 +125 250.0 +126 126.0 +128 384.0 +129 258.0 +131 131.0 +PREHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 0,10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 0,10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0.0 +10 10.0 +100 200.0 +103 206.0 +104 208.0 +105 105.0 +11 11.0 +111 111.0 +113 226.0 +114 114.0 +PREHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 1,10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 1,10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +10 10.0 +100 200.0 +103 206.0 +104 208.0 +105 105.0 +11 11.0 +111 111.0 +113 226.0 +114 114.0 +116 116.0 +PREHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 300,100 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 300,100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +86 86.0 +87 87.0 +9 9.0 +90 270.0 +92 92.0 +95 190.0 +96 96.0 +97 194.0 +98 196.0 +PREHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 100 OFFSET 300 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 100 OFFSET 300 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +86 86.0 +87 87.0 +9 9.0 +90 270.0 +92 92.0 +95 190.0 +96 96.0 +97 194.0 +98 196.0