Repository: hive
Updated Branches:
  refs/heads/branch-1 e35b908ea -> f4020cfce


HIVE-12084 : Hive queries with ORDER BY and large LIMIT fails with 
OutOfMemoryError Java heap space (Hari Subramaniyan, reviewed by Laljo John 
Pullokkaran)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f4020cfc
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f4020cfc
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f4020cfc

Branch: refs/heads/branch-1
Commit: f4020cfce034cdcf91e362c106c15508b0ac16d8
Parents: e35b908
Author: Hari Subramaniyan <harisan...@apache.org>
Authored: Fri Oct 23 10:52:46 2015 -0700
Committer: Hari Subramaniyan <harisan...@apache.org>
Committed: Fri Oct 23 10:53:53 2015 -0700

----------------------------------------------------------------------
 .../apache/hadoop/hive/ql/exec/TopNHash.java    |  2 +-
 ql/src/test/queries/clientpositive/topn.q       | 13 ++++++
 ql/src/test/results/clientpositive/topn.q.out   | 42 ++++++++++++++++++++
 3 files changed, 56 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/f4020cfc/ql/src/java/org/apache/hadoop/hive/ql/exec/TopNHash.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/TopNHash.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/TopNHash.java
index 484006a..8859add 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/TopNHash.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/TopNHash.java
@@ -104,7 +104,7 @@ public class TopNHash {
     }
 
     // limit * 64 : compensation of arrays for key/value/hashcodes
-    this.threshold = (long) (memUsage * Runtime.getRuntime().maxMemory()) - 
topN * 64;
+    this.threshold = (long) (memUsage * Runtime.getRuntime().freeMemory()) - 
topN * 64L;
     if (threshold < 0) {
       return;
     }

http://git-wip-us.apache.org/repos/asf/hive/blob/f4020cfc/ql/src/test/queries/clientpositive/topn.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/topn.q 
b/ql/src/test/queries/clientpositive/topn.q
new file mode 100644
index 0000000..4821b5a
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/topn.q
@@ -0,0 +1,13 @@
+CREATE TABLE `sample_07` ( `code` string , `description` string , `total_emp` 
int , `salary` int ) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' STORED AS 
TextFile;
+set hive.limit.pushdown.memory.usage=0.9999999;
+
+select * from sample_07 order by salary LIMIT 999999999;
+
+SELECT * FROM (
+SELECT *, rank() over(PARTITION BY code ORDER BY salary DESC) as rank
+FROM sample_07
+) ranked_claim
+WHERE ranked_claim.rank < 2
+ORDER BY code;
+
+select sum(total_emp) over(partition by salary+salary order by code) from 
sample_07 limit 9999999;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/f4020cfc/ql/src/test/results/clientpositive/topn.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/topn.q.out 
b/ql/src/test/results/clientpositive/topn.q.out
new file mode 100644
index 0000000..4846d8f
--- /dev/null
+++ b/ql/src/test/results/clientpositive/topn.q.out
@@ -0,0 +1,42 @@
+PREHOOK: query: CREATE TABLE `sample_07` ( `code` string , `description` 
string , `total_emp` int , `salary` int ) ROW FORMAT DELIMITED FIELDS 
TERMINATED BY '\t' STORED AS TextFile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@sample_07
+POSTHOOK: query: CREATE TABLE `sample_07` ( `code` string , `description` 
string , `total_emp` int , `salary` int ) ROW FORMAT DELIMITED FIELDS 
TERMINATED BY '\t' STORED AS TextFile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@sample_07
+PREHOOK: query: select * from sample_07 order by salary LIMIT 999999999
+PREHOOK: type: QUERY
+PREHOOK: Input: default@sample_07
+#### A masked pattern was here ####
+POSTHOOK: query: select * from sample_07 order by salary LIMIT 999999999
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@sample_07
+#### A masked pattern was here ####
+PREHOOK: query: SELECT * FROM (
+SELECT *, rank() over(PARTITION BY code ORDER BY salary DESC) as rank
+FROM sample_07
+) ranked_claim
+WHERE ranked_claim.rank < 2
+ORDER BY code
+PREHOOK: type: QUERY
+PREHOOK: Input: default@sample_07
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM (
+SELECT *, rank() over(PARTITION BY code ORDER BY salary DESC) as rank
+FROM sample_07
+) ranked_claim
+WHERE ranked_claim.rank < 2
+ORDER BY code
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@sample_07
+#### A masked pattern was here ####
+PREHOOK: query: select sum(total_emp) over(partition by salary+salary order by 
code) from sample_07 limit 9999999
+PREHOOK: type: QUERY
+PREHOOK: Input: default@sample_07
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(total_emp) over(partition by salary+salary order 
by code) from sample_07 limit 9999999
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@sample_07
+#### A masked pattern was here ####

Reply via email to