[2/6] kylin git commit: TopN merge performance

2016-12-03 Thread lidong
TopN merge performance


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/e7d31938
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/e7d31938
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/e7d31938

Branch: refs/heads/yang21-hbase102
Commit: e7d31938ef9204a671c9894f02da4a9d3ab81b42
Parents: d3ecb0d
Author: shaofengshi 
Authored: Sun Dec 4 09:39:45 2016 +0800
Committer: shaofengshi 
Committed: Sun Dec 4 09:39:45 2016 +0800

--
 .../apache/kylin/measure/topn/TopNCounter.java  | 49 +++-
 1 file changed, 17 insertions(+), 32 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/kylin/blob/e7d31938/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNCounter.java
--
diff --git 
a/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNCounter.java 
b/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNCounter.java
index cf9978a..0d0726c 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNCounter.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNCounter.java
@@ -26,11 +26,9 @@ import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
 
-import com.google.common.collect.Maps;
 import com.google.common.collect.Lists;
-import com.google.common.collect.Sets;
+import com.google.common.collect.Maps;
 
 /**
  * Modified from the StreamSummary.java in 
https://github.com/addthis/stream-lib
@@ -157,41 +155,28 @@ public class TopNCounter implements 
Iterable {
  * @return
  */
 public TopNCounter merge(TopNCounter another) {
-double m1 = 0.0, m2 = 0.0;
-if (this.size() >= this.capacity) {
-m1 = this.counterList.getLast().count;
-}
-
-if (another.size() >= another.capacity) {
-m2 = another.counterList.getLast().count;
-}
-
-Set duplicateItems = Sets.newHashSet();
-List notDuplicateItems = Lists.newArrayList();
-
-for (Map.Entry entry : this.counterMap.entrySet()) {
-T item = entry.getKey();
-Counter existing = another.counterMap.get(item);
-if (existing != null) {
-duplicateItems.add(item);
-} else {
-notDuplicateItems.add(item);
+boolean thisFull = this.size() >= this.capacity;
+boolean anotherFull = another.size() >= another.capacity;
+double m1 = thisFull ? this.counterList.getLast().count : 0.0;
+double m2 = anotherFull ? another.counterList.getLast().count : 0.0;
+
+if (thisFull == true) {
+for (Counter entry : another.counterMap.values()) {
+entry.count += m1;
 }
 }
 
-for (T item : duplicateItems) {
-this.offer(item, another.counterMap.get(item).count);
-}
-
-for (T item : notDuplicateItems) {
-this.offer(item, m2);
+if (anotherFull == true) {
+for (Counter entry : this.counterMap.values()) {
+entry.count += m2;
+}
 }
 
 for (Map.Entry entry : another.counterMap.entrySet()) {
-T item = entry.getKey();
-if (duplicateItems.contains(item) == false) {
-double counter = entry.getValue().count;
-this.offer(item, counter + m1);
+if (counterMap.containsKey(entry.getKey())) {
+this.offer(entry.getValue().getItem(), anotherFull ? (thisFull 
? entry.getValue().count - m2 - m1 : entry.getValue().count - m2) : (thisFull ? 
(entry.getValue().count - m1) : entry.getValue().count));
+} else {
+this.offer(entry.getValue().getItem(), entry.getValue().count);
 }
 }
 



[4/6] kylin git commit: KYLIN-1528 Create a branch for v1.5 with HBase 1.x API

2016-12-03 Thread lidong
http://git-wip-us.apache.org/repos/asf/kylin/blob/306fa777/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/HBaseStreamingOutput.java
--
diff --git 
a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/HBaseStreamingOutput.java
 
b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/HBaseStreamingOutput.java
index 0e95102..c59fb33 100644
--- 
a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/HBaseStreamingOutput.java
+++ 
b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/HBaseStreamingOutput.java
@@ -27,7 +27,8 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.client.HTableInterface;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Table;
 import org.apache.kylin.common.KylinConfig;
 import org.apache.kylin.common.persistence.ResourceStore;
 import org.apache.kylin.cube.CubeSegment;
@@ -56,7 +57,7 @@ public class HBaseStreamingOutput implements IStreamingOutput 
{
 try {
 CubeSegment cubeSegment = (CubeSegment) buildable;
 
-final HTableInterface hTable;
+final Table hTable;
 hTable = createHTable(cubeSegment);
 List cuboidWriters = Lists.newArrayList();
 cuboidWriters.add(new HBaseCuboidWriter(cubeSegment, hTable));
@@ -88,10 +89,10 @@ public class HBaseStreamingOutput implements 
IStreamingOutput {
 }
 }
 
-private HTableInterface createHTable(final CubeSegment cubeSegment) throws 
IOException {
+private Table createHTable(final CubeSegment cubeSegment) throws 
IOException {
 final String hTableName = cubeSegment.getStorageLocationIdentifier();
 CubeHTableUtil.createHTable(cubeSegment, null);
-final HTableInterface hTable = 
HBaseConnection.get(KylinConfig.getInstanceFromEnv().getStorageUrl()).getTable(hTableName);
+final Table hTable = 
HBaseConnection.get(KylinConfig.getInstanceFromEnv().getStorageUrl()).getTable(TableName.valueOf(hTableName));
 logger.info("hTable:" + hTableName + " for segment:" + 
cubeSegment.getName() + " created!");
 return hTable;
 }

http://git-wip-us.apache.org/repos/asf/kylin/blob/306fa777/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/MergeGCStep.java
--
diff --git 
a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/MergeGCStep.java
 
b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/MergeGCStep.java
index 5b2441c..2f7e164 100644
--- 
a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/MergeGCStep.java
+++ 
b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/MergeGCStep.java
@@ -24,11 +24,11 @@ import java.util.Collections;
 import java.util.List;
 
 import org.apache.commons.lang.StringUtils;
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.HTableDescriptor;
-import org.apache.hadoop.hbase.client.HBaseAdmin;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.Connection;
 import org.apache.kylin.common.KylinConfig;
-import org.apache.kylin.common.util.Bytes;
 import org.apache.kylin.job.exception.ExecuteException;
 import org.apache.kylin.job.execution.AbstractExecutable;
 import org.apache.kylin.job.execution.ExecutableContext;
@@ -69,19 +69,20 @@ public class MergeGCStep extends AbstractExecutable {
 List oldTables = getOldHTables();
 if (oldTables != null && oldTables.size() > 0) {
 String metadataUrlPrefix = 
KylinConfig.getInstanceFromEnv().getMetadataUrlPrefix();
-Configuration conf = 
HBaseConnection.getCurrentHBaseConfiguration();
-HBaseAdmin admin = null;
+Admin admin = null;
 try {
-admin = new HBaseAdmin(conf);
+Connection conn = 
HBaseConnection.get(KylinConfig.getInstanceFromEnv().getStorageUrl());
+admin = conn.getAdmin();
+
 for (String table : oldTables) {
-if (admin.tableExists(table)) {
-HTableDescriptor tableDescriptor = 
admin.getTableDescriptor(Bytes.toBytes(table));
+if (admin.tableExists(TableName.valueOf(table))) {
+HTableDescriptor tableDescriptor = 
admin.getTableDescriptor(TableName.valueOf((table)));
 String host = 
tableDescriptor.getValue(IRealizationConstants.HTableTag);
 if (metadataUrlPrefix.equalsIgnoreCase(host)) {
-if (admin.isTableEnabled(table)) {
-admin.disableTable(table);
+

[1/6] kylin git commit: refine mapper and reducer log [Forced Update!]

2016-12-03 Thread lidong
Repository: kylin
Updated Branches:
  refs/heads/yang21-hbase102 11b7fff75 -> bb9234e33 (forced update)


refine mapper and reducer log


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/d3ecb0d9
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/d3ecb0d9
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/d3ecb0d9

Branch: refs/heads/yang21-hbase102
Commit: d3ecb0d9c381dbb035c7cada7d3c798e24fef1d1
Parents: 8001887
Author: Hongbin Ma 
Authored: Thu Dec 1 18:01:55 2016 +0800
Committer: Hongbin Ma 
Committed: Thu Dec 1 18:01:55 2016 +0800

--
 .../org/apache/kylin/engine/mr/KylinMapper.java | 17 +++---
 .../apache/kylin/engine/mr/KylinReducer.java| 17 +++---
 .../engine/mr/steps/BaseCuboidMapperBase.java   |  1 -
 .../kylin/engine/mr/steps/CuboidReducer.java| 21 ++--
 .../engine/mr/steps/HiveToBaseCuboidMapper.java | 10 ++
 .../engine/mr/steps/InMemCuboidMapper.java  | 34 
 .../engine/mr/steps/InMemCuboidReducer.java | 20 ++--
 .../kylin/engine/mr/steps/NDCuboidMapper.java   | 22 ++---
 8 files changed, 70 insertions(+), 72 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/kylin/blob/d3ecb0d9/engine-mr/src/main/java/org/apache/kylin/engine/mr/KylinMapper.java
--
diff --git 
a/engine-mr/src/main/java/org/apache/kylin/engine/mr/KylinMapper.java 
b/engine-mr/src/main/java/org/apache/kylin/engine/mr/KylinMapper.java
index a01f7a2..c5af2fe 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/KylinMapper.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/KylinMapper.java
@@ -18,18 +18,21 @@
 
 package org.apache.kylin.engine.mr;
 
-import java.io.IOException;
-
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.kylin.engine.mr.common.BatchConstants;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.io.IOException;
+
 /**
  */
 public class KylinMapper extends 
Mapper {
 private static final Logger logger = 
LoggerFactory.getLogger(KylinMapper.class);
 
+protected int mapCounter = 0;
+
 protected void bindCurrentConfiguration(Configuration conf) {
 logger.info("The conf for current mapper will be " + 
System.identityHashCode(conf));
 HadoopUtil.setCurrentConfiguration(conf);
@@ -38,6 +41,10 @@ public class KylinMapper 
extends Mapper.Context context) throws IOException, InterruptedException {
 try {
+if (mapCounter++ % BatchConstants.NORMAL_RECORD_LOG_THRESHOLD == 
0) {
+logger.info("Accepting Mapper Key with ordinal: " + 
mapCounter);
+}
+
 doMap(key, value, context);
 } catch (IOException ex) { // KYLIN-2170
 logger.error("", ex);
@@ -53,11 +60,11 @@ public class KylinMapper 
extends Mapper.Context context) throws IOException, InterruptedException {
 super.map(key, value, context);
 }
-
+
 @Override
 final protected void cleanup(Mapper.Context context) throws IOException, InterruptedException {
 try {
@@ -76,7 +83,7 @@ public class KylinMapper 
extends Mapper.Context 
context) throws IOException, InterruptedException {
 }
 }

http://git-wip-us.apache.org/repos/asf/kylin/blob/d3ecb0d9/engine-mr/src/main/java/org/apache/kylin/engine/mr/KylinReducer.java
--
diff --git 
a/engine-mr/src/main/java/org/apache/kylin/engine/mr/KylinReducer.java 
b/engine-mr/src/main/java/org/apache/kylin/engine/mr/KylinReducer.java
index 2b63ce0..83266ea 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/KylinReducer.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/KylinReducer.java
@@ -18,18 +18,22 @@
 
 package org.apache.kylin.engine.mr;
 
-import java.io.IOException;
-
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.kylin.engine.mr.common.BatchConstants;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.io.IOException;
+
 /**
  

[6/6] kylin git commit: KYLIN-2233 Support HBase 1.0.2

2016-12-03 Thread lidong
KYLIN-2233 Support HBase 1.0.2

Signed-off-by: Yang Li 


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/bb9234e3
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/bb9234e3
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/bb9234e3

Branch: refs/heads/yang21-hbase102
Commit: bb9234e33db93c052b12cceca5f74b68222b7828
Parents: 306fa77
Author: Cheng Wang 
Authored: Wed Oct 12 14:39:24 2016 +0800
Committer: lidongsjtu 
Committed: Sun Dec 4 15:45:28 2016 +0800

--
 pom.xml   |  2 +-
 .../storage/hbase/cube/v1/RegionScannerAdapter.java   | 11 ++-
 .../coprocessor/observer/AggregateRegionObserver.java |  4 ++--
 .../v1/coprocessor/observer/AggregationScanner.java   | 14 --
 .../observer/ObserverAggregationCache.java| 10 ++
 .../observer/AggregateRegionObserverTest.java | 12 +++-
 .../cube/v1/filter/TestFuzzyRowFilterV2EndToEnd.java  |  3 +--
 7 files changed, 15 insertions(+), 41 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/kylin/blob/bb9234e3/pom.xml
--
diff --git a/pom.xml b/pom.xml
index 1df14f4..ad70ee3 100644
--- a/pom.xml
+++ b/pom.xml
@@ -54,7 +54,7 @@
 1.2.1
 
 
-1.1.1
+1.0.2
 0.8.1
 
 

http://git-wip-us.apache.org/repos/asf/kylin/blob/bb9234e3/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v1/RegionScannerAdapter.java
--
diff --git 
a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v1/RegionScannerAdapter.java
 
b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v1/RegionScannerAdapter.java
index 0ade920..40da772 100644
--- 
a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v1/RegionScannerAdapter.java
+++ 
b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v1/RegionScannerAdapter.java
@@ -26,7 +26,6 @@ import org.apache.hadoop.hbase.HRegionInfo;
 import org.apache.hadoop.hbase.client.Result;
 import org.apache.hadoop.hbase.client.ResultScanner;
 import org.apache.hadoop.hbase.regionserver.RegionScanner;
-import org.apache.hadoop.hbase.regionserver.ScannerContext;
 
 /**
  * @author yangli9
@@ -51,7 +50,7 @@ public class RegionScannerAdapter implements RegionScanner {
 }
 
 @Override
-public boolean next(List result, ScannerContext scannerContext) 
throws IOException {
+public boolean next(List result, int limit) throws IOException {
 return next(result);
 }
 
@@ -61,7 +60,7 @@ public class RegionScannerAdapter implements RegionScanner {
 }
 
 @Override
-public boolean nextRaw(List result, ScannerContext scannerContext) 
throws IOException {
+public boolean nextRaw(List result, int limit) throws IOException {
 return next(result);
 }
 
@@ -94,10 +93,4 @@ public class RegionScannerAdapter implements RegionScanner {
 public long getMvccReadPoint() {
 return Long.MAX_VALUE;
 }
-
-@Override
-public int getBatch() {
-return -1;
-}
-
 }

http://git-wip-us.apache.org/repos/asf/kylin/blob/bb9234e3/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v1/coprocessor/observer/AggregateRegionObserver.java
--
diff --git 
a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v1/coprocessor/observer/AggregateRegionObserver.java
 
b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v1/coprocessor/observer/AggregateRegionObserver.java
index 7e25e4c..7139ca7 100644
--- 
a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v1/coprocessor/observer/AggregateRegionObserver.java
+++ 
b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v1/coprocessor/observer/AggregateRegionObserver.java
@@ -26,7 +26,7 @@ import org.apache.hadoop.hbase.client.Scan;
 import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver;
 import org.apache.hadoop.hbase.coprocessor.ObserverContext;
 import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
-import org.apache.hadoop.hbase.regionserver.Region;
+import org.apache.hadoop.hbase.regionserver.HRegion;
 import org.apache.hadoop.hbase.regionserver.RegionCoprocessorHost;
 import org.apache.hadoop.hbase.regionserver.RegionScanner;
 import org.apache.kylin.gridtable.StorageSideBehavior;
@@ -99,7 +99,7 @@ public class AggregateRegionObserver extends 
BaseRegionObserver {
 // start/end region operation & sync on scanner is suggested by the
 // javadoc of RegionScanner.nextRaw()
 // FIXME: will the 

[5/6] kylin git commit: KYLIN-1528 Create a branch for v1.5 with HBase 1.x API

2016-12-03 Thread lidong
KYLIN-1528 Create a branch for v1.5 with HBase 1.x API


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/306fa777
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/306fa777
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/306fa777

Branch: refs/heads/yang21-hbase102
Commit: 306fa777c00723de132edf1507ca7721dd5601ca
Parents: 698b542
Author: shaofengshi 
Authored: Wed Mar 23 17:07:05 2016 +0800
Committer: lidongsjtu 
Committed: Sun Dec 4 15:41:00 2016 +0800

--
 examples/test_case_data/sandbox/hbase-site.xml  | 19 +---
 .../kylin/provision/BuildCubeWithEngine.java| 13 ++-
 pom.xml | 12 +--
 .../kylin/rest/security/AclHBaseStorage.java|  4 +-
 .../rest/security/MockAclHBaseStorage.java  |  8 +-
 .../apache/kylin/rest/security/MockHTable.java  | 95 
 .../rest/security/RealAclHBaseStorage.java  |  9 +-
 .../apache/kylin/rest/service/AclService.java   | 25 +++---
 .../apache/kylin/rest/service/CubeService.java  | 36 +++-
 .../apache/kylin/rest/service/QueryService.java | 24 +++--
 .../apache/kylin/rest/service/UserService.java  | 17 ++--
 .../kylin/storage/hbase/HBaseConnection.java| 44 -
 .../kylin/storage/hbase/HBaseResourceStore.java | 31 +++
 .../kylin/storage/hbase/HBaseStorage.java   |  3 +-
 .../storage/hbase/cube/SimpleHBaseStore.java| 20 ++---
 .../hbase/cube/v1/CubeSegmentTupleIterator.java | 11 +--
 .../storage/hbase/cube/v1/CubeStorageQuery.java |  6 +-
 .../hbase/cube/v1/RegionScannerAdapter.java | 10 ++-
 .../cube/v1/SerializedHBaseTupleIterator.java   |  4 +-
 .../observer/AggregateRegionObserver.java   |  4 +-
 .../observer/AggregationScanner.java| 14 ++-
 .../observer/ObserverAggregationCache.java  | 10 ++-
 .../coprocessor/observer/ObserverEnabler.java   |  4 +-
 .../hbase/cube/v2/CubeHBaseEndpointRPC.java | 13 +--
 .../storage/hbase/cube/v2/CubeHBaseScanRPC.java |  9 +-
 .../coprocessor/endpoint/CubeVisitService.java  |  4 +-
 .../storage/hbase/steps/CubeHTableUtil.java | 16 ++--
 .../storage/hbase/steps/DeprecatedGCStep.java   | 23 ++---
 .../storage/hbase/steps/HBaseCuboidWriter.java  |  7 +-
 .../hbase/steps/HBaseStreamingOutput.java   |  9 +-
 .../kylin/storage/hbase/steps/MergeGCStep.java  | 23 ++---
 .../storage/hbase/util/CleanHtableCLI.java  | 12 +--
 .../storage/hbase/util/CubeMigrationCLI.java| 36 
 .../hbase/util/CubeMigrationCheckCLI.java   | 17 ++--
 .../hbase/util/DeployCoprocessorCLI.java| 22 ++---
 .../hbase/util/ExtendCubeToHybridCLI.java   |  8 +-
 .../hbase/util/GridTableHBaseBenchmark.java | 34 +++
 .../kylin/storage/hbase/util/HBaseClean.java| 18 ++--
 .../hbase/util/HBaseRegionSizeCalculator.java   | 35 
 .../kylin/storage/hbase/util/HBaseUsage.java|  9 +-
 .../storage/hbase/util/HbaseStreamingInput.java | 30 +++
 .../hbase/util/HtableAlterMetadataCLI.java  |  9 +-
 .../storage/hbase/util/OrphanHBaseCleanJob.java | 19 ++--
 .../kylin/storage/hbase/util/PingHBaseCLI.java  | 15 ++--
 .../kylin/storage/hbase/util/RowCounterCLI.java | 11 +--
 .../storage/hbase/util/StorageCleanupJob.java   | 20 +++--
 .../storage/hbase/util/UpdateHTableHostCLI.java | 17 ++--
 .../observer/AggregateRegionObserverTest.java   | 26 ++
 .../v1/filter/TestFuzzyRowFilterV2EndToEnd.java |  5 +-
 .../org/apache/kylin/tool/CubeMigrationCLI.java | 36 
 .../kylin/tool/CubeMigrationCheckCLI.java   | 16 ++--
 .../kylin/tool/ExtendCubeToHybridCLI.java   |  8 +-
 .../apache/kylin/tool/StorageCleanupJob.java| 20 +++--
 53 files changed, 450 insertions(+), 500 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/kylin/blob/306fa777/examples/test_case_data/sandbox/hbase-site.xml
--
diff --git a/examples/test_case_data/sandbox/hbase-site.xml 
b/examples/test_case_data/sandbox/hbase-site.xml
index 46d5345..734908e 100644
--- a/examples/test_case_data/sandbox/hbase-site.xml
+++ b/examples/test_case_data/sandbox/hbase-site.xml
@@ -190,22 +190,5 @@
 zookeeper.znode.parent
 /hbase-unsecure
 
-
-hbase.client.pause
-100
-General client pause value.  Used mostly as value to wait
-before running a retry of a failed get, region lookup, etc.
-See hbase.client.retries.number for description of how we backoff 
from
-this initial pause amount and how this pause works w/ 
retries.
-
-
-hbase.client.retries.number
-5
-Maximum retries.  Used as maximum for all retryable
-operations such as the getting of a cell's value, starting a row 
update,
-   

[1/6] kylin git commit: refine mapper and reducer log [Forced Update!]

2016-12-03 Thread lidong
Repository: kylin
Updated Branches:
  refs/heads/yang21-cdh5.7 c02911381 -> d5956a425 (forced update)


refine mapper and reducer log


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/d3ecb0d9
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/d3ecb0d9
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/d3ecb0d9

Branch: refs/heads/yang21-cdh5.7
Commit: d3ecb0d9c381dbb035c7cada7d3c798e24fef1d1
Parents: 8001887
Author: Hongbin Ma 
Authored: Thu Dec 1 18:01:55 2016 +0800
Committer: Hongbin Ma 
Committed: Thu Dec 1 18:01:55 2016 +0800

--
 .../org/apache/kylin/engine/mr/KylinMapper.java | 17 +++---
 .../apache/kylin/engine/mr/KylinReducer.java| 17 +++---
 .../engine/mr/steps/BaseCuboidMapperBase.java   |  1 -
 .../kylin/engine/mr/steps/CuboidReducer.java| 21 ++--
 .../engine/mr/steps/HiveToBaseCuboidMapper.java | 10 ++
 .../engine/mr/steps/InMemCuboidMapper.java  | 34 
 .../engine/mr/steps/InMemCuboidReducer.java | 20 ++--
 .../kylin/engine/mr/steps/NDCuboidMapper.java   | 22 ++---
 8 files changed, 70 insertions(+), 72 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/kylin/blob/d3ecb0d9/engine-mr/src/main/java/org/apache/kylin/engine/mr/KylinMapper.java
--
diff --git 
a/engine-mr/src/main/java/org/apache/kylin/engine/mr/KylinMapper.java 
b/engine-mr/src/main/java/org/apache/kylin/engine/mr/KylinMapper.java
index a01f7a2..c5af2fe 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/KylinMapper.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/KylinMapper.java
@@ -18,18 +18,21 @@
 
 package org.apache.kylin.engine.mr;
 
-import java.io.IOException;
-
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.kylin.engine.mr.common.BatchConstants;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.io.IOException;
+
 /**
  */
 public class KylinMapper extends 
Mapper {
 private static final Logger logger = 
LoggerFactory.getLogger(KylinMapper.class);
 
+protected int mapCounter = 0;
+
 protected void bindCurrentConfiguration(Configuration conf) {
 logger.info("The conf for current mapper will be " + 
System.identityHashCode(conf));
 HadoopUtil.setCurrentConfiguration(conf);
@@ -38,6 +41,10 @@ public class KylinMapper 
extends Mapper.Context context) throws IOException, InterruptedException {
 try {
+if (mapCounter++ % BatchConstants.NORMAL_RECORD_LOG_THRESHOLD == 
0) {
+logger.info("Accepting Mapper Key with ordinal: " + 
mapCounter);
+}
+
 doMap(key, value, context);
 } catch (IOException ex) { // KYLIN-2170
 logger.error("", ex);
@@ -53,11 +60,11 @@ public class KylinMapper 
extends Mapper.Context context) throws IOException, InterruptedException {
 super.map(key, value, context);
 }
-
+
 @Override
 final protected void cleanup(Mapper.Context context) throws IOException, InterruptedException {
 try {
@@ -76,7 +83,7 @@ public class KylinMapper 
extends Mapper.Context 
context) throws IOException, InterruptedException {
 }
 }

http://git-wip-us.apache.org/repos/asf/kylin/blob/d3ecb0d9/engine-mr/src/main/java/org/apache/kylin/engine/mr/KylinReducer.java
--
diff --git 
a/engine-mr/src/main/java/org/apache/kylin/engine/mr/KylinReducer.java 
b/engine-mr/src/main/java/org/apache/kylin/engine/mr/KylinReducer.java
index 2b63ce0..83266ea 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/KylinReducer.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/KylinReducer.java
@@ -18,18 +18,22 @@
 
 package org.apache.kylin.engine.mr;
 
-import java.io.IOException;
-
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.kylin.engine.mr.common.BatchConstants;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.io.IOException;
+
 /**
  */
 

[6/6] kylin git commit: KYLIN-1672 support kylin on cdh 5.7

2016-12-03 Thread lidong
KYLIN-1672 support kylin on cdh 5.7

Signed-off-by: Li Yang 


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/d5956a42
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/d5956a42
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/d5956a42

Branch: refs/heads/yang21-cdh5.7
Commit: d5956a4256ba37580a29060a0e06ec781980df2d
Parents: 306fa77
Author: Lynne Jiang 
Authored: Mon May 16 03:33:27 2016 -0700
Committer: lidongsjtu 
Committed: Sun Dec 4 15:43:03 2016 +0800

--
 build/conf/kylin.properties |   3 +
 dev-support/test_all_against_hdp_2_2_4_2_2.sh   |   0
 .../kylin/engine/mr/steps/MockupMapContext.java |  15 +-
 examples/test_case_data/sandbox/core-site.xml   | 146 +++---
 examples/test_case_data/sandbox/hbase-site.xml  | 162 ++
 examples/test_case_data/sandbox/hdfs-site.xml   | 259 ++
 examples/test_case_data/sandbox/mapred-site.xml | 398 ++-
 examples/test_case_data/sandbox/yarn-site.xml   | 496 ++-
 pom.xml |  16 +-
 server/pom.xml  |  36 ++
 .../storage/hbase/steps/MockupMapContext.java   |  19 +-
 tool/pom.xml|  12 +
 12 files changed, 431 insertions(+), 1131 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/kylin/blob/d5956a42/build/conf/kylin.properties
--
diff --git a/build/conf/kylin.properties b/build/conf/kylin.properties
index ed86bdb..ebd8cde 100644
--- a/build/conf/kylin.properties
+++ b/build/conf/kylin.properties
@@ -118,6 +118,9 @@ kylin.job.mapreduce.mapper.input.rows=100
 
 kylin.job.step.timeout=7200
 
+# for secure cdh, filtering hive dependency is risky, so filter nothing
+kylin.job.dependency.filterlist=[^,]+
+
 ### CUBE ###
 
 # 'auto', 'inmem', 'layer' or 'random' for testing

http://git-wip-us.apache.org/repos/asf/kylin/blob/d5956a42/dev-support/test_all_against_hdp_2_2_4_2_2.sh
--
diff --git a/dev-support/test_all_against_hdp_2_2_4_2_2.sh 
b/dev-support/test_all_against_hdp_2_2_4_2_2.sh
old mode 100644
new mode 100755

http://git-wip-us.apache.org/repos/asf/kylin/blob/d5956a42/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/MockupMapContext.java
--
diff --git 
a/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/MockupMapContext.java
 
b/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/MockupMapContext.java
index 847071d..9900465 100644
--- 
a/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/MockupMapContext.java
+++ 
b/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/MockupMapContext.java
@@ -77,6 +77,7 @@ public class MockupMapContext {
 outKV[0] = key;
 outKV[1] = value;
 }
+
 }
 
 @Override
@@ -99,6 +100,7 @@ public class MockupMapContext {
 throw new NotImplementedException();
 }
 
+
 @Override
 public float getProgress() {
 throw new NotImplementedException();
@@ -195,17 +197,17 @@ public class MockupMapContext {
 }
 
 @Override
-public RawComparator getSortComparator() {
+public boolean userClassesTakesPrecedence() {
 throw new NotImplementedException();
 }
 
 @Override
-public String getJar() {
+public RawComparator getSortComparator() {
 throw new NotImplementedException();
 }
 
 @Override
-public RawComparator getGroupingComparator() {
+public String getJar() {
 throw new NotImplementedException();
 }
 
@@ -221,7 +223,7 @@ public class MockupMapContext {
 
 @Override
 public boolean getProfileEnabled() {
-throw new NotImplementedException();
+return false;
 }
 
 @Override
@@ -308,6 +310,11 @@ public class MockupMapContext {
 public RawComparator getCombinerKeyGroupingComparator() {
 throw new NotImplementedException();
 }
+
+@Override
+public RawComparator getGroupingComparator() {
+return null;
+}
 });
 }
 }

http://git-wip-us.apache.org/repos/asf/kylin/blob/d5956a42/examples/test_case_data/sandbox/core-site.xml
--
diff --git a/examples/test_case_data/sandbox/core-site.xml 

[2/6] kylin git commit: TopN merge performance

2016-12-03 Thread lidong
TopN merge performance


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/e7d31938
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/e7d31938
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/e7d31938

Branch: refs/heads/yang21-cdh5.7
Commit: e7d31938ef9204a671c9894f02da4a9d3ab81b42
Parents: d3ecb0d
Author: shaofengshi 
Authored: Sun Dec 4 09:39:45 2016 +0800
Committer: shaofengshi 
Committed: Sun Dec 4 09:39:45 2016 +0800

--
 .../apache/kylin/measure/topn/TopNCounter.java  | 49 +++-
 1 file changed, 17 insertions(+), 32 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/kylin/blob/e7d31938/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNCounter.java
--
diff --git 
a/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNCounter.java 
b/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNCounter.java
index cf9978a..0d0726c 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNCounter.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNCounter.java
@@ -26,11 +26,9 @@ import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
 
-import com.google.common.collect.Maps;
 import com.google.common.collect.Lists;
-import com.google.common.collect.Sets;
+import com.google.common.collect.Maps;
 
 /**
  * Modified from the StreamSummary.java in 
https://github.com/addthis/stream-lib
@@ -157,41 +155,28 @@ public class TopNCounter implements 
Iterable {
  * @return
  */
 public TopNCounter merge(TopNCounter another) {
-double m1 = 0.0, m2 = 0.0;
-if (this.size() >= this.capacity) {
-m1 = this.counterList.getLast().count;
-}
-
-if (another.size() >= another.capacity) {
-m2 = another.counterList.getLast().count;
-}
-
-Set duplicateItems = Sets.newHashSet();
-List notDuplicateItems = Lists.newArrayList();
-
-for (Map.Entry entry : this.counterMap.entrySet()) {
-T item = entry.getKey();
-Counter existing = another.counterMap.get(item);
-if (existing != null) {
-duplicateItems.add(item);
-} else {
-notDuplicateItems.add(item);
+boolean thisFull = this.size() >= this.capacity;
+boolean anotherFull = another.size() >= another.capacity;
+double m1 = thisFull ? this.counterList.getLast().count : 0.0;
+double m2 = anotherFull ? another.counterList.getLast().count : 0.0;
+
+if (thisFull == true) {
+for (Counter entry : another.counterMap.values()) {
+entry.count += m1;
 }
 }
 
-for (T item : duplicateItems) {
-this.offer(item, another.counterMap.get(item).count);
-}
-
-for (T item : notDuplicateItems) {
-this.offer(item, m2);
+if (anotherFull == true) {
+for (Counter entry : this.counterMap.values()) {
+entry.count += m2;
+}
 }
 
 for (Map.Entry entry : another.counterMap.entrySet()) {
-T item = entry.getKey();
-if (duplicateItems.contains(item) == false) {
-double counter = entry.getValue().count;
-this.offer(item, counter + m1);
+if (counterMap.containsKey(entry.getKey())) {
+this.offer(entry.getValue().getItem(), anotherFull ? (thisFull 
? entry.getValue().count - m2 - m1 : entry.getValue().count - m2) : (thisFull ? 
(entry.getValue().count - m1) : entry.getValue().count));
+} else {
+this.offer(entry.getValue().getItem(), entry.getValue().count);
 }
 }
 



[3/6] kylin git commit: minor: TopN merge performance improvement further

2016-12-03 Thread lidong
minor: TopN merge performance improvement further


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/698b542a
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/698b542a
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/698b542a

Branch: refs/heads/yang21-cdh5.7
Commit: 698b542a1201b55f8176f1822d47b1783853304c
Parents: e7d3193
Author: shaofengshi 
Authored: Sun Dec 4 10:39:37 2016 +0800
Committer: shaofengshi 
Committed: Sun Dec 4 10:39:37 2016 +0800

--
 .../src/main/java/org/apache/kylin/measure/topn/TopNCounter.java | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/kylin/blob/698b542a/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNCounter.java
--
diff --git 
a/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNCounter.java 
b/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNCounter.java
index 0d0726c..eabc941 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNCounter.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNCounter.java
@@ -173,8 +173,8 @@ public class TopNCounter implements Iterable 
{
 }
 
 for (Map.Entry entry : another.counterMap.entrySet()) {
-if (counterMap.containsKey(entry.getKey())) {
-this.offer(entry.getValue().getItem(), anotherFull ? (thisFull 
? entry.getValue().count - m2 - m1 : entry.getValue().count - m2) : (thisFull ? 
(entry.getValue().count - m1) : entry.getValue().count));
+if (this.counterMap.containsKey(entry.getKey())) {
+this.offer(entry.getValue().getItem(), (entry.getValue().count 
- m2 - m1));
 } else {
 this.offer(entry.getValue().getItem(), entry.getValue().count);
 }



[5/6] kylin git commit: KYLIN-1528 Create a branch for v1.5 with HBase 1.x API

2016-12-03 Thread lidong
KYLIN-1528 Create a branch for v1.5 with HBase 1.x API


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/306fa777
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/306fa777
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/306fa777

Branch: refs/heads/yang21-cdh5.7
Commit: 306fa777c00723de132edf1507ca7721dd5601ca
Parents: 698b542
Author: shaofengshi 
Authored: Wed Mar 23 17:07:05 2016 +0800
Committer: lidongsjtu 
Committed: Sun Dec 4 15:41:00 2016 +0800

--
 examples/test_case_data/sandbox/hbase-site.xml  | 19 +---
 .../kylin/provision/BuildCubeWithEngine.java| 13 ++-
 pom.xml | 12 +--
 .../kylin/rest/security/AclHBaseStorage.java|  4 +-
 .../rest/security/MockAclHBaseStorage.java  |  8 +-
 .../apache/kylin/rest/security/MockHTable.java  | 95 
 .../rest/security/RealAclHBaseStorage.java  |  9 +-
 .../apache/kylin/rest/service/AclService.java   | 25 +++---
 .../apache/kylin/rest/service/CubeService.java  | 36 +++-
 .../apache/kylin/rest/service/QueryService.java | 24 +++--
 .../apache/kylin/rest/service/UserService.java  | 17 ++--
 .../kylin/storage/hbase/HBaseConnection.java| 44 -
 .../kylin/storage/hbase/HBaseResourceStore.java | 31 +++
 .../kylin/storage/hbase/HBaseStorage.java   |  3 +-
 .../storage/hbase/cube/SimpleHBaseStore.java| 20 ++---
 .../hbase/cube/v1/CubeSegmentTupleIterator.java | 11 +--
 .../storage/hbase/cube/v1/CubeStorageQuery.java |  6 +-
 .../hbase/cube/v1/RegionScannerAdapter.java | 10 ++-
 .../cube/v1/SerializedHBaseTupleIterator.java   |  4 +-
 .../observer/AggregateRegionObserver.java   |  4 +-
 .../observer/AggregationScanner.java| 14 ++-
 .../observer/ObserverAggregationCache.java  | 10 ++-
 .../coprocessor/observer/ObserverEnabler.java   |  4 +-
 .../hbase/cube/v2/CubeHBaseEndpointRPC.java | 13 +--
 .../storage/hbase/cube/v2/CubeHBaseScanRPC.java |  9 +-
 .../coprocessor/endpoint/CubeVisitService.java  |  4 +-
 .../storage/hbase/steps/CubeHTableUtil.java | 16 ++--
 .../storage/hbase/steps/DeprecatedGCStep.java   | 23 ++---
 .../storage/hbase/steps/HBaseCuboidWriter.java  |  7 +-
 .../hbase/steps/HBaseStreamingOutput.java   |  9 +-
 .../kylin/storage/hbase/steps/MergeGCStep.java  | 23 ++---
 .../storage/hbase/util/CleanHtableCLI.java  | 12 +--
 .../storage/hbase/util/CubeMigrationCLI.java| 36 
 .../hbase/util/CubeMigrationCheckCLI.java   | 17 ++--
 .../hbase/util/DeployCoprocessorCLI.java| 22 ++---
 .../hbase/util/ExtendCubeToHybridCLI.java   |  8 +-
 .../hbase/util/GridTableHBaseBenchmark.java | 34 +++
 .../kylin/storage/hbase/util/HBaseClean.java| 18 ++--
 .../hbase/util/HBaseRegionSizeCalculator.java   | 35 
 .../kylin/storage/hbase/util/HBaseUsage.java|  9 +-
 .../storage/hbase/util/HbaseStreamingInput.java | 30 +++
 .../hbase/util/HtableAlterMetadataCLI.java  |  9 +-
 .../storage/hbase/util/OrphanHBaseCleanJob.java | 19 ++--
 .../kylin/storage/hbase/util/PingHBaseCLI.java  | 15 ++--
 .../kylin/storage/hbase/util/RowCounterCLI.java | 11 +--
 .../storage/hbase/util/StorageCleanupJob.java   | 20 +++--
 .../storage/hbase/util/UpdateHTableHostCLI.java | 17 ++--
 .../observer/AggregateRegionObserverTest.java   | 26 ++
 .../v1/filter/TestFuzzyRowFilterV2EndToEnd.java |  5 +-
 .../org/apache/kylin/tool/CubeMigrationCLI.java | 36 
 .../kylin/tool/CubeMigrationCheckCLI.java   | 16 ++--
 .../kylin/tool/ExtendCubeToHybridCLI.java   |  8 +-
 .../apache/kylin/tool/StorageCleanupJob.java| 20 +++--
 53 files changed, 450 insertions(+), 500 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/kylin/blob/306fa777/examples/test_case_data/sandbox/hbase-site.xml
--
diff --git a/examples/test_case_data/sandbox/hbase-site.xml 
b/examples/test_case_data/sandbox/hbase-site.xml
index 46d5345..734908e 100644
--- a/examples/test_case_data/sandbox/hbase-site.xml
+++ b/examples/test_case_data/sandbox/hbase-site.xml
@@ -190,22 +190,5 @@
 zookeeper.znode.parent
 /hbase-unsecure
 
-
-hbase.client.pause
-100
-General client pause value.  Used mostly as value to wait
-before running a retry of a failed get, region lookup, etc.
-See hbase.client.retries.number for description of how we backoff 
from
-this initial pause amount and how this pause works w/ 
retries.
-
-
-hbase.client.retries.number
-5
-Maximum retries.  Used as maximum for all retryable
-operations such as the getting of a cell's value, starting a row 
update,
- 

[2/5] kylin git commit: TopN merge performance

2016-12-03 Thread lidong
TopN merge performance


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/e7d31938
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/e7d31938
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/e7d31938

Branch: refs/heads/yang21-hbase1.x
Commit: e7d31938ef9204a671c9894f02da4a9d3ab81b42
Parents: d3ecb0d
Author: shaofengshi 
Authored: Sun Dec 4 09:39:45 2016 +0800
Committer: shaofengshi 
Committed: Sun Dec 4 09:39:45 2016 +0800

--
 .../apache/kylin/measure/topn/TopNCounter.java  | 49 +++-
 1 file changed, 17 insertions(+), 32 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/kylin/blob/e7d31938/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNCounter.java
--
diff --git 
a/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNCounter.java 
b/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNCounter.java
index cf9978a..0d0726c 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNCounter.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNCounter.java
@@ -26,11 +26,9 @@ import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
 
-import com.google.common.collect.Maps;
 import com.google.common.collect.Lists;
-import com.google.common.collect.Sets;
+import com.google.common.collect.Maps;
 
 /**
  * Modified from the StreamSummary.java in 
https://github.com/addthis/stream-lib
@@ -157,41 +155,28 @@ public class TopNCounter implements 
Iterable {
  * @return
  */
 public TopNCounter merge(TopNCounter another) {
-double m1 = 0.0, m2 = 0.0;
-if (this.size() >= this.capacity) {
-m1 = this.counterList.getLast().count;
-}
-
-if (another.size() >= another.capacity) {
-m2 = another.counterList.getLast().count;
-}
-
-Set duplicateItems = Sets.newHashSet();
-List notDuplicateItems = Lists.newArrayList();
-
-for (Map.Entry entry : this.counterMap.entrySet()) {
-T item = entry.getKey();
-Counter existing = another.counterMap.get(item);
-if (existing != null) {
-duplicateItems.add(item);
-} else {
-notDuplicateItems.add(item);
+boolean thisFull = this.size() >= this.capacity;
+boolean anotherFull = another.size() >= another.capacity;
+double m1 = thisFull ? this.counterList.getLast().count : 0.0;
+double m2 = anotherFull ? another.counterList.getLast().count : 0.0;
+
+if (thisFull == true) {
+for (Counter entry : another.counterMap.values()) {
+entry.count += m1;
 }
 }
 
-for (T item : duplicateItems) {
-this.offer(item, another.counterMap.get(item).count);
-}
-
-for (T item : notDuplicateItems) {
-this.offer(item, m2);
+if (anotherFull == true) {
+for (Counter entry : this.counterMap.values()) {
+entry.count += m2;
+}
 }
 
 for (Map.Entry entry : another.counterMap.entrySet()) {
-T item = entry.getKey();
-if (duplicateItems.contains(item) == false) {
-double counter = entry.getValue().count;
-this.offer(item, counter + m1);
+if (counterMap.containsKey(entry.getKey())) {
+this.offer(entry.getValue().getItem(), anotherFull ? (thisFull 
? entry.getValue().count - m2 - m1 : entry.getValue().count - m2) : (thisFull ? 
(entry.getValue().count - m1) : entry.getValue().count));
+} else {
+this.offer(entry.getValue().getItem(), entry.getValue().count);
 }
 }
 



[1/5] kylin git commit: refine mapper and reducer log [Forced Update!]

2016-12-03 Thread lidong
Repository: kylin
Updated Branches:
  refs/heads/yang21-hbase1.x 215329ea0 -> 306fa777c (forced update)


refine mapper and reducer log


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/d3ecb0d9
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/d3ecb0d9
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/d3ecb0d9

Branch: refs/heads/yang21-hbase1.x
Commit: d3ecb0d9c381dbb035c7cada7d3c798e24fef1d1
Parents: 8001887
Author: Hongbin Ma 
Authored: Thu Dec 1 18:01:55 2016 +0800
Committer: Hongbin Ma 
Committed: Thu Dec 1 18:01:55 2016 +0800

--
 .../org/apache/kylin/engine/mr/KylinMapper.java | 17 +++---
 .../apache/kylin/engine/mr/KylinReducer.java| 17 +++---
 .../engine/mr/steps/BaseCuboidMapperBase.java   |  1 -
 .../kylin/engine/mr/steps/CuboidReducer.java| 21 ++--
 .../engine/mr/steps/HiveToBaseCuboidMapper.java | 10 ++
 .../engine/mr/steps/InMemCuboidMapper.java  | 34 
 .../engine/mr/steps/InMemCuboidReducer.java | 20 ++--
 .../kylin/engine/mr/steps/NDCuboidMapper.java   | 22 ++---
 8 files changed, 70 insertions(+), 72 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/kylin/blob/d3ecb0d9/engine-mr/src/main/java/org/apache/kylin/engine/mr/KylinMapper.java
--
diff --git 
a/engine-mr/src/main/java/org/apache/kylin/engine/mr/KylinMapper.java 
b/engine-mr/src/main/java/org/apache/kylin/engine/mr/KylinMapper.java
index a01f7a2..c5af2fe 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/KylinMapper.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/KylinMapper.java
@@ -18,18 +18,21 @@
 
 package org.apache.kylin.engine.mr;
 
-import java.io.IOException;
-
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.kylin.engine.mr.common.BatchConstants;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.io.IOException;
+
 /**
  */
 public class KylinMapper extends 
Mapper {
 private static final Logger logger = 
LoggerFactory.getLogger(KylinMapper.class);
 
+protected int mapCounter = 0;
+
 protected void bindCurrentConfiguration(Configuration conf) {
 logger.info("The conf for current mapper will be " + 
System.identityHashCode(conf));
 HadoopUtil.setCurrentConfiguration(conf);
@@ -38,6 +41,10 @@ public class KylinMapper 
extends Mapper.Context context) throws IOException, InterruptedException {
 try {
+if (mapCounter++ % BatchConstants.NORMAL_RECORD_LOG_THRESHOLD == 
0) {
+logger.info("Accepting Mapper Key with ordinal: " + 
mapCounter);
+}
+
 doMap(key, value, context);
 } catch (IOException ex) { // KYLIN-2170
 logger.error("", ex);
@@ -53,11 +60,11 @@ public class KylinMapper 
extends Mapper.Context context) throws IOException, InterruptedException {
 super.map(key, value, context);
 }
-
+
 @Override
 final protected void cleanup(Mapper.Context context) throws IOException, InterruptedException {
 try {
@@ -76,7 +83,7 @@ public class KylinMapper 
extends Mapper.Context 
context) throws IOException, InterruptedException {
 }
 }

http://git-wip-us.apache.org/repos/asf/kylin/blob/d3ecb0d9/engine-mr/src/main/java/org/apache/kylin/engine/mr/KylinReducer.java
--
diff --git 
a/engine-mr/src/main/java/org/apache/kylin/engine/mr/KylinReducer.java 
b/engine-mr/src/main/java/org/apache/kylin/engine/mr/KylinReducer.java
index 2b63ce0..83266ea 100644
--- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/KylinReducer.java
+++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/KylinReducer.java
@@ -18,18 +18,22 @@
 
 package org.apache.kylin.engine.mr;
 
-import java.io.IOException;
-
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.kylin.engine.mr.common.BatchConstants;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.io.IOException;
+
 /**
  

[5/5] kylin git commit: KYLIN-1528 Create a branch for v1.5 with HBase 1.x API

2016-12-03 Thread lidong
KYLIN-1528 Create a branch for v1.5 with HBase 1.x API


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/306fa777
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/306fa777
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/306fa777

Branch: refs/heads/yang21-hbase1.x
Commit: 306fa777c00723de132edf1507ca7721dd5601ca
Parents: 698b542
Author: shaofengshi 
Authored: Wed Mar 23 17:07:05 2016 +0800
Committer: lidongsjtu 
Committed: Sun Dec 4 15:41:00 2016 +0800

--
 examples/test_case_data/sandbox/hbase-site.xml  | 19 +---
 .../kylin/provision/BuildCubeWithEngine.java| 13 ++-
 pom.xml | 12 +--
 .../kylin/rest/security/AclHBaseStorage.java|  4 +-
 .../rest/security/MockAclHBaseStorage.java  |  8 +-
 .../apache/kylin/rest/security/MockHTable.java  | 95 
 .../rest/security/RealAclHBaseStorage.java  |  9 +-
 .../apache/kylin/rest/service/AclService.java   | 25 +++---
 .../apache/kylin/rest/service/CubeService.java  | 36 +++-
 .../apache/kylin/rest/service/QueryService.java | 24 +++--
 .../apache/kylin/rest/service/UserService.java  | 17 ++--
 .../kylin/storage/hbase/HBaseConnection.java| 44 -
 .../kylin/storage/hbase/HBaseResourceStore.java | 31 +++
 .../kylin/storage/hbase/HBaseStorage.java   |  3 +-
 .../storage/hbase/cube/SimpleHBaseStore.java| 20 ++---
 .../hbase/cube/v1/CubeSegmentTupleIterator.java | 11 +--
 .../storage/hbase/cube/v1/CubeStorageQuery.java |  6 +-
 .../hbase/cube/v1/RegionScannerAdapter.java | 10 ++-
 .../cube/v1/SerializedHBaseTupleIterator.java   |  4 +-
 .../observer/AggregateRegionObserver.java   |  4 +-
 .../observer/AggregationScanner.java| 14 ++-
 .../observer/ObserverAggregationCache.java  | 10 ++-
 .../coprocessor/observer/ObserverEnabler.java   |  4 +-
 .../hbase/cube/v2/CubeHBaseEndpointRPC.java | 13 +--
 .../storage/hbase/cube/v2/CubeHBaseScanRPC.java |  9 +-
 .../coprocessor/endpoint/CubeVisitService.java  |  4 +-
 .../storage/hbase/steps/CubeHTableUtil.java | 16 ++--
 .../storage/hbase/steps/DeprecatedGCStep.java   | 23 ++---
 .../storage/hbase/steps/HBaseCuboidWriter.java  |  7 +-
 .../hbase/steps/HBaseStreamingOutput.java   |  9 +-
 .../kylin/storage/hbase/steps/MergeGCStep.java  | 23 ++---
 .../storage/hbase/util/CleanHtableCLI.java  | 12 +--
 .../storage/hbase/util/CubeMigrationCLI.java| 36 
 .../hbase/util/CubeMigrationCheckCLI.java   | 17 ++--
 .../hbase/util/DeployCoprocessorCLI.java| 22 ++---
 .../hbase/util/ExtendCubeToHybridCLI.java   |  8 +-
 .../hbase/util/GridTableHBaseBenchmark.java | 34 +++
 .../kylin/storage/hbase/util/HBaseClean.java| 18 ++--
 .../hbase/util/HBaseRegionSizeCalculator.java   | 35 
 .../kylin/storage/hbase/util/HBaseUsage.java|  9 +-
 .../storage/hbase/util/HbaseStreamingInput.java | 30 +++
 .../hbase/util/HtableAlterMetadataCLI.java  |  9 +-
 .../storage/hbase/util/OrphanHBaseCleanJob.java | 19 ++--
 .../kylin/storage/hbase/util/PingHBaseCLI.java  | 15 ++--
 .../kylin/storage/hbase/util/RowCounterCLI.java | 11 +--
 .../storage/hbase/util/StorageCleanupJob.java   | 20 +++--
 .../storage/hbase/util/UpdateHTableHostCLI.java | 17 ++--
 .../observer/AggregateRegionObserverTest.java   | 26 ++
 .../v1/filter/TestFuzzyRowFilterV2EndToEnd.java |  5 +-
 .../org/apache/kylin/tool/CubeMigrationCLI.java | 36 
 .../kylin/tool/CubeMigrationCheckCLI.java   | 16 ++--
 .../kylin/tool/ExtendCubeToHybridCLI.java   |  8 +-
 .../apache/kylin/tool/StorageCleanupJob.java| 20 +++--
 53 files changed, 450 insertions(+), 500 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/kylin/blob/306fa777/examples/test_case_data/sandbox/hbase-site.xml
--
diff --git a/examples/test_case_data/sandbox/hbase-site.xml 
b/examples/test_case_data/sandbox/hbase-site.xml
index 46d5345..734908e 100644
--- a/examples/test_case_data/sandbox/hbase-site.xml
+++ b/examples/test_case_data/sandbox/hbase-site.xml
@@ -190,22 +190,5 @@
 zookeeper.znode.parent
 /hbase-unsecure
 
-
-hbase.client.pause
-100
-General client pause value.  Used mostly as value to wait
-before running a retry of a failed get, region lookup, etc.
-See hbase.client.retries.number for description of how we backoff 
from
-this initial pause amount and how this pause works w/ 
retries.
-
-
-hbase.client.retries.number
-5
-Maximum retries.  Used as maximum for all retryable
-operations such as the getting of a cell's value, starting a row 
update,
-   

[3/5] kylin git commit: minor: TopN merge performance improvement further

2016-12-03 Thread lidong
minor: TopN merge performance improvement further


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/698b542a
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/698b542a
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/698b542a

Branch: refs/heads/yang21-hbase1.x
Commit: 698b542a1201b55f8176f1822d47b1783853304c
Parents: e7d3193
Author: shaofengshi 
Authored: Sun Dec 4 10:39:37 2016 +0800
Committer: shaofengshi 
Committed: Sun Dec 4 10:39:37 2016 +0800

--
 .../src/main/java/org/apache/kylin/measure/topn/TopNCounter.java | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/kylin/blob/698b542a/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNCounter.java
--
diff --git 
a/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNCounter.java 
b/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNCounter.java
index 0d0726c..eabc941 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNCounter.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNCounter.java
@@ -173,8 +173,8 @@ public class TopNCounter implements Iterable 
{
 }
 
 for (Map.Entry entry : another.counterMap.entrySet()) {
-if (counterMap.containsKey(entry.getKey())) {
-this.offer(entry.getValue().getItem(), anotherFull ? (thisFull 
? entry.getValue().count - m2 - m1 : entry.getValue().count - m2) : (thisFull ? 
(entry.getValue().count - m1) : entry.getValue().count));
+if (this.counterMap.containsKey(entry.getKey())) {
+this.offer(entry.getValue().getItem(), (entry.getValue().count 
- m2 - m1));
 } else {
 this.offer(entry.getValue().getItem(), entry.getValue().count);
 }



[4/5] kylin git commit: KYLIN-1528 Create a branch for v1.5 with HBase 1.x API

2016-12-03 Thread lidong
http://git-wip-us.apache.org/repos/asf/kylin/blob/306fa777/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/HBaseStreamingOutput.java
--
diff --git 
a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/HBaseStreamingOutput.java
 
b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/HBaseStreamingOutput.java
index 0e95102..c59fb33 100644
--- 
a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/HBaseStreamingOutput.java
+++ 
b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/HBaseStreamingOutput.java
@@ -27,7 +27,8 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.client.HTableInterface;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Table;
 import org.apache.kylin.common.KylinConfig;
 import org.apache.kylin.common.persistence.ResourceStore;
 import org.apache.kylin.cube.CubeSegment;
@@ -56,7 +57,7 @@ public class HBaseStreamingOutput implements IStreamingOutput 
{
 try {
 CubeSegment cubeSegment = (CubeSegment) buildable;
 
-final HTableInterface hTable;
+final Table hTable;
 hTable = createHTable(cubeSegment);
 List cuboidWriters = Lists.newArrayList();
 cuboidWriters.add(new HBaseCuboidWriter(cubeSegment, hTable));
@@ -88,10 +89,10 @@ public class HBaseStreamingOutput implements 
IStreamingOutput {
 }
 }
 
-private HTableInterface createHTable(final CubeSegment cubeSegment) throws 
IOException {
+private Table createHTable(final CubeSegment cubeSegment) throws 
IOException {
 final String hTableName = cubeSegment.getStorageLocationIdentifier();
 CubeHTableUtil.createHTable(cubeSegment, null);
-final HTableInterface hTable = 
HBaseConnection.get(KylinConfig.getInstanceFromEnv().getStorageUrl()).getTable(hTableName);
+final Table hTable = 
HBaseConnection.get(KylinConfig.getInstanceFromEnv().getStorageUrl()).getTable(TableName.valueOf(hTableName));
 logger.info("hTable:" + hTableName + " for segment:" + 
cubeSegment.getName() + " created!");
 return hTable;
 }

http://git-wip-us.apache.org/repos/asf/kylin/blob/306fa777/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/MergeGCStep.java
--
diff --git 
a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/MergeGCStep.java
 
b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/MergeGCStep.java
index 5b2441c..2f7e164 100644
--- 
a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/MergeGCStep.java
+++ 
b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/MergeGCStep.java
@@ -24,11 +24,11 @@ import java.util.Collections;
 import java.util.List;
 
 import org.apache.commons.lang.StringUtils;
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.HTableDescriptor;
-import org.apache.hadoop.hbase.client.HBaseAdmin;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.Connection;
 import org.apache.kylin.common.KylinConfig;
-import org.apache.kylin.common.util.Bytes;
 import org.apache.kylin.job.exception.ExecuteException;
 import org.apache.kylin.job.execution.AbstractExecutable;
 import org.apache.kylin.job.execution.ExecutableContext;
@@ -69,19 +69,20 @@ public class MergeGCStep extends AbstractExecutable {
 List oldTables = getOldHTables();
 if (oldTables != null && oldTables.size() > 0) {
 String metadataUrlPrefix = 
KylinConfig.getInstanceFromEnv().getMetadataUrlPrefix();
-Configuration conf = 
HBaseConnection.getCurrentHBaseConfiguration();
-HBaseAdmin admin = null;
+Admin admin = null;
 try {
-admin = new HBaseAdmin(conf);
+Connection conn = 
HBaseConnection.get(KylinConfig.getInstanceFromEnv().getStorageUrl());
+admin = conn.getAdmin();
+
 for (String table : oldTables) {
-if (admin.tableExists(table)) {
-HTableDescriptor tableDescriptor = 
admin.getTableDescriptor(Bytes.toBytes(table));
+if (admin.tableExists(TableName.valueOf(table))) {
+HTableDescriptor tableDescriptor = 
admin.getTableDescriptor(TableName.valueOf((table)));
 String host = 
tableDescriptor.getValue(IRealizationConstants.HTableTag);
 if (metadataUrlPrefix.equalsIgnoreCase(host)) {
-if (admin.isTableEnabled(table)) {
-admin.disableTable(table);
+

kylin git commit: minor: TopN merge performance improvement further

2016-12-03 Thread shaofengshi
Repository: kylin
Updated Branches:
  refs/heads/yang21 e7d31938e -> 698b542a1


minor: TopN merge performance improvement further


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/698b542a
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/698b542a
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/698b542a

Branch: refs/heads/yang21
Commit: 698b542a1201b55f8176f1822d47b1783853304c
Parents: e7d3193
Author: shaofengshi 
Authored: Sun Dec 4 10:39:37 2016 +0800
Committer: shaofengshi 
Committed: Sun Dec 4 10:39:37 2016 +0800

--
 .../src/main/java/org/apache/kylin/measure/topn/TopNCounter.java | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/kylin/blob/698b542a/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNCounter.java
--
diff --git 
a/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNCounter.java 
b/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNCounter.java
index 0d0726c..eabc941 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNCounter.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNCounter.java
@@ -173,8 +173,8 @@ public class TopNCounter implements Iterable 
{
 }
 
 for (Map.Entry entry : another.counterMap.entrySet()) {
-if (counterMap.containsKey(entry.getKey())) {
-this.offer(entry.getValue().getItem(), anotherFull ? (thisFull 
? entry.getValue().count - m2 - m1 : entry.getValue().count - m2) : (thisFull ? 
(entry.getValue().count - m1) : entry.getValue().count));
+if (this.counterMap.containsKey(entry.getKey())) {
+this.offer(entry.getValue().getItem(), (entry.getValue().count 
- m2 - m1));
 } else {
 this.offer(entry.getValue().getItem(), entry.getValue().count);
 }



kylin git commit: TopN merge performance

2016-12-03 Thread shaofengshi
Repository: kylin
Updated Branches:
  refs/heads/yang21 d3ecb0d9c -> e7d31938e


TopN merge performance


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/e7d31938
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/e7d31938
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/e7d31938

Branch: refs/heads/yang21
Commit: e7d31938ef9204a671c9894f02da4a9d3ab81b42
Parents: d3ecb0d
Author: shaofengshi 
Authored: Sun Dec 4 09:39:45 2016 +0800
Committer: shaofengshi 
Committed: Sun Dec 4 09:39:45 2016 +0800

--
 .../apache/kylin/measure/topn/TopNCounter.java  | 49 +++-
 1 file changed, 17 insertions(+), 32 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/kylin/blob/e7d31938/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNCounter.java
--
diff --git 
a/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNCounter.java 
b/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNCounter.java
index cf9978a..0d0726c 100644
--- a/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNCounter.java
+++ b/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNCounter.java
@@ -26,11 +26,9 @@ import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
 
-import com.google.common.collect.Maps;
 import com.google.common.collect.Lists;
-import com.google.common.collect.Sets;
+import com.google.common.collect.Maps;
 
 /**
  * Modified from the StreamSummary.java in 
https://github.com/addthis/stream-lib
@@ -157,41 +155,28 @@ public class TopNCounter implements 
Iterable {
  * @return
  */
 public TopNCounter merge(TopNCounter another) {
-double m1 = 0.0, m2 = 0.0;
-if (this.size() >= this.capacity) {
-m1 = this.counterList.getLast().count;
-}
-
-if (another.size() >= another.capacity) {
-m2 = another.counterList.getLast().count;
-}
-
-Set duplicateItems = Sets.newHashSet();
-List notDuplicateItems = Lists.newArrayList();
-
-for (Map.Entry entry : this.counterMap.entrySet()) {
-T item = entry.getKey();
-Counter existing = another.counterMap.get(item);
-if (existing != null) {
-duplicateItems.add(item);
-} else {
-notDuplicateItems.add(item);
+boolean thisFull = this.size() >= this.capacity;
+boolean anotherFull = another.size() >= another.capacity;
+double m1 = thisFull ? this.counterList.getLast().count : 0.0;
+double m2 = anotherFull ? another.counterList.getLast().count : 0.0;
+
+if (thisFull == true) {
+for (Counter entry : another.counterMap.values()) {
+entry.count += m1;
 }
 }
 
-for (T item : duplicateItems) {
-this.offer(item, another.counterMap.get(item).count);
-}
-
-for (T item : notDuplicateItems) {
-this.offer(item, m2);
+if (anotherFull == true) {
+for (Counter entry : this.counterMap.values()) {
+entry.count += m2;
+}
 }
 
 for (Map.Entry entry : another.counterMap.entrySet()) {
-T item = entry.getKey();
-if (duplicateItems.contains(item) == false) {
-double counter = entry.getValue().count;
-this.offer(item, counter + m1);
+if (counterMap.containsKey(entry.getKey())) {
+this.offer(entry.getValue().getItem(), anotherFull ? (thisFull 
? entry.getValue().count - m2 - m1 : entry.getValue().count - m2) : (thisFull ? 
(entry.getValue().count - m1) : entry.getValue().count));
+} else {
+this.offer(entry.getValue().getItem(), entry.getValue().count);
 }
 }
 



svn commit: r1772467 - in /kylin/site: ./ blog/ blog/2016/12/ blog/2016/12/04/ blog/2016/12/04/release-v1.6.0/ cn/blog/2016/12/ cn/blog/2016/12/04/ cn/blog/2016/12/04/release-v1.6.0/

2016-12-03 Thread lidong
Author: lidong
Date: Sat Dec  3 13:16:28 2016
New Revision: 1772467

URL: http://svn.apache.org/viewvc?rev=1772467=rev
Log:
add 160 release ntoes

Added:
kylin/site/blog/2016/12/
kylin/site/blog/2016/12/04/
kylin/site/blog/2016/12/04/release-v1.6.0/
kylin/site/blog/2016/12/04/release-v1.6.0/index.html
kylin/site/cn/blog/2016/12/
kylin/site/cn/blog/2016/12/04/
kylin/site/cn/blog/2016/12/04/release-v1.6.0/
kylin/site/cn/blog/2016/12/04/release-v1.6.0/index.html
Modified:
kylin/site/blog/index.html
kylin/site/feed.xml

Added: kylin/site/blog/2016/12/04/release-v1.6.0/index.html
URL: 
http://svn.apache.org/viewvc/kylin/site/blog/2016/12/04/release-v1.6.0/index.html?rev=1772467=auto
==
--- kylin/site/blog/2016/12/04/release-v1.6.0/index.html (added)
+++ kylin/site/blog/2016/12/04/release-v1.6.0/index.html Sat Dec  3 13:16:28 
2016
@@ -0,0 +1,287 @@
+
+
+
+   
+
+
+  
+  
+  
+
+  Apache Kylin | Apache Kylin v1.6.0 Release Announcement
+  
+  
+  
+
+
+
+
+
+
+
+
+
+
+
+
+
+  
+  
+  
+  
+
+  http://kylin.apache.org/blog/2016/12/04/release-v1.6.0/;>
+  http://kylin.apache.org/feed.xml; />
+
+
+
+  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new 
Date();a=s.createElement(o),
+  
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+  })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+
+  //oringal tracker for kylin.io
+  ga('create', 'UA-55534813-1', 'auto');
+  //new tracker for kylin.apache.org
+  ga('create', 'UA-55534813-2', 'auto', {'name':'toplevel'});
+
+  ga('send', 'pageview');
+  ga('toplevel.send', 'pageview');
+
+
+
+
+ 
+ 
+
+
+   
+   
+
+
+  
+  
+  Apache Kylin™ Extreme OLAP Engine for Big 
Data 
+
+  
+  
+
+  
+  
+  
+
+
+
+  
+Toggle navigation
+
+
+
+  
+ 
+
+
+
+
+  
+ Home
+  Docs
+  Download
+  Community
+  Development
+  Blog
+  中文版  
+  https://twitter.com/apachekylin; target="_blank" 
class="fa fa-twitter fa-lg" title="Twitter: @ApacheKylin" >
+  https://github.com/apache/kylin; target="_blank" 
class="fa fa-github-alt fa-lg" title="Github: apache/kylin" >  
+  https://www.facebook.com/kylinio; target="_blank" 
class="fa fa-facebook fa-lg" title="Facebook: kylin.io" >   
+
+
+  
+
+ 
+
+   
+   
+   
+   Apache Kylin™ 
Technical Blog
+   
+   
+
+   
+   
+
+   
+
+
+
+  
+Apache Kylin v1.6.0 Release Announcement
+Dec 4, 2016 • Shaofeng Shi
+  
+
+  
+The Apache Kylin community is pleased to announce the release of Apache 
Kylin v1.6.0.
+
+Apache Kylin is an open source Distributed Analytics Engine designed to 
provide SQL interface and multi-dimensional analysis (OLAP) on Hadoop 
supporting extremely large datasets.
+
+This is a major release after 1.5.4, with the reliable and scalable support 
for using Apache Kafka as data source; this enables user to build cubes 
directly from streaming data (without loading to Apache Hive), reducing the 
data latency from days/hours to minutes.
+
+Apache Kylin 1.6.0 resolved 102 issues including bug fixes, improvements, 
and new features. All of the changes can be found in the https://kylin.apache.org/docs16/release_notes.html;>release notes.
+
+Change Highlights
+
+
+  Scalable streaming cubing https://issues.apache.org/jira/browse/KYLIN-1726;>KYLIN-1726
+  TopN counter merge performance improvement https://issues.apache.org/jira/browse/KYLIN-1917;>KYLIN-1917
+  Support Embedded Structure JSON Message https://issues.apache.org/jira/browse/KYLIN-1919;>KYLIN-1919
+  More robust approach to hive schema changes https://issues.apache.org/jira/browse/KYLIN-2012;>KYLIN-2012
+  TimedJsonStreamParser should support other time format https://issues.apache.org/jira/browse/KYLIN-2054;>KYLIN-2054
+  Add an encoder for Boolean type https://issues.apache.org/jira/browse/KYLIN-2055;>KYLIN-2055
+  Allowe concurrent build/refresh/merge https://issues.apache.org/jira/browse/KYLIN-2070;>KYLIN-2070
+  Support to change streaming configuration https://issues.apache.org/jira/browse/KYLIN-2082;>KYLIN-2082
+
+
+To download Apache Kylin v1.6.0 source code or binary package, visit the http://kylin.apache.org/download;>download page.
+
+Upgrade
+
+Follow the upgrade guide.
+
+Support
+
+Any issue or question,
+open JIRA to Apache Kylin project: https://issues.apache.org/jira/browse/KYLIN/;>https://issues.apache.org/jira/browse/KYLIN/
+or
+send mail to Apache Kylin dev mailing list: 
+
+Great thanks to 

kylin git commit: add 160 release ntoes

2016-12-03 Thread shaofengshi
Repository: kylin
Updated Branches:
  refs/heads/document 407ee5f37 -> 3c6a9a63d


add 160 release ntoes


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/3c6a9a63
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/3c6a9a63
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/3c6a9a63

Branch: refs/heads/document
Commit: 3c6a9a63d7ede52d899c634fd9dba98667be7100
Parents: 407ee5f
Author: shaofengshi 
Authored: Sat Dec 3 21:11:55 2016 +0800
Committer: shaofengshi 
Committed: Sat Dec 3 21:11:55 2016 +0800

--
 .../_posts/blog/2016-12-04-release-v1.6.0.cn.md | 42 +++
 .../_posts/blog/2016-12-04-release-v1.6.0.md| 43 
 2 files changed, 85 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/kylin/blob/3c6a9a63/website/_posts/blog/2016-12-04-release-v1.6.0.cn.md
--
diff --git a/website/_posts/blog/2016-12-04-release-v1.6.0.cn.md 
b/website/_posts/blog/2016-12-04-release-v1.6.0.cn.md
new file mode 100644
index 000..b5cbe01
--- /dev/null
+++ b/website/_posts/blog/2016-12-04-release-v1.6.0.cn.md
@@ -0,0 +1,42 @@
+---
+layout: post-blog
+title:  Apache Kylin v1.6.0 正式发布
+date:   2016-12-04 21:00:00
+author: Shaofeng Shi
+categories: blog
+---
+
+Apache Kylin社区非常高兴宣布Apache Kylin v1.6.0正式发布。
+
+Apache 
Kylin是一个开源的分布式分析引擎,提供Hadoop之上的SQL查询接口及多维分析(OLAP)能力,支持对è¶
…大规模数据进行秒级查询。
+
+Apache Kylin v1.6.0带来了更可靠更易于管理的从Apache 
Kafka流中直接构建Cube的能力,使得用户可以在更多场景中更自然地进行数据分析,使得数据从产生到被检索到的延迟,从以前的一天或数小时,降低到数分钟。
 Apache Kylin 1.6.0修复了102个issue,包
括缺陷,改进和新功能,详见[release notes]( 
+https://kylin.apache.org/docs16/release_notes.html).
+
+
+## 主要变化
+
+- 可伸缩的流式Cube构建 
[KYLIN-1726](https://issues.apache.org/jira/browse/KYLIN-1726) 
+- TopN性能增强 
[KYLIN-1917](https://issues.apache.org/jira/browse/KYLIN-1917) 
+- 支持Kafka的嵌入格式的JSON消息 
[KYLIN-1919](https://issues.apache.org/jira/browse/KYLIN-1919) 
+- 可靠同步hive表模式更改 
[KYLIN-2012](https://issues.apache.org/jira/browse/KYLIN-2012) 
+- 支持更多Kafka消息的时间戳格式 
[KYLIN-2054](https://issues.apache.org/jira/browse/KYLIN-2054) 
+- 增加Boolean编码 
[KYLIN-2055](https://issues.apache.org/jira/browse/KYLIN-2055) 
+- 支持多segment并行构建/合并/刷新 
[KYLIN-2070](https://issues.apache.org/jira/browse/KYLIN-2070) 
+- 支持更新流式表模式和配置的修改 
[KYLIN-2082](https://issues.apache.org/jira/browse/KYLIN-2082) 
+
+
+下载Apache Kylin v1.6.0源代码及二进制安装包
,请访问[下载](http://kylin.apache.org/cn/download/)页面.
+
+__升级__
+
+参见[升级指南](/docs16/howto/howto_upgrade.html).
+
+__支持__
+
+升级和使用过程中有任何问题,请:
+提交至Kylin的JIRA: 
[https://issues.apache.org/jira/browse/KYLIN/](https://issues.apache.org/jira/browse/KYLIN/)
+或者
+发送邮件到Apache Kylin邮件列表: 
[d...@kylin.apache.org](mailto:d...@kylin.apache.org)
+
+_感谢每一位朋友的参与和贡献!_
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/kylin/blob/3c6a9a63/website/_posts/blog/2016-12-04-release-v1.6.0.md
--
diff --git a/website/_posts/blog/2016-12-04-release-v1.6.0.md 
b/website/_posts/blog/2016-12-04-release-v1.6.0.md
new file mode 100644
index 000..1190019
--- /dev/null
+++ b/website/_posts/blog/2016-12-04-release-v1.6.0.md
@@ -0,0 +1,43 @@
+---
+layout: post-blog
+title:  Apache Kylin v1.6.0 Release Announcement
+date:   2016-12-04 20:00:00
+author: Shaofeng Shi
+categories: blog
+---
+
+The Apache Kylin community is pleased to announce the release of Apache Kylin 
v1.6.0.
+
+Apache Kylin is an open source Distributed Analytics Engine designed to 
provide SQL interface and multi-dimensional analysis (OLAP) on Hadoop 
supporting extremely large datasets.
+
+This is a major release after 1.5.4, with the reliable and scalable support 
for using Apache Kafka as data source; this enables user to build cubes 
directly from streaming data (without loading to Apache Hive), reducing the 
data latency from days/hours to minutes. 
+
+Apache Kylin 1.6.0 resolved 102 issues including bug fixes, improvements, and 
new features. All of the changes can be found in the [release notes]( 
+https://kylin.apache.org/docs16/release_notes.html).
+
+
+## Change Highlights
+
+- Scalable