Repository: kylin Updated Branches: refs/heads/master 2e03c9c38 -> 82ae39aa2
KYLIN-1541 IntegerDimEnc, custom dimension encoding for integers Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/82ae39aa Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/82ae39aa Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/82ae39aa Branch: refs/heads/master Commit: 82ae39aa21356390485ba292c790b800d536a329 Parents: 2e03c9c Author: Li Yang <[email protected]> Authored: Mon Mar 28 18:00:46 2016 +0800 Committer: Li Yang <[email protected]> Committed: Mon Mar 28 18:01:06 2016 +0800 ---------------------------------------------------------------------- .../dimension/DimensionEncodingFactory.java | 1 + .../apache/kylin/dimension/IntegerDimEnc.java | 171 +++++++++++++++++++ 2 files changed, 172 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/82ae39aa/core-metadata/src/main/java/org/apache/kylin/dimension/DimensionEncodingFactory.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/dimension/DimensionEncodingFactory.java b/core-metadata/src/main/java/org/apache/kylin/dimension/DimensionEncodingFactory.java index af5551d..812a5b4 100644 --- a/core-metadata/src/main/java/org/apache/kylin/dimension/DimensionEncodingFactory.java +++ b/core-metadata/src/main/java/org/apache/kylin/dimension/DimensionEncodingFactory.java @@ -74,6 +74,7 @@ public abstract class DimensionEncodingFactory { // built-in encodings, note dictionary is a special case map.put(FixedLenDimEnc.ENCODING_NAME, new FixedLenDimEnc.Factory()); + map.put(IntegerDimEnc.ENCODING_NAME, new IntegerDimEnc.Factory()); // custom encodings String[] clsNames = KylinConfig.getInstanceFromEnv().getCubeDimensionCustomEncodingFactories(); http://git-wip-us.apache.org/repos/asf/kylin/blob/82ae39aa/core-metadata/src/main/java/org/apache/kylin/dimension/IntegerDimEnc.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/dimension/IntegerDimEnc.java b/core-metadata/src/main/java/org/apache/kylin/dimension/IntegerDimEnc.java new file mode 100644 index 0000000..15c46a6 --- /dev/null +++ b/core-metadata/src/main/java/org/apache/kylin/dimension/IntegerDimEnc.java @@ -0,0 +1,171 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kylin.dimension; + +import java.io.IOException; +import java.io.ObjectInput; +import java.io.ObjectOutput; +import java.nio.ByteBuffer; +import java.util.Arrays; + +import org.apache.kylin.common.util.Bytes; +import org.apache.kylin.common.util.BytesUtil; +import org.apache.kylin.metadata.datatype.DataTypeSerializer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class IntegerDimEnc extends DimensionEncoding { + + private static Logger logger = LoggerFactory.getLogger(IntegerDimEnc.class); + + private static final long CAP[] = { 0, 0xffL, 0xffffL, 0xffffffL, 0xffffffffL, 0xffffffffffL, 0xffffffffffffL, 0xffffffffffffffL, Long.MAX_VALUE }; + + public static final String ENCODING_NAME = "int"; + + public static class Factory extends DimensionEncodingFactory { + @Override + public String getSupportedEncodingName() { + return ENCODING_NAME; + } + + @Override + public DimensionEncoding createDimensionEncoding(String encodingName, String[] args) { + return new IntegerDimEnc(Integer.parseInt(args[0])); + } + }; + + // ============================================================================ + + private int fixedLen; + + transient private int avoidVerbose = 0; + + public IntegerDimEnc(int len) { + if (len <= 0 || len >= CAP.length) + throw new IllegalArgumentException(); + + this.fixedLen = len; + } + + @Override + public int getLengthOfEncoding() { + return fixedLen; + } + + @Override + public void encode(byte[] value, int valueLen, byte[] output, int outputOffset) { + if (value == null) { + Arrays.fill(output, outputOffset, outputOffset + fixedLen, NULL); + return; + } + + encode(Bytes.toString(value, 0, valueLen), output, outputOffset); + } + + void encode(String valueStr, byte[] output, int outputOffset) { + if (valueStr == null) { + Arrays.fill(output, outputOffset, outputOffset + fixedLen, NULL); + return; + } + + long integer = Long.parseLong(valueStr); + if (integer > CAP[fixedLen]) { + if (avoidVerbose++ % 10000 == 0) { + logger.warn("Expect at most " + fixedLen + " bytes, but got " + valueStr + ", will truncate, hit times:" + avoidVerbose); + } + } + + BytesUtil.writeLong(integer, output, outputOffset, fixedLen); + } + + @Override + public String decode(byte[] bytes, int offset, int len) { + if (isNull(bytes, offset, len)) { + return null; + } + + long integer = BytesUtil.readLong(bytes, offset, len); + return String.valueOf(integer); + } + + @Override + public DataTypeSerializer<Object> asDataTypeSerializer() { + return new IntegerSerializer(); + } + + public class IntegerSerializer extends DataTypeSerializer<Object> { + // be thread-safe and avoid repeated obj creation + private ThreadLocal<byte[]> current = new ThreadLocal<byte[]>(); + + private byte[] currentBuf() { + byte[] buf = current.get(); + if (buf == null) { + buf = new byte[fixedLen]; + current.set(buf); + } + return buf; + } + + @Override + public void serialize(Object value, ByteBuffer out) { + byte[] buf = currentBuf(); + String valueStr = value == null ? null : value.toString(); + encode(valueStr, buf, 0); + out.put(buf); + } + + @Override + public Object deserialize(ByteBuffer in) { + byte[] buf = currentBuf(); + in.get(buf); + return decode(buf, 0, buf.length); + } + + @Override + public int peekLength(ByteBuffer in) { + return fixedLen; + } + + @Override + public int maxLength() { + return fixedLen; + } + + @Override + public int getStorageBytesEstimate() { + return fixedLen; + } + + @Override + public Object valueOf(String str) { + return str; + } + } + + @Override + public void writeExternal(ObjectOutput out) throws IOException { + out.writeShort(fixedLen); + } + + @Override + public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { + fixedLen = in.readShort(); + } + +}
