KYLIN-2215 Refactor DimensionEncoding.encode(byte[]) to encode(String)
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/a6e3ccd7 Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/a6e3ccd7 Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/a6e3ccd7 Branch: refs/heads/master Commit: a6e3ccd7b22b19e03b5e3af568148816fe4a563a Parents: d4a18c1 Author: Yang Li <liy...@apache.org> Authored: Sat Nov 19 16:07:10 2016 +0800 Committer: Yang Li <liy...@apache.org> Committed: Sat Nov 19 16:07:10 2016 +0800 ---------------------------------------------------------------------- .../kylin/cube/gridtable/TrimmedDimEnc.java | 2 +- .../kylin/cube/kv/AbstractRowKeyEncoder.java | 2 +- .../apache/kylin/cube/kv/FuzzyMaskEncoder.java | 4 +- .../org/apache/kylin/cube/kv/RowConstants.java | 3 - .../apache/kylin/cube/kv/RowKeyColumnIO.java | 4 +- .../org/apache/kylin/cube/kv/RowKeyEncoder.java | 33 +++------ .../apache/kylin/cube/kv/RowKeyDecoderTest.java | 19 +++-- .../apache/kylin/cube/kv/RowKeyEncoderTest.java | 42 +++++------ .../gridtable/DimEncodingPreserveOrderTest.java | 18 +---- .../kylin/dimension/AbstractDateDimEnc.java | 17 +---- .../apache/kylin/dimension/BooleanDimEnc.java | 19 ++--- .../kylin/dimension/DictionaryDimEnc.java | 7 +- .../kylin/dimension/DimensionEncoding.java | 4 +- .../apache/kylin/dimension/FixedLenDimEnc.java | 10 ++- .../kylin/dimension/FixedLenHexDimEnc.java | 12 +-- .../org/apache/kylin/dimension/IntDimEnc.java | 12 +-- .../apache/kylin/dimension/IntegerDimEnc.java | 12 +-- .../apache/kylin/dimension/IntegerDimEncV2.java | 12 +-- .../kylin/dimension/OneMoreByteVLongDimEnc.java | 12 +-- .../kylin/measure/topn/TopNMeasureType.java | 7 +- .../kylin/dimension/BooleanDimEncTest.java | 6 +- .../kylin/dimension/FixedLenHexDimEncTest.java | 6 +- .../apache/kylin/dimension/IntDimEncTest.java | 6 +- .../kylin/dimension/IntegerDimEncTest.java | 6 +- .../dimension/OneMoreByteVLongDimEncTest.java | 6 +- .../engine/mr/steps/BaseCuboidMapperBase.java | 77 +++++++------------- .../engine/mr/steps/HiveToBaseCuboidMapper.java | 9 +-- .../coprocessor/CoprocessorProjector.java | 4 +- .../common/coprocessor/FilterDecorator.java | 4 +- 29 files changed, 122 insertions(+), 253 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/a6e3ccd7/core-cube/src/main/java/org/apache/kylin/cube/gridtable/TrimmedDimEnc.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/gridtable/TrimmedDimEnc.java b/core-cube/src/main/java/org/apache/kylin/cube/gridtable/TrimmedDimEnc.java index c4bee8a..6d05668 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/gridtable/TrimmedDimEnc.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/gridtable/TrimmedDimEnc.java @@ -44,7 +44,7 @@ public class TrimmedDimEnc extends DimensionEncoding { } @Override - public void encode(byte[] value, int valueLen, byte[] output, int outputOffset) { + public void encode(String valueStr, byte[] output, int outputOffset) { throw new UnsupportedOperationException(); } http://git-wip-us.apache.org/repos/asf/kylin/blob/a6e3ccd7/core-cube/src/main/java/org/apache/kylin/cube/kv/AbstractRowKeyEncoder.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/kv/AbstractRowKeyEncoder.java b/core-cube/src/main/java/org/apache/kylin/cube/kv/AbstractRowKeyEncoder.java index 37b33aa..bfe6eb4 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/kv/AbstractRowKeyEncoder.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/kv/AbstractRowKeyEncoder.java @@ -84,5 +84,5 @@ public abstract class AbstractRowKeyEncoder { abstract public byte[] encode(Map<TblColRef, String> valueMap); - abstract public byte[] encode(byte[][] values); + abstract public byte[] encode(String[] values); } http://git-wip-us.apache.org/repos/asf/kylin/blob/a6e3ccd7/core-cube/src/main/java/org/apache/kylin/cube/kv/FuzzyMaskEncoder.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/kv/FuzzyMaskEncoder.java b/core-cube/src/main/java/org/apache/kylin/cube/kv/FuzzyMaskEncoder.java index 94db94b..0cbb7d2 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/kv/FuzzyMaskEncoder.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/kv/FuzzyMaskEncoder.java @@ -77,8 +77,8 @@ public class FuzzyMaskEncoder extends RowKeyEncoder { } @Override - protected void fillColumnValue(TblColRef column, int columnLen, byte[] value, int valueLen, byte[] outputValue, int outputValueOffset) { - if (value == null) { + protected void fillColumnValue(TblColRef column, int columnLen, String valueStr, byte[] outputValue, int outputValueOffset) { + if (valueStr == null) { Arrays.fill(outputValue, outputValueOffset, outputValueOffset + columnLen, RowConstants.BYTE_ONE); } else { Arrays.fill(outputValue, outputValueOffset, outputValueOffset + columnLen, RowConstants.BYTE_ZERO); http://git-wip-us.apache.org/repos/asf/kylin/blob/a6e3ccd7/core-cube/src/main/java/org/apache/kylin/cube/kv/RowConstants.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/kv/RowConstants.java b/core-cube/src/main/java/org/apache/kylin/cube/kv/RowConstants.java index 809e0a3..ec0d39d 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/kv/RowConstants.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/kv/RowConstants.java @@ -44,7 +44,4 @@ public class RowConstants { public static final int ROWKEY_BUFFER_SIZE = 65 * 256;// a little more than 64 dimensions * 256 bytes each - // marker class - public static final byte[][] BYTE_ARR_MARKER = new byte[0][]; - } http://git-wip-us.apache.org/repos/asf/kylin/blob/a6e3ccd7/core-cube/src/main/java/org/apache/kylin/cube/kv/RowKeyColumnIO.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/kv/RowKeyColumnIO.java b/core-cube/src/main/java/org/apache/kylin/cube/kv/RowKeyColumnIO.java index a1c7792..b3facd2 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/kv/RowKeyColumnIO.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/kv/RowKeyColumnIO.java @@ -47,12 +47,12 @@ public class RowKeyColumnIO { return dimEncMap.getDictionary(col); } - public void writeColumn(TblColRef col, byte[] value, int valueLen, int roundingFlag, byte defaultValue, byte[] output, int outputOffset) { + public void writeColumn(TblColRef col, String value, int roundingFlag, byte defaultValue, byte[] output, int outputOffset) { DimensionEncoding dimEnc = dimEncMap.get(col); if (dimEnc instanceof DictionaryDimEnc) dimEnc = ((DictionaryDimEnc) dimEnc).copy(roundingFlag, defaultValue); - dimEnc.encode(value, valueLen, output, outputOffset); + dimEnc.encode(value, output, outputOffset); } public String readColumnString(TblColRef col, byte[] bytes, int offset, int length) { http://git-wip-us.apache.org/repos/asf/kylin/blob/a6e3ccd7/core-cube/src/main/java/org/apache/kylin/cube/kv/RowKeyEncoder.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/kv/RowKeyEncoder.java b/core-cube/src/main/java/org/apache/kylin/cube/kv/RowKeyEncoder.java index 672e1bc..bf20de1 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/kv/RowKeyEncoder.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/kv/RowKeyEncoder.java @@ -18,14 +18,12 @@ package org.apache.kylin.cube.kv; -import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Map; import java.util.Set; import org.apache.kylin.common.util.ByteArray; -import org.apache.kylin.common.util.Bytes; import org.apache.kylin.common.util.BytesUtil; import org.apache.kylin.common.util.ImmutableBitSet; import org.apache.kylin.common.util.ShardingHash; @@ -132,36 +130,23 @@ public class RowKeyEncoder extends AbstractRowKeyEncoder { @Override public byte[] encode(Map<TblColRef, String> valueMap) { - List<byte[]> valueList = new ArrayList<byte[]>(); - for (TblColRef bdCol : cuboid.getColumns()) { - String value = valueMap.get(bdCol); - valueList.add(valueStringToBytes(value)); + List<TblColRef> columns = cuboid.getColumns(); + String[] values = new String[columns.size()]; + for (int i = 0; i < columns.size(); i++) { + values[i] = valueMap.get(columns.get(i)); } - byte[][] values = valueList.toArray(RowConstants.BYTE_ARR_MARKER); return encode(values); } - public byte[] valueStringToBytes(String value) { - if (value == null) - return null; - else - return Bytes.toBytes(value); - } - @Override - public byte[] encode(byte[][] values) { + public byte[] encode(String[] values) { byte[] bytes = new byte[this.getBytesLength()]; int offset = getHeaderLength(); for (int i = 0; i < cuboid.getColumns().size(); i++) { TblColRef column = cuboid.getColumns().get(i); int colLength = colIO.getColumnLength(column); - byte[] value = values[i]; - if (value == null) { - fillColumnValue(column, colLength, null, 0, bytes, offset); - } else { - fillColumnValue(column, colLength, value, value.length, bytes, offset); - } + fillColumnValue(column, colLength, values[i], bytes, offset); offset += colLength; } @@ -185,14 +170,14 @@ public class RowKeyEncoder extends AbstractRowKeyEncoder { //return offset; } - protected void fillColumnValue(TblColRef column, int columnLen, byte[] value, int valueLen, byte[] outputValue, int outputValueOffset) { + protected void fillColumnValue(TblColRef column, int columnLen, String valueStr, byte[] outputValue, int outputValueOffset) { // special null value case - if (value == null) { + if (valueStr == null) { Arrays.fill(outputValue, outputValueOffset, outputValueOffset + columnLen, defaultValue()); return; } - colIO.writeColumn(column, value, valueLen, 0, this.blankByte, outputValue, outputValueOffset); + colIO.writeColumn(column, valueStr, 0, this.blankByte, outputValue, outputValueOffset); } protected byte defaultValue() { http://git-wip-us.apache.org/repos/asf/kylin/blob/a6e3ccd7/core-cube/src/test/java/org/apache/kylin/cube/kv/RowKeyDecoderTest.java ---------------------------------------------------------------------- diff --git a/core-cube/src/test/java/org/apache/kylin/cube/kv/RowKeyDecoderTest.java b/core-cube/src/test/java/org/apache/kylin/cube/kv/RowKeyDecoderTest.java index 9b34b2a..1d1d147 100644 --- a/core-cube/src/test/java/org/apache/kylin/cube/kv/RowKeyDecoderTest.java +++ b/core-cube/src/test/java/org/apache/kylin/cube/kv/RowKeyDecoderTest.java @@ -23,7 +23,6 @@ import static org.junit.Assert.assertEquals; import java.io.IOException; import java.util.List; -import org.apache.kylin.common.util.Bytes; import org.apache.kylin.common.util.DateFormat; import org.apache.kylin.common.util.LocalFileMetadataTestCase; import org.apache.kylin.cube.CubeInstance; @@ -81,15 +80,15 @@ public class RowKeyDecoderTest extends LocalFileMetadataTestCase { CubeInstance cube = CubeManager.getInstance(getTestConfig()).getCube("TEST_KYLIN_CUBE_WITHOUT_SLR_READY"); CubeDesc cubeDesc = cube.getDescriptor(); - byte[][] data = new byte[8][]; - data[0] = Bytes.toBytes("2012-12-15"); - data[1] = Bytes.toBytes("11848"); - data[2] = Bytes.toBytes("Health & Beauty"); - data[3] = Bytes.toBytes("Fragrances"); - data[4] = Bytes.toBytes("Women"); - data[5] = Bytes.toBytes("åç»æ ¼å¼æµè¯");// UTF-8 - data[6] = Bytes.toBytes("0"); - data[7] = Bytes.toBytes("15"); + String[] data = new String[8]; + data[0] = "2012-12-15"; + data[1] = "11848"; + data[2] = "Health & Beauty"; + data[3] = "Fragrances"; + data[4] = "Women"; + data[5] = "åç»æ ¼å¼æµè¯";// UTF-8 + data[6] = "0"; + data[7] = "15"; long baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc); Cuboid baseCuboid = Cuboid.findById(cubeDesc, baseCuboidId); http://git-wip-us.apache.org/repos/asf/kylin/blob/a6e3ccd7/core-cube/src/test/java/org/apache/kylin/cube/kv/RowKeyEncoderTest.java ---------------------------------------------------------------------- diff --git a/core-cube/src/test/java/org/apache/kylin/cube/kv/RowKeyEncoderTest.java b/core-cube/src/test/java/org/apache/kylin/cube/kv/RowKeyEncoderTest.java index c32af71..75e2458 100644 --- a/core-cube/src/test/java/org/apache/kylin/cube/kv/RowKeyEncoderTest.java +++ b/core-cube/src/test/java/org/apache/kylin/cube/kv/RowKeyEncoderTest.java @@ -55,15 +55,15 @@ public class RowKeyEncoderTest extends LocalFileMetadataTestCase { CubeDesc cubeDesc = cube.getDescriptor(); // String data = // "2013-08-18Abbigliamento e accessoriDonna: AccessoriSciarpFoulard e ScialliAuctionItalyRegular"; - byte[][] data = new byte[8][]; - data[0] = Bytes.toBytes("2012-12-15"); - data[1] = Bytes.toBytes("11848"); - data[2] = Bytes.toBytes("Health & Beauty"); - data[3] = Bytes.toBytes("Fragrances"); - data[4] = Bytes.toBytes("Women"); - data[5] = Bytes.toBytes("FP-GTC"); - data[6] = Bytes.toBytes("0"); - data[7] = Bytes.toBytes("15"); + String[] data = new String[8]; + data[0] = "2012-12-15"; + data[1] = "11848"; + data[2] = "Health & Beauty"; + data[3] = "Fragrances"; + data[4] = "Women"; + data[5] = "FP-GTC"; + data[6] = "0"; + data[7] = "15"; long baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc); Cuboid baseCuboid = Cuboid.findById(cubeDesc, baseCuboidId); @@ -85,16 +85,16 @@ public class RowKeyEncoderTest extends LocalFileMetadataTestCase { CubeDesc cubeDesc = cube.getDescriptor(); // String data = // "1234567892013-08-18Abbigliamento e accessoriDonna: AccessoriSciarpFoulard e ScialliAuctionItalyRegular"; - byte[][] data = new byte[9][]; - data[0] = Bytes.toBytes("123456789"); - data[1] = Bytes.toBytes("2012-12-15"); - data[2] = Bytes.toBytes("11848"); - data[3] = Bytes.toBytes("Health & Beauty"); - data[4] = Bytes.toBytes("Fragrances"); - data[5] = Bytes.toBytes("Women"); - data[6] = Bytes.toBytes("FP-GTC"); - data[7] = Bytes.toBytes("0"); - data[8] = Bytes.toBytes("15"); + String[] data = new String[9]; + data[0] = "123456789"; + data[1] = "2012-12-15"; + data[2] = "11848"; + data[3] = "Health & Beauty"; + data[4] = "Fragrances"; + data[5] = "Women"; + data[6] = "FP-GTC"; + data[7] = "0"; + data[8] = "15"; long baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc); Cuboid baseCuboid = Cuboid.findById(cubeDesc, baseCuboidId); @@ -121,8 +121,8 @@ public class RowKeyEncoderTest extends LocalFileMetadataTestCase { CubeDesc cubeDesc = cube.getDescriptor(); // String data = // "1234567892013-08-18Abbigliamento e accessoriDonna: AccessoriSciarpFoulard e ScialliAuctionItalyRegular"; - byte[][] data = new byte[9][]; - data[0] = Bytes.toBytes("123456789"); + String[] data = new String[9]; + data[0] = "123456789"; data[1] = null; data[2] = null; data[3] = null; http://git-wip-us.apache.org/repos/asf/kylin/blob/a6e3ccd7/core-cube/src/test/java/org/apache/kylin/gridtable/DimEncodingPreserveOrderTest.java ---------------------------------------------------------------------- diff --git a/core-cube/src/test/java/org/apache/kylin/gridtable/DimEncodingPreserveOrderTest.java b/core-cube/src/test/java/org/apache/kylin/gridtable/DimEncodingPreserveOrderTest.java index d572e56..7d6af24 100644 --- a/core-cube/src/test/java/org/apache/kylin/gridtable/DimEncodingPreserveOrderTest.java +++ b/core-cube/src/test/java/org/apache/kylin/gridtable/DimEncodingPreserveOrderTest.java @@ -24,7 +24,6 @@ import java.util.ArrayList; import java.util.List; import org.apache.kylin.common.util.ByteArray; -import org.apache.kylin.common.util.Bytes; import org.apache.kylin.dimension.DimensionEncoding; import org.apache.kylin.dimension.FixedLenHexDimEnc; import org.apache.kylin.dimension.IntegerDimEncV2; @@ -110,19 +109,10 @@ public class DimEncodingPreserveOrderTest { } private ByteArray encode(DimensionEncoding enc, Object value) { - if (value != null) { - byte[] buf = new byte[enc.getLengthOfEncoding()]; - - String valueStr = "" + value; - byte[] bytes = Bytes.toBytes(valueStr); - - enc.encode(bytes, bytes.length, buf, 0); - return new ByteArray(buf); - } else { - byte[] buf = new byte[enc.getLengthOfEncoding()]; - enc.encode(null, 0, buf, 0); - return new ByteArray(buf); - } + byte[] buf = new byte[enc.getLengthOfEncoding()]; + String valueStr = value == null ? null : value.toString(); + enc.encode(valueStr, buf, 0); + return new ByteArray(buf); } @Test http://git-wip-us.apache.org/repos/asf/kylin/blob/a6e3ccd7/core-metadata/src/main/java/org/apache/kylin/dimension/AbstractDateDimEnc.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/dimension/AbstractDateDimEnc.java b/core-metadata/src/main/java/org/apache/kylin/dimension/AbstractDateDimEnc.java index e6d4be5..ec6347f 100644 --- a/core-metadata/src/main/java/org/apache/kylin/dimension/AbstractDateDimEnc.java +++ b/core-metadata/src/main/java/org/apache/kylin/dimension/AbstractDateDimEnc.java @@ -22,7 +22,6 @@ import java.io.IOException; import java.io.ObjectInput; import java.io.ObjectOutput; import java.io.Serializable; -import java.io.UnsupportedEncodingException; import java.nio.ByteBuffer; import java.util.Arrays; @@ -54,21 +53,7 @@ public class AbstractDateDimEnc extends DimensionEncoding { } @Override - public void encode(byte[] value, int valueLen, byte[] output, int outputOffset) { - if (value == null) { - Arrays.fill(output, outputOffset, outputOffset + fixedLen, NULL); - return; - } - - try { - String str = new String(value, 0, valueLen, "ISO-8859-1"); - encode(str, output, outputOffset); - } catch (UnsupportedEncodingException e) { - // never happen - } - } - - void encode(String value, byte[] output, int outputOffset) { + public void encode(String value, byte[] output, int outputOffset) { if (value == null) { Arrays.fill(output, outputOffset, outputOffset + fixedLen, NULL); return; http://git-wip-us.apache.org/repos/asf/kylin/blob/a6e3ccd7/core-metadata/src/main/java/org/apache/kylin/dimension/BooleanDimEnc.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/dimension/BooleanDimEnc.java b/core-metadata/src/main/java/org/apache/kylin/dimension/BooleanDimEnc.java index a88d1a2..c3f4c11 100644 --- a/core-metadata/src/main/java/org/apache/kylin/dimension/BooleanDimEnc.java +++ b/core-metadata/src/main/java/org/apache/kylin/dimension/BooleanDimEnc.java @@ -25,11 +25,11 @@ import java.nio.ByteBuffer; import java.util.Arrays; import java.util.Map; -import com.google.common.collect.Maps; -import org.apache.kylin.common.util.Bytes; import org.apache.kylin.common.util.BytesUtil; import org.apache.kylin.metadata.datatype.DataTypeSerializer; +import com.google.common.collect.Maps; + /** * Encoding Boolean values to bytes */ @@ -75,24 +75,15 @@ public class BooleanDimEnc extends DimensionEncoding { } @Override - public void encode(byte[] value, int valueLen, byte[] output, int outputOffset) { + public void encode(String value, byte[] output, int outputOffset) { if (value == null) { Arrays.fill(output, outputOffset, outputOffset + fixedLen, NULL); return; } - encode(Bytes.toString(value, 0, valueLen), output, outputOffset); - } - - void encode(String valueStr, byte[] output, int outputOffset) { - if (valueStr == null) { - Arrays.fill(output, outputOffset, outputOffset + fixedLen, NULL); - return; - } - - Integer encodeValue = map.get(valueStr); + Integer encodeValue = map.get(value); if (encodeValue == null) { - throw new IllegalArgumentException("Value '" + valueStr + "' is not a recognized boolean value."); + throw new IllegalArgumentException("Value '" + value + "' is not a recognized boolean value."); } BytesUtil.writeLong(encodeValue, output, outputOffset, fixedLen); http://git-wip-us.apache.org/repos/asf/kylin/blob/a6e3ccd7/core-metadata/src/main/java/org/apache/kylin/dimension/DictionaryDimEnc.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/dimension/DictionaryDimEnc.java b/core-metadata/src/main/java/org/apache/kylin/dimension/DictionaryDimEnc.java index b6bc7fb..500b410 100644 --- a/core-metadata/src/main/java/org/apache/kylin/dimension/DictionaryDimEnc.java +++ b/core-metadata/src/main/java/org/apache/kylin/dimension/DictionaryDimEnc.java @@ -23,7 +23,6 @@ import java.io.ObjectInput; import java.io.ObjectOutput; import java.nio.ByteBuffer; -import org.apache.kylin.common.util.Bytes; import org.apache.kylin.common.util.BytesUtil; import org.apache.kylin.common.util.Dictionary; import org.apache.kylin.metadata.datatype.DataTypeSerializer; @@ -86,15 +85,15 @@ public class DictionaryDimEnc extends DimensionEncoding { } @Override - public void encode(byte[] value, int valueLen, byte[] output, int outputOffset) { + public void encode(String valueStr, byte[] output, int outputOffset) { try { - int id = dict.getIdFromValueBytes(value, 0, valueLen, roundingFlag); + int id = dict.getIdFromValue(valueStr, roundingFlag); BytesUtil.writeUnsigned(id, output, outputOffset, fixedLen); } catch (IllegalArgumentException ex) { for (int i = outputOffset; i < outputOffset + fixedLen; i++) { output[i] = defaultByte; } - logger.error("Can't translate value " + Bytes.toString(value, 0, valueLen) + " to dictionary ID, roundingFlag " + roundingFlag + ". Using default value " + String.format("\\x%02X", defaultByte)); + logger.error("Can't translate value " + valueStr + " to dictionary ID, roundingFlag " + roundingFlag + ". Using default value " + String.format("\\x%02X", defaultByte)); } } http://git-wip-us.apache.org/repos/asf/kylin/blob/a6e3ccd7/core-metadata/src/main/java/org/apache/kylin/dimension/DimensionEncoding.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/dimension/DimensionEncoding.java b/core-metadata/src/main/java/org/apache/kylin/dimension/DimensionEncoding.java index c6ec607..6dc97a3 100644 --- a/core-metadata/src/main/java/org/apache/kylin/dimension/DimensionEncoding.java +++ b/core-metadata/src/main/java/org/apache/kylin/dimension/DimensionEncoding.java @@ -66,8 +66,8 @@ public abstract class DimensionEncoding implements Externalizable { /** return the fixed length of encoded bytes */ abstract public int getLengthOfEncoding(); - /** encode given value (a string in byte form) to bytes, note the NULL convention */ - abstract public void encode(byte[] value, int valueLen, byte[] output, int outputOffset); + /** encode given value to bytes, note the NULL convention */ + abstract public void encode(String value, byte[] output, int outputOffset); /** decode given bytes to value string, note the NULL convention */ abstract public String decode(byte[] bytes, int offset, int len); http://git-wip-us.apache.org/repos/asf/kylin/blob/a6e3ccd7/core-metadata/src/main/java/org/apache/kylin/dimension/FixedLenDimEnc.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/dimension/FixedLenDimEnc.java b/core-metadata/src/main/java/org/apache/kylin/dimension/FixedLenDimEnc.java index bc47364..b219766 100644 --- a/core-metadata/src/main/java/org/apache/kylin/dimension/FixedLenDimEnc.java +++ b/core-metadata/src/main/java/org/apache/kylin/dimension/FixedLenDimEnc.java @@ -89,12 +89,14 @@ public class FixedLenDimEnc extends DimensionEncoding { } @Override - public void encode(byte[] value, int valueLen, byte[] output, int outputOffset) { - if (value == null) { + public void encode(String valueStr, byte[] output, int outputOffset) { + if (valueStr == null) { Arrays.fill(output, outputOffset, outputOffset + fixedLen, NULL); return; } + byte[] value = Bytes.toBytes(valueStr); + int valueLen = value.length; if (valueLen > fixedLen) { if (avoidVerbose++ % 10000 == 0) { logger.warn("Expect at most " + fixedLen + " bytes, but got " + valueLen + ", will truncate, value string: " + Bytes.toString(value, 0, valueLen) + " times:" + avoidVerbose); @@ -142,8 +144,8 @@ public class FixedLenDimEnc extends DimensionEncoding { @Override public void serialize(Object value, ByteBuffer out) { byte[] buf = currentBuf(); - byte[] bytes = value == null ? null : Bytes.toBytes(value.toString()); - encode(bytes, bytes == null ? 0 : bytes.length, buf, 0); + String str = value == null ? null : value.toString(); + encode(str, buf, 0); out.put(buf); } http://git-wip-us.apache.org/repos/asf/kylin/blob/a6e3ccd7/core-metadata/src/main/java/org/apache/kylin/dimension/FixedLenHexDimEnc.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/dimension/FixedLenHexDimEnc.java b/core-metadata/src/main/java/org/apache/kylin/dimension/FixedLenHexDimEnc.java index fbf1ccf..83118fc 100644 --- a/core-metadata/src/main/java/org/apache/kylin/dimension/FixedLenHexDimEnc.java +++ b/core-metadata/src/main/java/org/apache/kylin/dimension/FixedLenHexDimEnc.java @@ -156,14 +156,16 @@ public class FixedLenHexDimEnc extends DimensionEncoding { } @Override - public void encode(byte[] value, int valueLen, byte[] output, int outputOffset) { - if (value == null) { + public void encode(String valueStr, byte[] output, int outputOffset) { + if (valueStr == null) { Arrays.fill(output, outputOffset, outputOffset + bytelen, NULL); return; } + byte[] value = Bytes.toBytes(valueStr); + int valueLen = value.length; int endOffset = outputOffset + bytelen; - + if (valueLen > hexLength) { if (avoidVerbose++ % 10000 == 0) { logger.warn("Expect at most " + hexLength + " bytes, but got " + valueLen + ", will truncate, value string: " + Bytes.toString(value, 0, valueLen) + " times:" + avoidVerbose); @@ -237,8 +239,8 @@ public class FixedLenHexDimEnc extends DimensionEncoding { @Override public void serialize(Object value, ByteBuffer out) { byte[] buf = currentBuf(); - byte[] bytes = value == null ? null : Bytes.toBytes(value.toString()); - encode(bytes, bytes == null ? 0 : bytes.length, buf, 0); + String str = value == null ? null : value.toString(); + encode(str, buf, 0); out.put(buf); } http://git-wip-us.apache.org/repos/asf/kylin/blob/a6e3ccd7/core-metadata/src/main/java/org/apache/kylin/dimension/IntDimEnc.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/dimension/IntDimEnc.java b/core-metadata/src/main/java/org/apache/kylin/dimension/IntDimEnc.java index 88af716..f25f2a6 100644 --- a/core-metadata/src/main/java/org/apache/kylin/dimension/IntDimEnc.java +++ b/core-metadata/src/main/java/org/apache/kylin/dimension/IntDimEnc.java @@ -24,7 +24,6 @@ import java.io.ObjectOutput; import java.nio.ByteBuffer; import java.util.Arrays; -import org.apache.kylin.common.util.Bytes; import org.apache.kylin.common.util.BytesUtil; import org.apache.kylin.metadata.datatype.DataTypeSerializer; import org.slf4j.Logger; @@ -78,16 +77,7 @@ public class IntDimEnc extends DimensionEncoding { } @Override - public void encode(byte[] value, int valueLen, byte[] output, int outputOffset) { - if (value == null) { - Arrays.fill(output, outputOffset, outputOffset + fixedLen, NULL); - return; - } - - encode(Bytes.toString(value, 0, valueLen), output, outputOffset); - } - - void encode(String valueStr, byte[] output, int outputOffset) { + public void encode(String valueStr, byte[] output, int outputOffset) { if (valueStr == null) { Arrays.fill(output, outputOffset, outputOffset + fixedLen, NULL); return; http://git-wip-us.apache.org/repos/asf/kylin/blob/a6e3ccd7/core-metadata/src/main/java/org/apache/kylin/dimension/IntegerDimEnc.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/dimension/IntegerDimEnc.java b/core-metadata/src/main/java/org/apache/kylin/dimension/IntegerDimEnc.java index c417e37..44d0e73 100644 --- a/core-metadata/src/main/java/org/apache/kylin/dimension/IntegerDimEnc.java +++ b/core-metadata/src/main/java/org/apache/kylin/dimension/IntegerDimEnc.java @@ -24,7 +24,6 @@ import java.io.ObjectOutput; import java.nio.ByteBuffer; import java.util.Arrays; -import org.apache.kylin.common.util.Bytes; import org.apache.kylin.common.util.BytesUtil; import org.apache.kylin.metadata.datatype.DataTypeSerializer; import org.slf4j.Logger; @@ -91,16 +90,7 @@ public class IntegerDimEnc extends DimensionEncoding { } @Override - public void encode(byte[] value, int valueLen, byte[] output, int outputOffset) { - if (value == null) { - Arrays.fill(output, outputOffset, outputOffset + fixedLen, NULL); - return; - } - - encode(Bytes.toString(value, 0, valueLen), output, outputOffset); - } - - void encode(String valueStr, byte[] output, int outputOffset) { + public void encode(String valueStr, byte[] output, int outputOffset) { if (valueStr == null) { Arrays.fill(output, outputOffset, outputOffset + fixedLen, NULL); return; http://git-wip-us.apache.org/repos/asf/kylin/blob/a6e3ccd7/core-metadata/src/main/java/org/apache/kylin/dimension/IntegerDimEncV2.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/dimension/IntegerDimEncV2.java b/core-metadata/src/main/java/org/apache/kylin/dimension/IntegerDimEncV2.java index 1a54664..6012359 100644 --- a/core-metadata/src/main/java/org/apache/kylin/dimension/IntegerDimEncV2.java +++ b/core-metadata/src/main/java/org/apache/kylin/dimension/IntegerDimEncV2.java @@ -24,7 +24,6 @@ import java.io.ObjectOutput; import java.nio.ByteBuffer; import java.util.Arrays; -import org.apache.kylin.common.util.Bytes; import org.apache.kylin.common.util.BytesUtil; import org.apache.kylin.metadata.datatype.DataTypeSerializer; import org.slf4j.Logger; @@ -95,16 +94,7 @@ public class IntegerDimEncV2 extends DimensionEncoding { } @Override - public void encode(byte[] value, int valueLen, byte[] output, int outputOffset) { - if (value == null) { - Arrays.fill(output, outputOffset, outputOffset + fixedLen, NULL); - return; - } - - encode(Bytes.toString(value, 0, valueLen), output, outputOffset); - } - - void encode(String valueStr, byte[] output, int outputOffset) { + public void encode(String valueStr, byte[] output, int outputOffset) { if (valueStr == null) { Arrays.fill(output, outputOffset, outputOffset + fixedLen, NULL); return; http://git-wip-us.apache.org/repos/asf/kylin/blob/a6e3ccd7/core-metadata/src/main/java/org/apache/kylin/dimension/OneMoreByteVLongDimEnc.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/dimension/OneMoreByteVLongDimEnc.java b/core-metadata/src/main/java/org/apache/kylin/dimension/OneMoreByteVLongDimEnc.java index 3ba9d2f..993aac3 100644 --- a/core-metadata/src/main/java/org/apache/kylin/dimension/OneMoreByteVLongDimEnc.java +++ b/core-metadata/src/main/java/org/apache/kylin/dimension/OneMoreByteVLongDimEnc.java @@ -24,7 +24,6 @@ import java.io.ObjectOutput; import java.nio.ByteBuffer; import java.util.Arrays; -import org.apache.kylin.common.util.Bytes; import org.apache.kylin.common.util.BytesUtil; import org.apache.kylin.metadata.datatype.DataTypeSerializer; import org.slf4j.Logger; @@ -86,16 +85,7 @@ public class OneMoreByteVLongDimEnc extends DimensionEncoding { } @Override - public void encode(byte[] value, int valueLen, byte[] output, int outputOffset) { - if (value == null) { - Arrays.fill(output, outputOffset, outputOffset + byteLen, NULL); - return; - } - - encode(Bytes.toString(value, 0, valueLen), output, outputOffset); - } - - void encode(String valueStr, byte[] output, int outputOffset) { + public void encode(String valueStr, byte[] output, int outputOffset) { if (valueStr == null) { Arrays.fill(output, outputOffset, outputOffset + byteLen, NULL); return; http://git-wip-us.apache.org/repos/asf/kylin/blob/a6e3ccd7/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNMeasureType.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNMeasureType.java b/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNMeasureType.java index 14ce85c..c29af6c 100644 --- a/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNMeasureType.java +++ b/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNMeasureType.java @@ -26,7 +26,6 @@ import java.util.Map; import org.apache.commons.lang3.StringUtils; import org.apache.kylin.common.util.ByteArray; -import org.apache.kylin.common.util.Bytes; import org.apache.kylin.common.util.Dictionary; import org.apache.kylin.dimension.DictionaryDimEnc; import org.apache.kylin.dimension.DimensionEncoding; @@ -147,8 +146,7 @@ public class TopNMeasureType extends MeasureType<TopNCounter<ByteArray>> { if (values[i + 1] == null) { Arrays.fill(key.array(), offset, offset + dimensionEncodings[i].getLengthOfEncoding(), DimensionEncoding.NULL); } else { - byte[] valueBytes = Bytes.toBytes(values[i + 1]); - dimensionEncodings[i].encode(valueBytes, valueBytes.length, key.array(), offset); + dimensionEncodings[i].encode(values[i + 1], key.array(), offset); } offset += dimensionEncodings[i].getLengthOfEncoding(); } @@ -197,8 +195,7 @@ public class TopNMeasureType extends MeasureType<TopNCounter<ByteArray>> { int innerBuffOffset = 0; for (int i = 0; i < dimensionEncodings.length; i++) { String dimValue = dimensionEncodings[i].decode(c.getItem().array(), offset, dimensionEncodings[i].getLengthOfEncoding()); - byte[] dimValueBytes = Bytes.toBytes(dimValue); - newDimensionEncodings[i].encode(dimValueBytes, dimValueBytes.length, newIdBuf, bufOffset + innerBuffOffset); + newDimensionEncodings[i].encode(dimValue, newIdBuf, bufOffset + innerBuffOffset); innerBuffOffset += newDimensionEncodings[i].getLengthOfEncoding(); offset += dimensionEncodings[i].getLengthOfEncoding(); } http://git-wip-us.apache.org/repos/asf/kylin/blob/a6e3ccd7/core-metadata/src/test/java/org/apache/kylin/dimension/BooleanDimEncTest.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/test/java/org/apache/kylin/dimension/BooleanDimEncTest.java b/core-metadata/src/test/java/org/apache/kylin/dimension/BooleanDimEncTest.java index c6c1416..b40675f 100644 --- a/core-metadata/src/test/java/org/apache/kylin/dimension/BooleanDimEncTest.java +++ b/core-metadata/src/test/java/org/apache/kylin/dimension/BooleanDimEncTest.java @@ -20,7 +20,6 @@ package org.apache.kylin.dimension; import java.nio.ByteBuffer; -import org.apache.kylin.common.util.Bytes; import org.apache.kylin.metadata.datatype.DataTypeSerializer; import org.junit.Assert; import org.junit.Test; @@ -32,7 +31,7 @@ public class BooleanDimEncTest { BooleanDimEnc enc = new BooleanDimEnc(); byte[] buf = new byte[enc.getLengthOfEncoding()]; - enc.encode(null, 0, buf, 0); + enc.encode(null, buf, 0); Assert.assertTrue(DimensionEncoding.isNull(buf, 0, buf.length)); String decode = enc.decode(buf, 0, buf.length); Assert.assertEquals(null, decode); @@ -63,8 +62,7 @@ public class BooleanDimEncTest { private void testEncodeDecode(BooleanDimEnc enc, String valueStr) { byte[] buf = new byte[enc.getLengthOfEncoding()]; - byte[] bytes = Bytes.toBytes(valueStr); - enc.encode(bytes, bytes.length, buf, 0); + enc.encode(valueStr, buf, 0); String decode = enc.decode(buf, 0, buf.length); Assert.assertEquals(valueStr, decode); } http://git-wip-us.apache.org/repos/asf/kylin/blob/a6e3ccd7/core-metadata/src/test/java/org/apache/kylin/dimension/FixedLenHexDimEncTest.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/test/java/org/apache/kylin/dimension/FixedLenHexDimEncTest.java b/core-metadata/src/test/java/org/apache/kylin/dimension/FixedLenHexDimEncTest.java index 6f0540b..d9a1a0f 100644 --- a/core-metadata/src/test/java/org/apache/kylin/dimension/FixedLenHexDimEncTest.java +++ b/core-metadata/src/test/java/org/apache/kylin/dimension/FixedLenHexDimEncTest.java @@ -20,7 +20,6 @@ package org.apache.kylin.dimension; import java.nio.ByteBuffer; -import org.apache.kylin.common.util.Bytes; import org.apache.kylin.metadata.datatype.DataTypeSerializer; import org.junit.Assert; import org.junit.Test; @@ -45,7 +44,7 @@ public class FixedLenHexDimEncTest { FixedLenHexDimEnc enc = new FixedLenHexDimEnc(i); byte[] buf = new byte[enc.getLengthOfEncoding()]; - enc.encode(null, 0, buf, 0); + enc.encode(null, buf, 0); Assert.assertTrue(DimensionEncoding.isNull(buf, 0, buf.length)); String decode = enc.decode(buf, 0, buf.length); Assert.assertEquals(null, decode); @@ -131,8 +130,7 @@ public class FixedLenHexDimEncTest { private void testEncodeDecode(FixedLenHexDimEnc enc, String value) { byte[] buf = new byte[enc.getLengthOfEncoding()]; String valueStr = value; - byte[] bytes = Bytes.toBytes(valueStr); - enc.encode(bytes, bytes.length, buf, 0); + enc.encode(valueStr, buf, 0); String decode = enc.decode(buf, 0, buf.length); Assert.assertEquals(valueStr, decode); } http://git-wip-us.apache.org/repos/asf/kylin/blob/a6e3ccd7/core-metadata/src/test/java/org/apache/kylin/dimension/IntDimEncTest.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/test/java/org/apache/kylin/dimension/IntDimEncTest.java b/core-metadata/src/test/java/org/apache/kylin/dimension/IntDimEncTest.java index d228dd5..0f55b3e 100644 --- a/core-metadata/src/test/java/org/apache/kylin/dimension/IntDimEncTest.java +++ b/core-metadata/src/test/java/org/apache/kylin/dimension/IntDimEncTest.java @@ -20,7 +20,6 @@ package org.apache.kylin.dimension; import java.nio.ByteBuffer; -import org.apache.kylin.common.util.Bytes; import org.apache.kylin.metadata.datatype.DataTypeSerializer; import org.junit.Assert; import org.junit.Test; @@ -54,7 +53,7 @@ public class IntDimEncTest { IntDimEnc enc = new IntDimEnc(i); byte[] buf = new byte[enc.getLengthOfEncoding()]; - enc.encode(null, 0, buf, 0); + enc.encode(null, buf, 0); Assert.assertTrue(DimensionEncoding.isNull(buf, 0, buf.length)); String decode = enc.decode(buf, 0, buf.length); Assert.assertEquals(null, decode); @@ -92,8 +91,7 @@ public class IntDimEncTest { private void testEncodeDecode(IntDimEnc enc, long value) { byte[] buf = new byte[enc.getLengthOfEncoding()]; String valueStr = "" + value; - byte[] bytes = Bytes.toBytes(valueStr); - enc.encode(bytes, bytes.length, buf, 0); + enc.encode(valueStr, buf, 0); String decode = enc.decode(buf, 0, buf.length); Assert.assertEquals(valueStr, decode); } http://git-wip-us.apache.org/repos/asf/kylin/blob/a6e3ccd7/core-metadata/src/test/java/org/apache/kylin/dimension/IntegerDimEncTest.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/test/java/org/apache/kylin/dimension/IntegerDimEncTest.java b/core-metadata/src/test/java/org/apache/kylin/dimension/IntegerDimEncTest.java index 9924053..0c67bcd 100644 --- a/core-metadata/src/test/java/org/apache/kylin/dimension/IntegerDimEncTest.java +++ b/core-metadata/src/test/java/org/apache/kylin/dimension/IntegerDimEncTest.java @@ -22,7 +22,6 @@ import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.List; -import org.apache.kylin.common.util.Bytes; import org.apache.kylin.metadata.datatype.DataTypeSerializer; import org.junit.Assert; import org.junit.BeforeClass; @@ -96,7 +95,7 @@ public class IntegerDimEncTest { IntegerDimEncV2 enc = new IntegerDimEncV2(i); byte[] buf = new byte[enc.getLengthOfEncoding()]; - enc.encode(null, 0, buf, 0); + enc.encode(null, buf, 0); Assert.assertTrue(DimensionEncoding.isNull(buf, 0, buf.length)); String decode = enc.decode(buf, 0, buf.length); Assert.assertEquals(null, decode); @@ -132,8 +131,7 @@ public class IntegerDimEncTest { private void testEncodeDecode(IntegerDimEncV2 enc, long value) { String valueStr = "" + value; byte[] buf = new byte[enc.getLengthOfEncoding()]; - byte[] bytes = Bytes.toBytes(valueStr); - enc.encode(bytes, bytes.length, buf, 0); + enc.encode(valueStr, buf, 0); String decode = enc.decode(buf, 0, buf.length); Assert.assertEquals(valueStr, decode); } http://git-wip-us.apache.org/repos/asf/kylin/blob/a6e3ccd7/core-metadata/src/test/java/org/apache/kylin/dimension/OneMoreByteVLongDimEncTest.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/test/java/org/apache/kylin/dimension/OneMoreByteVLongDimEncTest.java b/core-metadata/src/test/java/org/apache/kylin/dimension/OneMoreByteVLongDimEncTest.java index ba6ab55..17af5c1 100644 --- a/core-metadata/src/test/java/org/apache/kylin/dimension/OneMoreByteVLongDimEncTest.java +++ b/core-metadata/src/test/java/org/apache/kylin/dimension/OneMoreByteVLongDimEncTest.java @@ -20,7 +20,6 @@ package org.apache.kylin.dimension; import java.nio.ByteBuffer; -import org.apache.kylin.common.util.Bytes; import org.apache.kylin.metadata.datatype.DataTypeSerializer; import org.junit.Assert; import org.junit.Test; @@ -50,7 +49,7 @@ public class OneMoreByteVLongDimEncTest { OneMoreByteVLongDimEnc enc = new OneMoreByteVLongDimEnc(i); byte[] buf = new byte[enc.getLengthOfEncoding()]; - enc.encode(null, 0, buf, 0); + enc.encode(null, buf, 0); Assert.assertTrue(DimensionEncoding.isNull(buf, 0, buf.length)); String decode = enc.decode(buf, 0, buf.length); Assert.assertEquals(null, decode); @@ -92,8 +91,7 @@ public class OneMoreByteVLongDimEncTest { private void testEncodeDecode(OneMoreByteVLongDimEnc enc, long value) { String valueStr = "" + value; byte[] buf = new byte[enc.getLengthOfEncoding()]; - byte[] bytes = Bytes.toBytes(valueStr); - enc.encode(bytes, bytes.length, buf, 0); + enc.encode(valueStr, buf, 0); String decode = enc.decode(buf, 0, buf.length); Assert.assertEquals(valueStr, decode); } http://git-wip-us.apache.org/repos/asf/kylin/blob/a6e3ccd7/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/BaseCuboidMapperBase.java ---------------------------------------------------------------------- diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/BaseCuboidMapperBase.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/BaseCuboidMapperBase.java index dd0a031..7b719e0 100644 --- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/BaseCuboidMapperBase.java +++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/BaseCuboidMapperBase.java @@ -19,18 +19,16 @@ package org.apache.kylin.engine.mr.steps; import java.io.IOException; -import java.io.UnsupportedEncodingException; import java.nio.ByteBuffer; import java.util.Arrays; import java.util.List; import java.util.Map; +import java.util.Set; import org.apache.hadoop.io.Text; import org.apache.kylin.common.KylinConfig; import org.apache.kylin.common.util.Bytes; -import org.apache.kylin.common.util.BytesSplitter; import org.apache.kylin.common.util.Dictionary; -import org.apache.kylin.common.util.SplittedBytes; import org.apache.kylin.cube.CubeInstance; import org.apache.kylin.cube.CubeManager; import org.apache.kylin.cube.CubeSegment; @@ -51,13 +49,13 @@ import org.apache.kylin.metadata.model.TblColRef; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.collect.Lists; +import com.google.common.collect.Sets; /** */ abstract public class BaseCuboidMapperBase<KEYIN, VALUEIN> extends KylinMapper<KEYIN, VALUEIN, Text, Text> { protected static final Logger logger = LoggerFactory.getLogger(BaseCuboidMapperBase.class); - public static final byte[] HIVE_NULL = Bytes.toBytes("\\N"); + public static final String HIVE_NULL = "\\N"; public static final byte[] ONE = Bytes.toBytes("1"); protected String cubeName; protected String segmentID; @@ -65,7 +63,7 @@ abstract public class BaseCuboidMapperBase<KEYIN, VALUEIN> extends KylinMapper<K protected CubeInstance cube; protected CubeDesc cubeDesc; protected CubeSegment cubeSegment; - protected List<byte[]> nullBytes; + protected Set<String> nullStrs; protected CubeJoinedFlatTableEnrich intermediateTableDesc; protected String intermediateTableRowDelimiter; protected byte byteRowDelimiter; @@ -73,8 +71,6 @@ abstract public class BaseCuboidMapperBase<KEYIN, VALUEIN> extends KylinMapper<K protected MeasureIngester<?>[] aggrIngesters; protected Map<TblColRef, Dictionary<String>> dictionaryMap; protected Object[] measures; - protected byte[][] keyBytesBuf; - protected BytesSplitter bytesSplitter; protected AbstractRowKeyEncoder rowKeyEncoder; protected BufferedMeasureCodec measureCodec; private int errorRecordCounter; @@ -105,15 +101,11 @@ abstract public class BaseCuboidMapperBase<KEYIN, VALUEIN> extends KylinMapper<K intermediateTableDesc = new CubeJoinedFlatTableEnrich(EngineFactory.getJoinedFlatTableDesc(cubeSegment), cubeDesc); - bytesSplitter = new BytesSplitter(200, 16384); rowKeyEncoder = AbstractRowKeyEncoder.createInstance(cubeSegment, baseCuboid); measureCodec = new BufferedMeasureCodec(cubeDesc.getMeasures()); measures = new Object[cubeDesc.getMeasures().size()]; - int colCount = cubeDesc.getRowkey().getRowKeyColumns().length; - keyBytesBuf = new byte[colCount][]; - aggrIngesters = MeasureIngester.create(cubeDesc.getMeasures()); dictionaryMap = cubeSegment.buildDictionaryMap(); @@ -121,46 +113,40 @@ abstract public class BaseCuboidMapperBase<KEYIN, VALUEIN> extends KylinMapper<K } private void initNullBytes() { - nullBytes = Lists.newArrayList(); - nullBytes.add(HIVE_NULL); + nullStrs = Sets.newHashSet(); + nullStrs.add(HIVE_NULL); String[] nullStrings = cubeDesc.getNullStrings(); if (nullStrings != null) { for (String s : nullStrings) { - nullBytes.add(Bytes.toBytes(s)); + nullStrs.add(s); } } } - protected boolean isNull(byte[] v) { - for (byte[] nullByte : nullBytes) { - if (Bytes.equals(v, nullByte)) - return true; - } - return false; + protected boolean isNull(String v) { + return nullStrs.contains(v); } - protected byte[] buildKey(SplittedBytes[] splitBuffers) { + protected byte[] buildKey(String[] flatRow) { int[] rowKeyColumnIndexes = intermediateTableDesc.getRowKeyColumnIndexes(); - for (int i = 0; i < baseCuboid.getColumns().size(); i++) { - int index = rowKeyColumnIndexes[i]; - keyBytesBuf[i] = Arrays.copyOf(splitBuffers[index].value, splitBuffers[index].length); - if (isNull(keyBytesBuf[i])) { - keyBytesBuf[i] = null; - } + List<TblColRef> columns = baseCuboid.getColumns(); + String[] colValues = new String[columns.size()]; + for (int i = 0; i < columns.size(); i++) { + colValues[i] = getCell(rowKeyColumnIndexes[i], flatRow); } - return rowKeyEncoder.encode(keyBytesBuf); + return rowKeyEncoder.encode(colValues); } - private ByteBuffer buildValue(SplittedBytes[] splitBuffers) { + private ByteBuffer buildValue(String[] flatRow) { for (int i = 0; i < measures.length; i++) { - measures[i] = buildValueOf(i, splitBuffers); + measures[i] = buildValueOf(i, flatRow); } return measureCodec.encode(measures); } - private Object buildValueOf(int idxOfMeasure, SplittedBytes[] splitBuffers) { + private Object buildValueOf(int idxOfMeasure, String[] flatRow) { MeasureDesc measure = cubeDesc.getMeasures().get(idxOfMeasure); FunctionDesc function = measure.getFunction(); int[] colIdxOnFlatTable = intermediateTableDesc.getMeasureColumnIndexes()[idxOfMeasure]; @@ -176,7 +162,7 @@ abstract public class BaseCuboidMapperBase<KEYIN, VALUEIN> extends KylinMapper<K if (function.isCount()) { value = "1"; } else if (param.isColumnType()) { - value = getCell(colIdxOnFlatTable[colParamIdx++], splitBuffers); + value = getCell(colIdxOnFlatTable[colParamIdx++], flatRow); } else { value = param.getValue(); } @@ -186,34 +172,25 @@ abstract public class BaseCuboidMapperBase<KEYIN, VALUEIN> extends KylinMapper<K return aggrIngesters[idxOfMeasure].valueOf(inputToMeasure, measure, dictionaryMap); } - private String getCell(int i, SplittedBytes[] splitBuffers) { - byte[] bytes = Arrays.copyOf(splitBuffers[i].value, splitBuffers[i].length); - if (isNull(bytes)) + private String getCell(int i, String[] flatRow) { + if (isNull(flatRow[i])) return null; else - return Bytes.toString(bytes); + return flatRow[i]; } - protected void outputKV(Context context) throws IOException, InterruptedException { - byte[] rowKey = buildKey(bytesSplitter.getSplitBuffers()); + protected void outputKV(String[] flatRow, Context context) throws IOException, InterruptedException { + byte[] rowKey = buildKey(flatRow); outputKey.set(rowKey, 0, rowKey.length); - ByteBuffer valueBuf = buildValue(bytesSplitter.getSplitBuffers()); + ByteBuffer valueBuf = buildValue(flatRow); outputValue.set(valueBuf.array(), 0, valueBuf.position()); context.write(outputKey, outputValue); } - protected byte[][] convertUTF8Bytes(String[] row) throws UnsupportedEncodingException { - byte[][] result = new byte[row.length][]; - for (int i = 0; i < row.length; i++) { - result[i] = row[i] == null ? HIVE_NULL : row[i].getBytes("UTF-8"); - } - return result; - } - - protected void handleErrorRecord(BytesSplitter bytesSplitter, Exception ex) throws IOException { + protected void handleErrorRecord(String[] flatRow, Exception ex) throws IOException { - logger.error("Insane record: " + bytesSplitter, ex); + logger.error("Insane record: " + Arrays.toString(flatRow), ex); // TODO expose errorRecordCounter as hadoop counter errorRecordCounter++; http://git-wip-us.apache.org/repos/asf/kylin/blob/a6e3ccd7/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/HiveToBaseCuboidMapper.java ---------------------------------------------------------------------- diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/HiveToBaseCuboidMapper.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/HiveToBaseCuboidMapper.java index d9c5312..9fa20ae 100644 --- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/HiveToBaseCuboidMapper.java +++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/HiveToBaseCuboidMapper.java @@ -44,15 +44,12 @@ public class HiveToBaseCuboidMapper<KEYIN> extends BaseCuboidMapperBase<KEYIN, O logger.info("Handled " + counter + " records!"); } + String[] row = flatTableInputFormat.parseMapperInput(value); try { - //put a record into the shared bytesSplitter - String[] row = flatTableInputFormat.parseMapperInput(value); - bytesSplitter.setBuffers(convertUTF8Bytes(row)); - //take care of the data in bytesSplitter - outputKV(context); + outputKV(row, context); } catch (Exception ex) { - handleErrorRecord(bytesSplitter, ex); + handleErrorRecord(row, ex); } } http://git-wip-us.apache.org/repos/asf/kylin/blob/a6e3ccd7/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/common/coprocessor/CoprocessorProjector.java ---------------------------------------------------------------------- diff --git a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/common/coprocessor/CoprocessorProjector.java b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/common/coprocessor/CoprocessorProjector.java index 65c5f92..f6332f4 100644 --- a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/common/coprocessor/CoprocessorProjector.java +++ b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/common/coprocessor/CoprocessorProjector.java @@ -45,13 +45,13 @@ public class CoprocessorProjector { } @Override - protected void fillColumnValue(TblColRef column, int columnLen, byte[] value, int valueLen, byte[] outputValue, int outputValueOffset) { + protected void fillColumnValue(TblColRef column, int columnLen, String valueStr, byte[] outputValue, int outputValueOffset) { byte bits = dimensionColumns.contains(column) ? (byte) 0xff : 0x00; Arrays.fill(outputValue, outputValueOffset, outputValueOffset + columnLen, bits); } }; - byte[] mask = rowKeyMaskEncoder.encode(new byte[cuboid.getColumns().size()][]); + byte[] mask = rowKeyMaskEncoder.encode(new String[cuboid.getColumns().size()]); return new CoprocessorProjector(mask, dimensionColumns.size() != 0); } http://git-wip-us.apache.org/repos/asf/kylin/blob/a6e3ccd7/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/common/coprocessor/FilterDecorator.java ---------------------------------------------------------------------- diff --git a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/common/coprocessor/FilterDecorator.java b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/common/coprocessor/FilterDecorator.java index ea4b504..5ab4117 100644 --- a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/common/coprocessor/FilterDecorator.java +++ b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/common/coprocessor/FilterDecorator.java @@ -21,7 +21,6 @@ package org.apache.kylin.storage.hbase.common.coprocessor; import java.util.Collection; import java.util.Set; -import org.apache.kylin.common.util.Bytes; import org.apache.kylin.common.util.Dictionary; import org.apache.kylin.cube.kv.RowKeyColumnIO; import org.apache.kylin.dict.BuiltInFunctionTransformer; @@ -200,9 +199,8 @@ public class FilterDecorator implements TupleFilterSerializer.Decorator { } private String translate(TblColRef column, String v, int roundingFlag) { - byte[] value = Bytes.toBytes(v); byte[] id = new byte[dimEncMap.get(column).getLengthOfEncoding()]; - columnIO.writeColumn(column, value, value.length, roundingFlag, DimensionEncoding.NULL, id, 0); + columnIO.writeColumn(column, v, roundingFlag, DimensionEncoding.NULL, id, 0); return Dictionary.dictIdToString(id, 0, id.length); } } \ No newline at end of file