Github user kiszk commented on a diff in the pull request: https://github.com/apache/spark/pull/13680#discussion_r69483421 --- Diff: sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java --- @@ -341,63 +325,115 @@ public UnsafeArrayData copy() { return arrayCopy; } - public static UnsafeArrayData fromPrimitiveArray(int[] arr) { - if (arr.length > (Integer.MAX_VALUE - 4) / 8) { - throw new UnsupportedOperationException("Cannot convert this array to unsafe format as " + - "it's too big."); - } + @Override + public boolean[] toBooleanArray() { + int size = numElements(); + boolean[] values = new boolean[size]; + Platform.copyMemory( + baseObject, baseOffset + headerInBytes, values, Platform.BYTE_ARRAY_OFFSET, size); + return values; + } - final int offsetRegionSize = 4 * arr.length; - final int valueRegionSize = 4 * arr.length; - final int totalSize = 4 + offsetRegionSize + valueRegionSize; - final byte[] data = new byte[totalSize]; + @Override + public byte[] toByteArray() { + int size = numElements(); + byte[] values = new byte[size]; + Platform.copyMemory( + baseObject, baseOffset + headerInBytes, values, Platform.BYTE_ARRAY_OFFSET, size); + return values; + } - Platform.putInt(data, Platform.BYTE_ARRAY_OFFSET, arr.length); + @Override + public short[] toShortArray() { + int size = numElements(); + short[] values = new short[size]; + Platform.copyMemory( + baseObject, baseOffset + headerInBytes, values, Platform.SHORT_ARRAY_OFFSET, size * 2); + return values; + } - int offsetPosition = Platform.BYTE_ARRAY_OFFSET + 4; - int valueOffset = 4 + offsetRegionSize; - for (int i = 0; i < arr.length; i++) { - Platform.putInt(data, offsetPosition, valueOffset); - offsetPosition += 4; - valueOffset += 4; - } + @Override + public int[] toIntArray() { + int size = numElements(); + int[] values = new int[size]; + Platform.copyMemory( + baseObject, baseOffset + headerInBytes, values, Platform.INT_ARRAY_OFFSET, size * 4); + return values; + } - Platform.copyMemory(arr, Platform.INT_ARRAY_OFFSET, data, - Platform.BYTE_ARRAY_OFFSET + 4 + offsetRegionSize, valueRegionSize); + @Override + public long[] toLongArray() { + int size = numElements(); + long[] values = new long[size]; + Platform.copyMemory( + baseObject, baseOffset + headerInBytes, values, Platform.LONG_ARRAY_OFFSET, size * 8); + return values; + } - UnsafeArrayData result = new UnsafeArrayData(); - result.pointTo(data, Platform.BYTE_ARRAY_OFFSET, totalSize); - return result; + @Override + public float[] toFloatArray() { + int size = numElements(); + float[] values = new float[size]; + Platform.copyMemory( + baseObject, baseOffset + headerInBytes, values, Platform.FLOAT_ARRAY_OFFSET, size * 4); + return values; } - public static UnsafeArrayData fromPrimitiveArray(double[] arr) { - if (arr.length > (Integer.MAX_VALUE - 4) / 12) { + @Override + public double[] toDoubleArray() { + int size = numElements(); + double[] values = new double[size]; + Platform.copyMemory( + baseObject, baseOffset + headerInBytes, values, Platform.DOUBLE_ARRAY_OFFSET, size * 8); + return values; + } + + private static UnsafeArrayData fromPrimitiveArray( + Object arr, int offset, int length, int elementSize) { + final long headerSize = calculateHeaderPortionInBytes(length); + final long valueRegionSize = (long)elementSize * (long)length; + final long allocationSize = (headerSize + valueRegionSize + 7) / 8; --- End diff -- updated these names
--- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org