bbotella commented on code in PR #3741: URL: https://github.com/apache/cassandra/pull/3741#discussion_r1918919796
########## src/java/org/apache/cassandra/cql3/functions/FormatFcts.java: ########## @@ -0,0 +1,434 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.cql3.functions; + +import java.math.RoundingMode; +import java.nio.ByteBuffer; +import java.text.DecimalFormat; +import java.util.List; +import java.util.Optional; +import java.util.concurrent.TimeUnit; + +import org.apache.cassandra.config.DataStorageSpec.DataStorageUnit; +import org.apache.cassandra.config.DurationSpec; +import org.apache.cassandra.db.marshal.AbstractType; +import org.apache.cassandra.db.marshal.UTF8Type; +import org.apache.cassandra.exceptions.InvalidRequestException; +import org.apache.cassandra.io.util.FileUtils; +import org.apache.cassandra.utils.Pair; + +import static java.util.concurrent.TimeUnit.DAYS; +import static java.util.concurrent.TimeUnit.HOURS; +import static java.util.concurrent.TimeUnit.MILLISECONDS; +import static java.util.concurrent.TimeUnit.MINUTES; +import static java.util.concurrent.TimeUnit.SECONDS; +import static org.apache.cassandra.config.DataStorageSpec.DataStorageUnit.BYTES; +import static org.apache.cassandra.config.DataStorageSpec.DataStorageUnit.GIBIBYTES; +import static org.apache.cassandra.config.DataStorageSpec.DataStorageUnit.KIBIBYTES; +import static org.apache.cassandra.config.DataStorageSpec.DataStorageUnit.MEBIBYTES; +import static org.apache.cassandra.cql3.CQL3Type.Native.ASCII; +import static org.apache.cassandra.cql3.CQL3Type.Native.BIGINT; +import static org.apache.cassandra.cql3.CQL3Type.Native.INT; +import static org.apache.cassandra.cql3.CQL3Type.Native.SMALLINT; +import static org.apache.cassandra.cql3.CQL3Type.Native.TEXT; +import static org.apache.cassandra.cql3.CQL3Type.Native.TINYINT; +import static org.apache.cassandra.cql3.CQL3Type.Native.VARINT; +import static org.apache.cassandra.cql3.functions.FunctionParameter.fixed; +import static org.apache.cassandra.cql3.functions.FunctionParameter.optional; + +public class FormatFcts +{ + public static void addFunctionsTo(NativeFunctions functions) + { + functions.add(FormatBytesFct.factory()); + functions.add(FormatTimeFct.factory()); + } + + /** + * Converts numeric value in a column to a value of specified unit. + * <p> + * If the function call contains just one argument - value to convert - then it will be + * looked at as the value is of unit 'ms' and it will be converted to a value of a unit which is closes to it. E.g. + * If a value is (60 * 1000 + 1) then the unit will be in minutes and converted value will be 1. + * <p> + * If the function call contains two arguments - value to convert and a unit - then it will be looked at + * as the unit of such value is 'ms' and it will be converted into the value of the second (unit) argument. + * <p> + * If the function call contains three arguments - value to covert and source and target unit - then the value + * will be considered of a unit of the second argument, and it will be converted + * into a value of the third (unit) argument. + * <p> + * Examples: + * <pre> + * format_time(val) + * format_time(val, 'm') = format_time(val, 'ms', 'm') + * format_time(val, 's', 'm') + * format_time(val, 's', 'h') + * format_time(val, 's', 'd') + * format_time(val, 's') = format_time(val, 'ms', 's') + * format_time(val, 'h') = format_time(val, 'ms', 'h') + * </pre> + * <p> + * It is possible to convert values of a bigger unit to values of a smaller unit, e.g. this is possible: + * + * <pre> + * format_time(val, 'm', 's') + * </pre> + * <p> + * Values can be max of Double.MAX_VALUE, If the conversion produces overflown value, Double.MAX_VALUE will be returned. + * <p> + * Supported units are: d, h, m, s, ms, us, µs, ns + * <p> + * Supported column types on which this function is possible to be applied: + * <pre>INT, TINYINT, SMALLINT, BIGINT, VARINT, ASCII, TEXT</pre> + * For ASCII and TEXT types, text of such column has to be a non-negative number. + * <p> + * The conversion of negative values is not supported. + */ + public static class FormatTimeFct extends NativeScalarFunction + { + private static final String FUNCTION_NAME = "format_time"; + + private FormatTimeFct(AbstractType<?>... argsTypes) + { + super(FUNCTION_NAME, UTF8Type.instance, argsTypes); + } + + @Override + public ByteBuffer execute(Arguments arguments) throws InvalidRequestException + { + if (arguments.get(0) == null) + return null; + + if (arguments.containsNulls()) + throw new InvalidRequestException("none of the arguments may be null"); + + long value = getValue(arguments); + + if (value < 0) + throw new InvalidRequestException("value must be non-negative"); + + if (arguments.size() == 1) + { + Pair<Double, String> convertedValue = convertValue(value); + return UTF8Type.instance.fromString(format(convertedValue.left) + ' ' + convertedValue.right); + } + + TimeUnit sourceUnit; + TimeUnit targetUnit; + String targetUnitAsString; + + if (arguments.size() == 2) + { + sourceUnit = MILLISECONDS; + targetUnitAsString = arguments.get(1); + } + else + { + sourceUnit = validateUnit(arguments.get(1)); + targetUnitAsString = arguments.get(2); + } + + targetUnit = validateUnit(targetUnitAsString); + + double convertedValue = convertValue(value, sourceUnit, targetUnit); + return UTF8Type.instance.fromString(format(convertedValue) + ' ' + targetUnitAsString); + } + + private TimeUnit validateUnit(String unitAsString) + { + try + { + return DurationSpec.fromSymbol(unitAsString); + } + catch (Exception ex) + { + throw new InvalidRequestException(ex.getMessage()); + } + } + + private Pair<Double, String> convertValue(long valueToConvert) + { + String[] units = {"d", "h", "m", "s"}; + TimeUnit[] timeUnits = {DAYS, HOURS, MINUTES, SECONDS}; + + for (int i = 0; i < timeUnits.length; i++) + { + double convertedValue = convertValue(valueToConvert, MILLISECONDS, timeUnits[i]); Review Comment: I understand this is clearer from a readability perspective, but, I don't think there's a need to iterate over `timeUnits` every time. Basically, we are trying to compare an amount of milliseconds to a fixed limit of milliseconds (ie, a day is 86400000 milliseconds, etc etc etc). So, directly comparing the valueToConvert inside `if/elses` (we only have 4) should be a bit more efficient than doing all the conversions. What do you think? ########## src/java/org/apache/cassandra/cql3/functions/FormatFcts.java: ########## @@ -0,0 +1,434 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.cql3.functions; + +import java.math.RoundingMode; +import java.nio.ByteBuffer; +import java.text.DecimalFormat; +import java.util.List; +import java.util.Optional; +import java.util.concurrent.TimeUnit; + +import org.apache.cassandra.config.DataStorageSpec.DataStorageUnit; +import org.apache.cassandra.config.DurationSpec; +import org.apache.cassandra.db.marshal.AbstractType; +import org.apache.cassandra.db.marshal.UTF8Type; +import org.apache.cassandra.exceptions.InvalidRequestException; +import org.apache.cassandra.io.util.FileUtils; +import org.apache.cassandra.utils.Pair; + +import static java.util.concurrent.TimeUnit.DAYS; +import static java.util.concurrent.TimeUnit.HOURS; +import static java.util.concurrent.TimeUnit.MILLISECONDS; +import static java.util.concurrent.TimeUnit.MINUTES; +import static java.util.concurrent.TimeUnit.SECONDS; +import static org.apache.cassandra.config.DataStorageSpec.DataStorageUnit.BYTES; +import static org.apache.cassandra.config.DataStorageSpec.DataStorageUnit.GIBIBYTES; +import static org.apache.cassandra.config.DataStorageSpec.DataStorageUnit.KIBIBYTES; +import static org.apache.cassandra.config.DataStorageSpec.DataStorageUnit.MEBIBYTES; +import static org.apache.cassandra.cql3.CQL3Type.Native.ASCII; +import static org.apache.cassandra.cql3.CQL3Type.Native.BIGINT; +import static org.apache.cassandra.cql3.CQL3Type.Native.INT; +import static org.apache.cassandra.cql3.CQL3Type.Native.SMALLINT; +import static org.apache.cassandra.cql3.CQL3Type.Native.TEXT; +import static org.apache.cassandra.cql3.CQL3Type.Native.TINYINT; +import static org.apache.cassandra.cql3.CQL3Type.Native.VARINT; +import static org.apache.cassandra.cql3.functions.FunctionParameter.fixed; +import static org.apache.cassandra.cql3.functions.FunctionParameter.optional; + +public class FormatFcts +{ + public static void addFunctionsTo(NativeFunctions functions) + { + functions.add(FormatBytesFct.factory()); + functions.add(FormatTimeFct.factory()); + } + + /** + * Converts numeric value in a column to a value of specified unit. + * <p> + * If the function call contains just one argument - value to convert - then it will be + * looked at as the value is of unit 'ms' and it will be converted to a value of a unit which is closes to it. E.g. Review Comment: ```suggestion * looked at as the value is of unit 'ms' and it will be converted to a value of a unit which is closest to it. E.g. ``` ########## test/unit/org/apache/cassandra/cql3/functions/FormatBytesFctTest.java: ########## @@ -0,0 +1,359 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.cql3.functions; + +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; + +import org.junit.Test; + +import org.apache.cassandra.cql3.CQL3Type; +import org.apache.cassandra.cql3.CQLTester; +import org.apache.cassandra.exceptions.InvalidRequestException; + +import static java.util.List.of; +import static org.apache.cassandra.cql3.CQL3Type.Native.ASCII; +import static org.apache.cassandra.cql3.CQL3Type.Native.BIGINT; +import static org.apache.cassandra.cql3.CQL3Type.Native.INT; +import static org.apache.cassandra.cql3.CQL3Type.Native.SMALLINT; +import static org.apache.cassandra.cql3.CQL3Type.Native.TEXT; +import static org.apache.cassandra.cql3.CQL3Type.Native.TINYINT; +import static org.apache.cassandra.cql3.CQL3Type.Native.VARINT; +import static org.apache.cassandra.cql3.functions.FormatFcts.format; +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.quicktheories.QuickTheory.qt; +import static org.quicktheories.generators.SourceDSL.integers; + +public class FormatBytesFctTest extends CQLTester +{ + @Test + public void testOneValueArgumentExact() + { + createTable(of(INT), new Object[][]{ { 1, 1073741825 }, + { 2, 1073741823 }, + { 3, 0 } }); // 0 B + assertRows(execute("select format_bytes(col1) from %s where pk = 1"), row("1 GiB")); + assertRows(execute("select format_bytes(col1) from %s where pk = 2"), row("1024 MiB")); + assertRows(execute("select format_bytes(col1) from %s where pk = 3"), row("0 B")); + } + + @Test + public void testOneValueArgumentDecimalRoundup() + { + createTable(of(INT), new Object[][]{ { 1, 1563401650 }, + { 2, 1072441589 }, + { 3, 102775 }, + { 4, 102 } }); + assertRows(execute("select format_bytes(col1) from %s where pk = 1"), row("1.46 GiB")); // 1.4560 + assertRows(execute("select format_bytes(col1) from %s where pk = 2"), row("1022.76 MiB")); // 1022.7599 + assertRows(execute("select format_bytes(col1) from %s where pk = 3"), row("100.37 KiB")); // 100.3662 + assertRows(execute("select format_bytes(col1) from %s where pk = 4"), row("102 B")); + } + + @Test + public void testOneValueArgumentDecimalRoundDown() + { + createTable(of(INT), new Object[][]{ { 1, 1557999386 }, + { 2, 1072433201 }, + { 3, 102769 }, + { 4, 102 } }); + assertRows(execute("select format_bytes(col1) from %s where pk = 1"), row("1.45 GiB")); // 1.451 + assertRows(execute("select format_bytes(col1) from %s where pk = 2"), row("1022.75 MiB")); // 1022.752 + assertRows(execute("select format_bytes(col1) from %s where pk = 3"), row("100.36 KiB")); // 100.3613 + assertRows(execute("select format_bytes(col1) from %s where pk = 4"), row("102 B")); + } + + @Test + public void testValueAndUnitArgumentsExact() + { + createTable(of(INT), new Object[][]{ { 1, 1073741825 }, + { 2, 0 } }); + assertRows(execute("select format_bytes(col1, 'B') from %s where pk = 1"), row("1073741825 B")); + assertRows(execute("select format_bytes(col1, 'KiB') from %s where pk = 1"), row("1048576 KiB")); + assertRows(execute("select format_bytes(col1, 'MiB') from %s where pk = 1"), row("1024 MiB")); + assertRows(execute("select format_bytes(col1, 'GiB') from %s where pk = 1"), row("1 GiB")); + + assertRows(execute("select format_bytes(col1, 'B') from %s where pk = 2"), row("0 B")); + assertRows(execute("select format_bytes(col1, 'KiB') from %s where pk = 2"), row("0 KiB")); + assertRows(execute("select format_bytes(col1, 'MiB') from %s where pk = 2"), row("0 MiB")); + assertRows(execute("select format_bytes(col1, 'GiB') from %s where pk = 2"), row("0 GiB")); + } + + @Test + public void testValueAndUnitArgumentsDecimal() + { + createTable(of(INT), new Object[][]{ { 1, 1563401650 }, + { 2, 1557999336 } }); + assertRows(execute("select format_bytes(col1, 'B') from %s where pk = 1"), row("1563401650 B")); + assertRows(execute("select format_bytes(col1, 'KiB') from %s where pk = 1"), row("1526759.42 KiB")); + assertRows(execute("select format_bytes(col1, 'MiB') from %s where pk = 1"), row("1490.98 MiB")); + assertRows(execute("select format_bytes(col1, 'GiB') from %s where pk = 1"), row("1.46 GiB")); + + assertRows(execute("select format_bytes(col1, 'B') from %s where pk = 2"), row("1557999336 B")); + assertRows(execute("select format_bytes(col1, 'KiB') from %s where pk = 2"), row("1521483.73 KiB")); + assertRows(execute("select format_bytes(col1, 'MiB') from %s where pk = 2"), row("1485.82 MiB")); + assertRows(execute("select format_bytes(col1, 'GiB') from %s where pk = 2"), row("1.45 GiB")); + } + + @Test + public void testValueWithSourceAndTargetArgumentExact() + { + createTable(of(INT), new Object[][]{ { 1, 1073741825 }, + { 2, 1 }, + { 3, 0 } }); + assertRows(execute("select format_bytes(col1, 'B', 'B') from %s where pk = 1"), row("1073741825 B")); + assertRows(execute("select format_bytes(col1, 'B', 'KiB') from %s where pk = 1"), row("1048576 KiB")); + assertRows(execute("select format_bytes(col1, 'B', 'MiB') from %s where pk = 1"), row("1024 MiB")); + assertRows(execute("select format_bytes(col1, 'B', 'GiB') from %s where pk = 1"), row("1 GiB")); + + assertRows(execute("select format_bytes(col1, 'GiB', 'GiB') from %s where pk = 2"), row("1 GiB")); + assertRows(execute("select format_bytes(col1, 'GiB', 'MiB') from %s where pk = 2"), row("1024 MiB")); + assertRows(execute("select format_bytes(col1, 'GiB', 'KiB') from %s where pk = 2"), row("1048576 KiB")); + assertRows(execute("select format_bytes(col1, 'GiB', 'B') from %s where pk = 2"), row("1073741824 B")); + + assertRows(execute("select format_bytes(col1, 'GiB', 'GiB') from %s where pk = 3"), row("0 GiB")); + assertRows(execute("select format_bytes(col1, 'GiB', 'MiB') from %s where pk = 3"), row("0 MiB")); + assertRows(execute("select format_bytes(col1, 'GiB', 'KiB') from %s where pk = 3"), row("0 KiB")); + assertRows(execute("select format_bytes(col1, 'GiB', 'B') from %s where pk = 3"), row("0 B")); + } + + @Test + public void testValueWithSourceAndTargetArgumentDecimal() + { + createTable(of(INT), new Object[][]{ { 1, 1563401650 }, + { 2, 1557999336 },}); + assertRows(execute("select format_bytes(col1, 'B', 'B') from %s where pk = 1"), row("1563401650 B")); + assertRows(execute("select format_bytes(col1, 'B', 'KiB') from %s where pk = 1"), row("1526759.42 KiB")); + assertRows(execute("select format_bytes(col1, 'B', 'MiB') from %s where pk = 1"), row("1490.98 MiB")); + assertRows(execute("select format_bytes(col1, 'B', 'GiB') from %s where pk = 1"), row("1.46 GiB")); + + assertRows(execute("select format_bytes(col1, 'B', 'B') from %s where pk = 2"), row("1557999336 B")); + assertRows(execute("select format_bytes(col1, 'B', 'KiB') from %s where pk = 2"), row("1521483.73 KiB")); + assertRows(execute("select format_bytes(col1, 'B', 'MiB') from %s where pk = 2"), row("1485.82 MiB")); + assertRows(execute("select format_bytes(col1, 'B', 'GiB') from %s where pk = 2"), row("1.45 GiB")); + } + + @Test + public void testFuzzNumberGenerators() + { + createTable("CREATE TABLE %s (pk int primary key, col1 int)"); + + qt().withExamples(1024).forAll(integers().allPositive()).checkAssert( + (randInt) -> { + execute("INSERT INTO %s (pk, col1) VALUES (?, ?)", 1, randInt); + + assertRows(execute("select format_bytes(col1, 'MiB') from %s where pk = 1"), row(format(randInt / 1024.0 / 1024.0) + " MiB")); + assertRows(execute("select format_bytes(col1, 'KiB', 'GiB') from %s where pk = 1"), row(format(randInt / 1024.0 / 1024.0) + " GiB")); + assertRows(execute("select format_bytes(col1, 'B', 'GiB') from %s where pk = 1"), row(format(randInt / 1024.0 / 1024.0 / 1024.0 ) + " GiB")); + }); + } + + @Test + public void testOverflow() + { + createTable(of(BIGINT, INT, SMALLINT, TINYINT), + new Object[][]{ { 1, + 1073741825L * 1024 + 1, + Integer.MAX_VALUE - 1, + Short.MAX_VALUE - 1, + Byte.MAX_VALUE - 1 }, + { 2, + 1073741825L * 1024 + 1, + Integer.MAX_VALUE, + Short.MAX_VALUE, + Byte.MAX_VALUE } }); + + // this will stop at Long.MAX_VALUE + assertRows(execute("select format_bytes(col1, 'GiB', 'B') from %s where pk = 1"), row("9223372036854776000 B")); + assertRows(execute("select format_bytes(col2, 'GiB', 'B') from %s where pk = 1"), row("2305843007066210300 B")); + assertRows(execute("select format_bytes(col3, 'GiB', 'B') from %s where pk = 1"), row("35182224605184 B")); + assertRows(execute("select format_bytes(col4, 'GiB', 'B') from %s where pk = 1"), row("135291469824 B")); + + assertRows(execute("select format_bytes(col2, 'GiB', 'B') from %s where pk = 2"), row("2305843008139952130 B")); + assertRows(execute("select format_bytes(col3, 'GiB', 'B') from %s where pk = 2"), row("35183298347008 B")); + assertRows(execute("select format_bytes(col4, 'GiB', 'B') from %s where pk = 2"), row("136365211648 B")); + } + + @Test + public void testAllSupportedColumnTypes() + { + createTable(of(INT, TINYINT, SMALLINT, BIGINT, VARINT, ASCII, TEXT), + new Object[][]{ { 1, + Integer.MAX_VALUE, + Byte.MAX_VALUE, + Short.MAX_VALUE, + Long.MAX_VALUE, + Integer.MAX_VALUE, + '\'' + Integer.valueOf(Integer.MAX_VALUE).toString() + '\'', + '\'' + Integer.valueOf(Integer.MAX_VALUE).toString() + '\'', + } }); + + assertRows(execute("select format_bytes(col1) from %s where pk = 1"), row("2 GiB")); + assertRows(execute("select format_bytes(col2) from %s where pk = 1"), row("127 B")); + assertRows(execute("select format_bytes(col3) from %s where pk = 1"), row("32 KiB")); + assertRows(execute("select format_bytes(col4) from %s where pk = 1"), row("8589934592 GiB")); + assertRows(execute("select format_bytes(col5) from %s where pk = 1"), row("2 GiB")); + assertRows(execute("select format_bytes(col6) from %s where pk = 1"), row("2 GiB")); + assertRows(execute("select format_bytes(col7) from %s where pk = 1"), row("2 GiB")); + } + + @Test + public void testNegativeValueIsInvalid() + { + createDefaultTable(new Object[][]{ { "1", "-1", "-2" } }); + assertThatThrownBy(() -> execute("select format_bytes(col1) from %s where pk = 1")) + .isInstanceOf(InvalidRequestException.class) + .hasMessageContaining("value must be non-negative"); + } + + @Test + public void testUnparsableTextIsInvalid() + { + createTable(of(TEXT), new Object[][]{ { 1, "'abc'" }, { 2, "'-1'" } }); + + assertThatThrownBy(() -> execute("select format_bytes(col1) from %s where pk = 1")) + .isInstanceOf(InvalidRequestException.class) + .hasMessageContaining("unable to convert string 'abc' to a value of type long"); + + assertThatThrownBy(() -> execute("select format_bytes(col1) from %s where pk = 2")) + .isInstanceOf(InvalidRequestException.class) + .hasMessageContaining("value must be non-negative"); + } + + @Test + public void testInvalidUnits() + { + createDefaultTable(new Object[][]{ { "1", "1", "2" } }); + for (String functionCall : new String[] { + "format_bytes(col1, 'abc')", + "format_bytes(col1, 'B', 'abc')", + "format_bytes(col1, 'abc', 'B')", + "format_bytes(col1, 'abc', 'abc')" + }) + { + assertThatThrownBy(() -> execute("select " + functionCall + " from %s where pk = 1")) + .isInstanceOf(InvalidRequestException.class) + .hasMessageContaining("Unsupported data storage unit: abc. Supported units are: B, KiB, MiB, GiB"); + } + } + + @Test + public void testInvalidArgumentsSize() Review Comment: Can we add test for no arguments passed? ########## test/unit/org/apache/cassandra/cql3/functions/FormatTimeFctTest.java: ########## @@ -0,0 +1,321 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.cql3.functions; + +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; + +import org.junit.Test; + +import org.apache.cassandra.cql3.CQL3Type; +import org.apache.cassandra.cql3.CQLTester; +import org.apache.cassandra.exceptions.InvalidRequestException; + +import static java.util.List.of; +import static org.apache.cassandra.cql3.CQL3Type.Native.ASCII; +import static org.apache.cassandra.cql3.CQL3Type.Native.BIGINT; +import static org.apache.cassandra.cql3.CQL3Type.Native.INT; +import static org.apache.cassandra.cql3.CQL3Type.Native.SMALLINT; +import static org.apache.cassandra.cql3.CQL3Type.Native.TEXT; +import static org.apache.cassandra.cql3.CQL3Type.Native.TINYINT; +import static org.apache.cassandra.cql3.CQL3Type.Native.VARINT; +import static org.apache.cassandra.cql3.functions.FormatFcts.format; +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import org.quicktheories.WithQuickTheories; + +public class FormatTimeFctTest extends CQLTester implements WithQuickTheories +{ + @Test + public void testOneValueArgument() + { + createTable(of(INT), new Object[][]{ { 1, 7200001 }, // 2h + 1ms + { 2, 7199999 }, // 2h - 1ms + { 3, 0 } }); // 0 B + assertRows(execute("select format_time(col1) from %s where pk = 1"), row("2 h")); + assertRows(execute("select format_time(col1) from %s where pk = 2"), row("2 h")); + assertRows(execute("select format_time(col1) from %s where pk = 3"), row("0 ms")); + } + + @Test + public void testOneValueArgumentDecimal() + { + createTable(of(INT), new Object[][]{ { 1, 9000000 }, // 2.5h + { 2, 7704000 }, // 2.14h + { 3, 7848000 } }); // 2.18h + assertRows(execute("select format_time(col1) from %s where pk = 1"), row("2.5 h")); + assertRows(execute("select format_time(col1) from %s where pk = 2"), row("2.14 h")); + assertRows(execute("select format_time(col1) from %s where pk = 3"), row("2.18 h")); + } + + @Test + public void testValueAndUnitArguments() + { + createTable(of(INT), new Object[][]{ { 1, 1073741826 }, + { 2, 0 }}); + assertRows(execute("select format_time(col1, 's') from %s where pk = 1"), row("1073741.83 s")); + assertRows(execute("select format_time(col1, 'm') from %s where pk = 1"), row("17895.7 m")); + assertRows(execute("select format_time(col1, 'h') from %s where pk = 1"), row("298.26 h")); + assertRows(execute("select format_time(col1, 'd') from %s where pk = 1"), row("12.43 d")); + + assertRows(execute("select format_time(col1, 's') from %s where pk = 2"), row("0 s")); + assertRows(execute("select format_time(col1, 'm') from %s where pk = 2"), row("0 m")); + assertRows(execute("select format_time(col1, 'h') from %s where pk = 2"), row("0 h")); + assertRows(execute("select format_time(col1, 'd') from %s where pk = 2"), row("0 d")); + } + + @Test + public void testValueWithSourceAndTargetArgument() + { + createTable(of(INT), new Object[][]{ { 1, 1073741826 }, + { 2, 1 }, + { 3, 0 } }); + assertRows(execute("select format_time(col1, 'ns', 'us') from %s where pk = 1"), row("1073741.83 us")); + assertRows(execute("select format_time(col1, 'ns', 'ms') from %s where pk = 1"), row("1073.74 ms")); + assertRows(execute("select format_time(col1, 'ns', 's') from %s where pk = 1"), row("1.07 s")); + assertRows(execute("select format_time(col1, 'ns', 'm') from %s where pk = 1"), row("0.02 m")); + + assertRows(execute("select format_time(col1, 'us', 'ns') from %s where pk = 1"), row("1073741826000 ns")); + assertRows(execute("select format_time(col1, 'us', 'ms') from %s where pk = 1"), row("1073741.83 ms")); + assertRows(execute("select format_time(col1, 'us', 's') from %s where pk = 1"), row("1073.74 s")); + assertRows(execute("select format_time(col1, 'us', 'm') from %s where pk = 1"), row("17.9 m")); + assertRows(execute("select format_time(col1, 'us', 'h') from %s where pk = 1"), row("0.3 h")); + assertRows(execute("select format_time(col1, 'us', 'd') from %s where pk = 1"), row("0.01 d")); + + assertRows(execute("select format_time(col1, 'ms', 'ms') from %s where pk = 1"), row("1073741826 ms")); + assertRows(execute("select format_time(col1, 'ms', 's') from %s where pk = 1"), row("1073741.83 s")); + assertRows(execute("select format_time(col1, 'ms', 'm') from %s where pk = 1"), row("17895.7 m")); + assertRows(execute("select format_time(col1, 'ms', 'h') from %s where pk = 1"), row("298.26 h")); + assertRows(execute("select format_time(col1, 'ms', 'd') from %s where pk = 1"), row("12.43 d")); + + assertRows(execute("select format_time(col1, 'd', 'd') from %s where pk = 2"), row("1 d")); + assertRows(execute("select format_time(col1, 'd', 'h') from %s where pk = 2"), row("24 h")); + assertRows(execute("select format_time(col1, 'd', 'm') from %s where pk = 2"), row("1440 m")); + assertRows(execute("select format_time(col1, 'd', 's') from %s where pk = 2"), row("86400 s")); + + assertRows(execute("select format_time(col1, 'd', 'd') from %s where pk = 3"), row("0 d")); + assertRows(execute("select format_time(col1, 'd', 'h') from %s where pk = 3"), row("0 h")); + assertRows(execute("select format_time(col1, 'd', 'm') from %s where pk = 3"), row("0 m")); + assertRows(execute("select format_time(col1, 'd', 's') from %s where pk = 3"), row("0 s")); + assertRows(execute("select format_time(col1, 'd', 'ms') from %s where pk = 3"), row("0 ms")); + assertRows(execute("select format_time(col1, 'd', 'us') from %s where pk = 3"), row("0 us")); + } + + @Test + public void testNoOverflow() + { + createTable(of(BIGINT, INT, SMALLINT, TINYINT), + new Object[][]{ { 1, + Long.MAX_VALUE - 1, + Integer.MAX_VALUE - 1, + Short.MAX_VALUE - 1, + Byte.MAX_VALUE - 1 }, + { 2, + Long.MAX_VALUE, + Integer.MAX_VALUE, + Short.MAX_VALUE, + Byte.MAX_VALUE } }); + + // Won't overlfow because the value is one less than the Double.MAX_VALUE + assertRows(execute("select format_time(col1, 'd', 'ns') from %s where pk = 1"), row("796899343984252600000000000000000 ns")); + assertRows(execute("select format_time(col2, 'd', 'ns') from %s where pk = 1"), row("185542587014400000000000 ns")); + assertRows(execute("select format_time(col3, 'd', 'ns') from %s where pk = 1"), row("2830982400000000000 ns")); + assertRows(execute("select format_time(col4, 'd', 'ns') from %s where pk = 1"), row("10886400000000000 ns")); + + assertRows(execute("select format_time(col1, 'd', 'ns') from %s where pk = 2"), row("796899343984252600000000000000000 ns")); + assertRows(execute("select format_time(col2, 'd', 'ns') from %s where pk = 2"), row("185542587100800000000000 ns")); + assertRows(execute("select format_time(col3, 'd', 'ns') from %s where pk = 2"), row("2831068800000000000 ns")); + assertRows(execute("select format_time(col4, 'd', 'ns') from %s where pk = 2"), row("10972800000000000 ns")); + } + + @Test + public void testAllSupportedColumnTypes() + { + createTable(of(INT, TINYINT, SMALLINT, BIGINT, VARINT, ASCII, TEXT), + new Object[][]{ { 1, + Integer.MAX_VALUE, + Byte.MAX_VALUE, + Short.MAX_VALUE, + Long.MAX_VALUE, + Integer.MAX_VALUE, + '\'' + Integer.valueOf(Integer.MAX_VALUE).toString() + '\'', + '\'' + Integer.valueOf(Integer.MAX_VALUE).toString() + '\'', + } }); + + assertRows(execute("select format_time(col1) from %s where pk = 1"), row("24.86 d")); + assertRows(execute("select format_time(col2) from %s where pk = 1"), row("127 ms")); + assertRows(execute("select format_time(col3) from %s where pk = 1"), row("32.77 s")); + assertRows(execute("select format_time(col4) from %s where pk = 1"), row("106751991167.3 d")); + assertRows(execute("select format_time(col5) from %s where pk = 1"), row("24.86 d")); + assertRows(execute("select format_time(col6) from %s where pk = 1"), row("24.86 d")); + assertRows(execute("select format_time(col7) from %s where pk = 1"), row("24.86 d")); + } + + @Test + public void testNegativeValueIsInvalid() + { + createDefaultTable(new Object[][]{ { "1", "-1", "-2" } }); + assertThatThrownBy(() -> execute("select format_time(col1) from %s where pk = 1")) + .isInstanceOf(InvalidRequestException.class) + .hasMessageContaining("value must be non-negative"); + } + + @Test + public void testUnparsableTextIsInvalid() + { + createTable(of(TEXT), new Object[][]{ { 1, "'abc'" }, { 2, "'-1'" } }); + + assertThatThrownBy(() -> execute("select format_time(col1) from %s where pk = 1")) + .isInstanceOf(InvalidRequestException.class) + .hasMessageContaining("unable to convert string 'abc' to a value of type long"); + + assertThatThrownBy(() -> execute("select format_time(col1) from %s where pk = 2")) + .isInstanceOf(InvalidRequestException.class) + .hasMessageContaining("value must be non-negative"); + } + + @Test + public void testInvalidUnits() + { + createDefaultTable(new Object[][]{ { "1", "1", "2" } }); + for (String functionCall : new String[] { + "format_time(col1, 'abc')", + "format_time(col1, 'd', 'abc')", + "format_time(col1, 'abc', 'd')", + "format_time(col1, 'abc', 'abc')" + }) + { + assertThatThrownBy(() -> execute("select " + functionCall + " from %s where pk = 1")) + .isInstanceOf(InvalidRequestException.class) + .hasMessageContaining("Unsupported time unit: abc. Supported units are: ns, us, ms, s, m, h, d"); + } + } + + @Test + public void testInvalidArgumentsSize() + { + createDefaultTable(new Object[][]{ { "1", "1", "2" } }); + assertThatThrownBy(() -> execute("select format_time(col1, 'ms', 's', 'h') from %s where pk = 1")) Review Comment: Same. Should we test for no arguments passed? ########## src/java/org/apache/cassandra/cql3/functions/FormatFcts.java: ########## @@ -0,0 +1,434 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.cql3.functions; + +import java.math.RoundingMode; +import java.nio.ByteBuffer; +import java.text.DecimalFormat; +import java.util.List; +import java.util.Optional; +import java.util.concurrent.TimeUnit; + +import org.apache.cassandra.config.DataStorageSpec.DataStorageUnit; +import org.apache.cassandra.config.DurationSpec; +import org.apache.cassandra.db.marshal.AbstractType; +import org.apache.cassandra.db.marshal.UTF8Type; +import org.apache.cassandra.exceptions.InvalidRequestException; +import org.apache.cassandra.io.util.FileUtils; +import org.apache.cassandra.utils.Pair; + +import static java.util.concurrent.TimeUnit.DAYS; +import static java.util.concurrent.TimeUnit.HOURS; +import static java.util.concurrent.TimeUnit.MILLISECONDS; +import static java.util.concurrent.TimeUnit.MINUTES; +import static java.util.concurrent.TimeUnit.SECONDS; +import static org.apache.cassandra.config.DataStorageSpec.DataStorageUnit.BYTES; +import static org.apache.cassandra.config.DataStorageSpec.DataStorageUnit.GIBIBYTES; +import static org.apache.cassandra.config.DataStorageSpec.DataStorageUnit.KIBIBYTES; +import static org.apache.cassandra.config.DataStorageSpec.DataStorageUnit.MEBIBYTES; +import static org.apache.cassandra.cql3.CQL3Type.Native.ASCII; +import static org.apache.cassandra.cql3.CQL3Type.Native.BIGINT; +import static org.apache.cassandra.cql3.CQL3Type.Native.INT; +import static org.apache.cassandra.cql3.CQL3Type.Native.SMALLINT; +import static org.apache.cassandra.cql3.CQL3Type.Native.TEXT; +import static org.apache.cassandra.cql3.CQL3Type.Native.TINYINT; +import static org.apache.cassandra.cql3.CQL3Type.Native.VARINT; +import static org.apache.cassandra.cql3.functions.FunctionParameter.fixed; +import static org.apache.cassandra.cql3.functions.FunctionParameter.optional; + +public class FormatFcts +{ + public static void addFunctionsTo(NativeFunctions functions) + { + functions.add(FormatBytesFct.factory()); + functions.add(FormatTimeFct.factory()); + } + + /** + * Converts numeric value in a column to a value of specified unit. + * <p> + * If the function call contains just one argument - value to convert - then it will be + * looked at as the value is of unit 'ms' and it will be converted to a value of a unit which is closes to it. E.g. + * If a value is (60 * 1000 + 1) then the unit will be in minutes and converted value will be 1. + * <p> + * If the function call contains two arguments - value to convert and a unit - then it will be looked at + * as the unit of such value is 'ms' and it will be converted into the value of the second (unit) argument. + * <p> + * If the function call contains three arguments - value to covert and source and target unit - then the value + * will be considered of a unit of the second argument, and it will be converted + * into a value of the third (unit) argument. + * <p> + * Examples: + * <pre> + * format_time(val) + * format_time(val, 'm') = format_time(val, 'ms', 'm') + * format_time(val, 's', 'm') + * format_time(val, 's', 'h') + * format_time(val, 's', 'd') + * format_time(val, 's') = format_time(val, 'ms', 's') + * format_time(val, 'h') = format_time(val, 'ms', 'h') + * </pre> + * <p> + * It is possible to convert values of a bigger unit to values of a smaller unit, e.g. this is possible: + * + * <pre> + * format_time(val, 'm', 's') + * </pre> + * <p> + * Values can be max of Double.MAX_VALUE, If the conversion produces overflown value, Double.MAX_VALUE will be returned. + * <p> + * Supported units are: d, h, m, s, ms, us, µs, ns + * <p> + * Supported column types on which this function is possible to be applied: + * <pre>INT, TINYINT, SMALLINT, BIGINT, VARINT, ASCII, TEXT</pre> + * For ASCII and TEXT types, text of such column has to be a non-negative number. + * <p> + * The conversion of negative values is not supported. + */ + public static class FormatTimeFct extends NativeScalarFunction + { + private static final String FUNCTION_NAME = "format_time"; + + private FormatTimeFct(AbstractType<?>... argsTypes) + { + super(FUNCTION_NAME, UTF8Type.instance, argsTypes); + } + + @Override + public ByteBuffer execute(Arguments arguments) throws InvalidRequestException + { + if (arguments.get(0) == null) + return null; + + if (arguments.containsNulls()) + throw new InvalidRequestException("none of the arguments may be null"); + + long value = getValue(arguments); + + if (value < 0) + throw new InvalidRequestException("value must be non-negative"); + + if (arguments.size() == 1) + { + Pair<Double, String> convertedValue = convertValue(value); + return UTF8Type.instance.fromString(format(convertedValue.left) + ' ' + convertedValue.right); + } + + TimeUnit sourceUnit; + TimeUnit targetUnit; + String targetUnitAsString; + + if (arguments.size() == 2) + { + sourceUnit = MILLISECONDS; + targetUnitAsString = arguments.get(1); + } + else + { + sourceUnit = validateUnit(arguments.get(1)); + targetUnitAsString = arguments.get(2); + } + + targetUnit = validateUnit(targetUnitAsString); + + double convertedValue = convertValue(value, sourceUnit, targetUnit); + return UTF8Type.instance.fromString(format(convertedValue) + ' ' + targetUnitAsString); + } + + private TimeUnit validateUnit(String unitAsString) + { + try + { + return DurationSpec.fromSymbol(unitAsString); + } + catch (Exception ex) + { + throw new InvalidRequestException(ex.getMessage()); + } + } + + private Pair<Double, String> convertValue(long valueToConvert) + { + String[] units = {"d", "h", "m", "s"}; + TimeUnit[] timeUnits = {DAYS, HOURS, MINUTES, SECONDS}; + + for (int i = 0; i < timeUnits.length; i++) + { + double convertedValue = convertValue(valueToConvert, MILLISECONDS, timeUnits[i]); + if (convertedValue >= 1.0) + { + return Pair.create(convertedValue, units[i]); + } + } + return Pair.create((double) valueToConvert, "ms"); + } + + private Double convertValue(long valueToConvert, TimeUnit sourceUnit, TimeUnit targetUnit) + { + try { + double conversionFactor = getConversionFactor(sourceUnit, targetUnit); + return valueToConvert * conversionFactor; + } + catch (ArithmeticException ex) { + return Double.MAX_VALUE; + } + } + + private static double getConversionFactor(TimeUnit sourceUnit, TimeUnit targetUnit) { + // Define conversion factors between units + double nanosPerSourceUnit = getNanosPerUnit(sourceUnit); + double nanosPerTargetUnit = getNanosPerUnit(targetUnit); + + // Calculate the conversion factor + return nanosPerSourceUnit / nanosPerTargetUnit; + } + + private static double getNanosPerUnit(TimeUnit unit) { + switch (unit) { + case NANOSECONDS: return 1.0; + case MICROSECONDS: return 1_000.0; + case MILLISECONDS: return 1_000_000.0; + case SECONDS: return 1_000_000_000.0; + case MINUTES: return 60.0 * 1_000_000_000.0; + case HOURS: return 3600.0 * 1_000_000_000.0; + case DAYS: return 86400.0 * 1_000_000_000.0; + default: throw new IllegalArgumentException("Unsupported time unit: " + unit); + } + } + + public static FunctionFactory factory() + { + return new FunctionFactory(FUNCTION_NAME, + fixed(INT, TINYINT, SMALLINT, BIGINT, VARINT, ASCII, TEXT), + optional(fixed(ASCII)), + optional(fixed(ASCII))) + { + @Override + protected NativeFunction doGetOrCreateFunction(List<AbstractType<?>> argTypes, AbstractType<?> receiverType) + { + if (argTypes.isEmpty() || argTypes.size() > 3) + throw invalidNumberOfArgumentsException(); + + return new FormatTimeFct(argTypes.toArray(new AbstractType<?>[0])); + } + }; + } + } + + private static final DecimalFormat decimalFormat; + + static + { + decimalFormat = new DecimalFormat("#.##"); + decimalFormat.setRoundingMode(RoundingMode.HALF_UP); + } + + /** + * Formats a double value to a string with two decimal places. + * <p> + * Supported column types on which this function is possible to be applied: + * <pre>DOUBLE</pre> + */ + public static String format(double value) + { + return decimalFormat.format(value); + } + + /** + * Converts numeric value in a column to a size value of specified unit. + * <p> + * If the function call contains just one argument - value to convert - then it will be + * looked at as the value is of unit 'B' and it will be converted to a value of a unit which is closest to it. E.g. + * If a value is (1024 * 1024 + 1) then the unit will be in MiB and converted value will be 1. + * <p> + * If the function call contains two arguments - value to convert and a unit - then it will be looked at + * as the unit of such value is 'B' and it will be converted into the value of the second (unit) argument. + * <p> + * If the function call contains three arguments - value to covert and source and target unit - then the value + * will be considered of a unit of the second argument, and it will be converted + * into a value of the third (unit) argument. + * <p> + * Examples: + * <pre> + * format_bytes(val) = format_bytes(val, 'B', 'MiB') + * format_bytes(val, 'B', 'MiB') + * format_bytes(val, 'B', 'GiB') + * format_bytes(val, 'KiB', 'GiB') + * format_bytes(val, 'MiB') = format_bytes(val, 'B', 'MiB') + * format_bytes(val, 'GiB') = format_bytes(val, 'B', 'GiB') + * </pre> + * <p> + * It is possible to convert values of a bigger unit to values of a smaller unit, e.g. this is possible: + * + * <pre> + * format_bytes(val, 'GiB', 'B') + * </pre> + * <p> + * Values can be max of Long.MAX_VALUE, If the conversion produces overflown value, Long.MAX_VALUE will be returned. + * Note that the actual return value will be 9223372036854776000 due to the limitations of double precision. + * <p> + * Supported units are: B, KiB, MiB, GiB + * <p> + * Supported column types on which this function is possible to be applied: + * <pre>INT, TINYINT, SMALLINT, BIGINT, VARINT, ASCII, TEXT</pre> + * For ASCII and TEXT types, text of such column has to be a non-negative number. + * <p> + * + * The conversion of negative values is not supported. + */ + public static class FormatBytesFct extends NativeScalarFunction + { + private static final String FUNCTION_NAME = "format_bytes"; + + private FormatBytesFct(AbstractType<?>... argsTypes) + { + super(FUNCTION_NAME, UTF8Type.instance, argsTypes); + } + + @Override + public ByteBuffer execute(Arguments arguments) throws InvalidRequestException + { + if (arguments.get(0) == null) + return null; + + if (arguments.containsNulls()) + throw new InvalidRequestException("none of the arguments may be null"); + + long value = (long) getValue(arguments); + + if (value < 0) + throw new InvalidRequestException("value must be non-negative"); Review Comment: This validation logic seems to be duplicated. Could we extract it to a private method? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]

