yifan-c commented on code in PR #71: URL: https://github.com/apache/cassandra-analytics/pull/71#discussion_r1737158816
########## cassandra-analytics-spark-converter/src/main/java/org/apache/cassandra/spark/data/converter/types/BinaryTraits.java: ########## @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.cassandra.spark.data.converter.types; + +import java.nio.ByteBuffer; + +import org.apache.cassandra.bridge.BigNumberConfig; +import org.apache.cassandra.spark.data.CqlField; +import org.apache.cassandra.spark.utils.ByteBufferUtils; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.catalyst.expressions.GenericInternalRow; +import org.apache.spark.sql.types.DataType; +import org.apache.spark.sql.types.DataTypes; +import org.jetbrains.annotations.NotNull; + +public interface BinaryTraits extends SparkType Review Comment: I was confused by the name. At the first sight, I thought it was a `scala` source code, since `trait` is the scala's interface. Can you please not use the word `trait` for java interface? This patch add many traits. ########## cassandra-analytics-core/src/test/java/org/apache/cassandra/spark/reader/DataTypeSerializationTests.java: ########## @@ -152,151 +153,153 @@ public void testUUID() public void testLong() { qt().forAll(TestUtils.bridges()).checkAssert(bridge -> { - assertTrue(bridge.bigint().deserialize(bridge.bigint().serialize(Long.MAX_VALUE)) instanceof Long); - assertEquals(Long.MAX_VALUE, bridge.bigint().deserialize( - ByteBuffer.allocate(8).putLong(0, Long.MAX_VALUE))); + assertTrue(bridge.bigint().deserializeToType(bridge.typeConverter(), bridge.bigint().serialize(Long.MAX_VALUE)) instanceof Long); + assertEquals(Long.MAX_VALUE, bridge.bigint().deserializeToType(bridge.typeConverter(), + ByteBuffer.allocate(8).putLong(0, Long.MAX_VALUE))); qt().forAll(integers().all()) - .checkAssert(integer -> assertEquals((long) integer, bridge.bigint().deserialize( - bridge.bigint().serialize((long) integer)))); - assertEquals(Long.MAX_VALUE, bridge.bigint().deserialize(bridge.bigint().serialize(Long.MAX_VALUE))); - assertEquals(Long.MIN_VALUE, bridge.bigint().deserialize(bridge.bigint().serialize(Long.MIN_VALUE))); + .checkAssert(integer -> assertEquals((long) integer, bridge.bigint().deserializeToType(bridge.typeConverter(), + bridge.bigint().serialize((long) integer)))); + assertEquals(Long.MAX_VALUE, bridge.bigint().deserializeToJavaType(bridge.bigint().serialize(Long.MAX_VALUE))); + assertEquals(Long.MIN_VALUE, bridge.bigint().deserializeToJavaType(bridge.bigint().serialize(Long.MIN_VALUE))); qt().withExamples(MAX_TESTS) .forAll(longs().all()) - .checkAssert(aLong -> assertEquals(aLong, bridge.bigint().deserialize( - bridge.bigint().serialize(aLong)))); + .checkAssert(aLong -> assertEquals(aLong, bridge.bigint().deserializeToType(bridge.typeConverter(), + bridge.bigint().serialize(aLong)))); }); } @Test public void testDecimal() { qt().forAll(TestUtils.bridges()).checkAssert(bridge -> { - assertTrue(bridge.decimal().deserialize( - bridge.decimal().serialize(BigDecimal.valueOf(500L))) instanceof Decimal); - assertEquals(Decimal.apply(500), bridge.decimal().deserialize( - bridge.decimal().serialize(BigDecimal.valueOf(500L)))); - assertNotSame(Decimal.apply(501), bridge.decimal().deserialize( - bridge.decimal().serialize(BigDecimal.valueOf(500L)))); - assertEquals(Decimal.apply(-1), bridge.decimal().deserialize( - bridge.decimal().serialize(BigDecimal.valueOf(-1L)))); - assertEquals(Decimal.apply(Long.MAX_VALUE), bridge.decimal().deserialize( - bridge.decimal().serialize(BigDecimal.valueOf(Long.MAX_VALUE)))); - assertEquals(Decimal.apply(Long.MIN_VALUE), bridge.decimal().deserialize( - bridge.decimal().serialize(BigDecimal.valueOf(Long.MIN_VALUE)))); - assertEquals(Decimal.apply(Integer.MAX_VALUE), bridge.decimal().deserialize( - bridge.decimal().serialize(BigDecimal.valueOf(Integer.MAX_VALUE)))); - assertEquals(Decimal.apply(Integer.MIN_VALUE), bridge.decimal().deserialize( - bridge.decimal().serialize(BigDecimal.valueOf(Integer.MIN_VALUE)))); + assertTrue(bridge.decimal().deserializeToType(bridge.typeConverter(), + bridge.decimal().serialize(BigDecimal.valueOf(500L))) instanceof Decimal); + assertEquals(Decimal.apply(500), bridge.decimal().deserializeToType(bridge.typeConverter(), + bridge.decimal().serialize(BigDecimal.valueOf(500L)))); + assertNotSame(Decimal.apply(501), bridge.decimal().deserializeToType(bridge.typeConverter(), + bridge.decimal().serialize(BigDecimal.valueOf(500L)))); + assertEquals(Decimal.apply(-1), bridge.decimal().deserializeToType(bridge.typeConverter(), + bridge.decimal().serialize(BigDecimal.valueOf(-1L)))); + assertEquals(Decimal.apply(Long.MAX_VALUE), bridge.decimal().deserializeToType(bridge.typeConverter(), + bridge.decimal().serialize(BigDecimal.valueOf(Long.MAX_VALUE)))); + assertEquals(Decimal.apply(Long.MIN_VALUE), bridge.decimal().deserializeToType(bridge.typeConverter(), + bridge.decimal().serialize(BigDecimal.valueOf(Long.MIN_VALUE)))); + assertEquals(Decimal.apply(Integer.MAX_VALUE), bridge.decimal().deserializeToType(bridge.typeConverter(), + bridge.decimal() + .serialize(BigDecimal.valueOf(Integer.MAX_VALUE)))); + assertEquals(Decimal.apply(Integer.MIN_VALUE), bridge.decimal().deserializeToType(bridge.typeConverter(), + bridge.decimal() + .serialize(BigDecimal.valueOf(Integer.MIN_VALUE)))); Review Comment: not easy to read. Can you extract a helper method? For example, ``` Object toDecimal(CassandraBridge bridge, long value) { return bridge.decimal().deserializeToType(bridge.typeConverter(), bridge.decimal().serialize(BigDecimal.valueOf(value))); } ``` If it looks good, can you update the other places with the same pattern? ########## cassandra-analytics-spark-converter/src/main/java/org/apache/cassandra/spark/data/converter/types/complex/List.java: ########## @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.cassandra.spark.data.converter.types.complex; + +import org.apache.cassandra.spark.data.CqlField; +import org.apache.cassandra.spark.data.converter.SparkSqlTypeConverter; + +public class List implements ListTrait Review Comment: Not a fan of the name. To easy to be confused with `java.util.List`. Same for "Map" and "Set" ########## cassandra-analytics-spark-converter/src/main/java/org/apache/cassandra/spark/data/converter/types/Duration.java: ########## @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.cassandra.spark.data.converter.types; + +public class Duration implements NotImplementedTraits Review Comment: It potentially causes confusion with `java.time.Duration`, but I am not strong on renaming it. ########## cassandra-analytics-spark-converter/src/main/java/org/apache/cassandra/spark/data/converter/types/Blob.java: ########## @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.cassandra.spark.data.converter.types; + +public class Blob implements BinaryTraits +{ + public static final org.apache.cassandra.spark.data.converter.types.Blob INSTANCE = new org.apache.cassandra.spark.data.converter.types.Blob(); Review Comment: and here, and many other classes in the package ########## cassandra-analytics-spark-converter/src/main/java/org/apache/cassandra/spark/data/converter/types/BigInt.java: ########## @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.cassandra.spark.data.converter.types; + +public class BigInt implements LongTraits +{ + public static final org.apache.cassandra.spark.data.converter.types.BigInt INSTANCE = new org.apache.cassandra.spark.data.converter.types.BigInt(); Review Comment: redundant class qualifier ########## cassandra-analytics-core/src/test/java/org/apache/cassandra/spark/data/CqlFieldComparators.java: ########## @@ -38,52 +39,52 @@ public class CqlFieldComparators extends VersionRunner { Review Comment: Test class not named with "Test" ########## cassandra-analytics-spark-converter/src/main/java/org/apache/cassandra/spark/data/converter/types/Ascii.java: ########## @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.cassandra.spark.data.converter.types; + +public class Ascii implements StringTraits +{ + public static final org.apache.cassandra.spark.data.converter.types.Ascii INSTANCE = new org.apache.cassandra.spark.data.converter.types.Ascii(); Review Comment: The class qualifier is redundant. ```suggestion public static final Ascii INSTANCE = new Ascii(); ``` ########## cassandra-analytics-spark-converter/src/main/java/org/apache/cassandra/spark/data/converter/SparkSqlTypeConverter.java: ########## @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.cassandra.spark.data.converter; + +import org.apache.cassandra.bridge.BigNumberConfig; +import org.apache.cassandra.spark.data.CqlField; +import org.apache.cassandra.spark.data.TypeConverter; +import org.apache.cassandra.spark.data.converter.types.SparkType; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.catalyst.expressions.GenericInternalRow; +import org.apache.spark.sql.types.DataType; + +public interface SparkSqlTypeConverter extends TypeConverter +{ + Object nativeSparkSqlRowValue(CqlField.CqlType cqlType, GenericInternalRow row, int position); + + Object nativeSparkSqlRowValue(CqlField.CqlType cqlType, Row row, int position); + + default Object sparkSqlRowValue(CqlField cqlField, GenericInternalRow row, int position) + { + return sparkSqlRowValue(cqlField.type(), row, position); + } + + Object sparkSqlRowValue(CqlField.CqlType cqlType, GenericInternalRow row, int position); + + default Object sparkSqlRowValue(CqlField cqlField, Row row, int position) + { + return sparkSqlRowValue(cqlField.type(), row, position); + } + + Object sparkSqlRowValue(CqlField.CqlType cqlType, Row row, int position); + + default DataType sparkSqlType(CqlField.CqlType cqlType) + { + return sparkSqlType(cqlType, BigNumberConfig.DEFAULT); + } + + SparkType toSparkType(CqlField.CqlType cqlType); + + default DataType sparkSqlType(CqlField cqlField, BigNumberConfig bigNumberConfig) + { + return sparkSqlType(cqlField.type(), bigNumberConfig); + } + + DataType sparkSqlType(CqlField.CqlType cqlType, BigNumberConfig bigNumberConfig); + + default Object toTestRowType(CqlField cqlField, Object value) + { + return toTestRowType(cqlField.type(), value); + } + + default Object toTestRowType(CqlField.CqlType cqlType, Object value) + { + return toSparkType(cqlType).toTestRowType(value); + } +} Review Comment: javadoc for all methods and the interface. ########## cassandra-analytics-spark-converter/src/main/java/org/apache/cassandra/spark/data/converter/types/Double.java: ########## @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.cassandra.spark.data.converter.types; + +import org.apache.cassandra.bridge.BigNumberConfig; +import org.apache.cassandra.spark.data.CqlField; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.catalyst.expressions.GenericInternalRow; +import org.apache.spark.sql.types.DataType; +import org.apache.spark.sql.types.DataTypes; + +public class Double implements SparkType Review Comment: Please rename the class. ########## cassandra-analytics-spark-converter/src/main/java/org/apache/cassandra/spark/data/converter/types/Float.java: ########## @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.cassandra.spark.data.converter.types; + +import org.apache.cassandra.bridge.BigNumberConfig; +import org.apache.cassandra.spark.data.CqlField; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.catalyst.expressions.GenericInternalRow; +import org.apache.spark.sql.types.DataType; +import org.apache.spark.sql.types.DataTypes; + +public class Float implements SparkType Review Comment: Rename this class to avoid conflict with `java.lang.Float` ########## cassandra-analytics-spark-converter/src/main/java/org/apache/cassandra/spark/data/converter/types/Boolean.java: ########## @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.cassandra.spark.data.converter.types; + +import org.apache.cassandra.bridge.BigNumberConfig; +import org.apache.cassandra.spark.data.CqlField; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.catalyst.expressions.GenericInternalRow; +import org.apache.spark.sql.types.DataType; +import org.apache.spark.sql.types.DataTypes; + +public class Boolean implements SparkType Review Comment: I would avoid naming it as `Boolean`. At the callsites, it only causes confusion with `java.lang.Boolean` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@cassandra.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@cassandra.apache.org For additional commands, e-mail: commits-h...@cassandra.apache.org