This is an automated email from the ASF dual-hosted git repository. haonan pushed a commit to branch optimize_infer_type in repository https://gitbox.apache.org/repos/asf/iotdb.git
commit 757bc0a5cc492f978978f36a5eec7591be3eb05d Author: HTHou <[email protected]> AuthorDate: Fri Mar 22 18:31:10 2024 +0800 Optimize the String value type infer --- .../java/org/apache/iotdb/db/conf/IoTDBConfig.java | 17 ++------- .../org/apache/iotdb/db/conf/IoTDBDescriptor.java | 4 --- .../apache/iotdb/db/utils/TypeInferenceUtils.java | 40 ++++++++++++---------- .../iotdb/db/utils/TypeInferenceUtilsTest.java | 37 ++++++++++++-------- .../resources/conf/iotdb-common.properties | 11 ++---- 5 files changed, 50 insertions(+), 59 deletions(-) diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/conf/IoTDBConfig.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/conf/IoTDBConfig.java index 6be4d80e36d..4c0b30bc947 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/conf/IoTDBConfig.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/conf/IoTDBConfig.java @@ -630,17 +630,14 @@ public class IoTDBConfig { /** Register time series as which type when receiving boolean string "true" or "false" */ private TSDataType booleanStringInferType = TSDataType.BOOLEAN; - /** Register time series as which type when receiving an integer string "67" */ - private TSDataType integerStringInferType = TSDataType.FLOAT; - /** * register time series as which type when receiving an integer string and using float may lose - * precision num > 2 ^ 24 + * precision */ - private TSDataType longStringInferType = TSDataType.DOUBLE; + private TSDataType integerStringInferType = TSDataType.INT64; /** register time series as which type when receiving a floating number string "6.7" */ - private TSDataType floatingStringInferType = TSDataType.FLOAT; + private TSDataType floatingStringInferType = TSDataType.DOUBLE; /** * register time series as which type when receiving the Literal NaN. Values can be DOUBLE, FLOAT @@ -2212,14 +2209,6 @@ public class IoTDBConfig { this.integerStringInferType = integerStringInferType; } - public void setLongStringInferType(TSDataType longStringInferType) { - this.longStringInferType = longStringInferType; - } - - public TSDataType getLongStringInferType() { - return longStringInferType; - } - public TSDataType getFloatingStringInferType() { return floatingStringInferType; } diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/conf/IoTDBDescriptor.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/conf/IoTDBDescriptor.java index 3a3693e3b7b..73e6b6b256c 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/conf/IoTDBDescriptor.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/conf/IoTDBDescriptor.java @@ -1298,10 +1298,6 @@ public class IoTDBDescriptor { TSDataType.valueOf( properties.getProperty( "integer_string_infer_type", conf.getIntegerStringInferType().toString()))); - conf.setLongStringInferType( - TSDataType.valueOf( - properties.getProperty( - "long_string_infer_type", conf.getLongStringInferType().toString()))); conf.setFloatingStringInferType( TSDataType.valueOf( properties.getProperty( diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/utils/TypeInferenceUtils.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/utils/TypeInferenceUtils.java index 187225f622a..1e6723ba637 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/utils/TypeInferenceUtils.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/utils/TypeInferenceUtils.java @@ -31,7 +31,6 @@ import org.apache.iotdb.db.queryengine.plan.expression.multi.FunctionExpression; import org.apache.iotdb.db.queryengine.plan.expression.multi.builtin.BuiltInScalarFunctionHelper; import org.apache.iotdb.db.queryengine.plan.expression.multi.builtin.BuiltInScalarFunctionHelperFactory; import org.apache.iotdb.db.utils.constant.SqlConstant; -import org.apache.iotdb.tsfile.common.constant.TsFileConstant; import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType; import org.apache.commons.lang3.StringUtils; @@ -47,9 +46,6 @@ public class TypeInferenceUtils { private static final TSDataType integerStringInferType = IoTDBDescriptor.getInstance().getConfig().getIntegerStringInferType(); - private static final TSDataType longStringInferType = - IoTDBDescriptor.getInstance().getConfig().getLongStringInferType(); - private static final TSDataType floatingStringInferType = IoTDBDescriptor.getInstance().getConfig().getFloatingStringInferType(); @@ -75,6 +71,15 @@ public class TypeInferenceUtils { || s.equalsIgnoreCase(SqlConstant.BOOLEAN_FALSE); } + private static boolean isLong(String s) { + try { + Long.parseLong(s); + } catch (NumberFormatException e) { + return false; + } + return true; + } + private static boolean isConvertFloatPrecisionLack(String s) { try { return Long.parseLong(s) > (1 << 24); @@ -86,15 +91,22 @@ public class TypeInferenceUtils { /** Get predicted DataType of the given value */ public static TSDataType getPredictedDataType(Object value, boolean inferType) { - if (inferType) { + if (value instanceof Boolean) { + return TSDataType.BOOLEAN; + } else if (value instanceof Integer) { + return TSDataType.INT32; + } else if (value instanceof Long) { + return TSDataType.INT64; + } else if (value instanceof Float) { + return TSDataType.FLOAT; + } else if (value instanceof Double) { + return TSDataType.DOUBLE; + } else if (inferType) { String strValue = value.toString(); if (isBoolean(strValue)) { return booleanStringInferType; } else if (isNumber(strValue)) { - if (!strValue.contains(TsFileConstant.PATH_SEPARATOR)) { - if (isConvertFloatPrecisionLack(StringUtils.trim(strValue))) { - return longStringInferType; - } + if (isLong(StringUtils.trim(strValue))) { return integerStringInferType; } else { return floatingStringInferType; @@ -107,16 +119,6 @@ public class TypeInferenceUtils { } else { return TSDataType.TEXT; } - } else if (value instanceof Boolean) { - return TSDataType.BOOLEAN; - } else if (value instanceof Integer) { - return TSDataType.INT32; - } else if (value instanceof Long) { - return TSDataType.INT64; - } else if (value instanceof Float) { - return TSDataType.FLOAT; - } else if (value instanceof Double) { - return TSDataType.DOUBLE; } return TSDataType.TEXT; diff --git a/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/utils/TypeInferenceUtilsTest.java b/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/utils/TypeInferenceUtilsTest.java index 89b8b00bd64..a51e64a2839 100644 --- a/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/utils/TypeInferenceUtilsTest.java +++ b/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/utils/TypeInferenceUtilsTest.java @@ -19,7 +19,9 @@ package org.apache.iotdb.db.utils; +import org.apache.iotdb.db.conf.IoTDBConfig; import org.apache.iotdb.db.conf.IoTDBDescriptor; +import org.apache.iotdb.db.exception.query.QueryProcessException; import org.apache.iotdb.tsfile.common.constant.TsFileConstant; import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType; @@ -60,7 +62,8 @@ public class TypeInferenceUtilsTest { } @Test - public void testInferType() { + public void testInferType() throws QueryProcessException { + IoTDBConfig config = IoTDBDescriptor.getInstance().getConfig(); Object[] values = { 123, "abc", @@ -76,26 +79,32 @@ public class TypeInferenceUtilsTest { "16777217", // 2^24 + 1 "16777216", // 2^24 "271840880000000000000000", + "4.9387406015404442E17", + "4E5", + "1.0", }; - TSDataType[] encodings = { - IoTDBDescriptor.getInstance().getConfig().getIntegerStringInferType(), + TSDataType[] inferredTypes = { + TSDataType.INT32, TSDataType.TEXT, - IoTDBDescriptor.getInstance().getConfig().getFloatingStringInferType(), + TSDataType.DOUBLE, TSDataType.BOOLEAN, TSDataType.FLOAT, - IoTDBDescriptor.getInstance().getConfig().getIntegerStringInferType(), - IoTDBDescriptor.getInstance().getConfig().getFloatingStringInferType(), - IoTDBDescriptor.getInstance().getConfig().getLongStringInferType(), - IoTDBDescriptor.getInstance().getConfig().getBooleanStringInferType(), - IoTDBDescriptor.getInstance().getConfig().getIntegerStringInferType(), - IoTDBDescriptor.getInstance().getConfig().getIntegerStringInferType(), - IoTDBDescriptor.getInstance().getConfig().getLongStringInferType(), - IoTDBDescriptor.getInstance().getConfig().getIntegerStringInferType(), - IoTDBDescriptor.getInstance().getConfig().getLongStringInferType(), + config.getIntegerStringInferType(), + config.getFloatingStringInferType(), + config.getIntegerStringInferType(), + config.getBooleanStringInferType(), + config.getIntegerStringInferType(), + config.getIntegerStringInferType(), + config.getIntegerStringInferType(), + config.getIntegerStringInferType(), + config.getFloatingStringInferType(), + config.getFloatingStringInferType(), + config.getFloatingStringInferType(), + config.getFloatingStringInferType() }; for (int i = 0; i < values.length; i++) { - assertEquals(encodings[i], TypeInferenceUtils.getPredictedDataType(values[i], true)); + assertEquals(inferredTypes[i], TypeInferenceUtils.getPredictedDataType(values[i], true)); } } diff --git a/iotdb-core/node-commons/src/assembly/resources/conf/iotdb-common.properties b/iotdb-core/node-commons/src/assembly/resources/conf/iotdb-common.properties index 13adcd1a86f..d8e9691329a 100644 --- a/iotdb-core/node-commons/src/assembly/resources/conf/iotdb-common.properties +++ b/iotdb-core/node-commons/src/assembly/resources/conf/iotdb-common.properties @@ -316,18 +316,13 @@ data_replication_factor=1 # Datatype: TSDataType # boolean_string_infer_type=BOOLEAN -# register time series as which type when receiving an integer string "67" +# register time series as which type when receiving an integer string and using float or double may lose precision # Datatype: TSDataType -# integer_string_infer_type=FLOAT - -# register time series as which type when receiving an integer string and using float may lose precision -# num > 2 ^ 24 -# Datatype: TSDataType -# long_string_infer_type=DOUBLE +# integer_string_infer_type=DOUBLE # register time series as which type when receiving a floating number string "6.7" # Datatype: TSDataType -# floating_string_infer_type=FLOAT +# floating_string_infer_type=DOUBLE # register time series as which type when receiving the Literal NaN. Values can be DOUBLE, FLOAT or TEXT # Datatype: TSDataType
