Date filter fix issue-CARBONDATA-603
Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/ea04fce1 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/ea04fce1 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/ea04fce1 Branch: refs/heads/branch-1.0 Commit: ea04fce1d6df2a56ec8e40f6dd4716ab61b8d6f8 Parents: 913d266 Author: kumarvishal <kumarvishal.1...@gmail.com> Authored: Wed Jan 11 20:29:31 2017 +0800 Committer: ravipesala <ravi.pes...@gmail.com> Committed: Fri Jan 20 13:41:01 2017 +0530 ---------------------------------------------------------------------- .../cache/dictionary/ColumnDictionaryInfo.java | 6 +-- .../core/constants/CarbonCommonConstants.java | 9 ++++ .../DirectDictionaryGenerator.java | 1 + .../DirectDictionaryKeyGeneratorFactory.java | 8 +--- .../DateDirectDictionaryGenerator.java | 32 +++++++------ .../core/scan/expression/ExpressionResult.java | 12 +++-- .../carbondata/core/scan/filter/FilterUtil.java | 19 +++++--- .../visitor/CustomTypeDictionaryVisitor.java | 28 ++++++++---- .../apache/carbondata/core/util/CarbonUtil.java | 22 +++++++++ .../carbondata/core/util/DataTypeUtil.java | 24 +++++++++- .../sortindex/CarbonDictionarySortModel.java | 7 ++- .../DateDataTypeDirectDictionaryTest.scala | 6 +-- ...TypeDirectDictionaryWithNoDictTestCase.scala | 6 +-- .../DateDataTypeNullDataTest.scala | 6 +-- .../spark/rdd/CarbonDataLoadRDD.scala | 18 +++++--- .../spark/rdd/CarbonGlobalDictionaryRDD.scala | 18 +++++--- .../spark/rdd/NewCarbonDataLoadRDD.scala | 9 ++-- .../carbondata/spark/util/CarbonScalaUtil.scala | 11 ++--- .../spark/util/GlobalDictionaryUtil.scala | 47 +++++++++++--------- .../execution/command/carbonTableSchema.scala | 3 ++ .../util/ExternalColumnDictionaryTestCase.scala | 3 ++ ...GlobalDictionaryUtilConcurrentTestCase.scala | 3 ++ .../util/GlobalDictionaryUtilTestCase.scala | 3 ++ .../execution/command/carbonTableSchema.scala | 3 ++ .../processing/model/CarbonLoadModel.java | 38 ++++++++++++---- 25 files changed, 232 insertions(+), 110 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/ea04fce1/core/src/main/java/org/apache/carbondata/core/cache/dictionary/ColumnDictionaryInfo.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/cache/dictionary/ColumnDictionaryInfo.java b/core/src/main/java/org/apache/carbondata/core/cache/dictionary/ColumnDictionaryInfo.java index 9774f19..d3599da 100644 --- a/core/src/main/java/org/apache/carbondata/core/cache/dictionary/ColumnDictionaryInfo.java +++ b/core/src/main/java/org/apache/carbondata/core/cache/dictionary/ColumnDictionaryInfo.java @@ -27,7 +27,6 @@ import java.util.concurrent.atomic.AtomicReference; import org.apache.carbondata.core.constants.CarbonCommonConstants; import org.apache.carbondata.core.metadata.datatype.DataType; import org.apache.carbondata.core.util.ByteUtil; -import org.apache.carbondata.core.util.CarbonProperties; import org.apache.carbondata.core.util.CarbonUtil; import org.apache.carbondata.core.util.DataTypeUtil; /** @@ -285,9 +284,8 @@ public class ColumnDictionaryInfo extends AbstractColumnDictionaryInfo { .compare((Boolean.parseBoolean(dictionaryVal)), (Boolean.parseBoolean(memberVal))); case DATE: case TIMESTAMP: - SimpleDateFormat parser = new SimpleDateFormat(CarbonProperties.getInstance() - .getProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, - CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT)); + String format = CarbonUtil.getFormatFromProperty(dataType); + SimpleDateFormat parser = new SimpleDateFormat(format); Date dateToStr; Date dictionaryDate; dateToStr = parser.parse(memberVal); http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/ea04fce1/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java index 6726d83..d1c5eb1 100644 --- a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java +++ b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java @@ -380,6 +380,15 @@ public final class CarbonCommonConstants { * CARBON_TIMESTAMP_DEFAULT_FORMAT */ public static final String CARBON_TIMESTAMP_FORMAT = "carbon.timestamp.format"; + + /** + * CARBON_DATE_DEFAULT_FORMAT + */ + public static final String CARBON_DATE_DEFAULT_FORMAT = "yyyy-MM-dd"; + /** + * CARBON_DATE_DEFAULT_FORMAT + */ + public static final String CARBON_DATE_FORMAT = "carbon.date.format"; /** * STORE_LOCATION_HDFS */ http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/ea04fce1/core/src/main/java/org/apache/carbondata/core/keygenerator/directdictionary/DirectDictionaryGenerator.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/keygenerator/directdictionary/DirectDictionaryGenerator.java b/core/src/main/java/org/apache/carbondata/core/keygenerator/directdictionary/DirectDictionaryGenerator.java index 97f1899..469fe1e 100644 --- a/core/src/main/java/org/apache/carbondata/core/keygenerator/directdictionary/DirectDictionaryGenerator.java +++ b/core/src/main/java/org/apache/carbondata/core/keygenerator/directdictionary/DirectDictionaryGenerator.java @@ -57,6 +57,7 @@ public interface DirectDictionaryGenerator { /** * Return value datatype for this generator + * * @return */ DataType getReturnType(); http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/ea04fce1/core/src/main/java/org/apache/carbondata/core/keygenerator/directdictionary/DirectDictionaryKeyGeneratorFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/keygenerator/directdictionary/DirectDictionaryKeyGeneratorFactory.java b/core/src/main/java/org/apache/carbondata/core/keygenerator/directdictionary/DirectDictionaryKeyGeneratorFactory.java index 8bf0148..e20ebb2 100644 --- a/core/src/main/java/org/apache/carbondata/core/keygenerator/directdictionary/DirectDictionaryKeyGeneratorFactory.java +++ b/core/src/main/java/org/apache/carbondata/core/keygenerator/directdictionary/DirectDictionaryKeyGeneratorFactory.java @@ -16,11 +16,10 @@ */ package org.apache.carbondata.core.keygenerator.directdictionary; -import org.apache.carbondata.core.constants.CarbonCommonConstants; import org.apache.carbondata.core.keygenerator.directdictionary.timestamp.DateDirectDictionaryGenerator; import org.apache.carbondata.core.keygenerator.directdictionary.timestamp.TimeStampDirectDictionaryGenerator; import org.apache.carbondata.core.metadata.datatype.DataType; -import org.apache.carbondata.core.util.CarbonProperties; +import org.apache.carbondata.core.util.CarbonUtil; /** * Factory for DirectDictionary Key generator @@ -57,9 +56,6 @@ public final class DirectDictionaryKeyGeneratorFactory { } public static DirectDictionaryGenerator getDirectDictionaryGenerator(DataType dataType) { - return getDirectDictionaryGenerator(dataType, - CarbonProperties.getInstance() - .getProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, - CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT)); + return getDirectDictionaryGenerator(dataType, CarbonUtil.getFormatFromProperty(dataType)); } } http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/ea04fce1/core/src/main/java/org/apache/carbondata/core/keygenerator/directdictionary/timestamp/DateDirectDictionaryGenerator.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/keygenerator/directdictionary/timestamp/DateDirectDictionaryGenerator.java b/core/src/main/java/org/apache/carbondata/core/keygenerator/directdictionary/timestamp/DateDirectDictionaryGenerator.java index 8401b80..5572f70 100644 --- a/core/src/main/java/org/apache/carbondata/core/keygenerator/directdictionary/timestamp/DateDirectDictionaryGenerator.java +++ b/core/src/main/java/org/apache/carbondata/core/keygenerator/directdictionary/timestamp/DateDirectDictionaryGenerator.java @@ -18,7 +18,9 @@ package org.apache.carbondata.core.keygenerator.directdictionary.timestamp; import java.text.ParseException; import java.text.SimpleDateFormat; +import java.util.Calendar; import java.util.Date; +import java.util.TimeZone; import org.apache.carbondata.common.logging.LogService; import org.apache.carbondata.common.logging.LogServiceFactory; @@ -33,13 +35,17 @@ import org.apache.carbondata.core.util.CarbonProperties; */ public class DateDirectDictionaryGenerator implements DirectDictionaryGenerator { - static final int cutOffDate = Integer.MAX_VALUE >> 1; - static final long SECONDS_PER_DAY = 60 * 60 * 24L; - static final long MILLIS_PER_DAY = SECONDS_PER_DAY * 1000L; - + private static final long SECONDS_PER_DAY = 60 * 60 * 24L; + private static final long MILLIS_PER_DAY = SECONDS_PER_DAY * 1000L; private ThreadLocal<SimpleDateFormat> simpleDateFormatLocal = new ThreadLocal<>(); + //Java TimeZone has no mention of thread safety. Use thread local instance to be safe. + private ThreadLocal<TimeZone> threadLocalLocalTimeZone = new ThreadLocal() { + @Override protected TimeZone initialValue() { + return Calendar.getInstance().getTimeZone(); + } + }; private String dateFormat; /** @@ -48,16 +54,14 @@ public class DateDirectDictionaryGenerator implements DirectDictionaryGenerator private static final LogService LOGGER = LogServiceFactory.getLogService(DateDirectDictionaryGenerator.class.getName()); - public DateDirectDictionaryGenerator(String dateFormat) { this.dateFormat = dateFormat; initialize(); } public DateDirectDictionaryGenerator() { - this(CarbonProperties.getInstance() - .getProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, - CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT)); + this(CarbonProperties.getInstance().getProperty(CarbonCommonConstants.CARBON_DATE_FORMAT, + CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT)); } /** @@ -127,7 +131,7 @@ public class DateDirectDictionaryGenerator implements DirectDictionaryGenerator if (key == 1) { return null; } - return key - cutOffDate; + return key; } private int generateDirectSurrogateKeyForNonTimestampType(String memberStr) { @@ -135,9 +139,8 @@ public class DateDirectDictionaryGenerator implements DirectDictionaryGenerator try { timeValue = Long.valueOf(memberStr) / 1000; } catch (NumberFormatException e) { - LOGGER.debug( - "Cannot convert " + memberStr + " Long type value. Value considered as null." + e - .getMessage()); + LOGGER.debug("Cannot convert " + memberStr + " Long type value. Value considered as null." + e + .getMessage()); } if (timeValue == -1) { return 1; @@ -147,11 +150,12 @@ public class DateDirectDictionaryGenerator implements DirectDictionaryGenerator } private int generateKey(long timeValue) { - int key = (int)Math.floor((double)timeValue / MILLIS_PER_DAY) + cutOffDate; + long milli = timeValue + threadLocalLocalTimeZone.get().getOffset(timeValue); + int key = (int) Math.floor((double) milli / MILLIS_PER_DAY); return key; } - public void initialize(){ + public void initialize() { if (simpleDateFormatLocal.get() == null) { simpleDateFormatLocal.set(new SimpleDateFormat(dateFormat)); simpleDateFormatLocal.get().setLenient(false); http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/ea04fce1/core/src/main/java/org/apache/carbondata/core/scan/expression/ExpressionResult.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/expression/ExpressionResult.java b/core/src/main/java/org/apache/carbondata/core/scan/expression/ExpressionResult.java index 090ce0e..1e57bf2 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/expression/ExpressionResult.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/expression/ExpressionResult.java @@ -28,7 +28,7 @@ import java.util.List; import org.apache.carbondata.core.constants.CarbonCommonConstants; import org.apache.carbondata.core.metadata.datatype.DataType; import org.apache.carbondata.core.scan.expression.exception.FilterIllegalMemberException; -import org.apache.carbondata.core.util.CarbonProperties; +import org.apache.carbondata.core.util.CarbonUtil; public class ExpressionResult implements Comparable<ExpressionResult> { @@ -175,9 +175,8 @@ public class ExpressionResult implements Comparable<ExpressionResult> { switch (this.getDataType()) { case DATE: case TIMESTAMP: - SimpleDateFormat parser = new SimpleDateFormat(CarbonProperties.getInstance() - .getProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, - CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT)); + String format = CarbonUtil.getFormatFromProperty(this.getDataType()); + SimpleDateFormat parser = new SimpleDateFormat(format); if (value instanceof Timestamp) { return parser.format((Timestamp) value); } else if (value instanceof java.sql.Date) { @@ -523,9 +522,8 @@ public class ExpressionResult implements Comparable<ExpressionResult> { return val1.compareTo(val2); case DATE: case TIMESTAMP: - SimpleDateFormat parser = new SimpleDateFormat(CarbonProperties.getInstance() - .getProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, - CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT)); + String format= CarbonUtil.getFormatFromProperty(o.dataType); + SimpleDateFormat parser = new SimpleDateFormat(format); Date date1 = null; Date date2 = null; date1 = parser.parse(this.getString()); http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/ea04fce1/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java index 65a118e..14f45c1 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java @@ -1078,9 +1078,8 @@ public final class FilterUtil { .compare((Boolean.parseBoolean(dictionaryVal)), (Boolean.parseBoolean(memberVal))); case DATE: case TIMESTAMP: - SimpleDateFormat parser = new SimpleDateFormat(CarbonProperties.getInstance() - .getProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, - CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT)); + String format= CarbonUtil.getFormatFromProperty(dataType); + SimpleDateFormat parser = new SimpleDateFormat(format); Date dateToStr; Date dictionaryDate; dateToStr = parser.parse(memberVal); @@ -1203,9 +1202,17 @@ public final class FilterUtil { if (CarbonCommonConstants.MEMBER_DEFAULT_VAL.equals(filterMember1)) { return 1; } - SimpleDateFormat parser = new SimpleDateFormat(CarbonProperties.getInstance() - .getProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, - CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT)); + String format= null; + if(dataType==DataType.DATE) { + format = CarbonProperties.getInstance() + .getProperty(CarbonCommonConstants.CARBON_DATE_FORMAT, + CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT); + } else { + format = CarbonProperties.getInstance() + .getProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, + CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT); + } + SimpleDateFormat parser = new SimpleDateFormat(format); Date date1 = null; Date date2 = null; date1 = parser.parse(filterMember1); http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/ea04fce1/core/src/main/java/org/apache/carbondata/core/scan/filter/resolver/resolverinfo/visitor/CustomTypeDictionaryVisitor.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/resolver/resolverinfo/visitor/CustomTypeDictionaryVisitor.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/resolver/resolverinfo/visitor/CustomTypeDictionaryVisitor.java index 4dcb306..257cca0 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/resolver/resolverinfo/visitor/CustomTypeDictionaryVisitor.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/resolver/resolverinfo/visitor/CustomTypeDictionaryVisitor.java @@ -23,6 +23,7 @@ import java.util.List; import org.apache.carbondata.core.constants.CarbonCommonConstants; import org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator; import org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryKeyGeneratorFactory; +import org.apache.carbondata.core.metadata.datatype.DataType; import org.apache.carbondata.core.scan.expression.ColumnExpression; import org.apache.carbondata.core.scan.expression.exception.FilterIllegalMemberException; import org.apache.carbondata.core.scan.expression.exception.FilterUnsupportedException; @@ -41,7 +42,7 @@ public class CustomTypeDictionaryVisitor implements ResolvedFilterInfoVisitorInt * @param visitableObj * @param metadata * @throws FilterUnsupportedException,if exception occurs while evaluating - * filter models. + * filter models. */ public void populateFilterResolvedInfo(DimColumnResolvedFilterInfo visitableObj, FilterResolverMetadata metadata) throws FilterUnsupportedException { @@ -54,7 +55,8 @@ public class CustomTypeDictionaryVisitor implements ResolvedFilterInfoVisitorInt throw new FilterUnsupportedException(e); } resolvedFilterObject = getDirectDictionaryValKeyMemberForFilter(metadata.getColumnExpression(), - evaluateResultListFinal, metadata.isIncludeFilter()); + evaluateResultListFinal, metadata.isIncludeFilter(), + metadata.getColumnExpression().getDimension().getDataType()); if (!metadata.isIncludeFilter() && null != resolvedFilterObject && !resolvedFilterObject .getFilterList().contains(CarbonCommonConstants.MEMBER_DEFAULT_VAL_SURROGATE_KEY)) { // Adding default surrogate key of null member inorder to not display the same while @@ -68,13 +70,13 @@ public class CustomTypeDictionaryVisitor implements ResolvedFilterInfoVisitorInt private DimColumnFilterInfo getDirectDictionaryValKeyMemberForFilter( ColumnExpression columnExpression, List<String> evaluateResultListFinal, - boolean isIncludeFilter) { + boolean isIncludeFilter, DataType dataType) { List<Integer> surrogates = new ArrayList<Integer>(20); DirectDictionaryGenerator directDictionaryGenerator = DirectDictionaryKeyGeneratorFactory .getDirectDictionaryGenerator(columnExpression.getDimension().getDataType()); // Reading the dictionary value direct - getSurrogateValuesForDictionary(evaluateResultListFinal, surrogates, - directDictionaryGenerator); + getSurrogateValuesForDictionary(evaluateResultListFinal, surrogates, directDictionaryGenerator, + dataType); Collections.sort(surrogates); DimColumnFilterInfo columnFilterInfo = null; @@ -87,10 +89,18 @@ public class CustomTypeDictionaryVisitor implements ResolvedFilterInfoVisitorInt } private void getSurrogateValuesForDictionary(List<String> evaluateResultListFinal, - List<Integer> surrogates, DirectDictionaryGenerator directDictionaryGenerator) { - String timeFormat = CarbonProperties.getInstance() - .getProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, - CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT); + List<Integer> surrogates, DirectDictionaryGenerator directDictionaryGenerator, + DataType dataType) { + String timeFormat = null; + if (dataType == DataType.DATE) { + timeFormat = CarbonProperties.getInstance() + .getProperty(CarbonCommonConstants.CARBON_DATE_FORMAT, + CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT); + } else { + timeFormat = CarbonProperties.getInstance() + .getProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, + CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT); + } for (String filterMember : evaluateResultListFinal) { surrogates .add(directDictionaryGenerator.generateDirectSurrogateKey(filterMember, timeFormat)); http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/ea04fce1/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java index 440c448..07ac127 100644 --- a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java +++ b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java @@ -1420,5 +1420,27 @@ public final class CarbonUtil { } return false; } + + /** + * Below method will be used to get the format for + * date or timestamp data type from property. This + * is added to avoid the code duplication + * + * @param dataType + * @return format + */ + public static String getFormatFromProperty(DataType dataType) { + switch (dataType) { + case DATE: + return CarbonProperties.getInstance().getProperty(CarbonCommonConstants.CARBON_DATE_FORMAT, + CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT); + case TIMESTAMP: + return CarbonProperties.getInstance() + .getProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, + CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT); + default: + return null; + } + } } http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/ea04fce1/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java b/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java index 4294563..074c00d 100644 --- a/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java +++ b/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java @@ -45,7 +45,7 @@ public final class DataTypeUtil { LogServiceFactory.getLogService(DataTypeUtil.class.getName()); private static final Map<String, String> dataTypeDisplayNames; - private static final ThreadLocal<DateFormat> formatter = new ThreadLocal<DateFormat>() { + private static final ThreadLocal<DateFormat> timeStampformatter = new ThreadLocal<DateFormat>() { @Override protected DateFormat initialValue() { return new SimpleDateFormat( @@ -54,6 +54,15 @@ public final class DataTypeUtil { } }; + private static final ThreadLocal<DateFormat> dateformatter = new ThreadLocal<DateFormat>() { + @Override + protected DateFormat initialValue() { + return new SimpleDateFormat( + CarbonProperties.getInstance().getProperty(CarbonCommonConstants.CARBON_DATE_FORMAT, + CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT)); + } + }; + static { dataTypeDisplayNames = new HashMap<String, String>(16); dataTypeDisplayNames.put(DataType.DATE.toString(), DataType.DATE.getName()); @@ -256,12 +265,23 @@ public final class DataTypeUtil { } return Long.parseLong(data); case DATE: + if (data.isEmpty()) { + return null; + } + try { + Date dateToStr = dateformatter.get().parse(data); + return dateToStr.getTime() * 1000; + } catch (ParseException e) { + LOGGER.error("Cannot convert" + data + " to Time/Long type value" + e.getMessage()); + return null; + } + case TIMESTAMP: if (data.isEmpty()) { return null; } try { - Date dateToStr = formatter.get().parse(data); + Date dateToStr = timeStampformatter.get().parse(data); return dateToStr.getTime() * 1000; } catch (ParseException e) { LOGGER.error("Cannot convert" + data + " to Time/Long type value" + e.getMessage()); http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/ea04fce1/core/src/main/java/org/apache/carbondata/core/writer/sortindex/CarbonDictionarySortModel.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/writer/sortindex/CarbonDictionarySortModel.java b/core/src/main/java/org/apache/carbondata/core/writer/sortindex/CarbonDictionarySortModel.java index 46a23e7..df40a1d 100644 --- a/core/src/main/java/org/apache/carbondata/core/writer/sortindex/CarbonDictionarySortModel.java +++ b/core/src/main/java/org/apache/carbondata/core/writer/sortindex/CarbonDictionarySortModel.java @@ -22,7 +22,7 @@ import java.util.Date; import org.apache.carbondata.core.constants.CarbonCommonConstants; import org.apache.carbondata.core.metadata.datatype.DataType; -import org.apache.carbondata.core.util.CarbonProperties; +import org.apache.carbondata.core.util.CarbonUtil; /** * Dictionary sort model class holds the member byte value and corresponding key value. @@ -102,9 +102,8 @@ public class CarbonDictionarySortModel implements Comparable<CarbonDictionarySor return val1.compareTo(val2); case DATE: case TIMESTAMP: - SimpleDateFormat parser = new SimpleDateFormat(CarbonProperties.getInstance() - .getProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, - CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT)); + String format= CarbonUtil.getFormatFromProperty(dataType); + SimpleDateFormat parser = new SimpleDateFormat(format); Date date1 = null; Date date2 = null; try { http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/ea04fce1/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/DateDataTypeDirectDictionaryTest.scala ---------------------------------------------------------------------- diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/DateDataTypeDirectDictionaryTest.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/DateDataTypeDirectDictionaryTest.scala index 1c96f08..e1d9369 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/DateDataTypeDirectDictionaryTest.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/DateDataTypeDirectDictionaryTest.scala @@ -53,7 +53,7 @@ class DateDataTypeDirectDictionaryTest extends QueryTest with BeforeAndAfterAll ) CarbonProperties.getInstance() - .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "yyyy-MM-dd") + .addProperty(CarbonCommonConstants.CARBON_DATE_FORMAT, "yyyy-MM-dd") val csvFilePath = s"$resourcesPath/datasamplefordate.csv" sql("LOAD DATA local inpath '" + csvFilePath + "' INTO TABLE directDictionaryTable OPTIONS" + "('DELIMITER'= ',', 'QUOTECHAR'= '\"')" ) @@ -63,7 +63,7 @@ class DateDataTypeDirectDictionaryTest extends QueryTest with BeforeAndAfterAll case x: Throwable => x.printStackTrace() CarbonProperties.getInstance() - .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "dd-MM-yyyy") + .addProperty(CarbonCommonConstants.CARBON_DATE_FORMAT, "dd-MM-yyyy") } } @@ -142,7 +142,7 @@ class DateDataTypeDirectDictionaryTest extends QueryTest with BeforeAndAfterAll sql("drop table directDictionaryTable") sql("drop table directDictionaryTable_hive") CarbonProperties.getInstance() - .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "dd-MM-yyyy") + .addProperty(CarbonCommonConstants.CARBON_DATE_FORMAT, "dd-MM-yyyy") CarbonProperties.getInstance().addProperty("carbon.direct.dictionary", "false") } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/ea04fce1/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/DateDataTypeDirectDictionaryWithNoDictTestCase.scala ---------------------------------------------------------------------- diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/DateDataTypeDirectDictionaryWithNoDictTestCase.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/DateDataTypeDirectDictionaryWithNoDictTestCase.scala index 42517db..2ad8592 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/DateDataTypeDirectDictionaryWithNoDictTestCase.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/DateDataTypeDirectDictionaryWithNoDictTestCase.scala @@ -47,7 +47,7 @@ class DateDataTypeDirectDictionaryWithNoDictTestCase extends QueryTest with Befo ) CarbonProperties.getInstance() - .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "yyyy-MM-dd HH:mm:ss") + .addProperty(CarbonCommonConstants.CARBON_DATE_FORMAT, "yyyy-MM-dd") val csvFilePath = s"$resourcesPath/datasample.csv" sql("LOAD DATA local inpath '" + csvFilePath + "' INTO TABLE directDictionaryTable OPTIONS" + "('DELIMITER'= ',', 'QUOTECHAR'= '\"')"); @@ -55,7 +55,7 @@ class DateDataTypeDirectDictionaryWithNoDictTestCase extends QueryTest with Befo case x: Throwable => x.printStackTrace() CarbonProperties.getInstance() - .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "dd-MM-yyyy") + .addProperty(CarbonCommonConstants.CARBON_DATE_FORMAT, "dd-MM-yyyy") } } @@ -90,7 +90,7 @@ class DateDataTypeDirectDictionaryWithNoDictTestCase extends QueryTest with Befo override def afterAll { sql("drop table directDictionaryTable") CarbonProperties.getInstance() - .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "dd-MM-yyyy") + .addProperty(CarbonCommonConstants.CARBON_DATE_FORMAT, "dd-MM-yyyy") CarbonProperties.getInstance().addProperty("carbon.direct.dictionary", "false") } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/ea04fce1/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/DateDataTypeNullDataTest.scala ---------------------------------------------------------------------- diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/DateDataTypeNullDataTest.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/DateDataTypeNullDataTest.scala index cd98210..0215a7b 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/DateDataTypeNullDataTest.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/DateDataTypeNullDataTest.scala @@ -45,7 +45,7 @@ class DateDataTypeNullDataTest extends QueryTest with BeforeAndAfterAll { ) CarbonProperties.getInstance() - .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "yyyy/MM/dd") + .addProperty(CarbonCommonConstants.CARBON_DATE_FORMAT, "yyyy/MM/dd") val csvFilePath = s"$resourcesPath/datasamplenull.csv" sql("LOAD DATA LOCAL INPATH '" + csvFilePath + "' INTO TABLE timestampTyeNullData").collect(); @@ -53,7 +53,7 @@ class DateDataTypeNullDataTest extends QueryTest with BeforeAndAfterAll { case x: Throwable => x.printStackTrace() CarbonProperties.getInstance() - .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "dd-MM-yyyy") + .addProperty(CarbonCommonConstants.CARBON_DATE_FORMAT, "dd-MM-yyyy") } } @@ -74,7 +74,7 @@ class DateDataTypeNullDataTest extends QueryTest with BeforeAndAfterAll { override def afterAll { sql("drop table timestampTyeNullData") CarbonProperties.getInstance() - .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "dd-MM-yyyy") + .addProperty(CarbonCommonConstants.CARBON_DATE_FORMAT, "dd-MM-yyyy") CarbonProperties.getInstance().addProperty("carbon.direct.dictionary", "false") } http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/ea04fce1/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonDataLoadRDD.scala ---------------------------------------------------------------------- diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonDataLoadRDD.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonDataLoadRDD.scala index 697c3a7..e3f2cf6 100644 --- a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonDataLoadRDD.scala +++ b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonDataLoadRDD.scala @@ -585,9 +585,12 @@ class RddIterator(rddIter: Iterator[Row], carbonLoadModel: CarbonLoadModel, context: TaskContext) extends CarbonIterator[Array[String]] { - val formatString = CarbonProperties.getInstance().getProperty(CarbonCommonConstants + val timeStampformatString = CarbonProperties.getInstance().getProperty(CarbonCommonConstants .CARBON_TIMESTAMP_FORMAT, CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT) - val format = new SimpleDateFormat(formatString) + val timeStampFormat = new SimpleDateFormat(timeStampformatString) + val dateFormatString = CarbonProperties.getInstance().getProperty(CarbonCommonConstants + .CARBON_DATE_FORMAT, CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT) + val dateFormat = new SimpleDateFormat(dateFormatString) val delimiterLevel1 = carbonLoadModel.getComplexDelimiterLevel1 val delimiterLevel2 = carbonLoadModel.getComplexDelimiterLevel2 val serializationNullFormat = @@ -599,7 +602,7 @@ class RddIterator(rddIter: Iterator[Row], val columns = new Array[String](row.length) for (i <- 0 until columns.length) { columns(i) = CarbonScalaUtil.getString(row.get(i), serializationNullFormat, - delimiterLevel1, delimiterLevel2, format) + delimiterLevel1, delimiterLevel2, timeStampFormat, dateFormat) } columns } @@ -612,9 +615,12 @@ class RddIterator(rddIter: Iterator[Row], class RddIteratorForUpdate(rddIter: Iterator[Row], carbonLoadModel: CarbonLoadModel) extends java.util.Iterator[Array[String]] { - val formatString = CarbonProperties.getInstance().getProperty(CarbonCommonConstants + val timeStampformatString = CarbonProperties.getInstance().getProperty(CarbonCommonConstants .CARBON_TIMESTAMP_FORMAT, CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT) - val format = new SimpleDateFormat(formatString) + val timeStampFormat = new SimpleDateFormat(timeStampformatString) + val dateFormatString = CarbonProperties.getInstance().getProperty(CarbonCommonConstants + .CARBON_DATE_FORMAT, CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT) + val dateFormat = new SimpleDateFormat(dateFormatString) val delimiterLevel1 = carbonLoadModel.getComplexDelimiterLevel1 val delimiterLevel2 = carbonLoadModel.getComplexDelimiterLevel2 val serializationNullFormat = @@ -628,7 +634,7 @@ class RddIteratorForUpdate(rddIter: Iterator[Row], for (i <- 0 until row.length) { // columns(i) = CarbonScalaUtil.getStringForUpdate(row(i), delimiterLevel1, delimiterLevel2) columns(i) = CarbonScalaUtil.getString(row.get(i), serializationNullFormat, - delimiterLevel1, delimiterLevel2, format) + delimiterLevel1, delimiterLevel2, timeStampFormat, dateFormat) if (columns(i).length() > CarbonCommonConstants.DEFAULT_COLUMN_LENGTH) { sys.error(s" Error processing input: Length of parsed input (${ CarbonCommonConstants http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/ea04fce1/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonGlobalDictionaryRDD.scala ---------------------------------------------------------------------- diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonGlobalDictionaryRDD.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonGlobalDictionaryRDD.scala index cf6d5cc..c825da6 100644 --- a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonGlobalDictionaryRDD.scala +++ b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonGlobalDictionaryRDD.scala @@ -161,7 +161,8 @@ case class DictionaryLoadModel(table: CarbonTableIdentifier, lockType: String, zooKeeperUrl: String, serializationNullFormat: String, - defaultTimestampFormat: String) extends Serializable + defaultTimestampFormat: String, + defaultDateFormat: String) extends Serializable case class ColumnDistinctValues(values: Array[String], rowCount: Long) extends Serializable @@ -287,7 +288,8 @@ class CarbonBlockDistinctValuesCombineRDD( val dimNum = model.dimensions.length var row: Row = null val rddIter = firstParent[Row].iterator(split, context) - val format = new SimpleDateFormat(model.defaultTimestampFormat) + val timeStampFormat = new SimpleDateFormat(model.defaultTimestampFormat) + val dateFormat = new SimpleDateFormat(model.defaultDateFormat) // generate block distinct value set while (rddIter.hasNext) { row = rddIter.next() @@ -295,7 +297,11 @@ class CarbonBlockDistinctValuesCombineRDD( rowCount += 1 for (i <- 0 until dimNum) { dimensionParsers(i).parseString(CarbonScalaUtil.getString(row.get(i), - model.serializationNullFormat, model.delimiters(0), model.delimiters(1), format)) + model.serializationNullFormat, + model.delimiters(0), + model.delimiters(1), + timeStampFormat, + dateFormat)) } } } @@ -340,9 +346,9 @@ class CarbonGlobalDictionaryGenerateRDD( val pathService: PathService = CarbonCommonFactory.getPathService val carbonTablePath: CarbonTablePath = pathService.getCarbonTablePath(model.hdfsLocation, model.table) - if (StringUtils.isNotBlank(model.hdfsTempLocation )) { - CarbonProperties.getInstance.addProperty(CarbonCommonConstants.HDFS_TEMP_LOCATION, - model.hdfsTempLocation) + if (StringUtils.isNotBlank(model.hdfsTempLocation)) { + CarbonProperties.getInstance.addProperty(CarbonCommonConstants.HDFS_TEMP_LOCATION, + model.hdfsTempLocation) } if (StringUtils.isNotBlank(model.lockType)) { CarbonProperties.getInstance.addProperty(CarbonCommonConstants.LOCK_TYPE, http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/ea04fce1/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/NewCarbonDataLoadRDD.scala ---------------------------------------------------------------------- diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/NewCarbonDataLoadRDD.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/NewCarbonDataLoadRDD.scala index 3af07d8..9ed0913 100644 --- a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/NewCarbonDataLoadRDD.scala +++ b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/NewCarbonDataLoadRDD.scala @@ -399,9 +399,12 @@ class NewRddIterator(rddIter: Iterator[Row], carbonLoadModel: CarbonLoadModel, context: TaskContext) extends CarbonIterator[Array[AnyRef]] { - val formatString = CarbonProperties.getInstance().getProperty(CarbonCommonConstants + val timeStampformatString = CarbonProperties.getInstance().getProperty(CarbonCommonConstants .CARBON_TIMESTAMP_FORMAT, CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT) - val format = new SimpleDateFormat(formatString) + val timeStampFormat = new SimpleDateFormat(timeStampformatString) + val dateFormatString = CarbonProperties.getInstance().getProperty(CarbonCommonConstants + .CARBON_DATE_FORMAT, CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT) + val dateFormat = new SimpleDateFormat(dateFormatString) val delimiterLevel1 = carbonLoadModel.getComplexDelimiterLevel1 val delimiterLevel2 = carbonLoadModel.getComplexDelimiterLevel2 val serializationNullFormat = @@ -413,7 +416,7 @@ class NewRddIterator(rddIter: Iterator[Row], val columns = new Array[AnyRef](row.length) for (i <- 0 until columns.length) { columns(i) = CarbonScalaUtil.getString(row.get(i), serializationNullFormat, - delimiterLevel1, delimiterLevel2, format) + delimiterLevel1, delimiterLevel2, timeStampFormat, dateFormat) } columns } http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/ea04fce1/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/CarbonScalaUtil.scala ---------------------------------------------------------------------- diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/CarbonScalaUtil.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/CarbonScalaUtil.scala index f462adb..35bd775 100644 --- a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/CarbonScalaUtil.scala +++ b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/CarbonScalaUtil.scala @@ -144,7 +144,8 @@ object CarbonScalaUtil { serializationNullFormat: String, delimiterLevel1: String, delimiterLevel2: String, - format: SimpleDateFormat, + timeStampFormat: SimpleDateFormat, + dateFormat: SimpleDateFormat, level: Int = 1): String = { if (value == null) { serializationNullFormat @@ -154,8 +155,8 @@ object CarbonScalaUtil { case d: java.math.BigDecimal => d.toPlainString case i: java.lang.Integer => i.toString case d: java.lang.Double => d.toString - case t: java.sql.Timestamp => format format t - case d: java.sql.Date => format format d + case t: java.sql.Timestamp => timeStampFormat format t + case d: java.sql.Date => dateFormat format d case b: java.lang.Boolean => b.toString case s: java.lang.Short => s.toString case f: java.lang.Float => f.toString @@ -169,7 +170,7 @@ object CarbonScalaUtil { val builder = new StringBuilder() s.foreach { x => builder.append(getString(x, serializationNullFormat, delimiterLevel1, - delimiterLevel2, format, level + 1)).append(delimiter) + delimiterLevel2, timeStampFormat, dateFormat, level + 1)).append(delimiter) } builder.substring(0, builder.length - 1) case m: scala.collection.Map[Any, Any] => @@ -183,7 +184,7 @@ object CarbonScalaUtil { val builder = new StringBuilder() for (i <- 0 until r.length) { builder.append(getString(r(i), serializationNullFormat, delimiterLevel1, - delimiterLevel2, format, level + 1)).append(delimiter) + delimiterLevel2, timeStampFormat, dateFormat, level + 1)).append(delimiter) } builder.substring(0, builder.length - 1) case other => other.toString http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/ea04fce1/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtil.scala ---------------------------------------------------------------------- diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtil.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtil.scala index f29493e..867a374 100644 --- a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtil.scala +++ b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtil.scala @@ -352,7 +352,8 @@ object GlobalDictionaryUtil { lockType, zookeeperUrl, serializationNullFormat, - carbonLoadModel.getDefaultTimestampFormat) + carbonLoadModel.getDefaultTimestampFormat, + carbonLoadModel.getDefaultDateFormat) } /** @@ -363,29 +364,29 @@ object GlobalDictionaryUtil { */ def loadDataFrame(sqlContext: SQLContext, carbonLoadModel: CarbonLoadModel): DataFrame = { - val hadoopConfiguration = new Configuration() - CommonUtil.configureCSVInputFormat(hadoopConfiguration, carbonLoadModel) - hadoopConfiguration.set(FileInputFormat.INPUT_DIR, carbonLoadModel.getFactFilePath) - val columnNames = carbonLoadModel.getCsvHeaderColumns - val schema = StructType(columnNames.map[StructField, Array[StructField]]{ column => - StructField(column, StringType) - }) - val values = new Array[String](columnNames.length) - val row = new StringArrayRow(values) - val rdd = new NewHadoopRDD[NullWritable, StringArrayWritable]( - sqlContext.sparkContext, - classOf[CSVInputFormat], - classOf[NullWritable], - classOf[StringArrayWritable], - hadoopConfiguration).setName("global dictionary").map[Row] { currentRow => - row.setValues(currentRow._2.get()) - } - sqlContext.createDataFrame(rdd, schema) + val hadoopConfiguration = new Configuration() + CommonUtil.configureCSVInputFormat(hadoopConfiguration, carbonLoadModel) + hadoopConfiguration.set(FileInputFormat.INPUT_DIR, carbonLoadModel.getFactFilePath) + val columnNames = carbonLoadModel.getCsvHeaderColumns + val schema = StructType(columnNames.map[StructField, Array[StructField]] { column => + StructField(column, StringType) + }) + val values = new Array[String](columnNames.length) + val row = new StringArrayRow(values) + val rdd = new NewHadoopRDD[NullWritable, StringArrayWritable]( + sqlContext.sparkContext, + classOf[CSVInputFormat], + classOf[NullWritable], + classOf[StringArrayWritable], + hadoopConfiguration).setName("global dictionary").map[Row] { currentRow => + row.setValues(currentRow._2.get()) + } + sqlContext.createDataFrame(rdd, schema) } // Hack for spark2 integration var updateTableMetadataFunc: (CarbonLoadModel, SQLContext, DictionaryLoadModel, - Array[CarbonDimension]) => Unit = _ + Array[CarbonDimension]) => Unit = _ /** * check whether global dictionary have been generated successfully or not @@ -466,7 +467,11 @@ object GlobalDictionaryUtil { colName match { case "" => colName case _ => - if (parentDimName.isEmpty) middleDimName else "." + middleDimName + if (parentDimName.isEmpty) { + middleDimName + } else { + "." + middleDimName + } } } // judge whether the column is exists http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/ea04fce1/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala ---------------------------------------------------------------------- diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala index f4eea84..62463fe 100644 --- a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala @@ -432,6 +432,9 @@ case class LoadTable( carbonLoadModel.setDefaultTimestampFormat(CarbonProperties.getInstance().getProperty( CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT)) + carbonLoadModel.setDefaultDateFormat(CarbonProperties.getInstance().getProperty( + CarbonCommonConstants.CARBON_DATE_FORMAT, + CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT)) carbonLoadModel .setSerializationNullFormat( TableOptionConstant.SERIALIZATION_NULL_FORMAT.getName + "," + serializationNullFormat) http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/ea04fce1/integration/spark/src/test/scala/org/apache/carbondata/spark/util/ExternalColumnDictionaryTestCase.scala ---------------------------------------------------------------------- diff --git a/integration/spark/src/test/scala/org/apache/carbondata/spark/util/ExternalColumnDictionaryTestCase.scala b/integration/spark/src/test/scala/org/apache/carbondata/spark/util/ExternalColumnDictionaryTestCase.scala index 361d183..5a986b7 100644 --- a/integration/spark/src/test/scala/org/apache/carbondata/spark/util/ExternalColumnDictionaryTestCase.scala +++ b/integration/spark/src/test/scala/org/apache/carbondata/spark/util/ExternalColumnDictionaryTestCase.scala @@ -144,6 +144,9 @@ class ExternalColumnDictionaryTestCase extends QueryTest with BeforeAndAfterAll carbonLoadModel.setDefaultTimestampFormat(CarbonProperties.getInstance().getProperty( CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT)) + carbonLoadModel.setDefaultDateFormat(CarbonProperties.getInstance().getProperty( + CarbonCommonConstants.CARBON_DATE_FORMAT, + CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT)) carbonLoadModel.setCsvHeaderColumns(CommonUtil.getCsvHeaderColumns(carbonLoadModel)) carbonLoadModel } http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/ea04fce1/integration/spark/src/test/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtilConcurrentTestCase.scala ---------------------------------------------------------------------- diff --git a/integration/spark/src/test/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtilConcurrentTestCase.scala b/integration/spark/src/test/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtilConcurrentTestCase.scala index 0206bcf..377bbaa 100644 --- a/integration/spark/src/test/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtilConcurrentTestCase.scala +++ b/integration/spark/src/test/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtilConcurrentTestCase.scala @@ -60,6 +60,9 @@ class GlobalDictionaryUtilConcurrentTestCase extends QueryTest with BeforeAndAft carbonLoadModel.setDefaultTimestampFormat(CarbonProperties.getInstance().getProperty( CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT)) + carbonLoadModel.setDefaultDateFormat(CarbonProperties.getInstance().getProperty( + CarbonCommonConstants.CARBON_DATE_FORMAT, + CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT)) carbonLoadModel.setCsvHeaderColumns(CommonUtil.getCsvHeaderColumns(carbonLoadModel)) carbonLoadModel } http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/ea04fce1/integration/spark/src/test/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtilTestCase.scala ---------------------------------------------------------------------- diff --git a/integration/spark/src/test/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtilTestCase.scala b/integration/spark/src/test/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtilTestCase.scala index 728a0c6..189e694 100644 --- a/integration/spark/src/test/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtilTestCase.scala +++ b/integration/spark/src/test/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtilTestCase.scala @@ -66,6 +66,9 @@ class GlobalDictionaryUtilTestCase extends QueryTest with BeforeAndAfterAll { carbonLoadModel.setDefaultTimestampFormat(CarbonProperties.getInstance().getProperty( CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT)) + carbonLoadModel.setDefaultDateFormat(CarbonProperties.getInstance().getProperty( + CarbonCommonConstants.CARBON_DATE_FORMAT, + CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT)) carbonLoadModel.setCsvHeaderColumns(CommonUtil.getCsvHeaderColumns(carbonLoadModel)) carbonLoadModel } http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/ea04fce1/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala ---------------------------------------------------------------------- diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala index c45f5a1..6fba830 100644 --- a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala +++ b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala @@ -427,6 +427,9 @@ case class LoadTable( carbonLoadModel.setDefaultTimestampFormat(CarbonProperties.getInstance().getProperty( CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT)) + carbonLoadModel.setDefaultDateFormat(CarbonProperties.getInstance().getProperty( + CarbonCommonConstants.CARBON_DATE_FORMAT, + CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT)) carbonLoadModel .setSerializationNullFormat( TableOptionConstant.SERIALIZATION_NULL_FORMAT.getName + "," + serializationNullFormat) http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/ea04fce1/processing/src/main/java/org/apache/carbondata/processing/model/CarbonLoadModel.java ---------------------------------------------------------------------- diff --git a/processing/src/main/java/org/apache/carbondata/processing/model/CarbonLoadModel.java b/processing/src/main/java/org/apache/carbondata/processing/model/CarbonLoadModel.java index ca5acb8..a628971 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/model/CarbonLoadModel.java +++ b/processing/src/main/java/org/apache/carbondata/processing/model/CarbonLoadModel.java @@ -74,7 +74,7 @@ public class CarbonLoadModel implements Serializable { private String blocksID; /** - * Map from carbon dimension to pre defined dict file path + * Map from carbon dimension to pre defined dict file path */ private HashMap<CarbonDimension, String> predefDictMap; @@ -112,6 +112,8 @@ public class CarbonLoadModel implements Serializable { private String defaultTimestampFormat; + private String defaultDateFormat; + /** * defines the string that should be treated as null while loadind data */ @@ -148,7 +150,7 @@ public class CarbonLoadModel implements Serializable { } /** - * Use one pass to generate dictionary + * Use one pass to generate dictionary */ private boolean useOnePass; @@ -169,6 +171,7 @@ public class CarbonLoadModel implements Serializable { /** * get escape char + * * @return */ public String getEscapeChar() { @@ -177,6 +180,7 @@ public class CarbonLoadModel implements Serializable { /** * set escape char + * * @param escapeChar */ public void setEscapeChar(String escapeChar) { @@ -330,7 +334,6 @@ public class CarbonLoadModel implements Serializable { } /** - * * @return external column dictionary file path */ public String getColDictFilePath() { @@ -339,13 +342,13 @@ public class CarbonLoadModel implements Serializable { /** * set external column dictionary file path + * * @param colDictFilePath */ public void setColDictFilePath(String colDictFilePath) { this.colDictFilePath = colDictFilePath; } - /** * get copy with parition * @@ -529,6 +532,7 @@ public class CarbonLoadModel implements Serializable { /** * getSegmentUpdateDetails + * * @return */ public List<SegmentUpdateDetails> getSegmentUpdateDetails() { @@ -546,6 +550,7 @@ public class CarbonLoadModel implements Serializable { /** * getSegmentUpdateStatusManager + * * @return */ public SegmentUpdateStatusManager getSegmentUpdateStatusManager() { @@ -561,7 +566,6 @@ public class CarbonLoadModel implements Serializable { this.segmentUpdateStatusManager = segmentUpdateStatusManager; } - /** * @return */ @@ -610,6 +614,7 @@ public class CarbonLoadModel implements Serializable { /** * the method returns the value to be treated as null while data load + * * @return */ public String getSerializationNullFormat() { @@ -618,6 +623,7 @@ public class CarbonLoadModel implements Serializable { /** * the method sets the value to be treated as null while data load + * * @param serializationNullFormat */ public void setSerializationNullFormat(String serializationNullFormat) { @@ -626,6 +632,7 @@ public class CarbonLoadModel implements Serializable { /** * returns the string to enable bad record logger + * * @return */ public String getBadRecordsLoggerEnable() { @@ -634,6 +641,7 @@ public class CarbonLoadModel implements Serializable { /** * method sets the string to specify whether to enable or dissable the badrecord logger. + * * @param badRecordsLoggerEnable */ public void setBadRecordsLoggerEnable(String badRecordsLoggerEnable) { @@ -656,9 +664,13 @@ public class CarbonLoadModel implements Serializable { this.commentChar = commentChar; } - public String getDateFormat() { return dateFormat; } + public String getDateFormat() { + return dateFormat; + } - public void setDateFormat(String dateFormat) { this.dateFormat = dateFormat; } + public void setDateFormat(String dateFormat) { + this.dateFormat = dateFormat; + } public String getDefaultTimestampFormat() { return defaultTimestampFormat; @@ -683,7 +695,8 @@ public class CarbonLoadModel implements Serializable { } /** - * returns option to specify the bad record logger action + * returns option to specify the bad record logger action + * * @return */ public String getBadRecordsAction() { @@ -692,6 +705,7 @@ public class CarbonLoadModel implements Serializable { /** * set option to specify the bad record logger action + * * @param badRecordsAction */ public void setBadRecordsAction(String badRecordsAction) { @@ -738,4 +752,12 @@ public class CarbonLoadModel implements Serializable { public void setPreFetch(boolean preFetch) { this.preFetch = preFetch; } + + public String getDefaultDateFormat() { + return defaultDateFormat; + } + + public void setDefaultDateFormat(String defaultDateFormat) { + this.defaultDateFormat = defaultDateFormat; + } }