Github user jackylk commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1970#discussion_r167465747
--- Diff:
processing/src/main/java/org/apache/carbondata/processing/loading/model/CarbonLoadModelBuilder.java
---
@@ -0,0 +1,322 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.processing.loading.model;
+
+import java.io.IOException;
+import java.text.SimpleDateFormat;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.carbondata.common.Maps;
+import org.apache.carbondata.common.Strings;
+import org.apache.carbondata.common.annotations.InterfaceAudience;
+import org.apache.carbondata.common.constants.LoggerAction;
+import
org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException;
+import org.apache.carbondata.core.constants.CarbonCommonConstants;
+import org.apache.carbondata.core.metadata.schema.table.CarbonTable;
+import
org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn;
+import org.apache.carbondata.core.util.CarbonProperties;
+import org.apache.carbondata.core.util.CarbonUtil;
+import
org.apache.carbondata.processing.loading.constants.DataLoadProcessorConstants;
+import org.apache.carbondata.processing.loading.csvinput.CSVInputFormat;
+import org.apache.carbondata.processing.loading.sort.SortScopeOptions;
+import org.apache.carbondata.processing.util.TableOptionConstant;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.conf.Configuration;
+
+/**
+ * Builder for {@link CarbonLoadModel}
+ */
[email protected]
+public class CarbonLoadModelBuilder {
+
+ private CarbonTable table;
+
+ public CarbonLoadModelBuilder(CarbonTable table) {
+ this.table = table;
+ }
+
+ /**
+ * build CarbonLoadModel for data loading
+ * @param options Load options from user input
+ * @return a new CarbonLoadModel instance
+ */
+ public CarbonLoadModel build(
+ Map<String, String> options) throws InvalidLoadOptionException,
IOException {
+ Map<String, String> optionsFinal =
LoadOption.fillOptionWithDefaultValue(options);
+ optionsFinal.put("sort_scope", "no_sort");
+ if (!options.containsKey("fileheader")) {
+ List<CarbonColumn> csvHeader =
table.getCreateOrderColumn(table.getTableName());
+ String[] columns = new String[csvHeader.size()];
+ for (int i = 0; i < columns.length; i++) {
+ columns[i] = csvHeader.get(i).getColName();
+ }
+ optionsFinal.put("fileheader", Strings.mkString(columns, ","));
+ }
+ CarbonLoadModel model = new CarbonLoadModel();
+
+ // we have provided 'fileheader', so it hadoopConf can be null
+ build(options, optionsFinal, model, null);
+
+ // set default values
+
model.setTimestampformat(CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT);
+ model.setDateFormat(CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT);
+ model.setUseOnePass(Boolean.parseBoolean(Maps.getOrDefault(options,
"onepass", "false")));
+ model.setDictionaryServerHost(Maps.getOrDefault(options, "dicthost",
null));
+ try {
+
model.setDictionaryServerPort(Integer.parseInt(Maps.getOrDefault(options,
"dictport", "-1")));
+ } catch (NumberFormatException e) {
+ throw new InvalidLoadOptionException(e.getMessage());
+ }
+ return model;
+ }
+
+ /**
+ * build CarbonLoadModel for data loading
+ * @param options Load options from user input
+ * @param optionsFinal Load options that populated with default values
for optional options
+ * @param carbonLoadModel The output load model
+ * @param hadoopConf hadoopConf is needed to read CSV header if there
'fileheader' is not set in
+ * user provided load options
+ */
+ public void build(
--- End diff --
These code are moved from DataLoadingUtil.scala in carbon-spark module
---