Github user jackylk commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/1970#discussion_r167465747
  
    --- Diff: 
processing/src/main/java/org/apache/carbondata/processing/loading/model/CarbonLoadModelBuilder.java
 ---
    @@ -0,0 +1,322 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *    http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.carbondata.processing.loading.model;
    +
    +import java.io.IOException;
    +import java.text.SimpleDateFormat;
    +import java.util.List;
    +import java.util.Map;
    +
    +import org.apache.carbondata.common.Maps;
    +import org.apache.carbondata.common.Strings;
    +import org.apache.carbondata.common.annotations.InterfaceAudience;
    +import org.apache.carbondata.common.constants.LoggerAction;
    +import 
org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException;
    +import org.apache.carbondata.core.constants.CarbonCommonConstants;
    +import org.apache.carbondata.core.metadata.schema.table.CarbonTable;
    +import 
org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn;
    +import org.apache.carbondata.core.util.CarbonProperties;
    +import org.apache.carbondata.core.util.CarbonUtil;
    +import 
org.apache.carbondata.processing.loading.constants.DataLoadProcessorConstants;
    +import org.apache.carbondata.processing.loading.csvinput.CSVInputFormat;
    +import org.apache.carbondata.processing.loading.sort.SortScopeOptions;
    +import org.apache.carbondata.processing.util.TableOptionConstant;
    +
    +import org.apache.commons.lang.StringUtils;
    +import org.apache.hadoop.conf.Configuration;
    +
    +/**
    + * Builder for {@link CarbonLoadModel}
    + */
    [email protected]
    +public class CarbonLoadModelBuilder {
    +
    +  private CarbonTable table;
    +
    +  public CarbonLoadModelBuilder(CarbonTable table) {
    +    this.table = table;
    +  }
    +
    +  /**
    +   * build CarbonLoadModel for data loading
    +   * @param options Load options from user input
    +   * @return a new CarbonLoadModel instance
    +   */
    +  public CarbonLoadModel build(
    +      Map<String, String> options) throws InvalidLoadOptionException, 
IOException {
    +    Map<String, String> optionsFinal = 
LoadOption.fillOptionWithDefaultValue(options);
    +    optionsFinal.put("sort_scope", "no_sort");
    +    if (!options.containsKey("fileheader")) {
    +      List<CarbonColumn> csvHeader = 
table.getCreateOrderColumn(table.getTableName());
    +      String[] columns = new String[csvHeader.size()];
    +      for (int i = 0; i < columns.length; i++) {
    +        columns[i] = csvHeader.get(i).getColName();
    +      }
    +      optionsFinal.put("fileheader", Strings.mkString(columns, ","));
    +    }
    +    CarbonLoadModel model = new CarbonLoadModel();
    +
    +    // we have provided 'fileheader', so it hadoopConf can be null
    +    build(options, optionsFinal, model, null);
    +
    +    // set default values
    +    
model.setTimestampformat(CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT);
    +    model.setDateFormat(CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT);
    +    model.setUseOnePass(Boolean.parseBoolean(Maps.getOrDefault(options, 
"onepass", "false")));
    +    model.setDictionaryServerHost(Maps.getOrDefault(options, "dicthost", 
null));
    +    try {
    +      
model.setDictionaryServerPort(Integer.parseInt(Maps.getOrDefault(options, 
"dictport", "-1")));
    +    } catch (NumberFormatException e) {
    +      throw new InvalidLoadOptionException(e.getMessage());
    +    }
    +    return model;
    +  }
    +
    +  /**
    +   * build CarbonLoadModel for data loading
    +   * @param options Load options from user input
    +   * @param optionsFinal Load options that populated with default values 
for optional options
    +   * @param carbonLoadModel The output load model
    +   * @param hadoopConf hadoopConf is needed to read CSV header if there 
'fileheader' is not set in
    +   *                   user provided load options
    +   */
    +  public void build(
    --- End diff --
    
    These code are moved from DataLoadingUtil.scala in carbon-spark module


---

Reply via email to