Jackie-Jiang commented on a change in pull request #3813: Refactor Hadoop Jobs URL: https://github.com/apache/incubator-pinot/pull/3813#discussion_r255651941
########## File path: pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/SegmentCreationJob.java ########## @@ -35,328 +37,261 @@ import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.JobContext; import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; -import org.apache.pinot.common.Utils; import org.apache.pinot.common.config.SegmentsValidationAndRetentionConfig; import org.apache.pinot.common.config.TableConfig; import org.apache.pinot.common.data.Schema; -import org.apache.pinot.hadoop.job.mapper.HadoopSegmentCreationMapReduceJob; +import org.apache.pinot.common.utils.StringUtil; +import org.apache.pinot.hadoop.job.mapper.SegmentCreationMapper; import org.apache.pinot.hadoop.utils.PushLocation; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -public class SegmentCreationJob extends Configured { - private static final Logger LOGGER = LoggerFactory.getLogger(SegmentCreationJob.class); +public class SegmentCreationJob extends BaseSegmentJob { + protected static final String APPEND = "APPEND"; - private static final String PATH_TO_DEPS_JAR = "path.to.deps.jar"; - private static final String APPEND = "APPEND"; + protected final Path _inputPattern; + protected final Path _outputDir; + protected final Path _stagingDir; + protected final String _rawTableName; - private final String _jobName; - private final Properties _properties; + // Optional + protected final Path _depsJarDir; + protected final Path _schemaFile; + protected final String _defaultPermissionsMask; + protected final List<PushLocation> _pushLocations; - private final String _inputSegmentDir; - private final String _stagingDir; - private final Schema _dataSchema; - private final String _depsJarPath; - private final String _outputDir; - private final String _tableName; + protected FileSystem _fileSystem; - private final String _readerConfigFile; + public SegmentCreationJob(Properties properties) { + super(properties); + _conf.set("mapreduce.job.user.classpath.first", "true"); - private final String _defaultPermissionsMask; + _inputPattern = Preconditions.checkNotNull(getPathFromProperty(JobConfigConstants.PATH_TO_INPUT)); + _outputDir = Preconditions.checkNotNull(getPathFromProperty(JobConfigConstants.PATH_TO_OUTPUT)); + _stagingDir = new Path(_outputDir, UUID.randomUUID().toString()); + _rawTableName = Preconditions.checkNotNull(_properties.getProperty(JobConfigConstants.SEGMENT_TABLE_NAME)); - private String[] _hosts; - private int _port; + // Optional + _depsJarDir = getPathFromProperty(JobConfigConstants.PATH_TO_DEPS_JAR); + _schemaFile = getPathFromProperty(JobConfigConstants.PATH_TO_SCHEMA); + _defaultPermissionsMask = _properties.getProperty(JobConfigConstants.DEFAULT_PERMISSIONS_MASK); Review comment: Yes, correct. We'll do null check before applying it. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@pinot.apache.org For additional commands, e-mail: dev-h...@pinot.apache.org