Github user nickwallen commented on a diff in the pull request:

    https://github.com/apache/metron/pull/1150#discussion_r208320208
  
    --- Diff: 
metron-analytics/metron-profiler-spark/src/main/java/org/apache/metron/profiler/spark/BatchProfiler.java
 ---
    @@ -0,0 +1,188 @@
    +/*
    + *
    + *  Licensed to the Apache Software Foundation (ASF) under one
    + *  or more contributor license agreements.  See the NOTICE file
    + *  distributed with this work for additional information
    + *  regarding copyright ownership.  The ASF licenses this file
    + *  to you under the Apache License, Version 2.0 (the
    + *  "License"); you may not use this file except in compliance
    + *  with the License.  You may obtain a copy of the License at
    + *
    + *      http://www.apache.org/licenses/LICENSE-2.0
    + *
    + *  Unless required by applicable law or agreed to in writing, software
    + *  distributed under the License is distributed on an "AS IS" BASIS,
    + *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 
implied.
    + *  See the License for the specific language governing permissions and
    + *  limitations under the License.
    + *
    + */
    +package org.apache.metron.profiler.spark;
    +
    +import com.google.common.collect.Maps;
    +import org.apache.metron.common.configuration.profiler.ProfilerConfig;
    +import org.apache.metron.profiler.MessageRoute;
    +import org.apache.metron.profiler.ProfilePeriod;
    +import org.apache.metron.profiler.spark.function.HBaseWriterFunction;
    +import org.apache.metron.profiler.spark.function.MessageRouterFunction;
    +import org.apache.metron.profiler.spark.function.ProfileBuilderFunction;
    +import org.apache.spark.api.java.function.MapFunction;
    +import org.apache.spark.sql.Dataset;
    +import org.apache.spark.sql.Encoders;
    +import org.apache.spark.sql.KeyValueGroupedDataset;
    +import org.apache.spark.sql.SparkSession;
    +import org.slf4j.Logger;
    +import org.slf4j.LoggerFactory;
    +
    +import java.io.Serializable;
    +import java.lang.invoke.MethodHandles;
    +import java.util.Map;
    +import java.util.Properties;
    +import java.util.concurrent.TimeUnit;
    +
    +import static 
org.apache.metron.profiler.spark.BatchProfilerConfig.PERIOD_DURATION;
    +import static 
org.apache.metron.profiler.spark.BatchProfilerConfig.PERIOD_DURATION_UNITS;
    +import static 
org.apache.metron.profiler.spark.BatchProfilerConfig.TELEMETRY_INPUT_FORMAT;
    +import static 
org.apache.metron.profiler.spark.BatchProfilerConfig.TELEMETRY_INPUT_PATH;
    +import static org.apache.spark.sql.functions.sum;
    +
    +/**
    + * A Profiler that generates profiles by consuming data in batch from 
archived telemetry.
    + *
    + * <p>The Batch Profiler is executed in Spark.
    + */
    +public class BatchProfiler implements Serializable {
    +
    +  protected static final Logger LOG = 
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
    +
    +  /**
    +   * Execute the Batch Profiler.
    +   *
    +   * @param spark The spark session.
    +   * @param properties The profiler configuration properties.
    +   * @param profiles The profile definitions.
    +   * @return The number of profile measurements produced.
    +   */
    +  public long execute(SparkSession spark,
    --- End diff --
    
    This is the main method that drives the Batch Profiler.  Its a good place 
to start reviewing.


---

Reply via email to