[GitHub] spark pull request: [SPARK-14654][CORE] New accumulator API

rxin Wed, 27 Apr 2016 19:36:43 -0700

Github user rxin commented on a diff in the pull request:

    https://github.com/apache/spark/pull/12612#discussion_r61366808
  
    --- Diff: core/src/main/scala/org/apache/spark/NewAccumulator.scala ---
    @@ -0,0 +1,356 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *    http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.spark
    +
    +import java.{lang => jl}
    +import java.io.ObjectInputStream
    +import java.util.concurrent.atomic.AtomicLong
    +import javax.annotation.concurrent.GuardedBy
    +
    +import org.apache.spark.scheduler.AccumulableInfo
    +import org.apache.spark.util.Utils
    +
    +
    +private[spark] case class AccumulatorMetadata(
    +    id: Long,
    +    name: Option[String],
    +    countFailedValues: Boolean) extends Serializable
    +
    +
    +/**
    + * The base class for accumulators, that can accumulate inputs of type 
`IN`, and produce output of
    + * type `OUT`.  Implementations must define following methods:
    + *  - isZero:       tell if this accumulator is zero value or not. e.g. 
for a counter accumulator,
    + *                  0 is zero value; for a list accumulator, Nil is zero 
value.
    + *  - copyAndReset: create a new copy of this accumulator, which is zero 
value. i.e. call `isZero`
    + *                  on the copy must return true.
    + *  - add:          defines how to accumulate the inputs. e.g. it can be a 
simple `+=` for counter
    + *                  accumulator
    + *  - merge:        defines how to merge another accumulator of same type.
    + *  - localValue:   defines how to produce the output by the current state 
of this accumulator.
    + *
    + * The implementations decide how to store intermediate values, e.g. a 
long field for a counter
    + * accumulator, a double and a long field for a average 
accumulator(storing the sum and count).
    + */
    +abstract class NewAccumulator[IN, OUT] extends Serializable {
    +  private[spark] var metadata: AccumulatorMetadata = _
    +  private[this] var atDriverSide = true
    +
    +  private[spark] def register(
    +      sc: SparkContext,
    +      name: Option[String] = None,
    +      countFailedValues: Boolean = false): Unit = {
    +    if (this.metadata != null) {
    +      throw new IllegalStateException("Cannot register an Accumulator 
twice.")
    +    }
    +    this.metadata = AccumulatorMetadata(AccumulatorContext.newId(), name, 
countFailedValues)
    +    AccumulatorContext.register(this)
    +    sc.cleaner.foreach(_.registerAccumulatorForCleanup(this))
    +  }
    +
    +  final def isRegistered: Boolean =
    +    metadata != null && 
AccumulatorContext.originals.containsKey(metadata.id)
    +
    +  private def assertMetadataNotNull(): Unit = {
    +    if (metadata == null) {
    +      throw new IllegalAccessError("The metadata of this accumulator has 
not been assigned yet.")
    +    }
    +  }
    +
    +  final def id: Long = {
    +    assertMetadataNotNull()
    +    metadata.id
    +  }
    +
    +  final def name: Option[String] = {
    +    assertMetadataNotNull()
    +    metadata.name
    +  }
    +
    +  final def countFailedValues: Boolean = {
    +    assertMetadataNotNull()
    +    metadata.countFailedValues
    +  }
    +
    +  private[spark] def toInfo(update: Option[Any], value: Option[Any]): 
AccumulableInfo = {
    +    val isInternal = 
name.exists(_.startsWith(InternalAccumulator.METRICS_PREFIX))
    +    new AccumulableInfo(id, name, update, value, isInternal, 
countFailedValues)
    +  }
    +
    +  final private[spark] def isAtDriverSide: Boolean = atDriverSide
    +
    +  def isZero(): Boolean
    +
    +  def copyAndReset(): NewAccumulator[IN, OUT]
    +
    +  def add(v: IN): Unit
    +
    +  def +=(v: IN): Unit = add(v)
    --- End diff --
    
    remove this - I'd rather keep the API minimal for now.



---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request: [SPARK-14654][CORE] New accumulator API

Reply via email to