date:20170611

spark git commit: [SPARK-20665][SQL][FOLLOW-UP] Move test case to MathExpressionsSuite

2017-06-11 Thread lixiao

Repository: spark
Updated Branches:
  refs/heads/master 3476390c6 -> d14091809


[SPARK-20665][SQL][FOLLOW-UP] Move test case to MathExpressionsSuite

## What changes were proposed in this pull request?

 add test case to MathExpressionsSuite as #17906

## How was this patch tested?

unit test cases

Author: liuxian 

Closes #18082 from 10110346/wip-lx-0524.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d1409180
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d1409180
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d1409180

Branch: refs/heads/master
Commit: d1409180932f2658daad2c6dbf5d80fdf4606dc5
Parents: 3476390
Author: liuxian 
Authored: Sun Jun 11 22:29:09 2017 -0700
Committer: Xiao Li 
Committed: Sun Jun 11 22:29:09 2017 -0700

--
 .../expressions/MathExpressionsSuite.scala  | 64 
 1 file changed, 52 insertions(+), 12 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/d1409180/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala
--
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala
index 6af0cde..f4d5a44 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala
@@ -23,6 +23,7 @@ import com.google.common.math.LongMath
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.InternalRow
+import 
org.apache.spark.sql.catalyst.analysis.TypeCoercion.ImplicitTypeCasts.implicitCast
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import 
org.apache.spark.sql.catalyst.expressions.codegen.GenerateMutableProjection
 import org.apache.spark.sql.catalyst.optimizer.SimpleTestOptimizer
@@ -223,6 +224,14 @@ class MathExpressionsSuite extends SparkFunSuite with 
ExpressionEvalHelper {
 def f: (Double) => Double = (x: Double) => 1 / math.tan(x)
 testUnary(Cot, f)
 checkConsistencyBetweenInterpretedAndCodegen(Cot, DoubleType)
+val nullLit = Literal.create(null, NullType)
+val intNullLit = Literal.create(null, IntegerType)
+val intLit = Literal.create(1, IntegerType)
+checkEvaluation(checkDataTypeAndCast(Cot(nullLit)), null, EmptyRow)
+checkEvaluation(checkDataTypeAndCast(Cot(intNullLit)), null, EmptyRow)
+checkEvaluation(checkDataTypeAndCast(Cot(intLit)), 1 / math.tan(1), 
EmptyRow)
+checkEvaluation(checkDataTypeAndCast(Cot(-intLit)), 1 / math.tan(-1), 
EmptyRow)
+checkEvaluation(checkDataTypeAndCast(Cot(0)), 1 / math.tan(0), EmptyRow)
   }
 
   test("atan") {
@@ -250,6 +259,11 @@ class MathExpressionsSuite extends SparkFunSuite with 
ExpressionEvalHelper {
 checkConsistencyBetweenInterpretedAndCodegen(Cbrt, DoubleType)
   }
 
+  def checkDataTypeAndCast(expression: UnaryMathExpression): Expression = {
+val expNew = implicitCast(expression.child, 
expression.inputTypes(0)).getOrElse(expression)
+expression.withNewChildren(Seq(expNew))
+  }
+
   test("ceil") {
 testUnary(Ceil, (d: Double) => math.ceil(d).toLong)
 checkConsistencyBetweenInterpretedAndCodegen(Ceil, DoubleType)
@@ -262,12 +276,22 @@ class MathExpressionsSuite extends SparkFunSuite with 
ExpressionEvalHelper {
 val doublePi: Double = 3.1415
 val floatPi: Float = 3.1415f
 val longLit: Long = 12345678901234567L
-checkEvaluation(Ceil(doublePi), 4L, EmptyRow)
-checkEvaluation(Ceil(floatPi.toDouble), 4L, EmptyRow)
-checkEvaluation(Ceil(longLit), longLit, EmptyRow)
-checkEvaluation(Ceil(-doublePi), -3L, EmptyRow)
-checkEvaluation(Ceil(-floatPi.toDouble), -3L, EmptyRow)
-checkEvaluation(Ceil(-longLit), -longLit, EmptyRow)
+val nullLit = Literal.create(null, NullType)
+val floatNullLit = Literal.create(null, FloatType)
+checkEvaluation(checkDataTypeAndCast(Ceil(doublePi)), 4L, EmptyRow)
+checkEvaluation(checkDataTypeAndCast(Ceil(floatPi)), 4L, EmptyRow)
+checkEvaluation(checkDataTypeAndCast(Ceil(longLit)), longLit, EmptyRow)
+checkEvaluation(checkDataTypeAndCast(Ceil(-doublePi)), -3L, EmptyRow)
+checkEvaluation(checkDataTypeAndCast(Ceil(-floatPi)), -3L, EmptyRow)
+checkEvaluation(checkDataTypeAndCast(Ceil(-longLit)), -longLit, EmptyRow)
+
+checkEvaluation(checkDataTypeAndCast(Ceil(nullLit)), null, EmptyRow)
+checkEvaluation(checkDataTypeAndCast(Ceil(floatNullLit)), null, EmptyRow)
+checkEvaluation(checkDataTypeAndCast(Ceil(0)), 0L, EmptyRow)
+checkEvaluation(checkDataTypeAndCast(Ceil(1)), 1L, EmptyRow)
+

spark git commit: [SPARK-20715] Store MapStatuses only in MapOutputTracker, not ShuffleMapStage

2017-06-11 Thread joshrosen

Repository: spark
Updated Branches:
refs/heads/master f48273c13 -> 3476390c6

[SPARK-20715] Store MapStatuses only in MapOutputTracker, not ShuffleMapStage

## What changes were proposed in this pull request?

This PR refactors `ShuffleMapStage` and `MapOutputTracker` in order to simplify
the management of `MapStatuses`, reduce driver memory consumption, and remove a
potential source of scheduler correctness bugs.

### Background

In Spark there are currently two places where MapStatuses are tracked:

- The `MapOutputTracker` maintains an `Array[MapStatus]` storing a single
location for each map output. This mapping is used by the `DAGScheduler` for
determining reduce-task locality preferences (when locality-aware reduce task
scheduling is enabled) and is also used to serve map output locations to
executors / tasks.
- Each `ShuffleMapStage` also contains a mapping of `Array[List[MapStatus]]`
which holds the complete set of locations where each map output could be
available. This mapping is used to determine which map tasks need to be run
when constructing `TaskSets` for the stage.

This duplication adds complexity and creates the potential for certain types of
correctness bugs. Bad things can happen if these two copies of the map output
locations get out of sync. For instance, if the `MapOutputTracker` is missing
locations for a map output but `ShuffleMapStage` believes that locations are
available then tasks will fail with `MetadataFetchFailedException` but
`ShuffleMapStage` will not be updated to reflect the missing map outputs,
leading to situations where the stage will be reattempted (because downstream
stages experienced fetch failures) but no task sets will be launched (because
`ShuffleMapStage` thinks all maps are available).

I observed this behavior in a real-world deployment. I'm still not quite sure
how the state got out of sync in the first place, but we can completely avoid
this class of bug if we eliminate the duplicate state.

### Why we only need to track a single location for each map output

I think that storing an `Array[List[MapStatus]]` in `ShuffleMapStage` is
unnecessary.

First, note that this adds memory/object bloat to the driver we need one extra
`List` per task. If you have millions of tasks across all stages then this can
add up to be a significant amount of resources.

Secondly, I believe that it's extremely uncommon that these lists will ever
contain more than one entry. It's not impossible, but is very unlikely given
the conditions which must occur for that to happen:

- In normal operation (no task failures) we'll only run each task once and thus
will have at most one output.
- If speculation is enabled then it's possible that we'll have multiple
attempts of a task. The TaskSetManager will [kill duplicate attempts of a
task](https://github.com/apache/spark/blob/04901dd03a3f8062fd39ea38d585935ff71a9248/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala#L717)
after a task finishes successfully, reducing the likelihood that both the
original and speculated task will successfully register map outputs.
- There is a [comment in
`TaskSetManager`](https://github.com/apache/spark/blob/04901dd03a3f8062fd39ea38d585935ff71a9248/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala#L113)
which suggests that running tasks are not killed if a task set becomes a
zombie. However:
- If the task set becomes a zombie due to the job being cancelled then it
doesn't matter whether we record map outputs.
- If the task set became a zombie because of a stage failure (e.g. the map
stage itself had a fetch failure from an upstream match stage) then I believe
that the "failedEpoch" will be updated which may cause map outputs from
still-running tasks to [be
ignored](https://github.com/apache/spark/blob/04901dd03a3f8062fd39ea38d585935ff71a9248/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala#L1213).
(I'm not 100% sure on this point, though).
- Even if you _do_ manage to record multiple map outputs for a stage, only a
single map output is reported to / tracked by the MapOutputTracker. The only
situation where the additional output locations could actually be read or used
would be if a task experienced a `FetchFailure` exception. The most likely
cause of a `FetchFailure` exception is an executor lost, which will have most
likely caused the loss of several map tasks' output, so saving on potential
re-execution of a single map task isn't a huge win if we're going to have to
recompute several other lost map outputs from other tasks which ran on that
lost executor. Also note that the re-population of MapOutputTracker state from
state in the ShuffleMapTask only happens after the reduce stage has failed; the
additional location doesn't help to prevent FetchFailures but, instead, can
only reduce the amount of work when recomputing missing parent stages.

Given this, this patch chooses to do away with tracking m

spark git commit: [SPARK-18891][SQL] Support for specific Java List subtypes

2017-06-11 Thread wenchen

Repository: spark
Updated Branches:
  refs/heads/master 0538f3b0a -> f48273c13


[SPARK-18891][SQL] Support for specific Java List subtypes

## What changes were proposed in this pull request?

Add support for specific Java `List` subtypes in deserialization as well as a 
generic implicit encoder.

All `List` subtypes are supported by using either the size-specifying 
constructor (one `int` parameter) or the default constructor.

Interfaces/abstract classes use the following implementations:

* `java.util.List`, `java.util.AbstractList` or 
`java.util.AbstractSequentialList` => `java.util.ArrayList`

## How was this patch tested?

```bash
build/mvn -DskipTests clean package && dev/run-tests
```

Additionally in Spark shell:

```
scala> val jlist = new java.util.LinkedList[Int]; jlist.add(1)
jlist: java.util.LinkedList[Int] = [1]
res0: Boolean = true

scala> Seq(jlist).toDS().map(_.element()).collect()
res1: Array[Int] = Array(1)
```

Author: Michal Senkyr 

Closes #18009 from michalsenkyr/dataset-java-lists.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f48273c1
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f48273c1
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f48273c1

Branch: refs/heads/master
Commit: f48273c13c9e9fea2d9bb6dda10fca50c588
Parents: 0538f3b
Author: Michal Senkyr 
Authored: Mon Jun 12 08:53:23 2017 +0800
Committer: Wenchen Fan 
Committed: Mon Jun 12 08:53:23 2017 +0800

--
 .../spark/sql/catalyst/JavaTypeInference.scala  | 15 ++---
 .../catalyst/expressions/objects/objects.scala  | 19 +-
 .../org/apache/spark/sql/JavaDatasetSuite.java  | 61 
 3 files changed, 83 insertions(+), 12 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/f48273c1/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
index 86a73a3..7683ee7 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
@@ -267,16 +267,11 @@ object JavaTypeInference {
 
   case c if listType.isAssignableFrom(typeToken) =>
 val et = elementType(typeToken)
-val array =
-  Invoke(
-MapObjects(
-  p => deserializerFor(et, Some(p)),
-  getPath,
-  inferDataType(et)._1),
-"array",
-ObjectType(classOf[Array[Any]]))
-
-StaticInvoke(classOf[java.util.Arrays], ObjectType(c), "asList", array 
:: Nil)
+MapObjects(
+  p => deserializerFor(et, Some(p)),
+  getPath,
+  inferDataType(et)._1,
+  customCollectionCls = Some(c))
 
   case _ if mapType.isAssignableFrom(typeToken) =>
 val (keyType, valueType) = mapKeyValueType(typeToken)

http://git-wip-us.apache.org/repos/asf/spark/blob/f48273c1/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
index 79b7b9f..5bb0feb 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
@@ -22,6 +22,7 @@ import java.lang.reflect.Modifier
 import scala.collection.mutable.Builder
 import scala.language.existentials
 import scala.reflect.ClassTag
+import scala.util.Try
 
 import org.apache.spark.{SparkConf, SparkEnv}
 import org.apache.spark.serializer._
@@ -597,8 +598,8 @@ case class MapObjects private(
 
 val (initCollection, addElement, getResult): (String, String => String, 
String) =
   customCollectionCls match {
-case Some(cls) =>
-  // collection
+case Some(cls) if classOf[Seq[_]].isAssignableFrom(cls) =>
+  // Scala sequence
   val getBuilder = s"${cls.getName}$$.MODULE$$.newBuilder()"
   val builder = ctx.freshName("collectionBuilder")
   (
@@ -609,6 +610,20 @@ case class MapObjects private(
 genValue => s"$builder.$$plus$$eq($genValue);",
 s"(${cls.getName}) $builder.result();"
   )
+case Some(cls) if classOf[java.util.List[_]].isAssignableFrom(cls) =>
+  // Java list
+  val builder = ctx.freshName("collectionBuilder

spark git commit: [SPARK-18891][SQL] Support for Scala Map collection types

2017-06-11 Thread wenchen

Repository: spark
Updated Branches:
  refs/heads/master a7c61c100 -> 0538f3b0a


[SPARK-18891][SQL] Support for Scala Map collection types

## What changes were proposed in this pull request?

Add support for arbitrary Scala `Map` types in deserialization as well as a 
generic implicit encoder.

Used the builder approach as in #16541 to construct any provided `Map` type 
upon deserialization.

Please note that this PR also adds (ignored) tests for issue [SPARK-19104 
CompileException with Map and Case Class in Spark 
2.1.0](https://issues.apache.org/jira/browse/SPARK-19104) but doesn't solve it.

Added support for Java Maps in codegen code (encoders will be added in a 
different PR) with the following default implementations for 
interfaces/abstract classes:

* `java.util.Map`, `java.util.AbstractMap` => `java.util.HashMap`
* `java.util.SortedMap`, `java.util.NavigableMap` => `java.util.TreeMap`
* `java.util.concurrent.ConcurrentMap` => 
`java.util.concurrent.ConcurrentHashMap`
* `java.util.concurrent.ConcurrentNavigableMap` => 
`java.util.concurrent.ConcurrentSkipListMap`

Resulting codegen for `Seq(Map(1 -> 
2)).toDS().map(identity).queryExecution.debug.codegen`:

```
/* 001 */ public Object generate(Object[] references) {
/* 002 */   return new GeneratedIterator(references);
/* 003 */ }
/* 004 */
/* 005 */ final class GeneratedIterator extends 
org.apache.spark.sql.execution.BufferedRowIterator {
/* 006 */   private Object[] references;
/* 007 */   private scala.collection.Iterator[] inputs;
/* 008 */   private scala.collection.Iterator inputadapter_input;
/* 009 */   private boolean CollectObjectsToMap_loopIsNull1;
/* 010 */   private int CollectObjectsToMap_loopValue0;
/* 011 */   private boolean CollectObjectsToMap_loopIsNull3;
/* 012 */   private int CollectObjectsToMap_loopValue2;
/* 013 */   private UnsafeRow deserializetoobject_result;
/* 014 */   private 
org.apache.spark.sql.catalyst.expressions.codegen.BufferHolder 
deserializetoobject_holder;
/* 015 */   private 
org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter 
deserializetoobject_rowWriter;
/* 016 */   private scala.collection.immutable.Map mapelements_argValue;
/* 017 */   private UnsafeRow mapelements_result;
/* 018 */   private 
org.apache.spark.sql.catalyst.expressions.codegen.BufferHolder 
mapelements_holder;
/* 019 */   private 
org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter 
mapelements_rowWriter;
/* 020 */   private UnsafeRow serializefromobject_result;
/* 021 */   private 
org.apache.spark.sql.catalyst.expressions.codegen.BufferHolder 
serializefromobject_holder;
/* 022 */   private 
org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter 
serializefromobject_rowWriter;
/* 023 */   private 
org.apache.spark.sql.catalyst.expressions.codegen.UnsafeArrayWriter 
serializefromobject_arrayWriter;
/* 024 */   private 
org.apache.spark.sql.catalyst.expressions.codegen.UnsafeArrayWriter 
serializefromobject_arrayWriter1;
/* 025 */
/* 026 */   public GeneratedIterator(Object[] references) {
/* 027 */ this.references = references;
/* 028 */   }
/* 029 */
/* 030 */   public void init(int index, scala.collection.Iterator[] inputs) {
/* 031 */ partitionIndex = index;
/* 032 */ this.inputs = inputs;
/* 033 */ wholestagecodegen_init_0();
/* 034 */ wholestagecodegen_init_1();
/* 035 */
/* 036 */   }
/* 037 */
/* 038 */   private void wholestagecodegen_init_0() {
/* 039 */ inputadapter_input = inputs[0];
/* 040 */
/* 041 */ deserializetoobject_result = new UnsafeRow(1);
/* 042 */ this.deserializetoobject_holder = new 
org.apache.spark.sql.catalyst.expressions.codegen.BufferHolder(deserializetoobject_result,
 32);
/* 043 */ this.deserializetoobject_rowWriter = new 
org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter(deserializetoobject_holder,
 1);
/* 044 */
/* 045 */ mapelements_result = new UnsafeRow(1);
/* 046 */ this.mapelements_holder = new 
org.apache.spark.sql.catalyst.expressions.codegen.BufferHolder(mapelements_result,
 32);
/* 047 */ this.mapelements_rowWriter = new 
org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter(mapelements_holder,
 1);
/* 048 */ serializefromobject_result = new UnsafeRow(1);
/* 049 */ this.serializefromobject_holder = new 
org.apache.spark.sql.catalyst.expressions.codegen.BufferHolder(serializefromobject_result,
 32);
/* 050 */ this.serializefromobject_rowWriter = new 
org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter(serializefromobject_holder,
 1);
/* 051 */ this.serializefromobject_arrayWriter = new 
org.apache.spark.sql.catalyst.expressions.codegen.UnsafeArrayWriter();
/* 052 */
/* 053 */   }
/* 054 */
/* 055 */   private void wholestagecodegen_init_1() {
/* 056 */ this.serializefromobject_arrayWriter1 = new 
org.apache.spark.sql.catalyst.expressions.codegen.UnsafeArrayWriter();
/* 057 */
/* 058 */   }
/* 059 */
/* 060 */   protected void processNext() throw

spark git commit: [SPARK-21031][SQL] Add `alterTableStats` to store spark's stats and let `alterTable` keep existing stats

2017-06-11 Thread wenchen

Repository: spark
Updated Branches:
  refs/heads/master 3a840048e -> a7c61c100


[SPARK-21031][SQL] Add `alterTableStats` to store spark's stats and let 
`alterTable` keep existing stats

## What changes were proposed in this pull request?

Currently, hive's stats are read into `CatalogStatistics`, while spark's stats 
are also persisted through `CatalogStatistics`. As a result, hive's stats can 
be unexpectedly propagated into spark' stats.

For example, for a catalog table, we read stats from hive, e.g. "totalSize" and 
put it into `CatalogStatistics`. Then, by using "ALTER TABLE" command, we will 
store the stats in `CatalogStatistics` into metastore as spark's stats (because 
we don't know whether it's from spark or not). But spark's stats should be only 
generated by "ANALYZE" command. This is unexpected from this command.

Secondly, now that we have spark's stats in metastore, after inserting new 
data, although hive updated "totalSize" in metastore, we still cannot get the 
right `sizeInBytes` in `CatalogStatistics`, because we respect spark's stats 
(should not exist) over hive's stats.

A running example is shown in 
[JIRA](https://issues.apache.org/jira/browse/SPARK-21031).

To fix this, we add a new method `alterTableStats` to store spark's stats, and 
let `alterTable` keep existing stats.

## How was this patch tested?

Added new tests.

Author: Zhenhua Wang 

Closes #18248 from wzhfy/separateHiveStats.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a7c61c10
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a7c61c10
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a7c61c10

Branch: refs/heads/master
Commit: a7c61c100b6e4380e8d0e588969dd7f2fd58d40c
Parents: 3a84004
Author: Zhenhua Wang 
Authored: Mon Jun 12 08:23:04 2017 +0800
Committer: Wenchen Fan 
Committed: Mon Jun 12 08:23:04 2017 +0800

--
 .../sql/catalyst/catalog/ExternalCatalog.scala  |  2 +
 .../sql/catalyst/catalog/InMemoryCatalog.scala  |  9 +++
 .../sql/catalyst/catalog/SessionCatalog.scala   | 13 
 .../catalyst/catalog/ExternalCatalogSuite.scala | 11 ++-
 .../catalyst/catalog/SessionCatalogSuite.scala  | 12 +++
 .../command/AnalyzeColumnCommand.scala  |  2 +-
 .../execution/command/AnalyzeTableCommand.scala |  2 +-
 .../spark/sql/hive/HiveExternalCatalog.scala| 68 ++---
 .../apache/spark/sql/hive/StatisticsSuite.scala | 80 +++-
 9 files changed, 132 insertions(+), 67 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/a7c61c10/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
index 974ef90..12ba5ae 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
@@ -160,6 +160,8 @@ abstract class ExternalCatalog
*/
   def alterTableSchema(db: String, table: String, schema: StructType): Unit
 
+  def alterTableStats(db: String, table: String, stats: CatalogStatistics): 
Unit
+
   def getTable(db: String, table: String): CatalogTable
 
   def getTableOption(db: String, table: String): Option[CatalogTable]

http://git-wip-us.apache.org/repos/asf/spark/blob/a7c61c10/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
index 8a5319b..9820522 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
@@ -312,6 +312,15 @@ class InMemoryCatalog(
 catalog(db).tables(table).table = origTable.copy(schema = schema)
   }
 
+  override def alterTableStats(
+  db: String,
+  table: String,
+  stats: CatalogStatistics): Unit = synchronized {
+requireTableExists(db, table)
+val origTable = catalog(db).tables(table).table
+catalog(db).tables(table).table = origTable.copy(stats = Some(stats))
+  }
+
   override def getTable(db: String, table: String): CatalogTable = 
synchronized {
 requireTableExists(db, table)
 catalog(db).tables(table).table

http://git-wip-us.apache.org/repos/asf/spark/blob/a7c61c10/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCa

spark git commit: Fixed typo in sql.functions

2017-06-11 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master 9f4ff9552 -> 3a840048e


Fixed typo in sql.functions

## What changes were proposed in this pull request?

I fixed a typo in the Scaladoc for the method `def struct(cols: Column*): 
Column`. 'retained' was misspelt as 'remained'.

## How was this patch tested?
Before:

Creates a new struct column.
   If the input column is a column in a `DataFrame`, or a derived column 
expression
   that is named (i.e. aliased), its name would be **remained** as the 
StructField's name,
   otherwise, the newly generated StructField's name would be auto generated as
   `col` with a suffix `index + 1`, i.e. col1, col2, col3, ...

After:

   Creates a new struct column.
   If the input column is a column in a `DataFrame`, or a derived column 
expression
   that is named (i.e. aliased), its name would be **retained** as the 
StructField's name,
   otherwise, the newly generated StructField's name would be auto generated as
   `col` with a suffix `index + 1`, i.e. col1, col2, col3, ...

Author: sujithjay 

Closes #18254 from sujithjay/fix-typo.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3a840048
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3a840048
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3a840048

Branch: refs/heads/master
Commit: 3a840048ed3501e06260b7c5df18cc0bbdb1505c
Parents: 9f4ff95
Author: sujithjay 
Authored: Sun Jun 11 18:23:57 2017 +0100
Committer: Sean Owen 
Committed: Sun Jun 11 18:23:57 2017 +0100

--
 sql/core/src/main/scala/org/apache/spark/sql/functions.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/3a840048/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
--
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 8d0a8c2..8d2e1f3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -1210,7 +1210,7 @@ object functions {
   /**
* Creates a new struct column.
* If the input column is a column in a `DataFrame`, or a derived column 
expression
-   * that is named (i.e. aliased), its name would be remained as the 
StructField's name,
+   * that is named (i.e. aliased), its name would be retained as the 
StructField's name,
* otherwise, the newly generated StructField's name would be auto generated 
as
* `col` with a suffix `index + 1`, i.e. col1, col2, col3, ...
*


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

25 matches

Mail list logo