spark git commit: [SPARK-3873][YARN] Fix import ordering.

2015-12-30 Thread rxin
Repository: spark
Updated Branches:
  refs/heads/master ee8f8d318 -> fd313


[SPARK-3873][YARN] Fix import ordering.

Author: Marcelo Vanzin 

Closes #10536 from vanzin/SPARK-3873-yarn.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fd31
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fd31
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fd31

Branch: refs/heads/master
Commit: fd313864e21e3a9c95577723c931357d1f16
Parents: ee8f8d3
Author: Marcelo Vanzin 
Authored: Wed Dec 30 18:20:27 2015 -0800
Committer: Reynold Xin 
Committed: Wed Dec 30 18:20:27 2015 -0800

--
 .../spark/deploy/yarn/AMDelegationTokenRenewer.scala|  2 +-
 .../apache/spark/deploy/yarn/ApplicationMaster.scala|  6 +++---
 .../spark/deploy/yarn/ApplicationMasterArguments.scala  |  5 +++--
 .../scala/org/apache/spark/deploy/yarn/Client.scala | 12 +---
 .../deploy/yarn/ClientDistributedCacheManager.scala |  2 +-
 .../deploy/yarn/ExecutorDelegationTokenUpdater.scala|  6 +++---
 .../org/apache/spark/deploy/yarn/ExecutorRunnable.scala |  4 ++--
 .../org/apache/spark/deploy/yarn/YarnAllocator.scala|  1 -
 .../org/apache/spark/deploy/yarn/YarnRMClient.scala |  2 +-
 .../apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala  |  6 +++---
 .../scheduler/cluster/YarnClientSchedulerBackend.scala  |  2 +-
 .../apache/spark/scheduler/cluster/YarnScheduler.scala  |  1 -
 12 files changed, 23 insertions(+), 26 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/fd31/yarn/src/main/scala/org/apache/spark/deploy/yarn/AMDelegationTokenRenewer.scala
--
diff --git 
a/yarn/src/main/scala/org/apache/spark/deploy/yarn/AMDelegationTokenRenewer.scala
 
b/yarn/src/main/scala/org/apache/spark/deploy/yarn/AMDelegationTokenRenewer.scala
index 56e4741..b8daa50 100644
--- 
a/yarn/src/main/scala/org/apache/spark/deploy/yarn/AMDelegationTokenRenewer.scala
+++ 
b/yarn/src/main/scala/org/apache/spark/deploy/yarn/AMDelegationTokenRenewer.scala
@@ -24,9 +24,9 @@ import scala.language.postfixOps
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileSystem, Path}
 import org.apache.hadoop.security.UserGroupInformation
-import org.apache.spark.deploy.SparkHadoopUtil
 
 import org.apache.spark.{Logging, SparkConf}
+import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.util.ThreadUtils
 
 /*

http://git-wip-us.apache.org/repos/asf/spark/blob/fd31/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
--
diff --git 
a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala 
b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
index fc742df..a01bb26 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -17,23 +17,23 @@
 
 package org.apache.spark.deploy.yarn
 
-import scala.util.control.NonFatal
-
 import java.io.{File, IOException}
 import java.lang.reflect.InvocationTargetException
 import java.net.{Socket, URL}
 import java.util.concurrent.atomic.AtomicReference
 
+import scala.util.control.NonFatal
+
 import org.apache.hadoop.fs.{FileSystem, Path}
 import org.apache.hadoop.yarn.api._
 import org.apache.hadoop.yarn.api.records._
 import org.apache.hadoop.yarn.conf.YarnConfiguration
 
-import org.apache.spark.rpc._
 import org.apache.spark.{Logging, SecurityManager, SparkConf, SparkContext, 
SparkEnv,
   SparkException, SparkUserAppException}
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.deploy.history.HistoryServer
+import org.apache.spark.rpc._
 import org.apache.spark.scheduler.cluster.{CoarseGrainedSchedulerBackend, 
YarnSchedulerBackend}
 import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages._
 import org.apache.spark.util._

http://git-wip-us.apache.org/repos/asf/spark/blob/fd31/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala
--
diff --git 
a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala
 
b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala
index 17d9943..5af3941 100644
--- 
a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala
+++ 
b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala
@@ -17,9 +17,10 @@
 
 package org.apache.spark.deploy.yarn
 
-import org.apache.spark.util.{MemoryParam, IntParam}

spark git commit: [SPARK-12561] Remove JobLogger in Spark 2.0.

2015-12-30 Thread rxin
Repository: spark
Updated Branches:
  refs/heads/master 9140d9074 -> be33a0cd3


[SPARK-12561] Remove JobLogger in Spark 2.0.

It was research code and has been deprecated since 1.0.0. No one really uses it 
since they can just use event logging.

Author: Reynold Xin 

Closes #10530 from rxin/SPARK-12561.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/be33a0cd
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/be33a0cd
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/be33a0cd

Branch: refs/heads/master
Commit: be33a0cd3def86e0aa64dab411e504abbbdfb03c
Parents: 9140d90
Author: Reynold Xin 
Authored: Wed Dec 30 18:28:08 2015 -0800
Committer: Reynold Xin 
Committed: Wed Dec 30 18:28:08 2015 -0800

--
 .../org/apache/spark/scheduler/JobLogger.scala  | 277 ---
 1 file changed, 277 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/be33a0cd/core/src/main/scala/org/apache/spark/scheduler/JobLogger.scala
--
diff --git a/core/src/main/scala/org/apache/spark/scheduler/JobLogger.scala 
b/core/src/main/scala/org/apache/spark/scheduler/JobLogger.scala
deleted file mode 100644
index f96eb8c..000
--- a/core/src/main/scala/org/apache/spark/scheduler/JobLogger.scala
+++ /dev/null
@@ -1,277 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.scheduler
-
-import java.io.{File, FileNotFoundException, IOException, PrintWriter}
-import java.text.SimpleDateFormat
-import java.util.{Date, Properties}
-
-import scala.collection.mutable.HashMap
-
-import org.apache.spark._
-import org.apache.spark.annotation.DeveloperApi
-import org.apache.spark.executor.TaskMetrics
-
-/**
- * :: DeveloperApi ::
- * A logger class to record runtime information for jobs in Spark. This class 
outputs one log file
- * for each Spark job, containing tasks start/stop and shuffle information. 
JobLogger is a subclass
- * of SparkListener, use addSparkListener to add JobLogger to a SparkContext 
after the SparkContext
- * is created. Note that each JobLogger only works for one SparkContext
- *
- * NOTE: The functionality of this class is heavily stripped down to 
accommodate for a general
- * refactor of the SparkListener interface. In its place, the 
EventLoggingListener is introduced
- * to log application information as SparkListenerEvents. To enable this 
functionality, set
- * spark.eventLog.enabled to true.
- */
-@DeveloperApi
-@deprecated("Log application information by setting spark.eventLog.enabled.", 
"1.0.0")
-class JobLogger(val user: String, val logDirName: String) extends 
SparkListener with Logging {
-
-  def this() = this(System.getProperty("user.name", ""),
-String.valueOf(System.currentTimeMillis()))
-
-  private val logDir =
-if (System.getenv("SPARK_LOG_DIR") != null) {
-  System.getenv("SPARK_LOG_DIR")
-} else {
-  "/tmp/spark-%s".format(user)
-}
-
-  private val jobIdToPrintWriter = new HashMap[Int, PrintWriter]
-  private val stageIdToJobId = new HashMap[Int, Int]
-  private val jobIdToStageIds = new HashMap[Int, Seq[Int]]
-  private val dateFormat = new ThreadLocal[SimpleDateFormat]() {
-override def initialValue(): SimpleDateFormat = new 
SimpleDateFormat("/MM/dd HH:mm:ss")
-  }
-
-  createLogDir()
-
-  /** Create a folder for log files, the folder's name is the creation time of 
jobLogger */
-  protected def createLogDir() {
-val dir = new File(logDir + "/" + logDirName + "/")
-if (dir.exists()) {
-  return
-}
-if (!dir.mkdirs()) {
-  // JobLogger should throw a exception rather than continue to construct 
this object.
-  throw new IOException("create log directory error:" + logDir + "/" + 
logDirName + "/")
-}
-  }
-
-  /**
-   * Create a log file for one job
-   * @param jobId ID of the job
-   * @throws FileNotFoundException Fail to create log file
-   */
-  protected def createLogWriter(jobId: 

spark git commit: [SPARK-12585] [SQL] move numFields to constructor of UnsafeRow

2015-12-30 Thread davies
Repository: spark
Updated Branches:
  refs/heads/master 93b52abca -> e6c77874b


[SPARK-12585] [SQL] move numFields to constructor of UnsafeRow

Right now, numFields will be passed in by pointTo(), then bitSetWidthInBytes is 
calculated, making pointTo() a little bit heavy.

It should be part of constructor of UnsafeRow.

Author: Davies Liu 

Closes #10528 from davies/numFields.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e6c77874
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e6c77874
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e6c77874

Branch: refs/heads/master
Commit: e6c77874b915691dead91e8d96ad9f58ba3a73db
Parents: 93b52ab
Author: Davies Liu 
Authored: Wed Dec 30 22:16:37 2015 -0800
Committer: Davies Liu 
Committed: Wed Dec 30 22:16:37 2015 -0800

--
 .../catalyst/expressions/UnsafeArrayData.java   |  4 +-
 .../sql/catalyst/expressions/UnsafeRow.java | 88 ++--
 .../sql/execution/UnsafeExternalRowSorter.java  | 16 ++--
 .../codegen/GenerateUnsafeProjection.scala  |  4 +-
 .../codegen/GenerateUnsafeRowJoiner.scala   |  4 +-
 .../GenerateUnsafeRowJoinerBitsetSuite.scala|  4 +-
 .../UnsafeFixedWidthAggregationMap.java | 10 +--
 .../sql/execution/UnsafeKVExternalSorter.java   | 24 +++---
 .../parquet/UnsafeRowParquetRecordReader.java   | 32 +++
 .../sql/execution/UnsafeRowSerializer.scala |  6 +-
 .../sql/execution/columnar/ColumnType.scala |  3 +-
 .../columnar/GenerateColumnAccessor.scala   |  4 +-
 .../datasources/text/DefaultSource.scala|  4 +-
 .../sql/execution/joins/CartesianProduct.scala  |  5 +-
 .../sql/execution/joins/HashedRelation.scala|  4 +-
 .../org/apache/spark/sql/UnsafeRowSuite.scala   | 11 ++-
 16 files changed, 86 insertions(+), 137 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/e6c77874/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java
--
diff --git 
a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java
 
b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java
index 3513960..3d80df2 100644
--- 
a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java
+++ 
b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java
@@ -270,8 +270,8 @@ public class UnsafeArrayData extends ArrayData {
 final int offset = getElementOffset(ordinal);
 if (offset < 0) return null;
 final int size = getElementSize(offset, ordinal);
-final UnsafeRow row = new UnsafeRow();
-row.pointTo(baseObject, baseOffset + offset, numFields, size);
+final UnsafeRow row = new UnsafeRow(numFields);
+row.pointTo(baseObject, baseOffset + offset, size);
 return row;
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/e6c77874/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
--
diff --git 
a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
 
b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
index b6979d0..7492b88 100644
--- 
a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
+++ 
b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
@@ -17,11 +17,7 @@
 
 package org.apache.spark.sql.catalyst.expressions;
 
-import java.io.Externalizable;
-import java.io.IOException;
-import java.io.ObjectInput;
-import java.io.ObjectOutput;
-import java.io.OutputStream;
+import java.io.*;
 import java.math.BigDecimal;
 import java.math.BigInteger;
 import java.nio.ByteBuffer;
@@ -30,26 +26,12 @@ import java.util.Collections;
 import java.util.HashSet;
 import java.util.Set;
 
-import org.apache.spark.sql.types.ArrayType;
-import org.apache.spark.sql.types.BinaryType;
-import org.apache.spark.sql.types.BooleanType;
-import org.apache.spark.sql.types.ByteType;
-import org.apache.spark.sql.types.CalendarIntervalType;
-import org.apache.spark.sql.types.DataType;
-import org.apache.spark.sql.types.DateType;
-import org.apache.spark.sql.types.Decimal;
-import org.apache.spark.sql.types.DecimalType;
-import org.apache.spark.sql.types.DoubleType;
-import org.apache.spark.sql.types.FloatType;
-import org.apache.spark.sql.types.IntegerType;
-import org.apache.spark.sql.types.LongType;
-import org.apache.spark.sql.types.MapType;
-import org.apache.spark.sql.types.NullType;
-import org.apache.spark.sql.types.ShortType;
-import 

spark git commit: House cleaning: close open pull requests created before June 1st, 2015

2015-12-30 Thread rxin
Repository: spark
Updated Branches:
  refs/heads/master be33a0cd3 -> 7b4452ba9


House cleaning: close open pull requests created before June 1st, 2015

Closes #5358
Closes #3744
Closes #3677
Closes #3536
Closes #3249
Closes #3221
Closes #2446
Closes #3794
Closes #3815
Closes #3816
Closes #3866
Closes #4286
Closes #5184
Closes #5170
Closes #5142
Closes #5025
Closes #5005
Closes #4897
Closes #4887
Closes #4849
Closes #4632
Closes #4622
Closes #4456
Closes #4449
Closes #4417
Closes #5483
Closes #5325
Closes #6545
Closes #6449
Closes #6433
Closes #6416
Closes #6403
Closes #6386
Closes #6263
Closes #6245
Closes #6213
Closes #6155
Closes #6133
Closes #6018
Closes #5978
Closes #5869
Closes #5852
Closes #5848
Closes #5754
Closes #5598
Closes #5503
Closes #4380


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7b4452ba
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7b4452ba
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7b4452ba

Branch: refs/heads/master
Commit: 7b4452ba98d53ed646a2e744bb701702fc5371a7
Parents: be33a0c
Author: Reynold Xin 
Authored: Wed Dec 30 18:49:17 2015 -0800
Committer: Reynold Xin 
Committed: Wed Dec 30 18:49:17 2015 -0800

--

--



-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: Closes #10386 since it was superseded by #10468.

2015-12-30 Thread rxin
Repository: spark
Updated Branches:
  refs/heads/master 7b4452ba9 -> c642c3a21


Closes #10386 since it was superseded by #10468.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c642c3a2
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c642c3a2
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c642c3a2

Branch: refs/heads/master
Commit: c642c3a2102fd016deabcb78bd0292bbf3dd7acf
Parents: 7b4452b
Author: Reynold Xin 
Authored: Wed Dec 30 18:50:30 2015 -0800
Committer: Reynold Xin 
Committed: Wed Dec 30 18:50:30 2015 -0800

--

--



-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-12300] [SQL] [PYSPARK] fix schema inferance on local collections

2015-12-30 Thread davies
Repository: spark
Updated Branches:
  refs/heads/branch-1.6 c069ffc2b -> 8dc654971


[SPARK-12300] [SQL] [PYSPARK] fix schema inferance on local collections

Current schema inference for local python collections halts as soon as there 
are no NullTypes. This is different than when we specify a sampling ratio of 
1.0 on a distributed collection. This could result in incomplete schema 
information.

Author: Holden Karau 

Closes #10275 from 
holdenk/SPARK-12300-fix-schmea-inferance-on-local-collections.

(cherry picked from commit d1ca634db4ca9db7f0ba7ca38a0e03bcbfec23c9)
Signed-off-by: Davies Liu 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8dc65497
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8dc65497
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8dc65497

Branch: refs/heads/branch-1.6
Commit: 8dc65497152f2c8949b08fddad853d31c4bd9ae5
Parents: c069ffc
Author: Holden Karau 
Authored: Wed Dec 30 11:14:47 2015 -0800
Committer: Davies Liu 
Committed: Wed Dec 30 11:15:12 2015 -0800

--
 python/pyspark/sql/context.py | 10 +++---
 python/pyspark/sql/tests.py   | 11 +++
 2 files changed, 14 insertions(+), 7 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/8dc65497/python/pyspark/sql/context.py
--
diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py
index b05aa2f..ba6915a 100644
--- a/python/pyspark/sql/context.py
+++ b/python/pyspark/sql/context.py
@@ -18,6 +18,7 @@
 import sys
 import warnings
 import json
+from functools import reduce
 
 if sys.version >= '3':
 basestring = unicode = str
@@ -236,14 +237,9 @@ class SQLContext(object):
 if type(first) is dict:
 warnings.warn("inferring schema from dict is deprecated,"
   "please use pyspark.sql.Row instead")
-schema = _infer_schema(first)
+schema = reduce(_merge_type, map(_infer_schema, data))
 if _has_nulltype(schema):
-for r in data:
-schema = _merge_type(schema, _infer_schema(r))
-if not _has_nulltype(schema):
-break
-else:
-raise ValueError("Some of types cannot be determined after 
inferring")
+raise ValueError("Some of types cannot be determined after 
inferring")
 return schema
 
 def _inferSchema(self, rdd, samplingRatio=None):

http://git-wip-us.apache.org/repos/asf/spark/blob/8dc65497/python/pyspark/sql/tests.py
--
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index 9f5f7cf..10b9917 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -353,6 +353,17 @@ class SQLTests(ReusedPySparkTestCase):
 df3 = self.sqlCtx.createDataFrame(rdd, df.schema)
 self.assertEqual(10, df3.count())
 
+def test_infer_schema_to_local(self):
+input = [{"a": 1}, {"b": "coffee"}]
+rdd = self.sc.parallelize(input)
+df = self.sqlCtx.createDataFrame(input)
+df2 = self.sqlCtx.createDataFrame(rdd, samplingRatio=1.0)
+self.assertEqual(df.schema, df2.schema)
+
+rdd = self.sc.parallelize(range(10)).map(lambda x: Row(a=x))
+df3 = self.sqlCtx.createDataFrame(rdd, df.schema)
+self.assertEqual(10, df3.count())
+
 def test_serialize_nested_array_and_map(self):
 d = [Row(l=[Row(a=1, b='s')], d={"key": Row(c=1.0, d="2")})]
 rdd = self.sc.parallelize(d)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-12300] [SQL] [PYSPARK] fix schema inferance on local collections

2015-12-30 Thread davies
Repository: spark
Updated Branches:
  refs/heads/master aa48164a4 -> d1ca634db


[SPARK-12300] [SQL] [PYSPARK] fix schema inferance on local collections

Current schema inference for local python collections halts as soon as there 
are no NullTypes. This is different than when we specify a sampling ratio of 
1.0 on a distributed collection. This could result in incomplete schema 
information.

Author: Holden Karau 

Closes #10275 from 
holdenk/SPARK-12300-fix-schmea-inferance-on-local-collections.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d1ca634d
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d1ca634d
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d1ca634d

Branch: refs/heads/master
Commit: d1ca634db4ca9db7f0ba7ca38a0e03bcbfec23c9
Parents: aa48164
Author: Holden Karau 
Authored: Wed Dec 30 11:14:47 2015 -0800
Committer: Davies Liu 
Committed: Wed Dec 30 11:14:47 2015 -0800

--
 python/pyspark/sql/context.py | 10 +++---
 python/pyspark/sql/tests.py   | 11 +++
 2 files changed, 14 insertions(+), 7 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/d1ca634d/python/pyspark/sql/context.py
--
diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py
index b05aa2f..ba6915a 100644
--- a/python/pyspark/sql/context.py
+++ b/python/pyspark/sql/context.py
@@ -18,6 +18,7 @@
 import sys
 import warnings
 import json
+from functools import reduce
 
 if sys.version >= '3':
 basestring = unicode = str
@@ -236,14 +237,9 @@ class SQLContext(object):
 if type(first) is dict:
 warnings.warn("inferring schema from dict is deprecated,"
   "please use pyspark.sql.Row instead")
-schema = _infer_schema(first)
+schema = reduce(_merge_type, map(_infer_schema, data))
 if _has_nulltype(schema):
-for r in data:
-schema = _merge_type(schema, _infer_schema(r))
-if not _has_nulltype(schema):
-break
-else:
-raise ValueError("Some of types cannot be determined after 
inferring")
+raise ValueError("Some of types cannot be determined after 
inferring")
 return schema
 
 def _inferSchema(self, rdd, samplingRatio=None):

http://git-wip-us.apache.org/repos/asf/spark/blob/d1ca634d/python/pyspark/sql/tests.py
--
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index 9f5f7cf..10b9917 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -353,6 +353,17 @@ class SQLTests(ReusedPySparkTestCase):
 df3 = self.sqlCtx.createDataFrame(rdd, df.schema)
 self.assertEqual(10, df3.count())
 
+def test_infer_schema_to_local(self):
+input = [{"a": 1}, {"b": "coffee"}]
+rdd = self.sc.parallelize(input)
+df = self.sqlCtx.createDataFrame(input)
+df2 = self.sqlCtx.createDataFrame(rdd, samplingRatio=1.0)
+self.assertEqual(df.schema, df2.schema)
+
+rdd = self.sc.parallelize(range(10)).map(lambda x: Row(a=x))
+df3 = self.sqlCtx.createDataFrame(rdd, df.schema)
+self.assertEqual(10, df3.count())
+
 def test_serialize_nested_array_and_map(self):
 d = [Row(l=[Row(a=1, b='s')], d={"key": Row(c=1.0, d="2")})]
 rdd = self.sc.parallelize(d)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-12495][SQL] use true as default value for propagateNull in NewInstance

2015-12-30 Thread marmbrus
Repository: spark
Updated Branches:
  refs/heads/master 932cf4424 -> aa48164a4


[SPARK-12495][SQL] use true as default value for propagateNull in NewInstance

Most of cases we should propagate null when call `NewInstance`, and so far 
there is only one case we should stop null propagation: create product/java 
bean. So I think it makes more sense to propagate null by dafault.

This also fixes a bug when encode null array/map, which is firstly discovered 
in https://github.com/apache/spark/pull/10401

Author: Wenchen Fan 

Closes #10443 from cloud-fan/encoder.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/aa48164a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/aa48164a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/aa48164a

Branch: refs/heads/master
Commit: aa48164a43bd9ed9eab53fcacbed92819e84eaf7
Parents: 932cf44
Author: Wenchen Fan 
Authored: Wed Dec 30 10:56:08 2015 -0800
Committer: Michael Armbrust 
Committed: Wed Dec 30 10:56:08 2015 -0800

--
 .../spark/sql/catalyst/JavaTypeInference.scala  | 16 ++---
 .../spark/sql/catalyst/ScalaReflection.scala| 16 ++---
 .../catalyst/encoders/ExpressionEncoder.scala   |  2 +-
 .../sql/catalyst/encoders/RowEncoder.scala  |  2 --
 .../sql/catalyst/expressions/objects.scala  | 12 +-
 .../encoders/EncoderResolutionSuite.scala   | 24 ++--
 .../encoders/ExpressionEncoderSuite.scala   |  3 +++
 7 files changed, 38 insertions(+), 37 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/aa48164a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
index a1500cb..ed153d1 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
@@ -178,19 +178,19 @@ object JavaTypeInference {
   case c if !inferExternalType(c).isInstanceOf[ObjectType] => getPath
 
   case c if c == classOf[java.lang.Short] =>
-NewInstance(c, getPath :: Nil, propagateNull = true, ObjectType(c))
+NewInstance(c, getPath :: Nil, ObjectType(c))
   case c if c == classOf[java.lang.Integer] =>
-NewInstance(c, getPath :: Nil, propagateNull = true, ObjectType(c))
+NewInstance(c, getPath :: Nil, ObjectType(c))
   case c if c == classOf[java.lang.Long] =>
-NewInstance(c, getPath :: Nil, propagateNull = true, ObjectType(c))
+NewInstance(c, getPath :: Nil, ObjectType(c))
   case c if c == classOf[java.lang.Double] =>
-NewInstance(c, getPath :: Nil, propagateNull = true, ObjectType(c))
+NewInstance(c, getPath :: Nil, ObjectType(c))
   case c if c == classOf[java.lang.Byte] =>
-NewInstance(c, getPath :: Nil, propagateNull = true, ObjectType(c))
+NewInstance(c, getPath :: Nil, ObjectType(c))
   case c if c == classOf[java.lang.Float] =>
-NewInstance(c, getPath :: Nil, propagateNull = true, ObjectType(c))
+NewInstance(c, getPath :: Nil, ObjectType(c))
   case c if c == classOf[java.lang.Boolean] =>
-NewInstance(c, getPath :: Nil, propagateNull = true, ObjectType(c))
+NewInstance(c, getPath :: Nil, ObjectType(c))
 
   case c if c == classOf[java.sql.Date] =>
 StaticInvoke(
@@ -298,7 +298,7 @@ object JavaTypeInference {
   p.getWriteMethod.getName -> setter
 }.toMap
 
-val newInstance = NewInstance(other, Nil, propagateNull = false, 
ObjectType(other))
+val newInstance = NewInstance(other, Nil, ObjectType(other), 
propagateNull = false)
 val result = InitializeJavaBean(newInstance, setters)
 
 if (path.nonEmpty) {

http://git-wip-us.apache.org/repos/asf/spark/blob/aa48164a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
index 8a22b37..9784c96 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
@@ -189,37 +189,37 @@ object ScalaReflection extends ScalaReflection {
   case t if t <:< localTypeOf[java.lang.Integer] =>
 val boxedType = 

spark git commit: [SPARK-10359] Enumerate dependencies in a file and diff against it for new pull requests

2015-12-30 Thread joshrosen
Repository: spark
Updated Branches:
  refs/heads/master d1ca634db -> 27a42c710


[SPARK-10359] Enumerate dependencies in a file and diff against it for new pull 
requests

This patch adds a new build check which enumerates Spark's resolved runtime 
classpath and saves it to a file, then diffs against that file to detect 
whether pull requests have introduced dependency changes. The aim of this check 
is to make it simpler to reason about whether pull request which modify the 
build have introduced new dependencies or changed transitive dependencies in a 
way that affects the final classpath.

This supplants the checks added in SPARK-4123 / #5093, which are currently 
disabled due to bugs.

This patch is based on pwendell's work in #8531.

Closes #8531.

Author: Josh Rosen 
Author: Patrick Wendell 

Closes #10461 from JoshRosen/SPARK-10359.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/27a42c71
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/27a42c71
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/27a42c71

Branch: refs/heads/master
Commit: 27a42c7108ced48a7f558990de2e4fc7ed340119
Parents: d1ca634
Author: Josh Rosen 
Authored: Wed Dec 30 12:47:42 2015 -0800
Committer: Josh Rosen 
Committed: Wed Dec 30 12:47:42 2015 -0800

--
 .rat-excludes|   1 +
 dev/deps/spark-deps-hadoop-2.3   | 184 +
 dev/deps/spark-deps-hadoop-2.4   | 185 ++
 dev/run-tests-jenkins.py |   2 +-
 dev/run-tests.py |   8 ++
 dev/sparktestsupport/__init__.py |   1 +
 dev/sparktestsupport/modules.py  |  15 ++-
 dev/test-dependencies.sh | 102 +++
 dev/tests/pr_new_dependencies.sh | 117 -
 pom.xml  |  17 
 10 files changed, 512 insertions(+), 120 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/27a42c71/.rat-excludes
--
diff --git a/.rat-excludes b/.rat-excludes
index 3544c0f..bf071eb 100644
--- a/.rat-excludes
+++ b/.rat-excludes
@@ -85,3 +85,4 @@ org.apache.spark.sql.sources.DataSourceRegister
 org.apache.spark.scheduler.SparkHistoryListenerFactory
 .*parquet
 LZ4BlockInputStream.java
+spark-deps-.*

http://git-wip-us.apache.org/repos/asf/spark/blob/27a42c71/dev/deps/spark-deps-hadoop-2.3
--
diff --git a/dev/deps/spark-deps-hadoop-2.3 b/dev/deps/spark-deps-hadoop-2.3
new file mode 100644
index 000..6014d50
--- /dev/null
+++ b/dev/deps/spark-deps-hadoop-2.3
@@ -0,0 +1,184 @@
+JavaEWAH-0.3.2.jar
+RoaringBitmap-0.5.11.jar
+ST4-4.0.4.jar
+activation-1.1.1.jar
+akka-actor_2.10-2.3.11.jar
+akka-remote_2.10-2.3.11.jar
+akka-slf4j_2.10-2.3.11.jar
+antlr-2.7.7.jar
+antlr-runtime-3.4.jar
+aopalliance-1.0.jar
+apache-log4j-extras-1.2.17.jar
+arpack_combined_all-0.1.jar
+asm-3.1.jar
+asm-commons-3.1.jar
+asm-tree-3.1.jar
+avro-1.7.7.jar
+avro-ipc-1.7.7-tests.jar
+avro-ipc-1.7.7.jar
+avro-mapred-1.7.7-hadoop2.jar
+base64-2.3.8.jar
+bcprov-jdk15on-1.51.jar
+bonecp-0.8.0.RELEASE.jar
+breeze-macros_2.10-0.11.2.jar
+breeze_2.10-0.11.2.jar
+calcite-avatica-1.2.0-incubating.jar
+calcite-core-1.2.0-incubating.jar
+calcite-linq4j-1.2.0-incubating.jar
+chill-java-0.5.0.jar
+chill_2.10-0.5.0.jar
+commons-beanutils-1.7.0.jar
+commons-beanutils-core-1.8.0.jar
+commons-cli-1.2.jar
+commons-codec-1.10.jar
+commons-collections-3.2.2.jar
+commons-compiler-2.7.6.jar
+commons-compress-1.4.1.jar
+commons-configuration-1.6.jar
+commons-dbcp-1.4.jar
+commons-digester-1.8.jar
+commons-httpclient-3.1.jar
+commons-io-2.4.jar
+commons-lang-2.6.jar
+commons-lang3-3.3.2.jar
+commons-logging-1.1.3.jar
+commons-math3-3.4.1.jar
+commons-net-2.2.jar
+commons-pool-1.5.4.jar
+compress-lzf-1.0.3.jar
+config-1.2.1.jar
+core-1.1.2.jar
+curator-client-2.4.0.jar
+curator-framework-2.4.0.jar
+curator-recipes-2.4.0.jar
+datanucleus-api-jdo-3.2.6.jar
+datanucleus-core-3.2.10.jar
+datanucleus-rdbms-3.2.9.jar
+derby-10.10.1.1.jar
+eigenbase-properties-1.1.5.jar
+geronimo-annotation_1.0_spec-1.1.1.jar
+geronimo-jaspic_1.0_spec-1.0.jar
+geronimo-jta_1.1_spec-1.1.1.jar
+groovy-all-2.1.6.jar
+guice-3.0.jar
+guice-servlet-3.0.jar
+hadoop-annotations-2.3.0.jar
+hadoop-auth-2.3.0.jar
+hadoop-client-2.3.0.jar
+hadoop-common-2.3.0.jar
+hadoop-hdfs-2.3.0.jar
+hadoop-mapreduce-client-app-2.3.0.jar
+hadoop-mapreduce-client-common-2.3.0.jar
+hadoop-mapreduce-client-core-2.3.0.jar
+hadoop-mapreduce-client-jobclient-2.3.0.jar
+hadoop-mapreduce-client-shuffle-2.3.0.jar
+hadoop-yarn-api-2.3.0.jar
+hadoop-yarn-client-2.3.0.jar
+hadoop-yarn-common-2.3.0.jar

spark git commit: [SPARK-12409][SPARK-12387][SPARK-12391][SQL] Support AND/OR/IN/LIKE push-down filters for JDBC

2015-12-30 Thread rxin
Repository: spark
Updated Branches:
  refs/heads/master 27a42c710 -> 5c2682b0c


[SPARK-12409][SPARK-12387][SPARK-12391][SQL] Support AND/OR/IN/LIKE push-down 
filters for JDBC

This is rework from #10386 and add more tests and LIKE push-down support.

Author: Takeshi YAMAMURO 

Closes #10468 from maropu/SupportMorePushdownInJdbc.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5c2682b0
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5c2682b0
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5c2682b0

Branch: refs/heads/master
Commit: 5c2682b0c8fd2aeae2af1adb716ee0d5f8b85135
Parents: 27a42c7
Author: Takeshi YAMAMURO 
Authored: Wed Dec 30 13:34:37 2015 -0800
Committer: Reynold Xin 
Committed: Wed Dec 30 13:34:37 2015 -0800

--
 .../execution/datasources/jdbc/JDBCRDD.scala|  9 ++-
 .../org/apache/spark/sql/jdbc/JDBCSuite.scala   | 28 +++-
 2 files changed, 35 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/5c2682b0/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
index 4e2f505..7072ee4 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
@@ -179,6 +179,7 @@ private[sql] object JDBCRDD extends Logging {
 case stringValue: String => s"'${escapeSql(stringValue)}'"
 case timestampValue: Timestamp => "'" + timestampValue + "'"
 case dateValue: Date => "'" + dateValue + "'"
+case arrayValue: Array[Object] => arrayValue.map(compileValue).mkString(", 
")
 case _ => value
   }
 
@@ -191,13 +192,19 @@ private[sql] object JDBCRDD extends Logging {
*/
   private def compileFilter(f: Filter): String = f match {
 case EqualTo(attr, value) => s"$attr = ${compileValue(value)}"
-case Not(EqualTo(attr, value)) => s"$attr != ${compileValue(value)}"
+case Not(f) => s"(NOT (${compileFilter(f)}))"
 case LessThan(attr, value) => s"$attr < ${compileValue(value)}"
 case GreaterThan(attr, value) => s"$attr > ${compileValue(value)}"
 case LessThanOrEqual(attr, value) => s"$attr <= ${compileValue(value)}"
 case GreaterThanOrEqual(attr, value) => s"$attr >= ${compileValue(value)}"
+case StringStartsWith(attr, value) => s"${attr} LIKE '${value}%'"
+case StringEndsWith(attr, value) => s"${attr} LIKE '%${value}'"
+case StringContains(attr, value) => s"${attr} LIKE '%${value}%'"
 case IsNull(attr) => s"$attr IS NULL"
 case IsNotNull(attr) => s"$attr IS NOT NULL"
+case In(attr, value) => s"$attr IN (${compileValue(value)})"
+case Or(f1, f2) => s"(${compileFilter(f1)}) OR (${compileFilter(f2)})"
+case And(f1, f2) => s"(${compileFilter(f1)}) AND (${compileFilter(f2)})"
 case _ => null
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/5c2682b0/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
--
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index 4044a10..00e37f1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -26,6 +26,7 @@ import org.scalatest.BeforeAndAfter
 import org.scalatest.PrivateMethodTester
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.Row
 import org.apache.spark.sql.execution.datasources.jdbc.JDBCRDD
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.types._
@@ -186,8 +187,26 @@ class JDBCSuite extends SparkFunSuite
 assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME = 
'fred'")).collect().size == 1)
 assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME > 
'fred'")).collect().size == 2)
 assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME != 
'fred'")).collect().size == 2)
+assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME IN ('mary', 
'fred')"))
+  .collect().size == 2)
+assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME NOT IN 
('fred')"))
+  .collect().size === 2)
+assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE THEID = 1 OR NAME 
= 'mary'"))
+  .collect().size == 2)
+assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE 

spark git commit: [SPARK-12399] Display correct error message when accessing REST API with an unknown app Id

2015-12-30 Thread vanzin
Repository: spark
Updated Branches:
  refs/heads/branch-1.6 8dc654971 -> cd86075b5


[SPARK-12399] Display correct error message when accessing REST API with an 
unknown app Id

I got an exception when accessing the below REST API with an unknown 
application Id.
`http://:18080/api/v1/applications/xxx/jobs`
Instead of an exception, I expect an error message "no such app: xxx" which is 
a similar error message when I access `/api/v1/applications/xxx`
```
org.spark-project.guava.util.concurrent.UncheckedExecutionException: 
java.util.NoSuchElementException: no app with key xxx
at 
org.spark-project.guava.cache.LocalCache$Segment.get(LocalCache.java:2263)
at org.spark-project.guava.cache.LocalCache.get(LocalCache.java:4000)
at 
org.spark-project.guava.cache.LocalCache.getOrLoad(LocalCache.java:4004)
at 
org.spark-project.guava.cache.LocalCache$LocalLoadingCache.get(LocalCache.java:4874)
at 
org.apache.spark.deploy.history.HistoryServer.getSparkUI(HistoryServer.scala:116)
at 
org.apache.spark.status.api.v1.UIRoot$class.withSparkUI(ApiRootResource.scala:226)
at 
org.apache.spark.deploy.history.HistoryServer.withSparkUI(HistoryServer.scala:46)
at 
org.apache.spark.status.api.v1.ApiRootResource.getJobs(ApiRootResource.scala:66)
```

Author: Carson Wang 

Closes #10352 from carsonwang/unknownAppFix.

(cherry picked from commit b244297966be1d09f8e861cfe2d8e69f7bed84da)
Signed-off-by: Marcelo Vanzin 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/cd86075b
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/cd86075b
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/cd86075b

Branch: refs/heads/branch-1.6
Commit: cd86075b52d6363f674dffc3eb71d90449563879
Parents: 8dc6549
Author: Carson Wang 
Authored: Wed Dec 30 13:49:10 2015 -0800
Committer: Marcelo Vanzin 
Committed: Wed Dec 30 13:49:28 2015 -0800

--
 .../apache/spark/deploy/history/HistoryServer.scala | 16 ++--
 1 file changed, 14 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/cd86075b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
--
diff --git 
a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala 
b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
index f31fef0..d11c9b7 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
@@ -21,6 +21,8 @@ import java.util.NoSuchElementException
 import java.util.zip.ZipOutputStream
 import javax.servlet.http.{HttpServlet, HttpServletRequest, 
HttpServletResponse}
 
+import scala.util.control.NonFatal
+
 import com.google.common.cache._
 import org.eclipse.jetty.servlet.{ServletContextHandler, ServletHolder}
 
@@ -115,7 +117,17 @@ class HistoryServer(
   }
 
   def getSparkUI(appKey: String): Option[SparkUI] = {
-Option(appCache.get(appKey))
+try {
+  val ui = appCache.get(appKey)
+  Some(ui)
+} catch {
+  case NonFatal(e) => e.getCause() match {
+case nsee: NoSuchElementException =>
+  None
+
+case cause: Exception => throw cause
+  }
+}
   }
 
   initialize()
@@ -195,7 +207,7 @@ class HistoryServer(
   appCache.get(appId + attemptId.map { id => s"/$id" }.getOrElse(""))
   true
 } catch {
-  case e: Exception => e.getCause() match {
+  case NonFatal(e) => e.getCause() match {
 case nsee: NoSuchElementException =>
   false
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-12399] Display correct error message when accessing REST API with an unknown app Id

2015-12-30 Thread vanzin
Repository: spark
Updated Branches:
  refs/heads/master 5c2682b0c -> b24429796


[SPARK-12399] Display correct error message when accessing REST API with an 
unknown app Id

I got an exception when accessing the below REST API with an unknown 
application Id.
`http://:18080/api/v1/applications/xxx/jobs`
Instead of an exception, I expect an error message "no such app: xxx" which is 
a similar error message when I access `/api/v1/applications/xxx`
```
org.spark-project.guava.util.concurrent.UncheckedExecutionException: 
java.util.NoSuchElementException: no app with key xxx
at 
org.spark-project.guava.cache.LocalCache$Segment.get(LocalCache.java:2263)
at org.spark-project.guava.cache.LocalCache.get(LocalCache.java:4000)
at 
org.spark-project.guava.cache.LocalCache.getOrLoad(LocalCache.java:4004)
at 
org.spark-project.guava.cache.LocalCache$LocalLoadingCache.get(LocalCache.java:4874)
at 
org.apache.spark.deploy.history.HistoryServer.getSparkUI(HistoryServer.scala:116)
at 
org.apache.spark.status.api.v1.UIRoot$class.withSparkUI(ApiRootResource.scala:226)
at 
org.apache.spark.deploy.history.HistoryServer.withSparkUI(HistoryServer.scala:46)
at 
org.apache.spark.status.api.v1.ApiRootResource.getJobs(ApiRootResource.scala:66)
```

Author: Carson Wang 

Closes #10352 from carsonwang/unknownAppFix.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b2442979
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b2442979
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b2442979

Branch: refs/heads/master
Commit: b244297966be1d09f8e861cfe2d8e69f7bed84da
Parents: 5c2682b
Author: Carson Wang 
Authored: Wed Dec 30 13:49:10 2015 -0800
Committer: Marcelo Vanzin 
Committed: Wed Dec 30 13:49:10 2015 -0800

--
 .../apache/spark/deploy/history/HistoryServer.scala | 16 ++--
 1 file changed, 14 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/b2442979/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
--
diff --git 
a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala 
b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
index 0bc0cb1..6143a33 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
@@ -21,6 +21,8 @@ import java.util.NoSuchElementException
 import java.util.zip.ZipOutputStream
 import javax.servlet.http.{HttpServlet, HttpServletRequest, 
HttpServletResponse}
 
+import scala.util.control.NonFatal
+
 import com.google.common.cache._
 import org.eclipse.jetty.servlet.{ServletContextHandler, ServletHolder}
 import org.apache.spark.{Logging, SecurityManager, SparkConf}
@@ -113,7 +115,17 @@ class HistoryServer(
   }
 
   def getSparkUI(appKey: String): Option[SparkUI] = {
-Option(appCache.get(appKey))
+try {
+  val ui = appCache.get(appKey)
+  Some(ui)
+} catch {
+  case NonFatal(e) => e.getCause() match {
+case nsee: NoSuchElementException =>
+  None
+
+case cause: Exception => throw cause
+  }
+}
   }
 
   initialize()
@@ -193,7 +205,7 @@ class HistoryServer(
   appCache.get(appId + attemptId.map { id => s"/$id" }.getOrElse(""))
   true
 } catch {
-  case e: Exception => e.getCause() match {
+  case NonFatal(e) => e.getCause() match {
 case nsee: NoSuchElementException =>
   false
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-8641][SPARK-12455][SQL] Native Spark Window functions - Follow-up (docs & tests)

2015-12-30 Thread yhuai
Repository: spark
Updated Branches:
  refs/heads/master b24429796 -> f76ee109d


[SPARK-8641][SPARK-12455][SQL] Native Spark Window functions - Follow-up (docs 
& tests)

This PR is a follow-up for PR https://github.com/apache/spark/pull/9819. It 
adds documentation for the window functions and a couple of NULL tests.

The documentation was largely based on the documentation in (the source of)  
Hive and Presto:
* https://prestodb.io/docs/current/functions/window.html
* 
https://cwiki.apache.org/confluence/display/Hive/LanguageManual+WindowingAndAnalytics

I am not sure if we need to add the licenses of these two projects to the 
licenses directory. They are both under the ASL. srowen any thoughts?

cc yhuai

Author: Herman van Hovell 

Closes #10402 from hvanhovell/SPARK-8641-docs.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f76ee109
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f76ee109
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f76ee109

Branch: refs/heads/master
Commit: f76ee109d87e727710d2721e4be47fdabc21582c
Parents: b244297
Author: Herman van Hovell 
Authored: Wed Dec 30 16:51:07 2015 -0800
Committer: Yin Huai 
Committed: Wed Dec 30 16:51:07 2015 -0800

--
 .../expressions/windowExpressions.scala | 130 ++-
 .../apache/spark/sql/DataFrameWindowSuite.scala |  20 +++
 .../sql/hive/execution/WindowQuerySuite.scala   |  15 +++
 3 files changed, 162 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/f76ee109/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
index 91f169e..f1a333b 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
@@ -314,8 +314,8 @@ abstract class OffsetWindowFunction
   val offset: Expression
 
   /**
-   * Direction (above = 1/below = -1) of the number of rows between the 
current row and the row
-   * where the input expression is evaluated.
+   * Direction of the number of rows between the current row and the row where 
the input expression
+   * is evaluated.
*/
   val direction: SortDirection
 
@@ -327,7 +327,7 @@ abstract class OffsetWindowFunction
* both the input and the default expression are foldable, the result is 
still not foldable due to
* the frame.
*/
-  override def foldable: Boolean = input.foldable && (default == null || 
default.foldable)
+  override def foldable: Boolean = false
 
   override def nullable: Boolean = default == null || default.nullable
 
@@ -353,6 +353,21 @@ abstract class OffsetWindowFunction
   override def toString: String = s"$prettyName($input, $offset, $default)"
 }
 
+/**
+ * The Lead function returns the value of 'x' at 'offset' rows after the 
current row in the window.
+ * Offsets start at 0, which is the current row. The offset must be constant 
integer value. The
+ * default offset is 1. When the value of 'x' is null at the offset, or when 
the offset is larger
+ * than the window, the default expression is evaluated.
+ *
+ * This documentation has been based upon similar documentation for the Hive 
and Presto projects.
+ *
+ * @param input expression to evaluate 'offset' rows after the current row.
+ * @param offset rows to jump ahead in the partition.
+ * @param default to use when the input value is null or when the offset is 
larger than the window.
+ */
+@ExpressionDescription(usage =
+  """_FUNC_(input, offset, default) - LEAD returns the value of 'x' at 
'offset' rows after the
+ current row in the window""")
 case class Lead(input: Expression, offset: Expression, default: Expression)
 extends OffsetWindowFunction {
 
@@ -365,6 +380,21 @@ case class Lead(input: Expression, offset: Expression, 
default: Expression)
   override val direction = Ascending
 }
 
+/**
+ * The Lag function returns the value of 'x' at 'offset' rows before the 
current row in the window.
+ * Offsets start at 0, which is the current row. The offset must be constant 
integer value. The
+ * default offset is 1. When the value of 'x' is null at the offset, or when 
the offset is smaller
+ * than the window, the default expression is evaluated.
+ *
+ * This documentation has been based upon similar documentation for the Hive 
and Presto projects.
+ *
+ * @param input expression to evaluate 'offset' rows before 

spark git commit: [SPARK-12588] Remove HttpBroadcast in Spark 2.0.

2015-12-30 Thread rxin
Repository: spark
Updated Branches:
  refs/heads/master f76ee109d -> ee8f8d318


[SPARK-12588] Remove HttpBroadcast in Spark 2.0.

We switched to TorrentBroadcast in Spark 1.1, and HttpBroadcast has been 
undocumented since then. It's time to remove it in Spark 2.0.

Author: Reynold Xin 

Closes #10531 from rxin/SPARK-12588.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ee8f8d31
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ee8f8d31
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ee8f8d31

Branch: refs/heads/master
Commit: ee8f8d318417c514fbb26e57157483d466ddbfae
Parents: f76ee10
Author: Reynold Xin 
Authored: Wed Dec 30 18:07:07 2015 -0800
Committer: Reynold Xin 
Committed: Wed Dec 30 18:07:07 2015 -0800

--
 .../spark/broadcast/BroadcastFactory.scala  |   4 +-
 .../spark/broadcast/BroadcastManager.scala  |  13 +-
 .../apache/spark/broadcast/HttpBroadcast.scala  | 269 ---
 .../spark/broadcast/HttpBroadcastFactory.scala  |  47 
 .../spark/broadcast/TorrentBroadcast.scala  |   2 +-
 .../broadcast/TorrentBroadcastFactory.scala |   2 +-
 .../spark/serializer/KryoSerializer.scala   |   2 -
 .../apache/spark/broadcast/BroadcastSuite.scala | 131 +
 docs/configuration.md   |  19 +-
 docs/security.md|  13 +-
 .../apache/spark/examples/BroadcastTest.scala   |   8 +-
 project/MimaExcludes.scala  |   3 +-
 12 files changed, 22 insertions(+), 491 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/ee8f8d31/core/src/main/scala/org/apache/spark/broadcast/BroadcastFactory.scala
--
diff --git 
a/core/src/main/scala/org/apache/spark/broadcast/BroadcastFactory.scala 
b/core/src/main/scala/org/apache/spark/broadcast/BroadcastFactory.scala
index 6a187b4..7f35ac4 100644
--- a/core/src/main/scala/org/apache/spark/broadcast/BroadcastFactory.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/BroadcastFactory.scala
@@ -24,14 +24,12 @@ import org.apache.spark.SparkConf
 import org.apache.spark.annotation.DeveloperApi
 
 /**
- * :: DeveloperApi ::
  * An interface for all the broadcast implementations in Spark (to allow
  * multiple broadcast implementations). SparkContext uses a user-specified
  * BroadcastFactory implementation to instantiate a particular broadcast for 
the
  * entire Spark job.
  */
-@DeveloperApi
-trait BroadcastFactory {
+private[spark] trait BroadcastFactory {
 
   def initialize(isDriver: Boolean, conf: SparkConf, securityMgr: 
SecurityManager): Unit
 

http://git-wip-us.apache.org/repos/asf/spark/blob/ee8f8d31/core/src/main/scala/org/apache/spark/broadcast/BroadcastManager.scala
--
diff --git 
a/core/src/main/scala/org/apache/spark/broadcast/BroadcastManager.scala 
b/core/src/main/scala/org/apache/spark/broadcast/BroadcastManager.scala
index fac..6134360 100644
--- a/core/src/main/scala/org/apache/spark/broadcast/BroadcastManager.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/BroadcastManager.scala
@@ -21,8 +21,8 @@ import java.util.concurrent.atomic.AtomicLong
 
 import scala.reflect.ClassTag
 
-import org.apache.spark._
-import org.apache.spark.util.Utils
+import org.apache.spark.{Logging, SparkConf, SecurityManager}
+
 
 private[spark] class BroadcastManager(
 val isDriver: Boolean,
@@ -39,15 +39,8 @@ private[spark] class BroadcastManager(
   private def initialize() {
 synchronized {
   if (!initialized) {
-val broadcastFactoryClass =
-  conf.get("spark.broadcast.factory", 
"org.apache.spark.broadcast.TorrentBroadcastFactory")
-
-broadcastFactory =
-  
Utils.classForName(broadcastFactoryClass).newInstance.asInstanceOf[BroadcastFactory]
-
-// Initialize appropriate BroadcastFactory and BroadcastObject
+broadcastFactory = new TorrentBroadcastFactory
 broadcastFactory.initialize(isDriver, conf, securityManager)
-
 initialized = true
   }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/ee8f8d31/core/src/main/scala/org/apache/spark/broadcast/HttpBroadcast.scala
--
diff --git a/core/src/main/scala/org/apache/spark/broadcast/HttpBroadcast.scala 
b/core/src/main/scala/org/apache/spark/broadcast/HttpBroadcast.scala
deleted file mode 100644
index b69af63..000
--- a/core/src/main/scala/org/apache/spark/broadcast/HttpBroadcast.scala
+++ /dev/null
@@ -1,269 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  

spark git commit: [SPARK-3873][GRAPHX] Import order fixes.

2015-12-30 Thread rxin
Repository: spark
Updated Branches:
  refs/heads/master fd313 -> 9140d9074


[SPARK-3873][GRAPHX] Import order fixes.

There's one warning left, caused by a bug in the checker.

Author: Marcelo Vanzin 

Closes #10537 from vanzin/SPARK-3873-graphx.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9140d907
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9140d907
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9140d907

Branch: refs/heads/master
Commit: 9140d9074379055a0b4b2f5c381362b31c141941
Parents: fd3
Author: Marcelo Vanzin 
Authored: Wed Dec 30 18:26:08 2015 -0800
Committer: Reynold Xin 
Committed: Wed Dec 30 18:26:08 2015 -0800

--
 graphx/src/main/scala/org/apache/spark/graphx/EdgeRDD.scala  | 5 ++---
 .../scala/org/apache/spark/graphx/GraphKryoRegistrator.scala | 5 ++---
 .../src/main/scala/org/apache/spark/graphx/GraphLoader.scala | 2 +-
 graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala | 3 +--
 .../src/main/scala/org/apache/spark/graphx/GraphXUtils.scala | 4 +---
 graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala   | 2 +-
 .../src/main/scala/org/apache/spark/graphx/VertexRDD.scala   | 5 ++---
 .../org/apache/spark/graphx/impl/EdgePartitionBuilder.scala  | 2 +-
 .../scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala | 5 ++---
 .../main/scala/org/apache/spark/graphx/impl/GraphImpl.scala  | 5 ++---
 .../org/apache/spark/graphx/impl/ReplicatedVertexView.scala  | 3 +--
 .../org/apache/spark/graphx/impl/RoutingTablePartition.scala | 8 +++-
 .../apache/spark/graphx/impl/ShippableVertexPartition.scala  | 3 +--
 .../scala/org/apache/spark/graphx/impl/VertexPartition.scala | 3 +--
 .../org/apache/spark/graphx/impl/VertexPartitionBase.scala   | 3 +--
 .../apache/spark/graphx/impl/VertexPartitionBaseOps.scala| 3 +--
 .../scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala   | 3 +--
 .../scala/org/apache/spark/graphx/lib/LabelPropagation.scala | 1 +
 .../main/scala/org/apache/spark/graphx/lib/PageRank.scala| 2 +-
 .../main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala | 2 +-
 .../scala/org/apache/spark/graphx/lib/ShortestPaths.scala| 3 ++-
 .../scala/org/apache/spark/graphx/util/GraphGenerators.scala | 7 ++-
 .../util/collection/GraphXPrimitiveKeyOpenHashMap.scala  | 4 ++--
 23 files changed, 33 insertions(+), 50 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/9140d907/graphx/src/main/scala/org/apache/spark/graphx/EdgeRDD.scala
--
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/EdgeRDD.scala 
b/graphx/src/main/scala/org/apache/spark/graphx/EdgeRDD.scala
index ee7302a..45526bf 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/EdgeRDD.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/EdgeRDD.scala
@@ -24,12 +24,11 @@ import org.apache.spark.Dependency
 import org.apache.spark.Partition
 import org.apache.spark.SparkContext
 import org.apache.spark.TaskContext
-import org.apache.spark.rdd.RDD
-import org.apache.spark.storage.StorageLevel
-
 import org.apache.spark.graphx.impl.EdgePartition
 import org.apache.spark.graphx.impl.EdgePartitionBuilder
 import org.apache.spark.graphx.impl.EdgeRDDImpl
+import org.apache.spark.rdd.RDD
+import org.apache.spark.storage.StorageLevel
 
 /**
  * `EdgeRDD[ED, VD]` extends `RDD[Edge[ED]]` by storing the edges in columnar 
format on each

http://git-wip-us.apache.org/repos/asf/spark/blob/9140d907/graphx/src/main/scala/org/apache/spark/graphx/GraphKryoRegistrator.scala
--
diff --git 
a/graphx/src/main/scala/org/apache/spark/graphx/GraphKryoRegistrator.scala 
b/graphx/src/main/scala/org/apache/spark/graphx/GraphKryoRegistrator.scala
index 563c948..eaa71da 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/GraphKryoRegistrator.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/GraphKryoRegistrator.scala
@@ -19,12 +19,11 @@ package org.apache.spark.graphx
 
 import com.esotericsoftware.kryo.Kryo
 
+import org.apache.spark.graphx.impl._
+import org.apache.spark.graphx.util.collection.GraphXPrimitiveKeyOpenHashMap
 import org.apache.spark.serializer.KryoRegistrator
 import org.apache.spark.util.BoundedPriorityQueue
 import org.apache.spark.util.collection.BitSet
-
-import org.apache.spark.graphx.impl._
-import org.apache.spark.graphx.util.collection.GraphXPrimitiveKeyOpenHashMap
 import org.apache.spark.util.collection.OpenHashSet
 
 /**

http://git-wip-us.apache.org/repos/asf/spark/blob/9140d907/graphx/src/main/scala/org/apache/spark/graphx/GraphLoader.scala
--
diff --git 

spark git commit: [SPARK-12263][DOCS] IllegalStateException: Memory can't be 0 for SPARK_WORKER_MEMORY without unit

2015-12-30 Thread srowen
Repository: spark
Updated Branches:
  refs/heads/master 27af6157f -> 932cf4424


[SPARK-12263][DOCS] IllegalStateException: Memory can't be 0 for 
SPARK_WORKER_MEMORY without unit

Updated the Worker Unit IllegalStateException message to indicate no values 
less than 1MB instead of 0 to help solve this.
Requesting review

Author: Neelesh Srinivas Salian 

Closes #10483 from nssalian/SPARK-12263.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/932cf442
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/932cf442
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/932cf442

Branch: refs/heads/master
Commit: 932cf44248e067ee7cae6fef79ddf2ab9b1c36d8
Parents: 27af615
Author: Neelesh Srinivas Salian 
Authored: Wed Dec 30 11:14:13 2015 +
Committer: Sean Owen 
Committed: Wed Dec 30 11:14:13 2015 +

--
 .../scala/org/apache/spark/deploy/worker/WorkerArguments.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/932cf442/core/src/main/scala/org/apache/spark/deploy/worker/WorkerArguments.scala
--
diff --git 
a/core/src/main/scala/org/apache/spark/deploy/worker/WorkerArguments.scala 
b/core/src/main/scala/org/apache/spark/deploy/worker/WorkerArguments.scala
index 5181142..de3c7cd 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/WorkerArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/WorkerArguments.scala
@@ -175,7 +175,7 @@ private[worker] class WorkerArguments(args: Array[String], 
conf: SparkConf) {
 
   def checkWorkerMemory(): Unit = {
 if (memory <= 0) {
-  val message = "Memory can't be 0, missing a M or G on the end of the 
memory specification?"
+  val message = "Memory is below 1MB, or missing a M/G at the end of the 
memory specification?"
   throw new IllegalStateException(message)
 }
   }


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[3/3] spark git commit: Revert "[SPARK-12362][SQL][WIP] Inline Hive Parser"

2015-12-30 Thread rxin
Revert "[SPARK-12362][SQL][WIP] Inline Hive Parser"

This reverts commit b600bccf41a7b1958e33d8301a19214e6517e388 due to 
non-deterministic build breaks.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/27af6157
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/27af6157
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/27af6157

Branch: refs/heads/master
Commit: 27af6157f9cceeb9aa74eec54c8898d3e0749ed0
Parents: 4f75f78
Author: Reynold Xin 
Authored: Wed Dec 30 00:08:44 2015 -0800
Committer: Reynold Xin 
Committed: Wed Dec 30 00:08:44 2015 -0800

--
 pom.xml |5 -
 project/SparkBuild.scala|2 +-
 project/plugins.sbt |4 -
 .../hive/execution/HiveCompatibilitySuite.scala |   10 +-
 sql/hive/pom.xml|   22 -
 .../apache/spark/sql/parser/FromClauseParser.g  |  330 ---
 .../apache/spark/sql/parser/IdentifiersParser.g |  697 -
 .../spark/sql/parser/SelectClauseParser.g   |  226 --
 .../org/apache/spark/sql/parser/SparkSqlLexer.g |  474 
 .../apache/spark/sql/parser/SparkSqlParser.g| 2457 --
 .../apache/spark/sql/parser/ASTErrorNode.java   |   49 -
 .../org/apache/spark/sql/parser/ASTNode.java|  245 --
 .../apache/spark/sql/parser/ParseDriver.java|  213 --
 .../org/apache/spark/sql/parser/ParseError.java |   54 -
 .../apache/spark/sql/parser/ParseException.java |   51 -
 .../org/apache/spark/sql/parser/ParseUtils.java |   96 -
 .../spark/sql/parser/SemanticAnalyzer.java  |  406 ---
 .../org/apache/spark/sql/hive/HiveQl.scala  |  133 +-
 18 files changed, 72 insertions(+), 5402 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/27af6157/pom.xml
--
diff --git a/pom.xml b/pom.xml
index 73ba8d5..284c219 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1951,11 +1951,6 @@
 
   
 
-
-  org.antlr
-  antlr3-maven-plugin
-  3.5.2
-
 
 
   org.apache.maven.plugins

http://git-wip-us.apache.org/repos/asf/spark/blob/27af6157/project/SparkBuild.scala
--
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index df21d3e..c3d53f8 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -415,7 +415,7 @@ object Hive {
 // in order to generate golden files.  This is only required for 
developers who are adding new
 // new query tests.
 fullClasspath in Test := (fullClasspath in Test).value.filterNot { f => 
f.toString.contains("jcl-over") }
-  ) ++ sbtantlr.SbtAntlrPlugin.antlrSettings
+  )
 
 }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/27af6157/project/plugins.sbt
--
diff --git a/project/plugins.sbt b/project/plugins.sbt
index f172dc9..5e23224 100644
--- a/project/plugins.sbt
+++ b/project/plugins.sbt
@@ -4,8 +4,6 @@ resolvers += "Typesafe Repository" at 
"http://repo.typesafe.com/typesafe/release
 
 resolvers += "sonatype-releases" at 
"https://oss.sonatype.org/content/repositories/releases/;
 
-resolvers += "stefri" at "http://stefri.github.io/repo/releases;
-
 addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.11.2")
 
 addSbtPlugin("com.typesafe.sbteclipse" % "sbteclipse-plugin" % "2.2.0")
@@ -26,8 +24,6 @@ addSbtPlugin("com.cavorite" % "sbt-avro" % "0.3.2")
 
 addSbtPlugin("io.spray" % "sbt-revolver" % "0.7.2")
 
-addSbtPlugin("com.github.stefri" % "sbt-antlr" % "0.5.3")
-
 libraryDependencies += "org.ow2.asm"  % "asm" % "5.0.3"
 
 libraryDependencies += "org.ow2.asm"  % "asm-commons" % "5.0.3"

http://git-wip-us.apache.org/repos/asf/spark/blob/27af6157/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
--
diff --git 
a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
 
b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
index 2b0e48d..2d0d7b8 100644
--- 
a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
+++ 
b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
@@ -308,12 +308,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest 
with BeforeAndAfter {
 
 // The difference between the double numbers generated by Hive and Spark
 // can be ignored (e.g., 0.6633880657639323 and 0.6633880657639322)
-

[1/3] spark git commit: Revert "[SPARK-12362][SQL][WIP] Inline Hive Parser"

2015-12-30 Thread rxin
Repository: spark
Updated Branches:
  refs/heads/master 4f75f785d -> 27af6157f


http://git-wip-us.apache.org/repos/asf/spark/blob/27af6157/sql/hive/src/main/java/org/apache/spark/sql/parser/ParseDriver.java
--
diff --git 
a/sql/hive/src/main/java/org/apache/spark/sql/parser/ParseDriver.java 
b/sql/hive/src/main/java/org/apache/spark/sql/parser/ParseDriver.java
deleted file mode 100644
index c77198b..000
--- a/sql/hive/src/main/java/org/apache/spark/sql/parser/ParseDriver.java
+++ /dev/null
@@ -1,213 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.parser;
-
-import java.util.ArrayList;
-import org.antlr.runtime.ANTLRStringStream;
-import org.antlr.runtime.CharStream;
-import org.antlr.runtime.NoViableAltException;
-import org.antlr.runtime.RecognitionException;
-import org.antlr.runtime.Token;
-import org.antlr.runtime.TokenRewriteStream;
-import org.antlr.runtime.TokenStream;
-import org.antlr.runtime.tree.CommonTree;
-import org.antlr.runtime.tree.CommonTreeAdaptor;
-import org.antlr.runtime.tree.TreeAdaptor;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.apache.hadoop.hive.ql.Context;
-
-/**
- * ParseDriver.
- *
- */
-public class ParseDriver {
-
-  private static final Logger LOG = 
LoggerFactory.getLogger("hive.ql.parse.ParseDriver");
-
-  /**
-   * ANTLRNoCaseStringStream.
-   *
-   */
-  //This class provides and implementation for a case insensitive token checker
-  //for the lexical analysis part of antlr. By converting the token stream into
-  //upper case at the time when lexical rules are checked, this class ensures 
that the
-  //lexical rules need to just match the token with upper case letters as 
opposed to
-  //combination of upper case and lower case characters. This is purely used 
for matching lexical
-  //rules. The actual token text is stored in the same way as the user input 
without
-  //actually converting it into an upper case. The token values are generated 
by the consume()
-  //function of the super class ANTLRStringStream. The LA() function is the 
lookahead function
-  //and is purely used for matching lexical rules. This also means that the 
grammar will only
-  //accept capitalized tokens in case it is run from other tools like 
antlrworks which
-  //do not have the ANTLRNoCaseStringStream implementation.
-  public class ANTLRNoCaseStringStream extends ANTLRStringStream {
-
-public ANTLRNoCaseStringStream(String input) {
-  super(input);
-}
-
-@Override
-public int LA(int i) {
-
-  int returnChar = super.LA(i);
-  if (returnChar == CharStream.EOF) {
-return returnChar;
-  } else if (returnChar == 0) {
-return returnChar;
-  }
-
-  return Character.toUpperCase((char) returnChar);
-}
-  }
-
-  /**
-   * HiveLexerX.
-   *
-   */
-  public class HiveLexerX extends SparkSqlLexer {
-
-private final ArrayList errors;
-
-public HiveLexerX(CharStream input) {
-  super(input);
-  errors = new ArrayList();
-}
-
-@Override
-public void displayRecognitionError(String[] tokenNames, 
RecognitionException e) {
-  errors.add(new ParseError(this, e, tokenNames));
-}
-
-@Override
-public String getErrorMessage(RecognitionException e, String[] tokenNames) 
{
-  String msg = null;
-
-  if (e instanceof NoViableAltException) {
-// @SuppressWarnings("unused")
-// NoViableAltException nvae = (NoViableAltException) e;
-// for development, can add
-// "decision=<<"+nvae.grammarDecisionDescription+">>"
-// and "(decision="+nvae.decisionNumber+") and
-// "state "+nvae.stateNumber
-msg = "character " + getCharErrorDisplay(e.c) + " not supported here";
-  } else {
-msg = super.getErrorMessage(e, tokenNames);
-  }
-
-  return msg;
-}
-
-public ArrayList getErrors() {
-  return errors;
-}
-
-  }
-
-  /**
-   * Tree adaptor for making antlr return ASTNodes instead of CommonTree nodes
-   * so that the graph walking algorithms and the rules framework defined in
-   * ql.lib can be used with the AST Nodes.
-   */

[2/3] spark git commit: Revert "[SPARK-12362][SQL][WIP] Inline Hive Parser"

2015-12-30 Thread rxin
http://git-wip-us.apache.org/repos/asf/spark/blob/27af6157/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/SparkSqlParser.g
--
diff --git 
a/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/SparkSqlParser.g 
b/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/SparkSqlParser.g
deleted file mode 100644
index 69574d7..000
--- a/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/SparkSqlParser.g
+++ /dev/null
@@ -1,2457 +0,0 @@
-/**
-   Licensed to the Apache Software Foundation (ASF) under one or more 
-   contributor license agreements.  See the NOTICE file distributed with 
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with 
-   the License.  You may obtain a copy of the License at
-
-   http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-*/
-parser grammar SparkSqlParser;
-
-options
-{
-tokenVocab=SparkSqlLexer;
-output=AST;
-ASTLabelType=CommonTree;
-backtrack=false;
-k=3;
-}
-import SelectClauseParser, FromClauseParser, IdentifiersParser;
-
-tokens {
-TOK_INSERT;
-TOK_QUERY;
-TOK_SELECT;
-TOK_SELECTDI;
-TOK_SELEXPR;
-TOK_FROM;
-TOK_TAB;
-TOK_PARTSPEC;
-TOK_PARTVAL;
-TOK_DIR;
-TOK_TABREF;
-TOK_SUBQUERY;
-TOK_INSERT_INTO;
-TOK_DESTINATION;
-TOK_ALLCOLREF;
-TOK_TABLE_OR_COL;
-TOK_FUNCTION;
-TOK_FUNCTIONDI;
-TOK_FUNCTIONSTAR;
-TOK_WHERE;
-TOK_OP_EQ;
-TOK_OP_NE;
-TOK_OP_LE;
-TOK_OP_LT;
-TOK_OP_GE;
-TOK_OP_GT;
-TOK_OP_DIV;
-TOK_OP_ADD;
-TOK_OP_SUB;
-TOK_OP_MUL;
-TOK_OP_MOD;
-TOK_OP_BITAND;
-TOK_OP_BITNOT;
-TOK_OP_BITOR;
-TOK_OP_BITXOR;
-TOK_OP_AND;
-TOK_OP_OR;
-TOK_OP_NOT;
-TOK_OP_LIKE;
-TOK_TRUE;
-TOK_FALSE;
-TOK_TRANSFORM;
-TOK_SERDE;
-TOK_SERDENAME;
-TOK_SERDEPROPS;
-TOK_EXPLIST;
-TOK_ALIASLIST;
-TOK_GROUPBY;
-TOK_ROLLUP_GROUPBY;
-TOK_CUBE_GROUPBY;
-TOK_GROUPING_SETS;
-TOK_GROUPING_SETS_EXPRESSION;
-TOK_HAVING;
-TOK_ORDERBY;
-TOK_CLUSTERBY;
-TOK_DISTRIBUTEBY;
-TOK_SORTBY;
-TOK_UNIONALL;
-TOK_UNIONDISTINCT;
-TOK_JOIN;
-TOK_LEFTOUTERJOIN;
-TOK_RIGHTOUTERJOIN;
-TOK_FULLOUTERJOIN;
-TOK_UNIQUEJOIN;
-TOK_CROSSJOIN;
-TOK_LOAD;
-TOK_EXPORT;
-TOK_IMPORT;
-TOK_REPLICATION;
-TOK_METADATA;
-TOK_NULL;
-TOK_ISNULL;
-TOK_ISNOTNULL;
-TOK_TINYINT;
-TOK_SMALLINT;
-TOK_INT;
-TOK_BIGINT;
-TOK_BOOLEAN;
-TOK_FLOAT;
-TOK_DOUBLE;
-TOK_DATE;
-TOK_DATELITERAL;
-TOK_DATETIME;
-TOK_TIMESTAMP;
-TOK_TIMESTAMPLITERAL;
-TOK_INTERVAL_YEAR_MONTH;
-TOK_INTERVAL_YEAR_MONTH_LITERAL;
-TOK_INTERVAL_DAY_TIME;
-TOK_INTERVAL_DAY_TIME_LITERAL;
-TOK_INTERVAL_YEAR_LITERAL;
-TOK_INTERVAL_MONTH_LITERAL;
-TOK_INTERVAL_DAY_LITERAL;
-TOK_INTERVAL_HOUR_LITERAL;
-TOK_INTERVAL_MINUTE_LITERAL;
-TOK_INTERVAL_SECOND_LITERAL;
-TOK_STRING;
-TOK_CHAR;
-TOK_VARCHAR;
-TOK_BINARY;
-TOK_DECIMAL;
-TOK_LIST;
-TOK_STRUCT;
-TOK_MAP;
-TOK_UNIONTYPE;
-TOK_COLTYPELIST;
-TOK_CREATEDATABASE;
-TOK_CREATETABLE;
-TOK_TRUNCATETABLE;
-TOK_CREATEINDEX;
-TOK_CREATEINDEX_INDEXTBLNAME;
-TOK_DEFERRED_REBUILDINDEX;
-TOK_DROPINDEX;
-TOK_LIKETABLE;
-TOK_DESCTABLE;
-TOK_DESCFUNCTION;
-TOK_ALTERTABLE;
-TOK_ALTERTABLE_RENAME;
-TOK_ALTERTABLE_ADDCOLS;
-TOK_ALTERTABLE_RENAMECOL;
-TOK_ALTERTABLE_RENAMEPART;
-TOK_ALTERTABLE_REPLACECOLS;
-TOK_ALTERTABLE_ADDPARTS;
-TOK_ALTERTABLE_DROPPARTS;
-TOK_ALTERTABLE_PARTCOLTYPE;
-TOK_ALTERTABLE_MERGEFILES;
-TOK_ALTERTABLE_TOUCH;
-TOK_ALTERTABLE_ARCHIVE;
-TOK_ALTERTABLE_UNARCHIVE;
-TOK_ALTERTABLE_SERDEPROPERTIES;
-TOK_ALTERTABLE_SERIALIZER;
-TOK_ALTERTABLE_UPDATECOLSTATS;
-TOK_TABLE_PARTITION;
-TOK_ALTERTABLE_FILEFORMAT;
-TOK_ALTERTABLE_LOCATION;
-TOK_ALTERTABLE_PROPERTIES;
-TOK_ALTERTABLE_CHANGECOL_AFTER_POSITION;
-TOK_ALTERTABLE_DROPPROPERTIES;
-TOK_ALTERTABLE_SKEWED;
-TOK_ALTERTABLE_EXCHANGEPARTITION;
-TOK_ALTERTABLE_SKEWED_LOCATION;
-TOK_ALTERTABLE_BUCKETS;
-TOK_ALTERTABLE_CLUSTER_SORT;
-TOK_ALTERTABLE_COMPACT;
-TOK_ALTERINDEX_REBUILD;
-TOK_ALTERINDEX_PROPERTIES;
-TOK_MSCK;
-TOK_SHOWDATABASES;
-TOK_SHOWTABLES;
-TOK_SHOWCOLUMNS;
-TOK_SHOWFUNCTIONS;
-TOK_SHOWPARTITIONS;
-TOK_SHOW_CREATEDATABASE;
-TOK_SHOW_CREATETABLE;
-TOK_SHOW_TABLESTATUS;
-TOK_SHOW_TBLPROPERTIES;
-TOK_SHOWLOCKS;
-TOK_SHOWCONF;
-TOK_LOCKTABLE;
-TOK_UNLOCKTABLE;
-TOK_LOCKDB;
-TOK_UNLOCKDB;
-TOK_SWITCHDATABASE;
-TOK_DROPDATABASE;
-TOK_DROPTABLE;
-TOK_DATABASECOMMENT;
-TOK_TABCOLLIST;
-TOK_TABCOL;
-TOK_TABLECOMMENT;
-TOK_TABLEPARTCOLS;
-TOK_TABLEROWFORMAT;
-TOK_TABLEROWFORMATFIELD;
-TOK_TABLEROWFORMATCOLLITEMS;
-TOK_TABLEROWFORMATMAPKEYS;
-TOK_TABLEROWFORMATLINES;
-TOK_TABLEROWFORMATNULL;
-TOK_TABLEFILEFORMAT;
-TOK_FILEFORMAT_GENERIC;
-TOK_OFFLINE;
-TOK_ENABLE;
-TOK_DISABLE;
-TOK_READONLY;
-TOK_NO_DROP;
-TOK_STORAGEHANDLER;