spark git commit: [SPARK-3873][YARN] Fix import ordering.
Repository: spark Updated Branches: refs/heads/master ee8f8d318 -> fd313 [SPARK-3873][YARN] Fix import ordering. Author: Marcelo VanzinCloses #10536 from vanzin/SPARK-3873-yarn. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fd31 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fd31 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fd31 Branch: refs/heads/master Commit: fd313864e21e3a9c95577723c931357d1f16 Parents: ee8f8d3 Author: Marcelo Vanzin Authored: Wed Dec 30 18:20:27 2015 -0800 Committer: Reynold Xin Committed: Wed Dec 30 18:20:27 2015 -0800 -- .../spark/deploy/yarn/AMDelegationTokenRenewer.scala| 2 +- .../apache/spark/deploy/yarn/ApplicationMaster.scala| 6 +++--- .../spark/deploy/yarn/ApplicationMasterArguments.scala | 5 +++-- .../scala/org/apache/spark/deploy/yarn/Client.scala | 12 +--- .../deploy/yarn/ClientDistributedCacheManager.scala | 2 +- .../deploy/yarn/ExecutorDelegationTokenUpdater.scala| 6 +++--- .../org/apache/spark/deploy/yarn/ExecutorRunnable.scala | 4 ++-- .../org/apache/spark/deploy/yarn/YarnAllocator.scala| 1 - .../org/apache/spark/deploy/yarn/YarnRMClient.scala | 2 +- .../apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala | 6 +++--- .../scheduler/cluster/YarnClientSchedulerBackend.scala | 2 +- .../apache/spark/scheduler/cluster/YarnScheduler.scala | 1 - 12 files changed, 23 insertions(+), 26 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/fd31/yarn/src/main/scala/org/apache/spark/deploy/yarn/AMDelegationTokenRenewer.scala -- diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/AMDelegationTokenRenewer.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/AMDelegationTokenRenewer.scala index 56e4741..b8daa50 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/AMDelegationTokenRenewer.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/AMDelegationTokenRenewer.scala @@ -24,9 +24,9 @@ import scala.language.postfixOps import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.hadoop.security.UserGroupInformation -import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.{Logging, SparkConf} +import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.util.ThreadUtils /* http://git-wip-us.apache.org/repos/asf/spark/blob/fd31/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala -- diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala index fc742df..a01bb26 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala @@ -17,23 +17,23 @@ package org.apache.spark.deploy.yarn -import scala.util.control.NonFatal - import java.io.{File, IOException} import java.lang.reflect.InvocationTargetException import java.net.{Socket, URL} import java.util.concurrent.atomic.AtomicReference +import scala.util.control.NonFatal + import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.hadoop.yarn.api._ import org.apache.hadoop.yarn.api.records._ import org.apache.hadoop.yarn.conf.YarnConfiguration -import org.apache.spark.rpc._ import org.apache.spark.{Logging, SecurityManager, SparkConf, SparkContext, SparkEnv, SparkException, SparkUserAppException} import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.deploy.history.HistoryServer +import org.apache.spark.rpc._ import org.apache.spark.scheduler.cluster.{CoarseGrainedSchedulerBackend, YarnSchedulerBackend} import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages._ import org.apache.spark.util._ http://git-wip-us.apache.org/repos/asf/spark/blob/fd31/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala -- diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala index 17d9943..5af3941 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala @@ -17,9 +17,10 @@ package org.apache.spark.deploy.yarn -import org.apache.spark.util.{MemoryParam, IntParam}
spark git commit: [SPARK-12561] Remove JobLogger in Spark 2.0.
Repository: spark Updated Branches: refs/heads/master 9140d9074 -> be33a0cd3 [SPARK-12561] Remove JobLogger in Spark 2.0. It was research code and has been deprecated since 1.0.0. No one really uses it since they can just use event logging. Author: Reynold XinCloses #10530 from rxin/SPARK-12561. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/be33a0cd Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/be33a0cd Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/be33a0cd Branch: refs/heads/master Commit: be33a0cd3def86e0aa64dab411e504abbbdfb03c Parents: 9140d90 Author: Reynold Xin Authored: Wed Dec 30 18:28:08 2015 -0800 Committer: Reynold Xin Committed: Wed Dec 30 18:28:08 2015 -0800 -- .../org/apache/spark/scheduler/JobLogger.scala | 277 --- 1 file changed, 277 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/be33a0cd/core/src/main/scala/org/apache/spark/scheduler/JobLogger.scala -- diff --git a/core/src/main/scala/org/apache/spark/scheduler/JobLogger.scala b/core/src/main/scala/org/apache/spark/scheduler/JobLogger.scala deleted file mode 100644 index f96eb8c..000 --- a/core/src/main/scala/org/apache/spark/scheduler/JobLogger.scala +++ /dev/null @@ -1,277 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - *http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.scheduler - -import java.io.{File, FileNotFoundException, IOException, PrintWriter} -import java.text.SimpleDateFormat -import java.util.{Date, Properties} - -import scala.collection.mutable.HashMap - -import org.apache.spark._ -import org.apache.spark.annotation.DeveloperApi -import org.apache.spark.executor.TaskMetrics - -/** - * :: DeveloperApi :: - * A logger class to record runtime information for jobs in Spark. This class outputs one log file - * for each Spark job, containing tasks start/stop and shuffle information. JobLogger is a subclass - * of SparkListener, use addSparkListener to add JobLogger to a SparkContext after the SparkContext - * is created. Note that each JobLogger only works for one SparkContext - * - * NOTE: The functionality of this class is heavily stripped down to accommodate for a general - * refactor of the SparkListener interface. In its place, the EventLoggingListener is introduced - * to log application information as SparkListenerEvents. To enable this functionality, set - * spark.eventLog.enabled to true. - */ -@DeveloperApi -@deprecated("Log application information by setting spark.eventLog.enabled.", "1.0.0") -class JobLogger(val user: String, val logDirName: String) extends SparkListener with Logging { - - def this() = this(System.getProperty("user.name", ""), -String.valueOf(System.currentTimeMillis())) - - private val logDir = -if (System.getenv("SPARK_LOG_DIR") != null) { - System.getenv("SPARK_LOG_DIR") -} else { - "/tmp/spark-%s".format(user) -} - - private val jobIdToPrintWriter = new HashMap[Int, PrintWriter] - private val stageIdToJobId = new HashMap[Int, Int] - private val jobIdToStageIds = new HashMap[Int, Seq[Int]] - private val dateFormat = new ThreadLocal[SimpleDateFormat]() { -override def initialValue(): SimpleDateFormat = new SimpleDateFormat("/MM/dd HH:mm:ss") - } - - createLogDir() - - /** Create a folder for log files, the folder's name is the creation time of jobLogger */ - protected def createLogDir() { -val dir = new File(logDir + "/" + logDirName + "/") -if (dir.exists()) { - return -} -if (!dir.mkdirs()) { - // JobLogger should throw a exception rather than continue to construct this object. - throw new IOException("create log directory error:" + logDir + "/" + logDirName + "/") -} - } - - /** - * Create a log file for one job - * @param jobId ID of the job - * @throws FileNotFoundException Fail to create log file - */ - protected def createLogWriter(jobId:
spark git commit: [SPARK-12585] [SQL] move numFields to constructor of UnsafeRow
Repository: spark Updated Branches: refs/heads/master 93b52abca -> e6c77874b [SPARK-12585] [SQL] move numFields to constructor of UnsafeRow Right now, numFields will be passed in by pointTo(), then bitSetWidthInBytes is calculated, making pointTo() a little bit heavy. It should be part of constructor of UnsafeRow. Author: Davies LiuCloses #10528 from davies/numFields. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e6c77874 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e6c77874 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e6c77874 Branch: refs/heads/master Commit: e6c77874b915691dead91e8d96ad9f58ba3a73db Parents: 93b52ab Author: Davies Liu Authored: Wed Dec 30 22:16:37 2015 -0800 Committer: Davies Liu Committed: Wed Dec 30 22:16:37 2015 -0800 -- .../catalyst/expressions/UnsafeArrayData.java | 4 +- .../sql/catalyst/expressions/UnsafeRow.java | 88 ++-- .../sql/execution/UnsafeExternalRowSorter.java | 16 ++-- .../codegen/GenerateUnsafeProjection.scala | 4 +- .../codegen/GenerateUnsafeRowJoiner.scala | 4 +- .../GenerateUnsafeRowJoinerBitsetSuite.scala| 4 +- .../UnsafeFixedWidthAggregationMap.java | 10 +-- .../sql/execution/UnsafeKVExternalSorter.java | 24 +++--- .../parquet/UnsafeRowParquetRecordReader.java | 32 +++ .../sql/execution/UnsafeRowSerializer.scala | 6 +- .../sql/execution/columnar/ColumnType.scala | 3 +- .../columnar/GenerateColumnAccessor.scala | 4 +- .../datasources/text/DefaultSource.scala| 4 +- .../sql/execution/joins/CartesianProduct.scala | 5 +- .../sql/execution/joins/HashedRelation.scala| 4 +- .../org/apache/spark/sql/UnsafeRowSuite.scala | 11 ++- 16 files changed, 86 insertions(+), 137 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/e6c77874/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java -- diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java index 3513960..3d80df2 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java @@ -270,8 +270,8 @@ public class UnsafeArrayData extends ArrayData { final int offset = getElementOffset(ordinal); if (offset < 0) return null; final int size = getElementSize(offset, ordinal); -final UnsafeRow row = new UnsafeRow(); -row.pointTo(baseObject, baseOffset + offset, numFields, size); +final UnsafeRow row = new UnsafeRow(numFields); +row.pointTo(baseObject, baseOffset + offset, size); return row; } http://git-wip-us.apache.org/repos/asf/spark/blob/e6c77874/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java -- diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java index b6979d0..7492b88 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java @@ -17,11 +17,7 @@ package org.apache.spark.sql.catalyst.expressions; -import java.io.Externalizable; -import java.io.IOException; -import java.io.ObjectInput; -import java.io.ObjectOutput; -import java.io.OutputStream; +import java.io.*; import java.math.BigDecimal; import java.math.BigInteger; import java.nio.ByteBuffer; @@ -30,26 +26,12 @@ import java.util.Collections; import java.util.HashSet; import java.util.Set; -import org.apache.spark.sql.types.ArrayType; -import org.apache.spark.sql.types.BinaryType; -import org.apache.spark.sql.types.BooleanType; -import org.apache.spark.sql.types.ByteType; -import org.apache.spark.sql.types.CalendarIntervalType; -import org.apache.spark.sql.types.DataType; -import org.apache.spark.sql.types.DateType; -import org.apache.spark.sql.types.Decimal; -import org.apache.spark.sql.types.DecimalType; -import org.apache.spark.sql.types.DoubleType; -import org.apache.spark.sql.types.FloatType; -import org.apache.spark.sql.types.IntegerType; -import org.apache.spark.sql.types.LongType; -import org.apache.spark.sql.types.MapType; -import org.apache.spark.sql.types.NullType; -import org.apache.spark.sql.types.ShortType; -import
spark git commit: House cleaning: close open pull requests created before June 1st, 2015
Repository: spark Updated Branches: refs/heads/master be33a0cd3 -> 7b4452ba9 House cleaning: close open pull requests created before June 1st, 2015 Closes #5358 Closes #3744 Closes #3677 Closes #3536 Closes #3249 Closes #3221 Closes #2446 Closes #3794 Closes #3815 Closes #3816 Closes #3866 Closes #4286 Closes #5184 Closes #5170 Closes #5142 Closes #5025 Closes #5005 Closes #4897 Closes #4887 Closes #4849 Closes #4632 Closes #4622 Closes #4456 Closes #4449 Closes #4417 Closes #5483 Closes #5325 Closes #6545 Closes #6449 Closes #6433 Closes #6416 Closes #6403 Closes #6386 Closes #6263 Closes #6245 Closes #6213 Closes #6155 Closes #6133 Closes #6018 Closes #5978 Closes #5869 Closes #5852 Closes #5848 Closes #5754 Closes #5598 Closes #5503 Closes #4380 Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7b4452ba Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7b4452ba Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7b4452ba Branch: refs/heads/master Commit: 7b4452ba98d53ed646a2e744bb701702fc5371a7 Parents: be33a0c Author: Reynold XinAuthored: Wed Dec 30 18:49:17 2015 -0800 Committer: Reynold Xin Committed: Wed Dec 30 18:49:17 2015 -0800 -- -- - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: Closes #10386 since it was superseded by #10468.
Repository: spark Updated Branches: refs/heads/master 7b4452ba9 -> c642c3a21 Closes #10386 since it was superseded by #10468. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c642c3a2 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c642c3a2 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c642c3a2 Branch: refs/heads/master Commit: c642c3a2102fd016deabcb78bd0292bbf3dd7acf Parents: 7b4452b Author: Reynold XinAuthored: Wed Dec 30 18:50:30 2015 -0800 Committer: Reynold Xin Committed: Wed Dec 30 18:50:30 2015 -0800 -- -- - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-12300] [SQL] [PYSPARK] fix schema inferance on local collections
Repository: spark Updated Branches: refs/heads/branch-1.6 c069ffc2b -> 8dc654971 [SPARK-12300] [SQL] [PYSPARK] fix schema inferance on local collections Current schema inference for local python collections halts as soon as there are no NullTypes. This is different than when we specify a sampling ratio of 1.0 on a distributed collection. This could result in incomplete schema information. Author: Holden KarauCloses #10275 from holdenk/SPARK-12300-fix-schmea-inferance-on-local-collections. (cherry picked from commit d1ca634db4ca9db7f0ba7ca38a0e03bcbfec23c9) Signed-off-by: Davies Liu Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8dc65497 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8dc65497 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8dc65497 Branch: refs/heads/branch-1.6 Commit: 8dc65497152f2c8949b08fddad853d31c4bd9ae5 Parents: c069ffc Author: Holden Karau Authored: Wed Dec 30 11:14:47 2015 -0800 Committer: Davies Liu Committed: Wed Dec 30 11:15:12 2015 -0800 -- python/pyspark/sql/context.py | 10 +++--- python/pyspark/sql/tests.py | 11 +++ 2 files changed, 14 insertions(+), 7 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/8dc65497/python/pyspark/sql/context.py -- diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py index b05aa2f..ba6915a 100644 --- a/python/pyspark/sql/context.py +++ b/python/pyspark/sql/context.py @@ -18,6 +18,7 @@ import sys import warnings import json +from functools import reduce if sys.version >= '3': basestring = unicode = str @@ -236,14 +237,9 @@ class SQLContext(object): if type(first) is dict: warnings.warn("inferring schema from dict is deprecated," "please use pyspark.sql.Row instead") -schema = _infer_schema(first) +schema = reduce(_merge_type, map(_infer_schema, data)) if _has_nulltype(schema): -for r in data: -schema = _merge_type(schema, _infer_schema(r)) -if not _has_nulltype(schema): -break -else: -raise ValueError("Some of types cannot be determined after inferring") +raise ValueError("Some of types cannot be determined after inferring") return schema def _inferSchema(self, rdd, samplingRatio=None): http://git-wip-us.apache.org/repos/asf/spark/blob/8dc65497/python/pyspark/sql/tests.py -- diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py index 9f5f7cf..10b9917 100644 --- a/python/pyspark/sql/tests.py +++ b/python/pyspark/sql/tests.py @@ -353,6 +353,17 @@ class SQLTests(ReusedPySparkTestCase): df3 = self.sqlCtx.createDataFrame(rdd, df.schema) self.assertEqual(10, df3.count()) +def test_infer_schema_to_local(self): +input = [{"a": 1}, {"b": "coffee"}] +rdd = self.sc.parallelize(input) +df = self.sqlCtx.createDataFrame(input) +df2 = self.sqlCtx.createDataFrame(rdd, samplingRatio=1.0) +self.assertEqual(df.schema, df2.schema) + +rdd = self.sc.parallelize(range(10)).map(lambda x: Row(a=x)) +df3 = self.sqlCtx.createDataFrame(rdd, df.schema) +self.assertEqual(10, df3.count()) + def test_serialize_nested_array_and_map(self): d = [Row(l=[Row(a=1, b='s')], d={"key": Row(c=1.0, d="2")})] rdd = self.sc.parallelize(d) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-12300] [SQL] [PYSPARK] fix schema inferance on local collections
Repository: spark Updated Branches: refs/heads/master aa48164a4 -> d1ca634db [SPARK-12300] [SQL] [PYSPARK] fix schema inferance on local collections Current schema inference for local python collections halts as soon as there are no NullTypes. This is different than when we specify a sampling ratio of 1.0 on a distributed collection. This could result in incomplete schema information. Author: Holden KarauCloses #10275 from holdenk/SPARK-12300-fix-schmea-inferance-on-local-collections. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d1ca634d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d1ca634d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d1ca634d Branch: refs/heads/master Commit: d1ca634db4ca9db7f0ba7ca38a0e03bcbfec23c9 Parents: aa48164 Author: Holden Karau Authored: Wed Dec 30 11:14:47 2015 -0800 Committer: Davies Liu Committed: Wed Dec 30 11:14:47 2015 -0800 -- python/pyspark/sql/context.py | 10 +++--- python/pyspark/sql/tests.py | 11 +++ 2 files changed, 14 insertions(+), 7 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/d1ca634d/python/pyspark/sql/context.py -- diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py index b05aa2f..ba6915a 100644 --- a/python/pyspark/sql/context.py +++ b/python/pyspark/sql/context.py @@ -18,6 +18,7 @@ import sys import warnings import json +from functools import reduce if sys.version >= '3': basestring = unicode = str @@ -236,14 +237,9 @@ class SQLContext(object): if type(first) is dict: warnings.warn("inferring schema from dict is deprecated," "please use pyspark.sql.Row instead") -schema = _infer_schema(first) +schema = reduce(_merge_type, map(_infer_schema, data)) if _has_nulltype(schema): -for r in data: -schema = _merge_type(schema, _infer_schema(r)) -if not _has_nulltype(schema): -break -else: -raise ValueError("Some of types cannot be determined after inferring") +raise ValueError("Some of types cannot be determined after inferring") return schema def _inferSchema(self, rdd, samplingRatio=None): http://git-wip-us.apache.org/repos/asf/spark/blob/d1ca634d/python/pyspark/sql/tests.py -- diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py index 9f5f7cf..10b9917 100644 --- a/python/pyspark/sql/tests.py +++ b/python/pyspark/sql/tests.py @@ -353,6 +353,17 @@ class SQLTests(ReusedPySparkTestCase): df3 = self.sqlCtx.createDataFrame(rdd, df.schema) self.assertEqual(10, df3.count()) +def test_infer_schema_to_local(self): +input = [{"a": 1}, {"b": "coffee"}] +rdd = self.sc.parallelize(input) +df = self.sqlCtx.createDataFrame(input) +df2 = self.sqlCtx.createDataFrame(rdd, samplingRatio=1.0) +self.assertEqual(df.schema, df2.schema) + +rdd = self.sc.parallelize(range(10)).map(lambda x: Row(a=x)) +df3 = self.sqlCtx.createDataFrame(rdd, df.schema) +self.assertEqual(10, df3.count()) + def test_serialize_nested_array_and_map(self): d = [Row(l=[Row(a=1, b='s')], d={"key": Row(c=1.0, d="2")})] rdd = self.sc.parallelize(d) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-12495][SQL] use true as default value for propagateNull in NewInstance
Repository: spark Updated Branches: refs/heads/master 932cf4424 -> aa48164a4 [SPARK-12495][SQL] use true as default value for propagateNull in NewInstance Most of cases we should propagate null when call `NewInstance`, and so far there is only one case we should stop null propagation: create product/java bean. So I think it makes more sense to propagate null by dafault. This also fixes a bug when encode null array/map, which is firstly discovered in https://github.com/apache/spark/pull/10401 Author: Wenchen FanCloses #10443 from cloud-fan/encoder. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/aa48164a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/aa48164a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/aa48164a Branch: refs/heads/master Commit: aa48164a43bd9ed9eab53fcacbed92819e84eaf7 Parents: 932cf44 Author: Wenchen Fan Authored: Wed Dec 30 10:56:08 2015 -0800 Committer: Michael Armbrust Committed: Wed Dec 30 10:56:08 2015 -0800 -- .../spark/sql/catalyst/JavaTypeInference.scala | 16 ++--- .../spark/sql/catalyst/ScalaReflection.scala| 16 ++--- .../catalyst/encoders/ExpressionEncoder.scala | 2 +- .../sql/catalyst/encoders/RowEncoder.scala | 2 -- .../sql/catalyst/expressions/objects.scala | 12 +- .../encoders/EncoderResolutionSuite.scala | 24 ++-- .../encoders/ExpressionEncoderSuite.scala | 3 +++ 7 files changed, 38 insertions(+), 37 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/aa48164a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala index a1500cb..ed153d1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala @@ -178,19 +178,19 @@ object JavaTypeInference { case c if !inferExternalType(c).isInstanceOf[ObjectType] => getPath case c if c == classOf[java.lang.Short] => -NewInstance(c, getPath :: Nil, propagateNull = true, ObjectType(c)) +NewInstance(c, getPath :: Nil, ObjectType(c)) case c if c == classOf[java.lang.Integer] => -NewInstance(c, getPath :: Nil, propagateNull = true, ObjectType(c)) +NewInstance(c, getPath :: Nil, ObjectType(c)) case c if c == classOf[java.lang.Long] => -NewInstance(c, getPath :: Nil, propagateNull = true, ObjectType(c)) +NewInstance(c, getPath :: Nil, ObjectType(c)) case c if c == classOf[java.lang.Double] => -NewInstance(c, getPath :: Nil, propagateNull = true, ObjectType(c)) +NewInstance(c, getPath :: Nil, ObjectType(c)) case c if c == classOf[java.lang.Byte] => -NewInstance(c, getPath :: Nil, propagateNull = true, ObjectType(c)) +NewInstance(c, getPath :: Nil, ObjectType(c)) case c if c == classOf[java.lang.Float] => -NewInstance(c, getPath :: Nil, propagateNull = true, ObjectType(c)) +NewInstance(c, getPath :: Nil, ObjectType(c)) case c if c == classOf[java.lang.Boolean] => -NewInstance(c, getPath :: Nil, propagateNull = true, ObjectType(c)) +NewInstance(c, getPath :: Nil, ObjectType(c)) case c if c == classOf[java.sql.Date] => StaticInvoke( @@ -298,7 +298,7 @@ object JavaTypeInference { p.getWriteMethod.getName -> setter }.toMap -val newInstance = NewInstance(other, Nil, propagateNull = false, ObjectType(other)) +val newInstance = NewInstance(other, Nil, ObjectType(other), propagateNull = false) val result = InitializeJavaBean(newInstance, setters) if (path.nonEmpty) { http://git-wip-us.apache.org/repos/asf/spark/blob/aa48164a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala index 8a22b37..9784c96 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala @@ -189,37 +189,37 @@ object ScalaReflection extends ScalaReflection { case t if t <:< localTypeOf[java.lang.Integer] => val boxedType =
spark git commit: [SPARK-10359] Enumerate dependencies in a file and diff against it for new pull requests
Repository: spark Updated Branches: refs/heads/master d1ca634db -> 27a42c710 [SPARK-10359] Enumerate dependencies in a file and diff against it for new pull requests This patch adds a new build check which enumerates Spark's resolved runtime classpath and saves it to a file, then diffs against that file to detect whether pull requests have introduced dependency changes. The aim of this check is to make it simpler to reason about whether pull request which modify the build have introduced new dependencies or changed transitive dependencies in a way that affects the final classpath. This supplants the checks added in SPARK-4123 / #5093, which are currently disabled due to bugs. This patch is based on pwendell's work in #8531. Closes #8531. Author: Josh RosenAuthor: Patrick Wendell Closes #10461 from JoshRosen/SPARK-10359. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/27a42c71 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/27a42c71 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/27a42c71 Branch: refs/heads/master Commit: 27a42c7108ced48a7f558990de2e4fc7ed340119 Parents: d1ca634 Author: Josh Rosen Authored: Wed Dec 30 12:47:42 2015 -0800 Committer: Josh Rosen Committed: Wed Dec 30 12:47:42 2015 -0800 -- .rat-excludes| 1 + dev/deps/spark-deps-hadoop-2.3 | 184 + dev/deps/spark-deps-hadoop-2.4 | 185 ++ dev/run-tests-jenkins.py | 2 +- dev/run-tests.py | 8 ++ dev/sparktestsupport/__init__.py | 1 + dev/sparktestsupport/modules.py | 15 ++- dev/test-dependencies.sh | 102 +++ dev/tests/pr_new_dependencies.sh | 117 - pom.xml | 17 10 files changed, 512 insertions(+), 120 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/27a42c71/.rat-excludes -- diff --git a/.rat-excludes b/.rat-excludes index 3544c0f..bf071eb 100644 --- a/.rat-excludes +++ b/.rat-excludes @@ -85,3 +85,4 @@ org.apache.spark.sql.sources.DataSourceRegister org.apache.spark.scheduler.SparkHistoryListenerFactory .*parquet LZ4BlockInputStream.java +spark-deps-.* http://git-wip-us.apache.org/repos/asf/spark/blob/27a42c71/dev/deps/spark-deps-hadoop-2.3 -- diff --git a/dev/deps/spark-deps-hadoop-2.3 b/dev/deps/spark-deps-hadoop-2.3 new file mode 100644 index 000..6014d50 --- /dev/null +++ b/dev/deps/spark-deps-hadoop-2.3 @@ -0,0 +1,184 @@ +JavaEWAH-0.3.2.jar +RoaringBitmap-0.5.11.jar +ST4-4.0.4.jar +activation-1.1.1.jar +akka-actor_2.10-2.3.11.jar +akka-remote_2.10-2.3.11.jar +akka-slf4j_2.10-2.3.11.jar +antlr-2.7.7.jar +antlr-runtime-3.4.jar +aopalliance-1.0.jar +apache-log4j-extras-1.2.17.jar +arpack_combined_all-0.1.jar +asm-3.1.jar +asm-commons-3.1.jar +asm-tree-3.1.jar +avro-1.7.7.jar +avro-ipc-1.7.7-tests.jar +avro-ipc-1.7.7.jar +avro-mapred-1.7.7-hadoop2.jar +base64-2.3.8.jar +bcprov-jdk15on-1.51.jar +bonecp-0.8.0.RELEASE.jar +breeze-macros_2.10-0.11.2.jar +breeze_2.10-0.11.2.jar +calcite-avatica-1.2.0-incubating.jar +calcite-core-1.2.0-incubating.jar +calcite-linq4j-1.2.0-incubating.jar +chill-java-0.5.0.jar +chill_2.10-0.5.0.jar +commons-beanutils-1.7.0.jar +commons-beanutils-core-1.8.0.jar +commons-cli-1.2.jar +commons-codec-1.10.jar +commons-collections-3.2.2.jar +commons-compiler-2.7.6.jar +commons-compress-1.4.1.jar +commons-configuration-1.6.jar +commons-dbcp-1.4.jar +commons-digester-1.8.jar +commons-httpclient-3.1.jar +commons-io-2.4.jar +commons-lang-2.6.jar +commons-lang3-3.3.2.jar +commons-logging-1.1.3.jar +commons-math3-3.4.1.jar +commons-net-2.2.jar +commons-pool-1.5.4.jar +compress-lzf-1.0.3.jar +config-1.2.1.jar +core-1.1.2.jar +curator-client-2.4.0.jar +curator-framework-2.4.0.jar +curator-recipes-2.4.0.jar +datanucleus-api-jdo-3.2.6.jar +datanucleus-core-3.2.10.jar +datanucleus-rdbms-3.2.9.jar +derby-10.10.1.1.jar +eigenbase-properties-1.1.5.jar +geronimo-annotation_1.0_spec-1.1.1.jar +geronimo-jaspic_1.0_spec-1.0.jar +geronimo-jta_1.1_spec-1.1.1.jar +groovy-all-2.1.6.jar +guice-3.0.jar +guice-servlet-3.0.jar +hadoop-annotations-2.3.0.jar +hadoop-auth-2.3.0.jar +hadoop-client-2.3.0.jar +hadoop-common-2.3.0.jar +hadoop-hdfs-2.3.0.jar +hadoop-mapreduce-client-app-2.3.0.jar +hadoop-mapreduce-client-common-2.3.0.jar +hadoop-mapreduce-client-core-2.3.0.jar +hadoop-mapreduce-client-jobclient-2.3.0.jar +hadoop-mapreduce-client-shuffle-2.3.0.jar +hadoop-yarn-api-2.3.0.jar +hadoop-yarn-client-2.3.0.jar +hadoop-yarn-common-2.3.0.jar
spark git commit: [SPARK-12409][SPARK-12387][SPARK-12391][SQL] Support AND/OR/IN/LIKE push-down filters for JDBC
Repository: spark Updated Branches: refs/heads/master 27a42c710 -> 5c2682b0c [SPARK-12409][SPARK-12387][SPARK-12391][SQL] Support AND/OR/IN/LIKE push-down filters for JDBC This is rework from #10386 and add more tests and LIKE push-down support. Author: Takeshi YAMAMUROCloses #10468 from maropu/SupportMorePushdownInJdbc. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5c2682b0 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5c2682b0 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5c2682b0 Branch: refs/heads/master Commit: 5c2682b0c8fd2aeae2af1adb716ee0d5f8b85135 Parents: 27a42c7 Author: Takeshi YAMAMURO Authored: Wed Dec 30 13:34:37 2015 -0800 Committer: Reynold Xin Committed: Wed Dec 30 13:34:37 2015 -0800 -- .../execution/datasources/jdbc/JDBCRDD.scala| 9 ++- .../org/apache/spark/sql/jdbc/JDBCSuite.scala | 28 +++- 2 files changed, 35 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/5c2682b0/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala index 4e2f505..7072ee4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala @@ -179,6 +179,7 @@ private[sql] object JDBCRDD extends Logging { case stringValue: String => s"'${escapeSql(stringValue)}'" case timestampValue: Timestamp => "'" + timestampValue + "'" case dateValue: Date => "'" + dateValue + "'" +case arrayValue: Array[Object] => arrayValue.map(compileValue).mkString(", ") case _ => value } @@ -191,13 +192,19 @@ private[sql] object JDBCRDD extends Logging { */ private def compileFilter(f: Filter): String = f match { case EqualTo(attr, value) => s"$attr = ${compileValue(value)}" -case Not(EqualTo(attr, value)) => s"$attr != ${compileValue(value)}" +case Not(f) => s"(NOT (${compileFilter(f)}))" case LessThan(attr, value) => s"$attr < ${compileValue(value)}" case GreaterThan(attr, value) => s"$attr > ${compileValue(value)}" case LessThanOrEqual(attr, value) => s"$attr <= ${compileValue(value)}" case GreaterThanOrEqual(attr, value) => s"$attr >= ${compileValue(value)}" +case StringStartsWith(attr, value) => s"${attr} LIKE '${value}%'" +case StringEndsWith(attr, value) => s"${attr} LIKE '%${value}'" +case StringContains(attr, value) => s"${attr} LIKE '%${value}%'" case IsNull(attr) => s"$attr IS NULL" case IsNotNull(attr) => s"$attr IS NOT NULL" +case In(attr, value) => s"$attr IN (${compileValue(value)})" +case Or(f1, f2) => s"(${compileFilter(f1)}) OR (${compileFilter(f2)})" +case And(f1, f2) => s"(${compileFilter(f1)}) AND (${compileFilter(f2)})" case _ => null } http://git-wip-us.apache.org/repos/asf/spark/blob/5c2682b0/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala index 4044a10..00e37f1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala @@ -26,6 +26,7 @@ import org.scalatest.BeforeAndAfter import org.scalatest.PrivateMethodTester import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.Row import org.apache.spark.sql.execution.datasources.jdbc.JDBCRDD import org.apache.spark.sql.test.SharedSQLContext import org.apache.spark.sql.types._ @@ -186,8 +187,26 @@ class JDBCSuite extends SparkFunSuite assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME = 'fred'")).collect().size == 1) assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME > 'fred'")).collect().size == 2) assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME != 'fred'")).collect().size == 2) +assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME IN ('mary', 'fred')")) + .collect().size == 2) +assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME NOT IN ('fred')")) + .collect().size === 2) +assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE THEID = 1 OR NAME = 'mary'")) + .collect().size == 2) +assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE
spark git commit: [SPARK-12399] Display correct error message when accessing REST API with an unknown app Id
Repository: spark Updated Branches: refs/heads/branch-1.6 8dc654971 -> cd86075b5 [SPARK-12399] Display correct error message when accessing REST API with an unknown app Id I got an exception when accessing the below REST API with an unknown application Id. `http://:18080/api/v1/applications/xxx/jobs` Instead of an exception, I expect an error message "no such app: xxx" which is a similar error message when I access `/api/v1/applications/xxx` ``` org.spark-project.guava.util.concurrent.UncheckedExecutionException: java.util.NoSuchElementException: no app with key xxx at org.spark-project.guava.cache.LocalCache$Segment.get(LocalCache.java:2263) at org.spark-project.guava.cache.LocalCache.get(LocalCache.java:4000) at org.spark-project.guava.cache.LocalCache.getOrLoad(LocalCache.java:4004) at org.spark-project.guava.cache.LocalCache$LocalLoadingCache.get(LocalCache.java:4874) at org.apache.spark.deploy.history.HistoryServer.getSparkUI(HistoryServer.scala:116) at org.apache.spark.status.api.v1.UIRoot$class.withSparkUI(ApiRootResource.scala:226) at org.apache.spark.deploy.history.HistoryServer.withSparkUI(HistoryServer.scala:46) at org.apache.spark.status.api.v1.ApiRootResource.getJobs(ApiRootResource.scala:66) ``` Author: Carson WangCloses #10352 from carsonwang/unknownAppFix. (cherry picked from commit b244297966be1d09f8e861cfe2d8e69f7bed84da) Signed-off-by: Marcelo Vanzin Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/cd86075b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/cd86075b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/cd86075b Branch: refs/heads/branch-1.6 Commit: cd86075b52d6363f674dffc3eb71d90449563879 Parents: 8dc6549 Author: Carson Wang Authored: Wed Dec 30 13:49:10 2015 -0800 Committer: Marcelo Vanzin Committed: Wed Dec 30 13:49:28 2015 -0800 -- .../apache/spark/deploy/history/HistoryServer.scala | 16 ++-- 1 file changed, 14 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/cd86075b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala index f31fef0..d11c9b7 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala @@ -21,6 +21,8 @@ import java.util.NoSuchElementException import java.util.zip.ZipOutputStream import javax.servlet.http.{HttpServlet, HttpServletRequest, HttpServletResponse} +import scala.util.control.NonFatal + import com.google.common.cache._ import org.eclipse.jetty.servlet.{ServletContextHandler, ServletHolder} @@ -115,7 +117,17 @@ class HistoryServer( } def getSparkUI(appKey: String): Option[SparkUI] = { -Option(appCache.get(appKey)) +try { + val ui = appCache.get(appKey) + Some(ui) +} catch { + case NonFatal(e) => e.getCause() match { +case nsee: NoSuchElementException => + None + +case cause: Exception => throw cause + } +} } initialize() @@ -195,7 +207,7 @@ class HistoryServer( appCache.get(appId + attemptId.map { id => s"/$id" }.getOrElse("")) true } catch { - case e: Exception => e.getCause() match { + case NonFatal(e) => e.getCause() match { case nsee: NoSuchElementException => false - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-12399] Display correct error message when accessing REST API with an unknown app Id
Repository: spark Updated Branches: refs/heads/master 5c2682b0c -> b24429796 [SPARK-12399] Display correct error message when accessing REST API with an unknown app Id I got an exception when accessing the below REST API with an unknown application Id. `http://:18080/api/v1/applications/xxx/jobs` Instead of an exception, I expect an error message "no such app: xxx" which is a similar error message when I access `/api/v1/applications/xxx` ``` org.spark-project.guava.util.concurrent.UncheckedExecutionException: java.util.NoSuchElementException: no app with key xxx at org.spark-project.guava.cache.LocalCache$Segment.get(LocalCache.java:2263) at org.spark-project.guava.cache.LocalCache.get(LocalCache.java:4000) at org.spark-project.guava.cache.LocalCache.getOrLoad(LocalCache.java:4004) at org.spark-project.guava.cache.LocalCache$LocalLoadingCache.get(LocalCache.java:4874) at org.apache.spark.deploy.history.HistoryServer.getSparkUI(HistoryServer.scala:116) at org.apache.spark.status.api.v1.UIRoot$class.withSparkUI(ApiRootResource.scala:226) at org.apache.spark.deploy.history.HistoryServer.withSparkUI(HistoryServer.scala:46) at org.apache.spark.status.api.v1.ApiRootResource.getJobs(ApiRootResource.scala:66) ``` Author: Carson WangCloses #10352 from carsonwang/unknownAppFix. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b2442979 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b2442979 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b2442979 Branch: refs/heads/master Commit: b244297966be1d09f8e861cfe2d8e69f7bed84da Parents: 5c2682b Author: Carson Wang Authored: Wed Dec 30 13:49:10 2015 -0800 Committer: Marcelo Vanzin Committed: Wed Dec 30 13:49:10 2015 -0800 -- .../apache/spark/deploy/history/HistoryServer.scala | 16 ++-- 1 file changed, 14 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/b2442979/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala index 0bc0cb1..6143a33 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala @@ -21,6 +21,8 @@ import java.util.NoSuchElementException import java.util.zip.ZipOutputStream import javax.servlet.http.{HttpServlet, HttpServletRequest, HttpServletResponse} +import scala.util.control.NonFatal + import com.google.common.cache._ import org.eclipse.jetty.servlet.{ServletContextHandler, ServletHolder} import org.apache.spark.{Logging, SecurityManager, SparkConf} @@ -113,7 +115,17 @@ class HistoryServer( } def getSparkUI(appKey: String): Option[SparkUI] = { -Option(appCache.get(appKey)) +try { + val ui = appCache.get(appKey) + Some(ui) +} catch { + case NonFatal(e) => e.getCause() match { +case nsee: NoSuchElementException => + None + +case cause: Exception => throw cause + } +} } initialize() @@ -193,7 +205,7 @@ class HistoryServer( appCache.get(appId + attemptId.map { id => s"/$id" }.getOrElse("")) true } catch { - case e: Exception => e.getCause() match { + case NonFatal(e) => e.getCause() match { case nsee: NoSuchElementException => false - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-8641][SPARK-12455][SQL] Native Spark Window functions - Follow-up (docs & tests)
Repository: spark Updated Branches: refs/heads/master b24429796 -> f76ee109d [SPARK-8641][SPARK-12455][SQL] Native Spark Window functions - Follow-up (docs & tests) This PR is a follow-up for PR https://github.com/apache/spark/pull/9819. It adds documentation for the window functions and a couple of NULL tests. The documentation was largely based on the documentation in (the source of) Hive and Presto: * https://prestodb.io/docs/current/functions/window.html * https://cwiki.apache.org/confluence/display/Hive/LanguageManual+WindowingAndAnalytics I am not sure if we need to add the licenses of these two projects to the licenses directory. They are both under the ASL. srowen any thoughts? cc yhuai Author: Herman van HovellCloses #10402 from hvanhovell/SPARK-8641-docs. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f76ee109 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f76ee109 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f76ee109 Branch: refs/heads/master Commit: f76ee109d87e727710d2721e4be47fdabc21582c Parents: b244297 Author: Herman van Hovell Authored: Wed Dec 30 16:51:07 2015 -0800 Committer: Yin Huai Committed: Wed Dec 30 16:51:07 2015 -0800 -- .../expressions/windowExpressions.scala | 130 ++- .../apache/spark/sql/DataFrameWindowSuite.scala | 20 +++ .../sql/hive/execution/WindowQuerySuite.scala | 15 +++ 3 files changed, 162 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/f76ee109/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala index 91f169e..f1a333b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala @@ -314,8 +314,8 @@ abstract class OffsetWindowFunction val offset: Expression /** - * Direction (above = 1/below = -1) of the number of rows between the current row and the row - * where the input expression is evaluated. + * Direction of the number of rows between the current row and the row where the input expression + * is evaluated. */ val direction: SortDirection @@ -327,7 +327,7 @@ abstract class OffsetWindowFunction * both the input and the default expression are foldable, the result is still not foldable due to * the frame. */ - override def foldable: Boolean = input.foldable && (default == null || default.foldable) + override def foldable: Boolean = false override def nullable: Boolean = default == null || default.nullable @@ -353,6 +353,21 @@ abstract class OffsetWindowFunction override def toString: String = s"$prettyName($input, $offset, $default)" } +/** + * The Lead function returns the value of 'x' at 'offset' rows after the current row in the window. + * Offsets start at 0, which is the current row. The offset must be constant integer value. The + * default offset is 1. When the value of 'x' is null at the offset, or when the offset is larger + * than the window, the default expression is evaluated. + * + * This documentation has been based upon similar documentation for the Hive and Presto projects. + * + * @param input expression to evaluate 'offset' rows after the current row. + * @param offset rows to jump ahead in the partition. + * @param default to use when the input value is null or when the offset is larger than the window. + */ +@ExpressionDescription(usage = + """_FUNC_(input, offset, default) - LEAD returns the value of 'x' at 'offset' rows after the + current row in the window""") case class Lead(input: Expression, offset: Expression, default: Expression) extends OffsetWindowFunction { @@ -365,6 +380,21 @@ case class Lead(input: Expression, offset: Expression, default: Expression) override val direction = Ascending } +/** + * The Lag function returns the value of 'x' at 'offset' rows before the current row in the window. + * Offsets start at 0, which is the current row. The offset must be constant integer value. The + * default offset is 1. When the value of 'x' is null at the offset, or when the offset is smaller + * than the window, the default expression is evaluated. + * + * This documentation has been based upon similar documentation for the Hive and Presto projects. + * + * @param input expression to evaluate 'offset' rows before
spark git commit: [SPARK-12588] Remove HttpBroadcast in Spark 2.0.
Repository: spark Updated Branches: refs/heads/master f76ee109d -> ee8f8d318 [SPARK-12588] Remove HttpBroadcast in Spark 2.0. We switched to TorrentBroadcast in Spark 1.1, and HttpBroadcast has been undocumented since then. It's time to remove it in Spark 2.0. Author: Reynold XinCloses #10531 from rxin/SPARK-12588. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ee8f8d31 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ee8f8d31 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ee8f8d31 Branch: refs/heads/master Commit: ee8f8d318417c514fbb26e57157483d466ddbfae Parents: f76ee10 Author: Reynold Xin Authored: Wed Dec 30 18:07:07 2015 -0800 Committer: Reynold Xin Committed: Wed Dec 30 18:07:07 2015 -0800 -- .../spark/broadcast/BroadcastFactory.scala | 4 +- .../spark/broadcast/BroadcastManager.scala | 13 +- .../apache/spark/broadcast/HttpBroadcast.scala | 269 --- .../spark/broadcast/HttpBroadcastFactory.scala | 47 .../spark/broadcast/TorrentBroadcast.scala | 2 +- .../broadcast/TorrentBroadcastFactory.scala | 2 +- .../spark/serializer/KryoSerializer.scala | 2 - .../apache/spark/broadcast/BroadcastSuite.scala | 131 + docs/configuration.md | 19 +- docs/security.md| 13 +- .../apache/spark/examples/BroadcastTest.scala | 8 +- project/MimaExcludes.scala | 3 +- 12 files changed, 22 insertions(+), 491 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ee8f8d31/core/src/main/scala/org/apache/spark/broadcast/BroadcastFactory.scala -- diff --git a/core/src/main/scala/org/apache/spark/broadcast/BroadcastFactory.scala b/core/src/main/scala/org/apache/spark/broadcast/BroadcastFactory.scala index 6a187b4..7f35ac4 100644 --- a/core/src/main/scala/org/apache/spark/broadcast/BroadcastFactory.scala +++ b/core/src/main/scala/org/apache/spark/broadcast/BroadcastFactory.scala @@ -24,14 +24,12 @@ import org.apache.spark.SparkConf import org.apache.spark.annotation.DeveloperApi /** - * :: DeveloperApi :: * An interface for all the broadcast implementations in Spark (to allow * multiple broadcast implementations). SparkContext uses a user-specified * BroadcastFactory implementation to instantiate a particular broadcast for the * entire Spark job. */ -@DeveloperApi -trait BroadcastFactory { +private[spark] trait BroadcastFactory { def initialize(isDriver: Boolean, conf: SparkConf, securityMgr: SecurityManager): Unit http://git-wip-us.apache.org/repos/asf/spark/blob/ee8f8d31/core/src/main/scala/org/apache/spark/broadcast/BroadcastManager.scala -- diff --git a/core/src/main/scala/org/apache/spark/broadcast/BroadcastManager.scala b/core/src/main/scala/org/apache/spark/broadcast/BroadcastManager.scala index fac..6134360 100644 --- a/core/src/main/scala/org/apache/spark/broadcast/BroadcastManager.scala +++ b/core/src/main/scala/org/apache/spark/broadcast/BroadcastManager.scala @@ -21,8 +21,8 @@ import java.util.concurrent.atomic.AtomicLong import scala.reflect.ClassTag -import org.apache.spark._ -import org.apache.spark.util.Utils +import org.apache.spark.{Logging, SparkConf, SecurityManager} + private[spark] class BroadcastManager( val isDriver: Boolean, @@ -39,15 +39,8 @@ private[spark] class BroadcastManager( private def initialize() { synchronized { if (!initialized) { -val broadcastFactoryClass = - conf.get("spark.broadcast.factory", "org.apache.spark.broadcast.TorrentBroadcastFactory") - -broadcastFactory = - Utils.classForName(broadcastFactoryClass).newInstance.asInstanceOf[BroadcastFactory] - -// Initialize appropriate BroadcastFactory and BroadcastObject +broadcastFactory = new TorrentBroadcastFactory broadcastFactory.initialize(isDriver, conf, securityManager) - initialized = true } } http://git-wip-us.apache.org/repos/asf/spark/blob/ee8f8d31/core/src/main/scala/org/apache/spark/broadcast/HttpBroadcast.scala -- diff --git a/core/src/main/scala/org/apache/spark/broadcast/HttpBroadcast.scala b/core/src/main/scala/org/apache/spark/broadcast/HttpBroadcast.scala deleted file mode 100644 index b69af63..000 --- a/core/src/main/scala/org/apache/spark/broadcast/HttpBroadcast.scala +++ /dev/null @@ -1,269 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements.
spark git commit: [SPARK-3873][GRAPHX] Import order fixes.
Repository: spark Updated Branches: refs/heads/master fd313 -> 9140d9074 [SPARK-3873][GRAPHX] Import order fixes. There's one warning left, caused by a bug in the checker. Author: Marcelo VanzinCloses #10537 from vanzin/SPARK-3873-graphx. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9140d907 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9140d907 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9140d907 Branch: refs/heads/master Commit: 9140d9074379055a0b4b2f5c381362b31c141941 Parents: fd3 Author: Marcelo Vanzin Authored: Wed Dec 30 18:26:08 2015 -0800 Committer: Reynold Xin Committed: Wed Dec 30 18:26:08 2015 -0800 -- graphx/src/main/scala/org/apache/spark/graphx/EdgeRDD.scala | 5 ++--- .../scala/org/apache/spark/graphx/GraphKryoRegistrator.scala | 5 ++--- .../src/main/scala/org/apache/spark/graphx/GraphLoader.scala | 2 +- graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala | 3 +-- .../src/main/scala/org/apache/spark/graphx/GraphXUtils.scala | 4 +--- graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala | 2 +- .../src/main/scala/org/apache/spark/graphx/VertexRDD.scala | 5 ++--- .../org/apache/spark/graphx/impl/EdgePartitionBuilder.scala | 2 +- .../scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala | 5 ++--- .../main/scala/org/apache/spark/graphx/impl/GraphImpl.scala | 5 ++--- .../org/apache/spark/graphx/impl/ReplicatedVertexView.scala | 3 +-- .../org/apache/spark/graphx/impl/RoutingTablePartition.scala | 8 +++- .../apache/spark/graphx/impl/ShippableVertexPartition.scala | 3 +-- .../scala/org/apache/spark/graphx/impl/VertexPartition.scala | 3 +-- .../org/apache/spark/graphx/impl/VertexPartitionBase.scala | 3 +-- .../apache/spark/graphx/impl/VertexPartitionBaseOps.scala| 3 +-- .../scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala | 3 +-- .../scala/org/apache/spark/graphx/lib/LabelPropagation.scala | 1 + .../main/scala/org/apache/spark/graphx/lib/PageRank.scala| 2 +- .../main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala | 2 +- .../scala/org/apache/spark/graphx/lib/ShortestPaths.scala| 3 ++- .../scala/org/apache/spark/graphx/util/GraphGenerators.scala | 7 ++- .../util/collection/GraphXPrimitiveKeyOpenHashMap.scala | 4 ++-- 23 files changed, 33 insertions(+), 50 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/9140d907/graphx/src/main/scala/org/apache/spark/graphx/EdgeRDD.scala -- diff --git a/graphx/src/main/scala/org/apache/spark/graphx/EdgeRDD.scala b/graphx/src/main/scala/org/apache/spark/graphx/EdgeRDD.scala index ee7302a..45526bf 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/EdgeRDD.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/EdgeRDD.scala @@ -24,12 +24,11 @@ import org.apache.spark.Dependency import org.apache.spark.Partition import org.apache.spark.SparkContext import org.apache.spark.TaskContext -import org.apache.spark.rdd.RDD -import org.apache.spark.storage.StorageLevel - import org.apache.spark.graphx.impl.EdgePartition import org.apache.spark.graphx.impl.EdgePartitionBuilder import org.apache.spark.graphx.impl.EdgeRDDImpl +import org.apache.spark.rdd.RDD +import org.apache.spark.storage.StorageLevel /** * `EdgeRDD[ED, VD]` extends `RDD[Edge[ED]]` by storing the edges in columnar format on each http://git-wip-us.apache.org/repos/asf/spark/blob/9140d907/graphx/src/main/scala/org/apache/spark/graphx/GraphKryoRegistrator.scala -- diff --git a/graphx/src/main/scala/org/apache/spark/graphx/GraphKryoRegistrator.scala b/graphx/src/main/scala/org/apache/spark/graphx/GraphKryoRegistrator.scala index 563c948..eaa71da 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/GraphKryoRegistrator.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/GraphKryoRegistrator.scala @@ -19,12 +19,11 @@ package org.apache.spark.graphx import com.esotericsoftware.kryo.Kryo +import org.apache.spark.graphx.impl._ +import org.apache.spark.graphx.util.collection.GraphXPrimitiveKeyOpenHashMap import org.apache.spark.serializer.KryoRegistrator import org.apache.spark.util.BoundedPriorityQueue import org.apache.spark.util.collection.BitSet - -import org.apache.spark.graphx.impl._ -import org.apache.spark.graphx.util.collection.GraphXPrimitiveKeyOpenHashMap import org.apache.spark.util.collection.OpenHashSet /** http://git-wip-us.apache.org/repos/asf/spark/blob/9140d907/graphx/src/main/scala/org/apache/spark/graphx/GraphLoader.scala -- diff --git
spark git commit: [SPARK-12263][DOCS] IllegalStateException: Memory can't be 0 for SPARK_WORKER_MEMORY without unit
Repository: spark Updated Branches: refs/heads/master 27af6157f -> 932cf4424 [SPARK-12263][DOCS] IllegalStateException: Memory can't be 0 for SPARK_WORKER_MEMORY without unit Updated the Worker Unit IllegalStateException message to indicate no values less than 1MB instead of 0 to help solve this. Requesting review Author: Neelesh Srinivas SalianCloses #10483 from nssalian/SPARK-12263. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/932cf442 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/932cf442 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/932cf442 Branch: refs/heads/master Commit: 932cf44248e067ee7cae6fef79ddf2ab9b1c36d8 Parents: 27af615 Author: Neelesh Srinivas Salian Authored: Wed Dec 30 11:14:13 2015 + Committer: Sean Owen Committed: Wed Dec 30 11:14:13 2015 + -- .../scala/org/apache/spark/deploy/worker/WorkerArguments.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/932cf442/core/src/main/scala/org/apache/spark/deploy/worker/WorkerArguments.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/WorkerArguments.scala b/core/src/main/scala/org/apache/spark/deploy/worker/WorkerArguments.scala index 5181142..de3c7cd 100644 --- a/core/src/main/scala/org/apache/spark/deploy/worker/WorkerArguments.scala +++ b/core/src/main/scala/org/apache/spark/deploy/worker/WorkerArguments.scala @@ -175,7 +175,7 @@ private[worker] class WorkerArguments(args: Array[String], conf: SparkConf) { def checkWorkerMemory(): Unit = { if (memory <= 0) { - val message = "Memory can't be 0, missing a M or G on the end of the memory specification?" + val message = "Memory is below 1MB, or missing a M/G at the end of the memory specification?" throw new IllegalStateException(message) } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[3/3] spark git commit: Revert "[SPARK-12362][SQL][WIP] Inline Hive Parser"
Revert "[SPARK-12362][SQL][WIP] Inline Hive Parser" This reverts commit b600bccf41a7b1958e33d8301a19214e6517e388 due to non-deterministic build breaks. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/27af6157 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/27af6157 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/27af6157 Branch: refs/heads/master Commit: 27af6157f9cceeb9aa74eec54c8898d3e0749ed0 Parents: 4f75f78 Author: Reynold XinAuthored: Wed Dec 30 00:08:44 2015 -0800 Committer: Reynold Xin Committed: Wed Dec 30 00:08:44 2015 -0800 -- pom.xml |5 - project/SparkBuild.scala|2 +- project/plugins.sbt |4 - .../hive/execution/HiveCompatibilitySuite.scala | 10 +- sql/hive/pom.xml| 22 - .../apache/spark/sql/parser/FromClauseParser.g | 330 --- .../apache/spark/sql/parser/IdentifiersParser.g | 697 - .../spark/sql/parser/SelectClauseParser.g | 226 -- .../org/apache/spark/sql/parser/SparkSqlLexer.g | 474 .../apache/spark/sql/parser/SparkSqlParser.g| 2457 -- .../apache/spark/sql/parser/ASTErrorNode.java | 49 - .../org/apache/spark/sql/parser/ASTNode.java| 245 -- .../apache/spark/sql/parser/ParseDriver.java| 213 -- .../org/apache/spark/sql/parser/ParseError.java | 54 - .../apache/spark/sql/parser/ParseException.java | 51 - .../org/apache/spark/sql/parser/ParseUtils.java | 96 - .../spark/sql/parser/SemanticAnalyzer.java | 406 --- .../org/apache/spark/sql/hive/HiveQl.scala | 133 +- 18 files changed, 72 insertions(+), 5402 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/27af6157/pom.xml -- diff --git a/pom.xml b/pom.xml index 73ba8d5..284c219 100644 --- a/pom.xml +++ b/pom.xml @@ -1951,11 +1951,6 @@ - - org.antlr - antlr3-maven-plugin - 3.5.2 - org.apache.maven.plugins http://git-wip-us.apache.org/repos/asf/spark/blob/27af6157/project/SparkBuild.scala -- diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index df21d3e..c3d53f8 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -415,7 +415,7 @@ object Hive { // in order to generate golden files. This is only required for developers who are adding new // new query tests. fullClasspath in Test := (fullClasspath in Test).value.filterNot { f => f.toString.contains("jcl-over") } - ) ++ sbtantlr.SbtAntlrPlugin.antlrSettings + ) } http://git-wip-us.apache.org/repos/asf/spark/blob/27af6157/project/plugins.sbt -- diff --git a/project/plugins.sbt b/project/plugins.sbt index f172dc9..5e23224 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -4,8 +4,6 @@ resolvers += "Typesafe Repository" at "http://repo.typesafe.com/typesafe/release resolvers += "sonatype-releases" at "https://oss.sonatype.org/content/repositories/releases/; -resolvers += "stefri" at "http://stefri.github.io/repo/releases; - addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.11.2") addSbtPlugin("com.typesafe.sbteclipse" % "sbteclipse-plugin" % "2.2.0") @@ -26,8 +24,6 @@ addSbtPlugin("com.cavorite" % "sbt-avro" % "0.3.2") addSbtPlugin("io.spray" % "sbt-revolver" % "0.7.2") -addSbtPlugin("com.github.stefri" % "sbt-antlr" % "0.5.3") - libraryDependencies += "org.ow2.asm" % "asm" % "5.0.3" libraryDependencies += "org.ow2.asm" % "asm-commons" % "5.0.3" http://git-wip-us.apache.org/repos/asf/spark/blob/27af6157/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala -- diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala index 2b0e48d..2d0d7b8 100644 --- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala +++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala @@ -308,12 +308,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { // The difference between the double numbers generated by Hive and Spark // can be ignored (e.g., 0.6633880657639323 and 0.6633880657639322) -
[1/3] spark git commit: Revert "[SPARK-12362][SQL][WIP] Inline Hive Parser"
Repository: spark Updated Branches: refs/heads/master 4f75f785d -> 27af6157f http://git-wip-us.apache.org/repos/asf/spark/blob/27af6157/sql/hive/src/main/java/org/apache/spark/sql/parser/ParseDriver.java -- diff --git a/sql/hive/src/main/java/org/apache/spark/sql/parser/ParseDriver.java b/sql/hive/src/main/java/org/apache/spark/sql/parser/ParseDriver.java deleted file mode 100644 index c77198b..000 --- a/sql/hive/src/main/java/org/apache/spark/sql/parser/ParseDriver.java +++ /dev/null @@ -1,213 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.parser; - -import java.util.ArrayList; -import org.antlr.runtime.ANTLRStringStream; -import org.antlr.runtime.CharStream; -import org.antlr.runtime.NoViableAltException; -import org.antlr.runtime.RecognitionException; -import org.antlr.runtime.Token; -import org.antlr.runtime.TokenRewriteStream; -import org.antlr.runtime.TokenStream; -import org.antlr.runtime.tree.CommonTree; -import org.antlr.runtime.tree.CommonTreeAdaptor; -import org.antlr.runtime.tree.TreeAdaptor; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.apache.hadoop.hive.ql.Context; - -/** - * ParseDriver. - * - */ -public class ParseDriver { - - private static final Logger LOG = LoggerFactory.getLogger("hive.ql.parse.ParseDriver"); - - /** - * ANTLRNoCaseStringStream. - * - */ - //This class provides and implementation for a case insensitive token checker - //for the lexical analysis part of antlr. By converting the token stream into - //upper case at the time when lexical rules are checked, this class ensures that the - //lexical rules need to just match the token with upper case letters as opposed to - //combination of upper case and lower case characters. This is purely used for matching lexical - //rules. The actual token text is stored in the same way as the user input without - //actually converting it into an upper case. The token values are generated by the consume() - //function of the super class ANTLRStringStream. The LA() function is the lookahead function - //and is purely used for matching lexical rules. This also means that the grammar will only - //accept capitalized tokens in case it is run from other tools like antlrworks which - //do not have the ANTLRNoCaseStringStream implementation. - public class ANTLRNoCaseStringStream extends ANTLRStringStream { - -public ANTLRNoCaseStringStream(String input) { - super(input); -} - -@Override -public int LA(int i) { - - int returnChar = super.LA(i); - if (returnChar == CharStream.EOF) { -return returnChar; - } else if (returnChar == 0) { -return returnChar; - } - - return Character.toUpperCase((char) returnChar); -} - } - - /** - * HiveLexerX. - * - */ - public class HiveLexerX extends SparkSqlLexer { - -private final ArrayList errors; - -public HiveLexerX(CharStream input) { - super(input); - errors = new ArrayList(); -} - -@Override -public void displayRecognitionError(String[] tokenNames, RecognitionException e) { - errors.add(new ParseError(this, e, tokenNames)); -} - -@Override -public String getErrorMessage(RecognitionException e, String[] tokenNames) { - String msg = null; - - if (e instanceof NoViableAltException) { -// @SuppressWarnings("unused") -// NoViableAltException nvae = (NoViableAltException) e; -// for development, can add -// "decision=<<"+nvae.grammarDecisionDescription+">>" -// and "(decision="+nvae.decisionNumber+") and -// "state "+nvae.stateNumber -msg = "character " + getCharErrorDisplay(e.c) + " not supported here"; - } else { -msg = super.getErrorMessage(e, tokenNames); - } - - return msg; -} - -public ArrayList getErrors() { - return errors; -} - - } - - /** - * Tree adaptor for making antlr return ASTNodes instead of CommonTree nodes - * so that the graph walking algorithms and the rules framework defined in - * ql.lib can be used with the AST Nodes. - */
[2/3] spark git commit: Revert "[SPARK-12362][SQL][WIP] Inline Hive Parser"
http://git-wip-us.apache.org/repos/asf/spark/blob/27af6157/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/SparkSqlParser.g -- diff --git a/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/SparkSqlParser.g b/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/SparkSqlParser.g deleted file mode 100644 index 69574d7..000 --- a/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/SparkSqlParser.g +++ /dev/null @@ -1,2457 +0,0 @@ -/** - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ -parser grammar SparkSqlParser; - -options -{ -tokenVocab=SparkSqlLexer; -output=AST; -ASTLabelType=CommonTree; -backtrack=false; -k=3; -} -import SelectClauseParser, FromClauseParser, IdentifiersParser; - -tokens { -TOK_INSERT; -TOK_QUERY; -TOK_SELECT; -TOK_SELECTDI; -TOK_SELEXPR; -TOK_FROM; -TOK_TAB; -TOK_PARTSPEC; -TOK_PARTVAL; -TOK_DIR; -TOK_TABREF; -TOK_SUBQUERY; -TOK_INSERT_INTO; -TOK_DESTINATION; -TOK_ALLCOLREF; -TOK_TABLE_OR_COL; -TOK_FUNCTION; -TOK_FUNCTIONDI; -TOK_FUNCTIONSTAR; -TOK_WHERE; -TOK_OP_EQ; -TOK_OP_NE; -TOK_OP_LE; -TOK_OP_LT; -TOK_OP_GE; -TOK_OP_GT; -TOK_OP_DIV; -TOK_OP_ADD; -TOK_OP_SUB; -TOK_OP_MUL; -TOK_OP_MOD; -TOK_OP_BITAND; -TOK_OP_BITNOT; -TOK_OP_BITOR; -TOK_OP_BITXOR; -TOK_OP_AND; -TOK_OP_OR; -TOK_OP_NOT; -TOK_OP_LIKE; -TOK_TRUE; -TOK_FALSE; -TOK_TRANSFORM; -TOK_SERDE; -TOK_SERDENAME; -TOK_SERDEPROPS; -TOK_EXPLIST; -TOK_ALIASLIST; -TOK_GROUPBY; -TOK_ROLLUP_GROUPBY; -TOK_CUBE_GROUPBY; -TOK_GROUPING_SETS; -TOK_GROUPING_SETS_EXPRESSION; -TOK_HAVING; -TOK_ORDERBY; -TOK_CLUSTERBY; -TOK_DISTRIBUTEBY; -TOK_SORTBY; -TOK_UNIONALL; -TOK_UNIONDISTINCT; -TOK_JOIN; -TOK_LEFTOUTERJOIN; -TOK_RIGHTOUTERJOIN; -TOK_FULLOUTERJOIN; -TOK_UNIQUEJOIN; -TOK_CROSSJOIN; -TOK_LOAD; -TOK_EXPORT; -TOK_IMPORT; -TOK_REPLICATION; -TOK_METADATA; -TOK_NULL; -TOK_ISNULL; -TOK_ISNOTNULL; -TOK_TINYINT; -TOK_SMALLINT; -TOK_INT; -TOK_BIGINT; -TOK_BOOLEAN; -TOK_FLOAT; -TOK_DOUBLE; -TOK_DATE; -TOK_DATELITERAL; -TOK_DATETIME; -TOK_TIMESTAMP; -TOK_TIMESTAMPLITERAL; -TOK_INTERVAL_YEAR_MONTH; -TOK_INTERVAL_YEAR_MONTH_LITERAL; -TOK_INTERVAL_DAY_TIME; -TOK_INTERVAL_DAY_TIME_LITERAL; -TOK_INTERVAL_YEAR_LITERAL; -TOK_INTERVAL_MONTH_LITERAL; -TOK_INTERVAL_DAY_LITERAL; -TOK_INTERVAL_HOUR_LITERAL; -TOK_INTERVAL_MINUTE_LITERAL; -TOK_INTERVAL_SECOND_LITERAL; -TOK_STRING; -TOK_CHAR; -TOK_VARCHAR; -TOK_BINARY; -TOK_DECIMAL; -TOK_LIST; -TOK_STRUCT; -TOK_MAP; -TOK_UNIONTYPE; -TOK_COLTYPELIST; -TOK_CREATEDATABASE; -TOK_CREATETABLE; -TOK_TRUNCATETABLE; -TOK_CREATEINDEX; -TOK_CREATEINDEX_INDEXTBLNAME; -TOK_DEFERRED_REBUILDINDEX; -TOK_DROPINDEX; -TOK_LIKETABLE; -TOK_DESCTABLE; -TOK_DESCFUNCTION; -TOK_ALTERTABLE; -TOK_ALTERTABLE_RENAME; -TOK_ALTERTABLE_ADDCOLS; -TOK_ALTERTABLE_RENAMECOL; -TOK_ALTERTABLE_RENAMEPART; -TOK_ALTERTABLE_REPLACECOLS; -TOK_ALTERTABLE_ADDPARTS; -TOK_ALTERTABLE_DROPPARTS; -TOK_ALTERTABLE_PARTCOLTYPE; -TOK_ALTERTABLE_MERGEFILES; -TOK_ALTERTABLE_TOUCH; -TOK_ALTERTABLE_ARCHIVE; -TOK_ALTERTABLE_UNARCHIVE; -TOK_ALTERTABLE_SERDEPROPERTIES; -TOK_ALTERTABLE_SERIALIZER; -TOK_ALTERTABLE_UPDATECOLSTATS; -TOK_TABLE_PARTITION; -TOK_ALTERTABLE_FILEFORMAT; -TOK_ALTERTABLE_LOCATION; -TOK_ALTERTABLE_PROPERTIES; -TOK_ALTERTABLE_CHANGECOL_AFTER_POSITION; -TOK_ALTERTABLE_DROPPROPERTIES; -TOK_ALTERTABLE_SKEWED; -TOK_ALTERTABLE_EXCHANGEPARTITION; -TOK_ALTERTABLE_SKEWED_LOCATION; -TOK_ALTERTABLE_BUCKETS; -TOK_ALTERTABLE_CLUSTER_SORT; -TOK_ALTERTABLE_COMPACT; -TOK_ALTERINDEX_REBUILD; -TOK_ALTERINDEX_PROPERTIES; -TOK_MSCK; -TOK_SHOWDATABASES; -TOK_SHOWTABLES; -TOK_SHOWCOLUMNS; -TOK_SHOWFUNCTIONS; -TOK_SHOWPARTITIONS; -TOK_SHOW_CREATEDATABASE; -TOK_SHOW_CREATETABLE; -TOK_SHOW_TABLESTATUS; -TOK_SHOW_TBLPROPERTIES; -TOK_SHOWLOCKS; -TOK_SHOWCONF; -TOK_LOCKTABLE; -TOK_UNLOCKTABLE; -TOK_LOCKDB; -TOK_UNLOCKDB; -TOK_SWITCHDATABASE; -TOK_DROPDATABASE; -TOK_DROPTABLE; -TOK_DATABASECOMMENT; -TOK_TABCOLLIST; -TOK_TABCOL; -TOK_TABLECOMMENT; -TOK_TABLEPARTCOLS; -TOK_TABLEROWFORMAT; -TOK_TABLEROWFORMATFIELD; -TOK_TABLEROWFORMATCOLLITEMS; -TOK_TABLEROWFORMATMAPKEYS; -TOK_TABLEROWFORMATLINES; -TOK_TABLEROWFORMATNULL; -TOK_TABLEFILEFORMAT; -TOK_FILEFORMAT_GENERIC; -TOK_OFFLINE; -TOK_ENABLE; -TOK_DISABLE; -TOK_READONLY; -TOK_NO_DROP; -TOK_STORAGEHANDLER;