[spark] branch master updated: [SPARK-27125][SQL][TEST] Add test suite for sql execution page
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 1853db3 [SPARK-27125][SQL][TEST] Add test suite for sql execution page 1853db3 is described below commit 1853db3186de3ed63fda050ac90bf85efafea5bf Author: Shahid AuthorDate: Tue Mar 12 10:15:28 2019 -0500 [SPARK-27125][SQL][TEST] Add test suite for sql execution page ## What changes were proposed in this pull request? Added test suite for AllExecutionsPage class. Checked the scenarios for SPARK-27019 and SPARK-27075. ## How was this patch tested? Added UT, manually tested Closes #24052 from shahidki31/SPARK-27125. Authored-by: Shahid Signed-off-by: Sean Owen --- .../sql/execution/ui/AllExecutionsPageSuite.scala | 127 + 1 file changed, 127 insertions(+) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/AllExecutionsPageSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/AllExecutionsPageSuite.scala new file mode 100644 index 000..5a3a923 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/AllExecutionsPageSuite.scala @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.ui + +import java.util +import java.util.{Locale, Properties} +import javax.servlet.http.HttpServletRequest + +import scala.xml.Node + +import org.mockito.Mockito.{mock, when, RETURNS_SMART_NULLS} + +import org.apache.spark.scheduler.{JobFailed, SparkListenerJobEnd, SparkListenerJobStart} +import org.apache.spark.sql.DataFrame +import org.apache.spark.sql.execution.{SparkPlanInfo, SQLExecution} +import org.apache.spark.sql.test.SharedSQLContext +import org.apache.spark.status.ElementTrackingStore +import org.apache.spark.util.kvstore.InMemoryStore + +class AllExecutionsPageSuite extends SharedSQLContext { + + import testImplicits._ + + test("SPARK-27019: correctly display SQL page when event reordering happens") { +val statusStore = createStatusStore +val tab = mock(classOf[SQLTab], RETURNS_SMART_NULLS) +when(tab.sqlStore).thenReturn(statusStore) + +val request = mock(classOf[HttpServletRequest]) +when(tab.appName).thenReturn("testing") +when(tab.headerTabs).thenReturn(Seq.empty) + +val html = renderSQLPage(request, tab, statusStore).toString().toLowerCase(Locale.ROOT) +assert(html.contains("failed queries")) +assert(!html.contains("1970")) + } + + test("sorting should be successful") { +val statusStore = createStatusStore +val tab = mock(classOf[SQLTab], RETURNS_SMART_NULLS) +val request = mock(classOf[HttpServletRequest]) + +when(tab.sqlStore).thenReturn(statusStore) +when(tab.appName).thenReturn("testing") +when(tab.headerTabs).thenReturn(Seq.empty) +when(request.getParameter("failed.sort")).thenReturn("Duration") +val map = new util.HashMap[String, Array[String]]() +map.put("failed.sort", Array("duration")) +when(request.getParameterMap()).thenReturn(map) +val html = renderSQLPage(request, tab, statusStore).toString().toLowerCase(Locale.ROOT) +assert(!html.contains("IllegalArgumentException")) +assert(html.contains("duration")) + } + + + private def createStatusStore: SQLAppStatusStore = { +val conf = sparkContext.conf +val store = new ElementTrackingStore(new InMemoryStore, conf) +val listener = new SQLAppStatusListener(conf, store, live = true) +new SQLAppStatusStore(store, Some(listener)) + } + + private def createTestDataFrame: DataFrame = { +Seq( + (1, 1), + (2, 2) +).toDF().filter("_1 > 1") + } + + /** + * Render a stage page started with the given conf and return the HTML. + * This also runs a dummy execution page to populate the page with useful content. + */ + private def renderSQLPage( +request: HttpServletRequest, +tab: SQLTab, +statusStore: SQLAppStatusStore): Seq[Node] = { + +val listener = statusStore.listener.get + +val page = new AllExecutionsPage(tab) +
[spark] branch master updated: [SPARK-27041][PYSPARK] Use imap() for python 2.x to resolve oom issue
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 60a899b [SPARK-27041][PYSPARK] Use imap() for python 2.x to resolve oom issue 60a899b is described below commit 60a899b8c33e1e36bb80ab9fa054ba40bee9f4be Author: TigerYang414 <39265202+tigeryang...@users.noreply.github.com> AuthorDate: Tue Mar 12 10:23:26 2019 -0500 [SPARK-27041][PYSPARK] Use imap() for python 2.x to resolve oom issue ## What changes were proposed in this pull request? With large partition, pyspark may exceeds executor memory limit and trigger out of memory for python 2.7. This is because map() is used. Unlike in python3.x, python 2.7 map() will generate a list and need to read all data into memory. The proposed fix will use imap in python 2.7 and it has been verified. ## How was this patch tested? Manual test. (Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests) (If this patch involves UI changes, please attach a screenshot; otherwise, remove this) Please review http://spark.apache.org/contributing.html before opening a pull request. Closes #23954 from TigerYang414/patch-1. Lead-authored-by: TigerYang414 <39265202+tigeryang...@users.noreply.github.com> Co-authored-by: Hyukjin Kwon Signed-off-by: Sean Owen --- python/pyspark/sql/tests/test_udf.py | 7 +++ python/pyspark/worker.py | 2 ++ 2 files changed, 9 insertions(+) diff --git a/python/pyspark/sql/tests/test_udf.py b/python/pyspark/sql/tests/test_udf.py index 0a56ba8..ba00bba 100644 --- a/python/pyspark/sql/tests/test_udf.py +++ b/python/pyspark/sql/tests/test_udf.py @@ -600,6 +600,13 @@ class UDFTests(ReusedSQLTestCase): result = sql("select i from values(0L) as data(i) where i in (select id from v)") self.assertEqual(result.collect(), [Row(i=0)]) +def test_udf_globals_not_overwritten(self): +@udf('string') +def f(): +assert "itertools" not in str(map) + +self.spark.range(1).select(f()).collect() + class UDFInitializationTests(unittest.TestCase): def tearDown(self): diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py index 0e9b6d6..7811012 100644 --- a/python/pyspark/worker.py +++ b/python/pyspark/worker.py @@ -45,6 +45,8 @@ from pyspark import shuffle if sys.version >= '3': basestring = str +else: +from itertools import imap as map # use iterator map by default pickleSer = PickleSerializer() utf8_deserializer = UTF8Deserializer() - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-27010][SQL] Find out the actual port number when hive.server2.thrift.port=0
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 3f9247d [SPARK-27010][SQL] Find out the actual port number when hive.server2.thrift.port=0 3f9247d is described below commit 3f9247de1e0395a40071e80cd989e06eb1a85446 Author: zuotingbing AuthorDate: Tue Mar 12 13:38:41 2019 -0500 [SPARK-27010][SQL] Find out the actual port number when hive.server2.thrift.port=0 ## What changes were proposed in this pull request? Currently, if we set hive.server2.thrift.port=0, it hard to find out the actual port number which one we should use when using beeline to connect. before: ![2019-02-28_170942](https://user-images.githubusercontent.com/24823338/53557240-779ad800-3b80-11e9-9567-175f28aa61da.png) after: ![2019-02-28_170904](https://user-images.githubusercontent.com/24823338/53557255-7f5a7c80-3b80-11e9-8ba6-9764c03e5407.png) use beeline to connect success: ![2019-02-28_170844](https://user-images.githubusercontent.com/24823338/53557267-85e8f400-3b80-11e9-90a5-f7f53a51cc32.png) ## How was this patch tested? manual tests Closes #23917 from zuotingbing/SPARK-27010. Authored-by: zuotingbing Signed-off-by: Sean Owen --- .../java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java | 2 +- .../java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java index 6c9efba..21b8bf7 100644 --- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java +++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java @@ -94,7 +94,7 @@ public class ThriftBinaryCLIService extends ThriftCLIService { server = new TThreadPoolServer(sargs); server.setServerEventHandler(serverEventHandler); String msg = "Starting " + ThriftBinaryCLIService.class.getSimpleName() + " on port " - + portNum + " with " + minWorkerThreads + "..." + maxWorkerThreads + " worker threads"; + + serverSocket.getServerSocket().getLocalPort() + " with " + minWorkerThreads + "..." + maxWorkerThreads + " worker threads"; LOG.info(msg); server.serve(); } catch (Throwable t) { diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java index a10245b..c3bcaf1 100644 --- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java +++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java @@ -144,7 +144,7 @@ public class ThriftHttpCLIService extends ThriftCLIService { // Finally, start the server httpServer.start(); String msg = "Started " + ThriftHttpCLIService.class.getSimpleName() + " in " + schemeName - + " mode on port " + portNum + " path=" + httpPath + " with " + minWorkerThreads + "..." + + " mode on port " + connector.getLocalPort()+ " path=" + httpPath + " with " + minWorkerThreads + "..." + maxWorkerThreads + " worker threads"; LOG.info(msg); httpServer.join(); - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-26089][CORE] Handle corruption in large shuffle blocks
This is an automated email from the ASF dual-hosted git repository. irashid pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 688b0c0 [SPARK-26089][CORE] Handle corruption in large shuffle blocks 688b0c0 is described below commit 688b0c01fac0db80f6473181673a89f1ce1be65b Author: ankurgupta AuthorDate: Tue Mar 12 14:27:44 2019 -0500 [SPARK-26089][CORE] Handle corruption in large shuffle blocks ## What changes were proposed in this pull request? SPARK-4105 added corruption detection in shuffle blocks but that was limited to blocks which are smaller than maxBytesInFlight/3. This commit adds upon that by adding corruption check for large blocks. There are two changes/improvements that are made in this commit: 1. Large blocks are checked upto maxBytesInFlight/3 size in a similar way as smaller blocks, so if a large block is corrupt in the starting, that block will be re-fetched and if that also fails, FetchFailureException will be thrown. 2. If large blocks are corrupt after size maxBytesInFlight/3, then any IOException thrown while reading the stream will be converted to FetchFailureException. This is slightly more aggressive than was originally intended but since the consumer of the stream may have already read some records and processed them, we can't just re-fetch the block, we need to fail the whole task. Additionally, we also thought about maybe adding a new type of TaskEndReason, which would re-try the task couple of times before failing the previous stage, but given the complexity involved in that solution we decided to not proceed in that direction. Thanks to squito for direction and support. ## How was this patch tested? Changed the junit test for big blocks to check for corruption. Closes #23453 from ankuriitg/ankurgupta/SPARK-26089. Authored-by: ankurgupta Signed-off-by: Imran Rashid --- .../org/apache/spark/internal/config/package.scala | 9 ++ .../spark/shuffle/BlockStoreShuffleReader.scala| 1 + .../storage/ShuffleBlockFetcherIterator.scala | 94 + .../main/scala/org/apache/spark/util/Utils.scala | 45 +++ .../storage/ShuffleBlockFetcherIteratorSuite.scala | 148 ++--- .../scala/org/apache/spark/util/UtilsSuite.scala | 54 +++- 6 files changed, 306 insertions(+), 45 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala index d6a359d..850d684 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/package.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala @@ -928,6 +928,15 @@ package object config { .booleanConf .createWithDefault(true) + private[spark] val SHUFFLE_DETECT_CORRUPT_MEMORY = +ConfigBuilder("spark.shuffle.detectCorrupt.useExtraMemory") + .doc("If enabled, part of a compressed/encrypted stream will be de-compressed/de-crypted " + +"by using extra memory to detect early corruption. Any IOException thrown will cause " + +"the task to be retried once and if it fails again with same exception, then " + +"FetchFailedException will be thrown to retry previous stage") + .booleanConf + .createWithDefault(false) + private[spark] val SHUFFLE_SYNC = ConfigBuilder("spark.shuffle.sync") .doc("Whether to force outstanding writes to disk.") diff --git a/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala b/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala index c5eefc7..c784371 100644 --- a/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala +++ b/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala @@ -55,6 +55,7 @@ private[spark] class BlockStoreShuffleReader[K, C]( SparkEnv.get.conf.get(config.REDUCER_MAX_BLOCKS_IN_FLIGHT_PER_ADDRESS), SparkEnv.get.conf.get(config.MAX_REMOTE_BLOCK_SIZE_FETCH_TO_MEM), SparkEnv.get.conf.get(config.SHUFFLE_DETECT_CORRUPT), + SparkEnv.get.conf.get(config.SHUFFLE_DETECT_CORRUPT_MEMORY), readMetrics).toCompletionIterator val serializerInstance = dep.serializer.newInstance() diff --git a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala index c75b209..c89d5cc 100644 --- a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala +++ b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala @@ -17,7 +17,7 @@ package org.apache.spark.storage -import java.io.{InputStream, IOException} +import java.io.{InputStream, IOException, SequenceInputStream} import ja
[spark] branch master updated: [SPARK-26927][CORE] Ensure executor is active when processing events in dynamic allocation manager.
This is an automated email from the ASF dual-hosted git repository. vanzin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new d5cfe08 [SPARK-26927][CORE] Ensure executor is active when processing events in dynamic allocation manager. d5cfe08 is described below commit d5cfe08fdc7ad07e948f329c0bdeeca5c2574a18 Author: Liupengcheng AuthorDate: Tue Mar 12 13:53:42 2019 -0700 [SPARK-26927][CORE] Ensure executor is active when processing events in dynamic allocation manager. ## What changes were proposed in this pull request? There is a race condition in the `ExecutorAllocationManager` that the `SparkListenerExecutorRemoved` event is posted before the `SparkListenerTaskStart` event, which will cause the incorrect result of `executorIds`. Then, when some executor idles, the real executors will be removed even actual executor number is equal to `minNumExecutors` due to the incorrect computation of `newExecutorTotal`(may greater than the `minNumExecutors`), thus may finally causing zero available executors bu [...] What's more, even the `SparkListenerTaskEnd` event can not make the fake `executorIds` released, because later idle event for the fake executors can not cause the real removal of these executors, as they are already removed and they are not exist in the `executorDataMap` of `CoaseGrainedSchedulerBackend`, so that the `onExecutorRemoved` method will never be called again. For details see https://issues.apache.org/jira/browse/SPARK-26927 This PR is to fix this problem. ## How was this patch tested? existUT and added UT Closes #23842 from liupc/Fix-race-condition-that-casues-dyanmic-allocation-not-working. Lead-authored-by: Liupengcheng Co-authored-by: liupengcheng Signed-off-by: Marcelo Vanzin --- .../apache/spark/ExecutorAllocationManager.scala | 13 +++ .../spark/ExecutorAllocationManagerSuite.scala | 26 +- 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala index 99f4d11..60d0404 100644 --- a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala +++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala @@ -725,10 +725,15 @@ private[spark] class ExecutorAllocationManager( if (stageIdToNumRunningTask.contains(stageId)) { stageIdToNumRunningTask(stageId) += 1 } -// This guards against the race condition in which the `SparkListenerTaskStart` -// event is posted before the `SparkListenerBlockManagerAdded` event, which is -// possible because these events are posted in different threads. (see SPARK-4951) -if (!allocationManager.executorIds.contains(executorId)) { +// This guards against the following race condition: +// 1. The `SparkListenerTaskStart` event is posted before the +// `SparkListenerExecutorAdded` event +// 2. The `SparkListenerExecutorRemoved` event is posted before the +// `SparkListenerTaskStart` event +// Above cases are possible because these events are posted in different threads. +// (see SPARK-4951 SPARK-26927) +if (!allocationManager.executorIds.contains(executorId) && +client.getExecutorIds().contains(executorId)) { allocationManager.onExecutorAdded(executorId) } diff --git a/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala index 5500329..12c8a9d 100644 --- a/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala +++ b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala @@ -421,6 +421,7 @@ class ExecutorAllocationManagerSuite // Remove when numExecutorsTarget is the same as the current number of executors assert(addExecutors(manager) === 1) assert(addExecutors(manager) === 2) +(1 to 8).foreach(execId => onExecutorAdded(manager, execId.toString)) (1 to 8).map { i => createTaskInfo(i, i, s"$i") }.foreach { info => post(sc.listenerBus, SparkListenerTaskStart(0, 0, info)) } assert(executorIds(manager).size === 8) @@ -834,7 +835,7 @@ class ExecutorAllocationManagerSuite assert(removeTimes(manager).size === 1) } - test("SPARK-4951: call onTaskStart before onBlockManagerAdded") { + test("SPARK-4951: call onTaskStart before onExecutorAdded") { sc = createSparkContext(2, 10, 2) val manager = sc.executorAllocationManager.get assert(executorIds(manager).isEmpty) @@ -1162,6 +1163,29 @@ class ExecutorAllocationManagerSuite assert(numExecutorsTarget(manager) === 1) } + test("
[spark] branch branch-2.4 updated: [SPARK-26927][CORE] Ensure executor is active when processing events in dynamic allocation manager.
This is an automated email from the ASF dual-hosted git repository. vanzin pushed a commit to branch branch-2.4 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-2.4 by this push: new 432ea69 [SPARK-26927][CORE] Ensure executor is active when processing events in dynamic allocation manager. 432ea69 is described below commit 432ea6924142c9688d8b6c64b46a531810691a8c Author: Liupengcheng AuthorDate: Tue Mar 12 13:53:42 2019 -0700 [SPARK-26927][CORE] Ensure executor is active when processing events in dynamic allocation manager. There is a race condition in the `ExecutorAllocationManager` that the `SparkListenerExecutorRemoved` event is posted before the `SparkListenerTaskStart` event, which will cause the incorrect result of `executorIds`. Then, when some executor idles, the real executors will be removed even actual executor number is equal to `minNumExecutors` due to the incorrect computation of `newExecutorTotal`(may greater than the `minNumExecutors`), thus may finally causing zero available executors bu [...] What's more, even the `SparkListenerTaskEnd` event can not make the fake `executorIds` released, because later idle event for the fake executors can not cause the real removal of these executors, as they are already removed and they are not exist in the `executorDataMap` of `CoaseGrainedSchedulerBackend`, so that the `onExecutorRemoved` method will never be called again. For details see https://issues.apache.org/jira/browse/SPARK-26927 This PR is to fix this problem. existUT and added UT Closes #23842 from liupc/Fix-race-condition-that-casues-dyanmic-allocation-not-working. Lead-authored-by: Liupengcheng Co-authored-by: liupengcheng Signed-off-by: Marcelo Vanzin (cherry picked from commit d5cfe08fdc7ad07e948f329c0bdeeca5c2574a18) Signed-off-by: Marcelo Vanzin --- .../apache/spark/ExecutorAllocationManager.scala | 13 +++ .../spark/ExecutorAllocationManagerSuite.scala | 26 +- 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala index 49fa80c..36819aa 100644 --- a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala +++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala @@ -719,10 +719,15 @@ private[spark] class ExecutorAllocationManager( if (stageIdToNumRunningTask.contains(stageId)) { stageIdToNumRunningTask(stageId) += 1 } -// This guards against the race condition in which the `SparkListenerTaskStart` -// event is posted before the `SparkListenerBlockManagerAdded` event, which is -// possible because these events are posted in different threads. (see SPARK-4951) -if (!allocationManager.executorIds.contains(executorId)) { +// This guards against the following race condition: +// 1. The `SparkListenerTaskStart` event is posted before the +// `SparkListenerExecutorAdded` event +// 2. The `SparkListenerExecutorRemoved` event is posted before the +// `SparkListenerTaskStart` event +// Above cases are possible because these events are posted in different threads. +// (see SPARK-4951 SPARK-26927) +if (!allocationManager.executorIds.contains(executorId) && +client.getExecutorIds().contains(executorId)) { allocationManager.onExecutorAdded(executorId) } diff --git a/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala index f50ad78..a69045f 100644 --- a/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala +++ b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala @@ -420,6 +420,7 @@ class ExecutorAllocationManagerSuite // Remove when numExecutorsTarget is the same as the current number of executors assert(addExecutors(manager) === 1) assert(addExecutors(manager) === 2) +(1 to 8).foreach(execId => onExecutorAdded(manager, execId.toString)) (1 to 8).map { i => createTaskInfo(i, i, s"$i") }.foreach { info => post(sc.listenerBus, SparkListenerTaskStart(0, 0, info)) } assert(executorIds(manager).size === 8) @@ -833,7 +834,7 @@ class ExecutorAllocationManagerSuite assert(removeTimes(manager).size === 1) } - test("SPARK-4951: call onTaskStart before onBlockManagerAdded") { + test("SPARK-4951: call onTaskStart before onExecutorAdded") { sc = createSparkContext(2, 10, 2) val manager = sc.executorAllocationManager.get assert(executorIds(manager).isEmpty) @@ -1161,6 +1162,29 @@ class ExecutorAllocationManagerSuite assert(numExecutorsTarget(manager) === 1)
[spark] branch branch-2.3 updated: [SPARK-26927][CORE] Ensure executor is active when processing events in dynamic allocation manager.
This is an automated email from the ASF dual-hosted git repository. vanzin pushed a commit to branch branch-2.3 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-2.3 by this push: new d0290ea [SPARK-26927][CORE] Ensure executor is active when processing events in dynamic allocation manager. d0290ea is described below commit d0290eaf240cfddfb51e490721484afaaba43176 Author: Liupengcheng AuthorDate: Tue Mar 12 13:53:42 2019 -0700 [SPARK-26927][CORE] Ensure executor is active when processing events in dynamic allocation manager. There is a race condition in the `ExecutorAllocationManager` that the `SparkListenerExecutorRemoved` event is posted before the `SparkListenerTaskStart` event, which will cause the incorrect result of `executorIds`. Then, when some executor idles, the real executors will be removed even actual executor number is equal to `minNumExecutors` due to the incorrect computation of `newExecutorTotal`(may greater than the `minNumExecutors`), thus may finally causing zero available executors bu [...] What's more, even the `SparkListenerTaskEnd` event can not make the fake `executorIds` released, because later idle event for the fake executors can not cause the real removal of these executors, as they are already removed and they are not exist in the `executorDataMap` of `CoaseGrainedSchedulerBackend`, so that the `onExecutorRemoved` method will never be called again. For details see https://issues.apache.org/jira/browse/SPARK-26927 This PR is to fix this problem. existUT and added UT Closes #23842 from liupc/Fix-race-condition-that-casues-dyanmic-allocation-not-working. Lead-authored-by: Liupengcheng Co-authored-by: liupengcheng Signed-off-by: Marcelo Vanzin (cherry picked from commit d5cfe08fdc7ad07e948f329c0bdeeca5c2574a18) Signed-off-by: Marcelo Vanzin --- .../apache/spark/ExecutorAllocationManager.scala | 13 +++ .../spark/ExecutorAllocationManagerSuite.scala | 26 +- 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala index 59d8826..28f7052 100644 --- a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala +++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala @@ -699,10 +699,15 @@ private[spark] class ExecutorAllocationManager( if (stageIdToNumRunningTask.contains(stageId)) { stageIdToNumRunningTask(stageId) += 1 } -// This guards against the race condition in which the `SparkListenerTaskStart` -// event is posted before the `SparkListenerBlockManagerAdded` event, which is -// possible because these events are posted in different threads. (see SPARK-4951) -if (!allocationManager.executorIds.contains(executorId)) { +// This guards against the following race condition: +// 1. The `SparkListenerTaskStart` event is posted before the +// `SparkListenerExecutorAdded` event +// 2. The `SparkListenerExecutorRemoved` event is posted before the +// `SparkListenerTaskStart` event +// Above cases are possible because these events are posted in different threads. +// (see SPARK-4951 SPARK-26927) +if (!allocationManager.executorIds.contains(executorId) && +client.getExecutorIds().contains(executorId)) { allocationManager.onExecutorAdded(executorId) } diff --git a/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala index 784beac..18d82b7 100644 --- a/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala +++ b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala @@ -386,6 +386,7 @@ class ExecutorAllocationManagerSuite // Remove when numExecutorsTarget is the same as the current number of executors assert(addExecutors(manager) === 1) assert(addExecutors(manager) === 2) +(1 to 8).foreach(execId => onExecutorAdded(manager, execId.toString)) (1 to 8).map { i => createTaskInfo(i, i, s"$i") }.foreach { info => post(sc.listenerBus, SparkListenerTaskStart(0, 0, info)) } assert(executorIds(manager).size === 8) @@ -799,7 +800,7 @@ class ExecutorAllocationManagerSuite assert(removeTimes(manager).size === 1) } - test("SPARK-4951: call onTaskStart before onBlockManagerAdded") { + test("SPARK-4951: call onTaskStart before onExecutorAdded") { sc = createSparkContext(2, 10, 2) val manager = sc.executorAllocationManager.get assert(executorIds(manager).isEmpty) @@ -1127,6 +1128,29 @@ class ExecutorAllocationManagerSuite assert(numExecutorsTarget(manager) === 1)
[spark] branch master updated: [SPARK-27123][SQL] Improve CollapseProject to handle projects cross limit/repartition/sample
This is an automated email from the ASF dual-hosted git repository. dbtsai pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 78314af [SPARK-27123][SQL] Improve CollapseProject to handle projects cross limit/repartition/sample 78314af is described below commit 78314af580b38f773a148c6f035d2ddd79896b4c Author: Dongjoon Hyun AuthorDate: Tue Mar 12 21:45:40 2019 + [SPARK-27123][SQL] Improve CollapseProject to handle projects cross limit/repartition/sample ## What changes were proposed in this pull request? `CollapseProject` optimizer rule simplifies some plans by merging the adjacent projects and performing alias substitutions. ```scala scala> sql("SELECT b c FROM (SELECT a b FROM t)").explain == Physical Plan == *(1) Project [a#5 AS c#1] +- Scan hive default.t [a#5], HiveTableRelation `default`.`t`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [a#5] ``` We can do that more complex cases like the following. This PR aims to handle adjacent projects across limit/repartition/sample. Here, repartition means `Repartition`, not `RepartitionByExpression`. **BEFORE** ```scala scala> sql("SELECT b c FROM (SELECT /*+ REPARTITION(1) */ a b FROM t)").explain == Physical Plan == *(2) Project [b#0 AS c#1] +- Exchange RoundRobinPartitioning(1) +- *(1) Project [a#5 AS b#0] +- Scan hive default.t [a#5], HiveTableRelation `default`.`t`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [a#5] ``` **AFTER** ```scala scala> sql("SELECT b c FROM (SELECT /*+ REPARTITION(1) */ a b FROM t)").explain == Physical Plan == Exchange RoundRobinPartitioning(1) +- *(1) Project [a#11 AS c#7] +- Scan hive default.t [a#11], HiveTableRelation `default`.`t`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [a#11] ``` ## How was this patch tested? Pass the Jenkins with the newly added and updated test cases. Closes #24049 from dongjoon-hyun/SPARK-27123. Authored-by: Dongjoon Hyun Signed-off-by: DB Tsai --- .../spark/sql/catalyst/optimizer/Optimizer.scala | 26 + .../catalyst/optimizer/CollapseProjectSuite.scala | 34 +- .../catalyst/optimizer/ColumnPruningSuite.scala| 2 +- 3 files changed, 60 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 97a53f2..1b7ff02 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -699,6 +699,24 @@ object CollapseProject extends Rule[LogicalPlan] { agg.copy(aggregateExpressions = buildCleanedProjectList( p.projectList, agg.aggregateExpressions)) } +case p1 @ Project(_, g @ GlobalLimit(_, l @ LocalLimit(_, p2: Project))) => + if (haveCommonNonDeterministicOutput(p1.projectList, p2.projectList)) { +p1 + } else { +val newProjectList = buildCleanedProjectList(p1.projectList, p2.projectList) +g.copy(child = l.copy(child = p2.copy(projectList = newProjectList))) + } +case p1 @ Project(_, l @ LocalLimit(_, p2: Project)) => + if (haveCommonNonDeterministicOutput(p1.projectList, p2.projectList)) { +p1 + } else { +val newProjectList = buildCleanedProjectList(p1.projectList, p2.projectList) +l.copy(child = p2.copy(projectList = newProjectList)) + } +case Project(l1, r @ Repartition(_, _, p @ Project(l2, _))) if isRenaming(l1, l2) => + r.copy(child = p.copy(projectList = buildCleanedProjectList(l1, p.projectList))) +case Project(l1, s @ Sample(_, _, _, _, p2 @ Project(l2, _))) if isRenaming(l1, l2) => + s.copy(child = p2.copy(projectList = buildCleanedProjectList(l1, p2.projectList))) } private def collectAliases(projectList: Seq[NamedExpression]): AttributeMap[Alias] = { @@ -739,6 +757,14 @@ object CollapseProject extends Rule[LogicalPlan] { CleanupAliases.trimNonTopLevelAliases(p).asInstanceOf[NamedExpression] } } + + private def isRenaming(list1: Seq[NamedExpression], list2: Seq[NamedExpression]): Boolean = { +list1.length == list2.length && list1.zip(list2).forall { + case (e1, e2) if e1.semanticEquals(e2) => true + case (Alias(a: Attribute, _), b) if a.metadata == Metadata.empty && a.name == b.name => true + case _ => false +} + } } /** diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseProjectSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseProjectSuite.scala index e7a5b
[spark] branch master updated: [SPARK-27034][SQL] Nested schema pruning for ORC
This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new b0c2b3b [SPARK-27034][SQL] Nested schema pruning for ORC b0c2b3b is described below commit b0c2b3bfd9b43ab97b37532abfef22e14642125c Author: Liang-Chi Hsieh AuthorDate: Tue Mar 12 15:39:16 2019 -0700 [SPARK-27034][SQL] Nested schema pruning for ORC ## What changes were proposed in this pull request? We only supported nested schema pruning for Parquet previously. This proposes to support nested schema pruning for ORC too. Note: This only covers ORC v1. For ORC v2, the necessary change is at the schema pruning rule. We should deal with ORC v2 as a TODO item, in order to reduce review burden. ## How was this patch tested? Added tests. Closes #23943 from viirya/nested-schema-pruning-orc. Authored-by: Liang-Chi Hsieh Signed-off-by: Dongjoon Hyun --- .../org/apache/spark/sql/internal/SQLConf.scala| 4 +- .../OrcNestedSchemaPruningBenchmark-results.txt| 20 +- .../execution/datasources/orc/OrcFileFormat.scala | 9 +- .../sql/execution/datasources/orc/OrcUtils.scala | 35 +- .../datasources/parquet/ParquetSchemaPruning.scala | 4 +- .../v2/orc/OrcPartitionReaderFactory.scala | 23 +- ...PruningSuite.scala => SchemaPruningSuite.scala} | 64 ++-- .../datasources/orc/OrcSchemaPruningSuite.scala| 34 ++ .../parquet/ParquetSchemaPruningSuite.scala| 389 + 9 files changed, 142 insertions(+), 440 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 6f483a7..193d311 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -1539,8 +1539,8 @@ object SQLConf { .internal() .doc("Prune nested fields from a logical relation's output which are unnecessary in " + "satisfying a query. This optimization allows columnar file format readers to avoid " + -"reading unnecessary nested column data. Currently Parquet is the only data source that " + -"implements this optimization.") +"reading unnecessary nested column data. Currently Parquet and ORC v1 are the " + +"data sources that implement this optimization.") .booleanConf .createWithDefault(false) diff --git a/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-results.txt b/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-results.txt index f738256..fdd35cd 100644 --- a/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-results.txt +++ b/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-results.txt @@ -6,35 +6,35 @@ Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3 Intel(R) Core(TM) i7-8750H CPU @ 2.20GHz Selection:Best Time(ms) Avg Time(ms) Stdev(ms)Rate(M/s) Per Row(ns) Relative -Top-level column113196 89 8.8 113.0 1.0X -Nested column 1316 1639 240 0.81315.5 0.1X +Top-level column116151 36 8.6 116.3 1.0X +Nested column 544604 31 1.8 544.5 0.2X Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3 Intel(R) Core(TM) i7-8750H CPU @ 2.20GHz Limiting: Best Time(ms) Avg Time(ms) Stdev(ms)Rate(M/s) Per Row(ns) Relative -Top-level column260474 211 3.8 260.4 1.0X -Nested column 2322 3312 701 0.42322.3 0.1X +Top-level column360397 32 2.8 360.4 1.0X +Nested column 3322 3503 166 0.33322.4 0.1X Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3 Intel(R) Core(TM) i7-8750H CPU @ 2.20GHz Repartitioning: Best Time(ms) Avg Time(ms) Stdev(ms)Rate(M/s) Per Row(ns) Relative -
[spark] branch master updated: [MINOR][CORE] Use https for bintray spark-packages repository
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new f57af22 [MINOR][CORE] Use https for bintray spark-packages repository f57af22 is described below commit f57af2286f85bf67706e14fecfbfd9ef034c2927 Author: Jungtaek Lim (HeartSaVioR) AuthorDate: Tue Mar 12 18:01:16 2019 -0500 [MINOR][CORE] Use https for bintray spark-packages repository ## What changes were proposed in this pull request? This patch changes the schema of url from http to https for bintray spark-packages repository. Looks like we already changed the schema of repository url for pom.xml but missed inside the code. ## How was this patch tested? Manually ran the `--package` via `./bin/spark-shell --verbose --packages "RedisLabs:spark-redis:0.3.2"` ``` ... Ivy Default Cache set to: /Users/jlim/.ivy2/cache The jars for the packages stored in: /Users/jlim/.ivy2/jars :: loading settings :: url = jar:file:/Users/jlim/WorkArea/ScalaProjects/spark/dist/jars/ivy-2.4.0.jar!/org/apache/ivy/core/settings/ivysettings.xml RedisLabs#spark-redis added as a dependency :: resolving dependencies :: org.apache.spark#spark-submit-parent-2fee2e18-7832-4a4d-9e97-7b3d0fef766d;1.0 confs: [default] found RedisLabs#spark-redis;0.3.2 in spark-packages found redis.clients#jedis;2.7.2 in central found org.apache.commons#commons-pool2;2.3 in central downloading https://dl.bintray.com/spark-packages/maven/RedisLabs/spark-redis/0.3.2/spark-redis-0.3.2.jar ... [SUCCESSFUL ] RedisLabs#spark-redis;0.3.2!spark-redis.jar (824ms) downloading https://repo1.maven.org/maven2/redis/clients/jedis/2.7.2/jedis-2.7.2.jar ... [SUCCESSFUL ] redis.clients#jedis;2.7.2!jedis.jar (576ms) downloading https://repo1.maven.org/maven2/org/apache/commons/commons-pool2/2.3/commons-pool2-2.3.jar ... [SUCCESSFUL ] org.apache.commons#commons-pool2;2.3!commons-pool2.jar (150ms) :: resolution report :: resolve 4586ms :: artifacts dl 1555ms :: modules in use: RedisLabs#spark-redis;0.3.2 from spark-packages in [default] org.apache.commons#commons-pool2;2.3 from central in [default] redis.clients#jedis;2.7.2 from central in [default] - | |modules|| artifacts | | conf | number| search|dwnlded|evicted|| number|dwnlded| - | default | 3 | 3 | 3 | 0 || 3 | 3 | - ``` Closes #24061 from HeartSaVioR/MINOR-use-https-to-bintray-repository. Authored-by: Jungtaek Lim (HeartSaVioR) Signed-off-by: Sean Owen --- core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index dd687b6..493cad0 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -1087,7 +1087,7 @@ private[spark] object SparkSubmitUtils { val sp: IBiblioResolver = new IBiblioResolver sp.setM2compatible(true) sp.setUsepoms(true) -sp.setRoot("http://dl.bintray.com/spark-packages/maven";) +sp.setRoot("https://dl.bintray.com/spark-packages/maven";) sp.setName("spark-packages") cr.add(sp) cr - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-2.4 updated: [MINOR][CORE] Use https for bintray spark-packages repository
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch branch-2.4 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-2.4 by this push: new 7f5bdd7 [MINOR][CORE] Use https for bintray spark-packages repository 7f5bdd7 is described below commit 7f5bdd75cf310b23586301796d5dab5b01ec38fc Author: Jungtaek Lim (HeartSaVioR) AuthorDate: Tue Mar 12 18:01:16 2019 -0500 [MINOR][CORE] Use https for bintray spark-packages repository ## What changes were proposed in this pull request? This patch changes the schema of url from http to https for bintray spark-packages repository. Looks like we already changed the schema of repository url for pom.xml but missed inside the code. ## How was this patch tested? Manually ran the `--package` via `./bin/spark-shell --verbose --packages "RedisLabs:spark-redis:0.3.2"` ``` ... Ivy Default Cache set to: /Users/jlim/.ivy2/cache The jars for the packages stored in: /Users/jlim/.ivy2/jars :: loading settings :: url = jar:file:/Users/jlim/WorkArea/ScalaProjects/spark/dist/jars/ivy-2.4.0.jar!/org/apache/ivy/core/settings/ivysettings.xml RedisLabs#spark-redis added as a dependency :: resolving dependencies :: org.apache.spark#spark-submit-parent-2fee2e18-7832-4a4d-9e97-7b3d0fef766d;1.0 confs: [default] found RedisLabs#spark-redis;0.3.2 in spark-packages found redis.clients#jedis;2.7.2 in central found org.apache.commons#commons-pool2;2.3 in central downloading https://dl.bintray.com/spark-packages/maven/RedisLabs/spark-redis/0.3.2/spark-redis-0.3.2.jar ... [SUCCESSFUL ] RedisLabs#spark-redis;0.3.2!spark-redis.jar (824ms) downloading https://repo1.maven.org/maven2/redis/clients/jedis/2.7.2/jedis-2.7.2.jar ... [SUCCESSFUL ] redis.clients#jedis;2.7.2!jedis.jar (576ms) downloading https://repo1.maven.org/maven2/org/apache/commons/commons-pool2/2.3/commons-pool2-2.3.jar ... [SUCCESSFUL ] org.apache.commons#commons-pool2;2.3!commons-pool2.jar (150ms) :: resolution report :: resolve 4586ms :: artifacts dl 1555ms :: modules in use: RedisLabs#spark-redis;0.3.2 from spark-packages in [default] org.apache.commons#commons-pool2;2.3 from central in [default] redis.clients#jedis;2.7.2 from central in [default] - | |modules|| artifacts | | conf | number| search|dwnlded|evicted|| number|dwnlded| - | default | 3 | 3 | 3 | 0 || 3 | 3 | - ``` Closes #24061 from HeartSaVioR/MINOR-use-https-to-bintray-repository. Authored-by: Jungtaek Lim (HeartSaVioR) Signed-off-by: Sean Owen (cherry picked from commit f57af2286f85bf67706e14fecfbfd9ef034c2927) Signed-off-by: Sean Owen --- core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 8ba36c6..385e1c6 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -1077,7 +1077,7 @@ private[spark] object SparkSubmitUtils { val sp: IBiblioResolver = new IBiblioResolver sp.setM2compatible(true) sp.setUsepoms(true) -sp.setRoot("http://dl.bintray.com/spark-packages/maven";) +sp.setRoot("https://dl.bintray.com/spark-packages/maven";) sp.setName("spark-packages") cr.add(sp) cr - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-2.3 updated: [MINOR][CORE] Use https for bintray spark-packages repository
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch branch-2.3 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-2.3 by this push: new 959a7ec [MINOR][CORE] Use https for bintray spark-packages repository 959a7ec is described below commit 959a7ecb9a1d433bed18f82c4c3354806f1329c7 Author: Jungtaek Lim (HeartSaVioR) AuthorDate: Tue Mar 12 18:01:16 2019 -0500 [MINOR][CORE] Use https for bintray spark-packages repository ## What changes were proposed in this pull request? This patch changes the schema of url from http to https for bintray spark-packages repository. Looks like we already changed the schema of repository url for pom.xml but missed inside the code. ## How was this patch tested? Manually ran the `--package` via `./bin/spark-shell --verbose --packages "RedisLabs:spark-redis:0.3.2"` ``` ... Ivy Default Cache set to: /Users/jlim/.ivy2/cache The jars for the packages stored in: /Users/jlim/.ivy2/jars :: loading settings :: url = jar:file:/Users/jlim/WorkArea/ScalaProjects/spark/dist/jars/ivy-2.4.0.jar!/org/apache/ivy/core/settings/ivysettings.xml RedisLabs#spark-redis added as a dependency :: resolving dependencies :: org.apache.spark#spark-submit-parent-2fee2e18-7832-4a4d-9e97-7b3d0fef766d;1.0 confs: [default] found RedisLabs#spark-redis;0.3.2 in spark-packages found redis.clients#jedis;2.7.2 in central found org.apache.commons#commons-pool2;2.3 in central downloading https://dl.bintray.com/spark-packages/maven/RedisLabs/spark-redis/0.3.2/spark-redis-0.3.2.jar ... [SUCCESSFUL ] RedisLabs#spark-redis;0.3.2!spark-redis.jar (824ms) downloading https://repo1.maven.org/maven2/redis/clients/jedis/2.7.2/jedis-2.7.2.jar ... [SUCCESSFUL ] redis.clients#jedis;2.7.2!jedis.jar (576ms) downloading https://repo1.maven.org/maven2/org/apache/commons/commons-pool2/2.3/commons-pool2-2.3.jar ... [SUCCESSFUL ] org.apache.commons#commons-pool2;2.3!commons-pool2.jar (150ms) :: resolution report :: resolve 4586ms :: artifacts dl 1555ms :: modules in use: RedisLabs#spark-redis;0.3.2 from spark-packages in [default] org.apache.commons#commons-pool2;2.3 from central in [default] redis.clients#jedis;2.7.2 from central in [default] - | |modules|| artifacts | | conf | number| search|dwnlded|evicted|| number|dwnlded| - | default | 3 | 3 | 3 | 0 || 3 | 3 | - ``` Closes #24061 from HeartSaVioR/MINOR-use-https-to-bintray-repository. Authored-by: Jungtaek Lim (HeartSaVioR) Signed-off-by: Sean Owen (cherry picked from commit f57af2286f85bf67706e14fecfbfd9ef034c2927) Signed-off-by: Sean Owen --- core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 15df51b..1801268 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -1074,7 +1074,7 @@ private[spark] object SparkSubmitUtils { val sp: IBiblioResolver = new IBiblioResolver sp.setM2compatible(true) sp.setUsepoms(true) -sp.setRoot("http://dl.bintray.com/spark-packages/maven";) +sp.setRoot("https://dl.bintray.com/spark-packages/maven";) sp.setName("spark-packages") cr.add(sp) cr - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-27130][BUILD] Automatically select profile when executing sbt-checkstyle
This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new dccf661 [SPARK-27130][BUILD] Automatically select profile when executing sbt-checkstyle dccf661 is described below commit dccf6615c34c9347e937838742dec88456843f13 Author: Yuming Wang AuthorDate: Wed Mar 13 08:03:46 2019 +0900 [SPARK-27130][BUILD] Automatically select profile when executing sbt-checkstyle ## What changes were proposed in this pull request? This PR makes it automatically select profile when executing `sbt-checkstyle`. The reason for this is that `hadoop-2.7` and `hadoop-3.1` may have different `hive-thriftserver` module in the future. ## How was this patch tested? manual tests: ``` Update AbstractService.java file. export HADOOP_PROFILE=hadoop2.7 ./dev/run-tests ``` The result: ![image](https://user-images.githubusercontent.com/5399861/54197992-5337e780-4500-11e9-930c-722982cdcd45.png) Closes #24065 from wangyum/SPARK-27130. Authored-by: Yuming Wang Signed-off-by: Hyukjin Kwon --- dev/run-tests.py | 10 +++--- dev/sbt-checkstyle | 11 +++ 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/dev/run-tests.py b/dev/run-tests.py index 122c5c6..aa106af 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -180,9 +180,13 @@ def run_scala_style_checks(): run_cmd([os.path.join(SPARK_HOME, "dev", "lint-scala")]) -def run_java_style_checks(): +def run_java_style_checks(build_profiles): set_title_and_block("Running Java style checks", "BLOCK_JAVA_STYLE") -run_cmd([os.path.join(SPARK_HOME, "dev", "sbt-checkstyle")]) +# The same profiles used for building are used to run Checkstyle by SBT as well because +# the previous build looks reused for Checkstyle and affecting Checkstyle. See SPARK-27130. +profiles = " ".join(build_profiles) +print("[info] Checking Java style using SBT with these profiles: ", profiles) +run_cmd([os.path.join(SPARK_HOME, "dev", "sbt-checkstyle"), profiles]) def run_python_style_checks(): @@ -333,7 +337,7 @@ def build_spark_assembly_sbt(hadoop_version, checkstyle=False): exec_sbt(profiles_and_goals) if checkstyle: -run_java_style_checks() +run_java_style_checks(build_profiles) build_spark_unidoc_sbt(hadoop_version) diff --git a/dev/sbt-checkstyle b/dev/sbt-checkstyle index 18f3bd8..1ecad59 100755 --- a/dev/sbt-checkstyle +++ b/dev/sbt-checkstyle @@ -17,17 +17,12 @@ # limitations under the License. # +profiles=${1:-"-Pkinesis-asl -Pmesos -Pkubernetes -Pyarn -Phive -Phive-thriftserver"} + # NOTE: echo "q" is needed because SBT prompts the user for input on encountering a build file # with failure (either resolution or compilation); the "q" makes SBT quit. ERRORS=$(echo -e "q\n" \ -| build/sbt \ --Pkinesis-asl \ --Pmesos \ --Pkubernetes \ --Pyarn \ --Phive \ --Phive-thriftserver \ -checkstyle test:checkstyle \ +| build/sbt ${profiles} checkstyle test:checkstyle \ | awk '{if($1~/error/)print}' \ ) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-27045][SQL] SQL tab in UI shows actual SQL instead of callsite in case of SparkSQLDriver
This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new e60d8fc [SPARK-27045][SQL] SQL tab in UI shows actual SQL instead of callsite in case of SparkSQLDriver e60d8fc is described below commit e60d8fce0b0cf2a6d766ea2fc5f994546550570a Author: Ajith AuthorDate: Tue Mar 12 16:14:29 2019 -0700 [SPARK-27045][SQL] SQL tab in UI shows actual SQL instead of callsite in case of SparkSQLDriver ## What changes were proposed in this pull request? When we run sql in spark via SparkSQLDriver (thrift server, spark-sql), SQL string is siet via ``setJobDescription``. the SparkUI SQL tab must show SQL instead of stacktrace in case ``setJobDescription`` is set which is more useful to end user. Instead it currently shows in description column the callsite shortform which is less useful ![image](https://user-images.githubusercontent.com/22072336/53734682-aaa7d900-3eaa-11e9-957b-0e5006db417e.png) ## How was this patch tested? Manually: ![image](https://user-images.githubusercontent.com/22072336/53734657-9f54ad80-3eaa-11e9-8dc5-2b38f6970f4e.png) Closes #23958 from ajithme/sqlui. Authored-by: Ajith Signed-off-by: Dongjoon Hyun --- .../org/apache/spark/sql/internal/StaticSQLConf.scala | 6 ++ .../org/apache/spark/sql/execution/SQLExecution.scala | 15 ++- .../apache/spark/sql/execution/ui/AllExecutionsPage.scala | 5 +++-- 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala index fc07efb..e12f05b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala @@ -132,4 +132,10 @@ object StaticSQLConf { .intConf .createWithDefault(1000) + val SQL_EVENT_TRUNCATE_LENGTH = buildStaticConf("spark.sql.event.truncate.length") +.doc("Threshold of SQL length beyond which it will be truncated before adding to " + + "event. Defaults to no truncation. If set to 0, callsite will be logged instead.") +.intConf +.checkValue(_ >= 0, "Must be set greater or equal to zero") +.createWithDefault(Int.MaxValue) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala index 5b38fe5..ca66337 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala @@ -20,9 +20,12 @@ package org.apache.spark.sql.execution import java.util.concurrent.ConcurrentHashMap import java.util.concurrent.atomic.AtomicLong +import org.apache.spark.SparkContext import org.apache.spark.internal.config.Tests.IS_TESTING import org.apache.spark.sql.SparkSession import org.apache.spark.sql.execution.ui.{SparkListenerSQLExecutionEnd, SparkListenerSQLExecutionStart} +import org.apache.spark.sql.internal.StaticSQLConf.SQL_EVENT_TRUNCATE_LENGTH +import org.apache.spark.util.Utils object SQLExecution { @@ -71,13 +74,23 @@ object SQLExecution { // streaming queries would give us call site like "run at :0" val callSite = sc.getCallSite() + val truncateLength = sc.conf.get(SQL_EVENT_TRUNCATE_LENGTH) + + val desc = Option(sc.getLocalProperty(SparkContext.SPARK_JOB_DESCRIPTION)) +.filter(_ => truncateLength > 0) +.map { sqlStr => + val redactedStr = Utils +.redact(sparkSession.sessionState.conf.stringRedactionPattern, sqlStr) + redactedStr.substring(0, Math.min(truncateLength, redactedStr.length)) +}.getOrElse(callSite.shortForm) + withSQLConfPropagated(sparkSession) { var ex: Option[Exception] = None val startTime = System.nanoTime() try { sc.listenerBus.post(SparkListenerSQLExecutionStart( executionId = executionId, -description = callSite.shortForm, +description = desc, details = callSite.longForm, physicalPlanDescription = queryExecution.toString, // `queryExecution.executedPlan` triggers query planning. If it fails, the exception diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/AllExecutionsPage.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/AllExecutionsPage.scala index 43ff1e1..824c094 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/AllExecutionsPage.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/AllExecutionsPage.scala @@ -388,14 +388,15 @@ pr
[spark] branch master updated: [MINOR][DOC] Fix the description of Pod Metadata's annotations
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 7beb464 [MINOR][DOC] Fix the description of Pod Metadata's annotations 7beb464 is described below commit 7beb464564a04004c638d6b0e1d8aafd9f038a0a Author: hehuiyuan AuthorDate: Tue Mar 12 19:29:32 2019 -0500 [MINOR][DOC] Fix the description of Pod Metadata's annotations ## What changes were proposed in this pull request? ![annotation](https://user-images.githubusercontent.com/18002496/54189638-2d551780-44ed-11e9-9efc-3691bec42130.jpg) Closes #24064 from hehuiyuan/hehuiyuan-patch-4. Authored-by: hehuiyuan Signed-off-by: Sean Owen --- docs/running-on-kubernetes.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index 78b7c51..40ff62c 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -1055,7 +1055,7 @@ See the below table for the full list of pod specifications that will be overwri annotations Adds the annotations from spark.kubernetes.{driver,executor}.annotation.* -Spark will add additional labels specified by the spark configuration. +Spark will add additional annotations specified by the spark configuration. - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r32894 - /dev/spark/v2.4.1-rc8-bin/
Author: dbtsai Date: Wed Mar 13 01:15:48 2019 New Revision: 32894 Log: Apache Spark v2.4.1-rc8 Added: dev/spark/v2.4.1-rc8-bin/ dev/spark/v2.4.1-rc8-bin/SparkR_2.4.1.tar.gz (with props) dev/spark/v2.4.1-rc8-bin/SparkR_2.4.1.tar.gz.asc dev/spark/v2.4.1-rc8-bin/SparkR_2.4.1.tar.gz.sha512 dev/spark/v2.4.1-rc8-bin/pyspark-2.4.1.tar.gz (with props) dev/spark/v2.4.1-rc8-bin/pyspark-2.4.1.tar.gz.asc dev/spark/v2.4.1-rc8-bin/pyspark-2.4.1.tar.gz.sha512 dev/spark/v2.4.1-rc8-bin/spark-2.4.1-bin-hadoop2.6.tgz (with props) dev/spark/v2.4.1-rc8-bin/spark-2.4.1-bin-hadoop2.6.tgz.asc dev/spark/v2.4.1-rc8-bin/spark-2.4.1-bin-hadoop2.6.tgz.sha512 dev/spark/v2.4.1-rc8-bin/spark-2.4.1-bin-hadoop2.7.tgz (with props) dev/spark/v2.4.1-rc8-bin/spark-2.4.1-bin-hadoop2.7.tgz.asc dev/spark/v2.4.1-rc8-bin/spark-2.4.1-bin-hadoop2.7.tgz.sha512 dev/spark/v2.4.1-rc8-bin/spark-2.4.1-bin-without-hadoop-scala-2.12.tgz (with props) dev/spark/v2.4.1-rc8-bin/spark-2.4.1-bin-without-hadoop-scala-2.12.tgz.asc dev/spark/v2.4.1-rc8-bin/spark-2.4.1-bin-without-hadoop-scala-2.12.tgz.sha512 dev/spark/v2.4.1-rc8-bin/spark-2.4.1-bin-without-hadoop.tgz (with props) dev/spark/v2.4.1-rc8-bin/spark-2.4.1-bin-without-hadoop.tgz.asc dev/spark/v2.4.1-rc8-bin/spark-2.4.1-bin-without-hadoop.tgz.sha512 dev/spark/v2.4.1-rc8-bin/spark-2.4.1.tgz (with props) dev/spark/v2.4.1-rc8-bin/spark-2.4.1.tgz.asc dev/spark/v2.4.1-rc8-bin/spark-2.4.1.tgz.sha512 Added: dev/spark/v2.4.1-rc8-bin/SparkR_2.4.1.tar.gz == Binary file - no diff available. Propchange: dev/spark/v2.4.1-rc8-bin/SparkR_2.4.1.tar.gz -- svn:mime-type = application/octet-stream Added: dev/spark/v2.4.1-rc8-bin/SparkR_2.4.1.tar.gz.asc == --- dev/spark/v2.4.1-rc8-bin/SparkR_2.4.1.tar.gz.asc (added) +++ dev/spark/v2.4.1-rc8-bin/SparkR_2.4.1.tar.gz.asc Wed Mar 13 01:15:48 2019 @@ -0,0 +1,17 @@ +-BEGIN PGP SIGNATURE- + +iQJGBAABCgAwFiEEVWa1AnAbOl9avVsfQuWyWo96gsEFAlyIKFASHGRidHNhaUBh +cGFjaGUub3JnAAoJEELlslqPeoLBSrgQAMThcJ5ZoydJr6ZimOgVI7rlhfow9wNh +D4XfuDMKcJVHxm82xubD5NePqdWjexpv0UyAcoyFLUsWa1NUa07tZuT+4Rg8Z0WZ +jzBj5npJ/qv7vhj+jU26AZOSIYqx/4VBQ1n7ug4dZ2QyfeJNecBPd9gQGRJsnKCG +C81Y/h/X862KC37RiyywU6SMLP4ndFo5LOLsudd599fLuK6QOCt6JF4behhhsQ07 +++xIj8zHxNKvmLOvpdalIJ6ypZhfC8NUsYk/JSU+RhEpWOwAY7KfmK7VXHCXGVGH +eJwLa1ANVY4jt1wG1t2dgWJ9D8YKBHvVOV8Rxq9S4EB9/yBcIxAXcHOd7VWpHD4M +t1tyo66yQeyepYruPTLGrcRNfv3kWdmuqVyhl8xvtTdK/+FsgOg/YvX5c2pVPjm0 +4NV1qv3Ili36Xdv98cQkQcN5/n/CixpZihqO86m1QO/qFYbKG9TTX9rpkLH8ZPtq +hYPHP42rk8sBKzWeQ4zzVfU/Frz/TqCHQLxmyVV7xlbLTlOTkMUbn7Ljcl6bIro3 +g/ZU61etcf9oYxNskL85nJCSgoaLIAR+gs/OodexxS/50JD5PSTqLtLIwi4RD/N2 +aGGQIKh0G+1WnY+9/b49+MDHz+N0lBv+zjGp5JtzI+UiBUbwBAo12xKneVZI8ECd +jxl2DtfzFDl4 +=O//b +-END PGP SIGNATURE- Added: dev/spark/v2.4.1-rc8-bin/SparkR_2.4.1.tar.gz.sha512 == --- dev/spark/v2.4.1-rc8-bin/SparkR_2.4.1.tar.gz.sha512 (added) +++ dev/spark/v2.4.1-rc8-bin/SparkR_2.4.1.tar.gz.sha512 Wed Mar 13 01:15:48 2019 @@ -0,0 +1,3 @@ +SparkR_2.4.1.tar.gz: 0B8E3134 4AAB44D1 817B862A 52730756 490B77E3 AAF0D26A + 36419927 41724B62 D37F96D6 322EE211 8C5BE8C4 011C228D + 33C1C44C 1E424D33 CED36630 DCB4A10D Added: dev/spark/v2.4.1-rc8-bin/pyspark-2.4.1.tar.gz == Binary file - no diff available. Propchange: dev/spark/v2.4.1-rc8-bin/pyspark-2.4.1.tar.gz -- svn:mime-type = application/octet-stream Added: dev/spark/v2.4.1-rc8-bin/pyspark-2.4.1.tar.gz.asc == --- dev/spark/v2.4.1-rc8-bin/pyspark-2.4.1.tar.gz.asc (added) +++ dev/spark/v2.4.1-rc8-bin/pyspark-2.4.1.tar.gz.asc Wed Mar 13 01:15:48 2019 @@ -0,0 +1,17 @@ +-BEGIN PGP SIGNATURE- + +iQJGBAABCgAwFiEEVWa1AnAbOl9avVsfQuWyWo96gsEFAlyIKFUSHGRidHNhaUBh +cGFjaGUub3JnAAoJEELlslqPeoLBzpQP/26NtzKTXojk/6nq7zsItId5yUVeclsG +qMXkVt5OuDnQRSEC9Fv4zE/b7uMWXfLDo4yB6yr5I6V4mNgx/uwcu0Or8hhRsfTe +/woU2G91vJ0m6JnaKZ+hy4VC8BHoBVpYkQ3SggliveDxUiLBh01a5h+75EeNb20t +BQ0WCOFnR2fzW/HJZbl/4iwA0eAHCV8jYi4M5xTN/bGJ+G/v0SgvcnckKW6EybwB +TONWgEhM04FGWjM62Xa7B1lY+78FUgNyh8X4UdfRHjJ7pdYgJYmLz0dgYCBfI0+L +HkrvDAluqXwjXMMgacWzdbkhsFLA+WUfcuJ/1rvpVpDnT4YHRQMGD9dyo5PyGYlw +LZq/nMtdbtOt/cJjUVN9yrxf6udpMkPG3n21njO+CQqNxpai/u4rFy84WALPJ5Uf +iQhOx1eaptEkpqGl1h205ZtYarktGE9wY2WdYIkAULHAsOZAoLFc5DjzKLZ5bupr +RgLHwmUSwQ5uBJykZaHPjYKCF+E2MvMQxA6sQO9rQ3Y/Yx+8JYXHWbrcK526cgq2 +3Nn8Y/dJO19008Xq2QWN5eE9FB7NFxMnvIMPmOLf1uaH39LaRmsoo9w30FwSbKLS +tta0LNPd+1h8jn9P4jK0Eme8WwnHQcDNp3tFHYYKoSbGRHcjic
[spark] branch master updated: [SPARK-26976][SQL] Forbid reserved keywords as identifiers when ANSI mode is on
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 1e9469b [SPARK-26976][SQL] Forbid reserved keywords as identifiers when ANSI mode is on 1e9469b is described below commit 1e9469bb7a71b06d610edaaebca933f4219a6eb3 Author: Takeshi Yamamuro AuthorDate: Wed Mar 13 11:20:27 2019 +0900 [SPARK-26976][SQL] Forbid reserved keywords as identifiers when ANSI mode is on ## What changes were proposed in this pull request? This pr added code to forbid reserved keywords as identifiers when ANSI mode is on. This is a follow-up of SPARK-26215(#23259). ## How was this patch tested? Added tests in `TableIdentifierParserSuite`. Closes #23880 from maropu/SPARK-26976. Authored-by: Takeshi Yamamuro Signed-off-by: Takeshi Yamamuro --- .../apache/spark/sql/catalyst/parser/SqlBase.g4| 84 --- .../parser/TableIdentifierParserSuite.scala| 650 - 2 files changed, 649 insertions(+), 85 deletions(-) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index c61cda8..d11c28c 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -736,7 +736,6 @@ qualifiedName identifier : strictIdentifier -| {ansi}? ansiReserved | {!ansi}? defaultReserved ; @@ -761,89 +760,6 @@ number | MINUS? BIGDECIMAL_LITERAL #bigDecimalLiteral ; -// NOTE: You must follow a rule below when you add a new ANTLR token in this file: -// - All the ANTLR tokens = UNION(`ansiReserved`, `ansiNonReserved`) = UNION(`defaultReserved`, `nonReserved`) -// -// Let's say you add a new token `NEWTOKEN` and this is not reserved regardless of a `spark.sql.parser.ansi.enabled` -// value. In this case, you must add a token `NEWTOKEN` in both `ansiNonReserved` and `nonReserved`. -// -// It is recommended to list them in alphabetical order. - -// The list of the reserved keywords when `spark.sql.parser.ansi.enabled` is true. Currently, we only reserve -// the ANSI keywords that almost all the ANSI SQL standards (SQL-92, SQL-99, SQL-2003, SQL-2008, SQL-2011, -// and SQL-2016) and PostgreSQL reserve. -ansiReserved -: ALL -| AND -| ANTI -| ANY -| AS -| AUTHORIZATION -| BOTH -| CASE -| CAST -| CHECK -| COLLATE -| COLUMN -| CONSTRAINT -| CREATE -| CROSS -| CURRENT_DATE -| CURRENT_TIME -| CURRENT_TIMESTAMP -| CURRENT_USER -| DISTINCT -| ELSE -| END -| EXCEPT -| FALSE -| FETCH -| FOR -| FOREIGN -| FROM -| FULL -| GRANT -| GROUP -| HAVING -| IN -| INNER -| INTERSECT -| INTO -| IS -| JOIN -| LEADING -| LEFT -| NATURAL -| NOT -| NULL -| ON -| ONLY -| OR -| ORDER -| OUTER -| OVERLAPS -| PRIMARY -| REFERENCES -| RIGHT -| SELECT -| SEMI -| SESSION_USER -| SETMINUS -| SOME -| TABLE -| THEN -| TO -| TRAILING -| UNION -| UNIQUE -| USER -| USING -| WHEN -| WHERE -| WITH -; - - // The list of the non-reserved keywords when `spark.sql.parser.ansi.enabled` is true. ansiNonReserved : ADD diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala index 3d41c27..2725deb 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala @@ -18,8 +18,10 @@ package org.apache.spark.sql.catalyst.parser import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.plans.SQLHelper +import org.apache.spark.sql.internal.SQLConf -class TableIdentifierParserSuite extends SparkFunSuite { +class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper { import CatalystSqlParser._ // Add "$elem$", "$value$" & "$key$" @@ -281,6 +283,635 @@ class TableIdentifierParserSuite extends SparkFunSuite { "where", "with") + // All the keywords in `docs/sql-reserved-and-non-reserved-key-words.md` are listed below: + val allCandidateKeywords = Set( +"abs", +"absolute", +"acos", +"action", +"add", +"after", +"all", +"allocate", +"alter", +"analyze", +"and", +"anti", +"any", +"archive", +"are", +"array", +"array_a
svn commit: r32897 - in /dev/spark/v2.4.1-rc8-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _site/api/java/org/apache/spark
Author: dbtsai Date: Wed Mar 13 02:47:08 2019 New Revision: 32897 Log: Apache Spark v2.4.1-rc8 docs [This commit notification would consist of 1478 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [MINOR][SQL] Refactor RowEncoder to use existing (De)serializerBuildHelper methods
This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 1b06cda [MINOR][SQL] Refactor RowEncoder to use existing (De)serializerBuildHelper methods 1b06cda is described below commit 1b06cda532b74ed555f759bcb4f73759966b71bb Author: Jungtaek Lim (HeartSaVioR) AuthorDate: Wed Mar 13 10:54:47 2019 +0800 [MINOR][SQL] Refactor RowEncoder to use existing (De)serializerBuildHelper methods ## What changes were proposed in this pull request? This patch proposes to reuse existing methods in (De)serializerBuildHelper in RowEncoder to achieve deduplication as well as consistent creation of serialization/deserialization of same type. ## How was this patch tested? Existing UT. Closes #24014 from HeartSaVioR/SPARK-27092. Authored-by: Jungtaek Lim (HeartSaVioR) Signed-off-by: Wenchen Fan --- .../spark/sql/catalyst/encoders/RowEncoder.scala | 149 - 1 file changed, 55 insertions(+), 94 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala index 97709bd..3a06f8d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala @@ -22,14 +22,15 @@ import scala.reflect.ClassTag import org.apache.spark.SparkException import org.apache.spark.sql.Row -import org.apache.spark.sql.catalyst.ScalaReflection +import org.apache.spark.sql.catalyst.{ScalaReflection, WalkedTypePath} +import org.apache.spark.sql.catalyst.DeserializerBuildHelper._ +import org.apache.spark.sql.catalyst.SerializerBuildHelper._ import org.apache.spark.sql.catalyst.analysis.GetColumnByOrdinal import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.objects._ -import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, ArrayData, DateTimeUtils} +import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, ArrayData} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ -import org.apache.spark.unsafe.types.UTF8String /** * A factory for constructing encoders that convert external row to/from the Spark SQL @@ -93,37 +94,19 @@ object RowEncoder { dataType = ObjectType(udtClass), false) Invoke(obj, "serialize", udt, inputObject :: Nil, returnNullable = false) -case TimestampType if SQLConf.get.datetimeJava8ApiEnabled => - StaticInvoke( -DateTimeUtils.getClass, -TimestampType, -"instantToMicros", -inputObject :: Nil, -returnNullable = false) - case TimestampType => - StaticInvoke( -DateTimeUtils.getClass, -TimestampType, -"fromJavaTimestamp", -inputObject :: Nil, -returnNullable = false) - -case DateType if SQLConf.get.datetimeJava8ApiEnabled => - StaticInvoke( -DateTimeUtils.getClass, -DateType, -"localDateToDays", -inputObject :: Nil, -returnNullable = false) + if (SQLConf.get.datetimeJava8ApiEnabled) { +createSerializerForJavaInstant(inputObject) + } else { +createSerializerForSqlTimestamp(inputObject) + } case DateType => - StaticInvoke( -DateTimeUtils.getClass, -DateType, -"fromJavaDate", -inputObject :: Nil, -returnNullable = false) + if (SQLConf.get.datetimeJava8ApiEnabled) { +createSerializerForJavaLocalDate(inputObject) + } else { +createSerializerForSqlDate(inputObject) + } case d: DecimalType => CheckOverflow(StaticInvoke( @@ -133,13 +116,7 @@ object RowEncoder { inputObject :: Nil, returnNullable = false), d) -case StringType => - StaticInvoke( -classOf[UTF8String], -StringType, -"fromString", -inputObject :: Nil, -returnNullable = false) +case StringType => createSerializerForString(inputObject) case t @ ArrayType(et, containsNull) => et match { @@ -151,17 +128,14 @@ object RowEncoder { inputObject :: Nil, returnNullable = false) -case _ => MapObjects( - element => { -val value = serializerFor(ValidateExternalType(element, et), et) -if (!containsNull) { - AssertNotNull(value) -} else { - value -} - }, - inputObject, - ObjectType(classOf[Object])) +case _ => + createSerializerForMapObjects( +inputObject, +ObjectType(classOf[Object]), +element => { +
[spark] branch master updated: [MINOR][BUILD] Add Scala 2.12 profile back for branch-2.4 build
This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 2b9ad25 [MINOR][BUILD] Add Scala 2.12 profile back for branch-2.4 build 2b9ad25 is described below commit 2b9ad2516efa18fe1a6102e657f185a7313a249f Author: DB Tsai AuthorDate: Tue Mar 12 20:08:52 2019 -0700 [MINOR][BUILD] Add Scala 2.12 profile back for branch-2.4 build Closes #24074 from dbtsai/scala-2.12. Authored-by: DB Tsai Signed-off-by: Dongjoon Hyun --- dev/create-release/release-build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh index 52e50b6..35deadf 100755 --- a/dev/create-release/release-build.sh +++ b/dev/create-release/release-build.sh @@ -125,7 +125,7 @@ fi PUBLISH_SCALA_2_12=0 SCALA_2_12_PROFILES="-Pscala-2.12" if [[ $SPARK_VERSION < "3.0." ]]; then - SCALA_2_12_PROFILES="-Pflume" + SCALA_2_12_PROFILES="-Pscala-2.12 -Pflume" fi if [[ $SPARK_VERSION > "2.4" ]]; then PUBLISH_SCALA_2_12=1 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org