spark git commit: [SPARK-21338][SQL][FOLLOW-UP] Implement isCascadingTruncateTable() method in AggregatedDialect
Repository: spark Updated Branches: refs/heads/master 4a8c9e29b -> 2274d84ef [SPARK-21338][SQL][FOLLOW-UP] Implement isCascadingTruncateTable() method in AggregatedDialect ## What changes were proposed in this pull request? The implemented `isCascadingTruncateTable` in `AggregatedDialect` is wrong. When no dialect claims cascading, once there is an unknown cascading truncate in the dialects, we should return unknown cascading, instead of false. ## How was this patch tested? Added test. Author: Liang-Chi HsiehCloses #19286 from viirya/SPARK-21338-followup. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2274d84e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2274d84e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2274d84e Branch: refs/heads/master Commit: 2274d84efcc71957766c1973b80b7c3265b55b11 Parents: 4a8c9e2 Author: Liang-Chi Hsieh Authored: Sat Sep 23 21:51:04 2017 -0700 Committer: gatorsmile Committed: Sat Sep 23 21:51:04 2017 -0700 -- .../spark/sql/jdbc/AggregatedDialect.scala | 8 ++- .../org/apache/spark/sql/jdbc/JDBCSuite.scala | 25 2 files changed, 32 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/2274d84e/sql/core/src/main/scala/org/apache/spark/sql/jdbc/AggregatedDialect.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/AggregatedDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/AggregatedDialect.scala index 7432a15..1419d69 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/AggregatedDialect.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/AggregatedDialect.scala @@ -43,6 +43,12 @@ private class AggregatedDialect(dialects: List[JdbcDialect]) extends JdbcDialect } override def isCascadingTruncateTable(): Option[Boolean] = { -dialects.flatMap(_.isCascadingTruncateTable()).reduceOption(_ || _) +// If any dialect claims cascading truncate, this dialect is also cascading truncate. +// Otherwise, if any dialect has unknown cascading truncate, this dialect is also unknown. +dialects.flatMap(_.isCascadingTruncateTable()).reduceOption(_ || _) match { + case Some(true) => Some(true) + case _ if dialects.exists(_.isCascadingTruncateTable().isEmpty) => None + case _ => Some(false) +} } } http://git-wip-us.apache.org/repos/asf/spark/blob/2274d84e/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala index fd12bb9..34205e0 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala @@ -749,6 +749,31 @@ class JDBCSuite extends SparkFunSuite assert(agg.isCascadingTruncateTable() === Some(true)) } + test("Aggregated dialects: isCascadingTruncateTable") { +def genDialect(cascadingTruncateTable: Option[Boolean]): JdbcDialect = new JdbcDialect { + override def canHandle(url: String): Boolean = true + override def getCatalystType( +sqlType: Int, +typeName: String, +size: Int, +md: MetadataBuilder): Option[DataType] = None + override def isCascadingTruncateTable(): Option[Boolean] = cascadingTruncateTable +} + +def testDialects(cascadings: List[Option[Boolean]], expected: Option[Boolean]): Unit = { + val dialects = cascadings.map(genDialect(_)) + val agg = new AggregatedDialect(dialects) + assert(agg.isCascadingTruncateTable() === expected) +} + +testDialects(List(Some(true), Some(false), None), Some(true)) +testDialects(List(Some(true), Some(true), None), Some(true)) +testDialects(List(Some(false), Some(false), None), None) +testDialects(List(Some(true), Some(true)), Some(true)) +testDialects(List(Some(false), Some(false)), Some(false)) +testDialects(List(None, None), None) + } + test("DB2Dialect type mapping") { val db2Dialect = JdbcDialects.get("jdbc:db2://127.0.0.1/db") assert(db2Dialect.getJDBCType(StringType).map(_.databaseTypeDefinition).get == "CLOB") - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-22109][SQL][BRANCH-2.2] Resolves type conflicts between strings and timestamps in partition column
Repository: spark Updated Branches: refs/heads/branch-2.2 1a829df94 -> 211d81beb [SPARK-22109][SQL][BRANCH-2.2] Resolves type conflicts between strings and timestamps in partition column ## What changes were proposed in this pull request? This PR backports https://github.com/apache/spark/commit/04975a68b583a6175f93da52374108e5d4754d9a into branch-2.2. ## How was this patch tested? Unit tests in `ParquetPartitionDiscoverySuite`. Author: hyukjinkwonCloses #19333 from HyukjinKwon/SPARK-22109-backport-2.2. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/211d81be Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/211d81be Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/211d81be Branch: refs/heads/branch-2.2 Commit: 211d81beb001a113e262d399fcacbd72f33ea0d9 Parents: 1a829df Author: hyukjinkwon Authored: Sun Sep 24 02:51:04 2017 +0900 Committer: Takuya UESHIN Committed: Sun Sep 24 02:51:04 2017 +0900 -- .../sql/execution/datasources/PartitioningUtils.scala | 11 ++- .../parquet/ParquetPartitionDiscoverySuite.scala| 12 2 files changed, 18 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/211d81be/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala index f61c673..6f74381 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala @@ -138,7 +138,7 @@ object PartitioningUtils { "root directory of the table. If there are multiple root directories, " + "please load them separately and then union them.") - val resolvedPartitionValues = resolvePartitions(pathsWithPartitionValues) + val resolvedPartitionValues = resolvePartitions(pathsWithPartitionValues, timeZone) // Creates the StructType which represents the partition columns. val fields = { @@ -322,7 +322,8 @@ object PartitioningUtils { * }}} */ def resolvePartitions( - pathsWithPartitionValues: Seq[(Path, PartitionValues)]): Seq[PartitionValues] = { + pathsWithPartitionValues: Seq[(Path, PartitionValues)], + timeZone: TimeZone): Seq[PartitionValues] = { if (pathsWithPartitionValues.isEmpty) { Seq.empty } else { @@ -337,7 +338,7 @@ object PartitioningUtils { val values = pathsWithPartitionValues.map(_._2) val columnCount = values.head.columnNames.size val resolvedValues = (0 until columnCount).map { i => -resolveTypeConflicts(values.map(_.literals(i))) +resolveTypeConflicts(values.map(_.literals(i)), timeZone) } // Fills resolved literals back to each partition @@ -474,7 +475,7 @@ object PartitioningUtils { * Given a collection of [[Literal]]s, resolves possible type conflicts by up-casting "lower" * types. */ - private def resolveTypeConflicts(literals: Seq[Literal]): Seq[Literal] = { + private def resolveTypeConflicts(literals: Seq[Literal], timeZone: TimeZone): Seq[Literal] = { val desiredType = { val topType = literals.map(_.dataType).maxBy(upCastingOrder.indexOf(_)) // Falls back to string if all values of this column are null or empty string @@ -482,7 +483,7 @@ object PartitioningUtils { } literals.map { case l @ Literal(_, dataType) => - Literal.create(Cast(l, desiredType).eval(), desiredType) + Literal.create(Cast(l, desiredType, Some(timeZone.getID)).eval(), desiredType) } } } http://git-wip-us.apache.org/repos/asf/spark/blob/211d81be/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala index b4f3de9..7225693 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala @@ -1022,4 +1022,16 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha } }
spark git commit: [SPARK-22110][SQL][DOCUMENTATION] Add usage and improve documentation with arguments and examples for trim function
Repository: spark Updated Branches: refs/heads/master c792aff03 -> 4a8c9e29b [SPARK-22110][SQL][DOCUMENTATION] Add usage and improve documentation with arguments and examples for trim function ## What changes were proposed in this pull request? This PR proposes to enhance the documentation for `trim` functions in the function description session. - Add more `usage`, `arguments` and `examples` for the trim function - Adjust space in the `usage` session After the changes, the trim function documentation will look like this: - `trim` ```trim(str) - Removes the leading and trailing space characters from str. trim(BOTH trimStr FROM str) - Remove the leading and trailing trimStr characters from str trim(LEADING trimStr FROM str) - Remove the leading trimStr characters from str trim(TRAILING trimStr FROM str) - Remove the trailing trimStr characters from str Arguments: str - a string expression trimStr - the trim string characters to trim, the default value is a single space BOTH, FROM - these are keywords to specify trimming string characters from both ends of the string LEADING, FROM - these are keywords to specify trimming string characters from the left end of the string TRAILING, FROM - these are keywords to specify trimming string characters from the right end of the string Examples: > SELECT trim('SparkSQL '); SparkSQL > SELECT trim('SL', 'SSparkSQLS'); parkSQ > SELECT trim(BOTH 'SL' FROM 'SSparkSQLS'); parkSQ > SELECT trim(LEADING 'SL' FROM 'SSparkSQLS'); parkSQLS > SELECT trim(TRAILING 'SL' FROM 'SSparkSQLS'); SSparkSQ ``` - `ltrim` ```ltrim ltrim(str) - Removes the leading space characters from str. ltrim(trimStr, str) - Removes the leading string contains the characters from the trim string Arguments: str - a string expression trimStr - the trim string characters to trim, the default value is a single space Examples: > SELECT ltrim('SparkSQL '); SparkSQL > SELECT ltrim('Sp', 'SSparkSQLS'); arkSQLS ``` - `rtrim` ```rtrim rtrim(str) - Removes the trailing space characters from str. rtrim(trimStr, str) - Removes the trailing string which contains the characters from the trim string from the str Arguments: str - a string expression trimStr - the trim string characters to trim, the default value is a single space Examples: > SELECT rtrim('SparkSQL '); SparkSQL > SELECT rtrim('LQSa', 'SSparkSQLS'); SSpark ``` This is the trim characters function jira: [trim function](https://issues.apache.org/jira/browse/SPARK-14878) ## How was this patch tested? Manually tested ``` spark-sql> describe function extended trim; 17/09/22 17:03:04 INFO CodeGenerator: Code generated in 153.026533 ms Function: trim Class: org.apache.spark.sql.catalyst.expressions.StringTrim Usage: trim(str) - Removes the leading and trailing space characters from `str`. trim(BOTH trimStr FROM str) - Remove the leading and trailing `trimStr` characters from `str` trim(LEADING trimStr FROM str) - Remove the leading `trimStr` characters from `str` trim(TRAILING trimStr FROM str) - Remove the trailing `trimStr` characters from `str` Extended Usage: Arguments: * str - a string expression * trimStr - the trim string characters to trim, the default value is a single space * BOTH, FROM - these are keywords to specify trimming string characters from both ends of the string * LEADING, FROM - these are keywords to specify trimming string characters from the left end of the string * TRAILING, FROM - these are keywords to specify trimming string characters from the right end of the string Examples: > SELECT trim('SparkSQL '); SparkSQL > SELECT trim('SL', 'SSparkSQLS'); parkSQ > SELECT trim(BOTH 'SL' FROM 'SSparkSQLS'); parkSQ > SELECT trim(LEADING 'SL' FROM 'SSparkSQLS'); parkSQLS > SELECT trim(TRAILING 'SL' FROM 'SSparkSQLS'); SSparkSQ ``` ``` spark-sql> describe function extended ltrim; Function: ltrim Class: org.apache.spark.sql.catalyst.expressions.StringTrimLeft Usage: ltrim(str) - Removes the leading space characters from `str`. ltrim(trimStr, str) - Removes the leading string contains the characters from the trim string Extended Usage: Arguments: * str - a string expression * trimStr - the trim string characters to trim, the default value is a single space Examples: > SELECT ltrim('SparkSQL '); SparkSQL > SELECT ltrim('Sp', 'SSparkSQLS'); arkSQLS ``` ``` spark-sql> describe function extended rtrim; Function: rtrim Class: org.apache.spark.sql.catalyst.expressions.StringTrimRight Usage: rtrim(str) - Removes the trailing space characters from `str`. rtrim(trimStr, str) - Removes the trailing string which contains the characters from the trim string from the `str` Extended Usage: Arguments: * str - a string expression * trimStr
spark git commit: [SPARK-22109][SQL] Resolves type conflicts between strings and timestamps in partition column
Repository: spark Updated Branches: refs/heads/master 50ada2a4d -> 04975a68b [SPARK-22109][SQL] Resolves type conflicts between strings and timestamps in partition column ## What changes were proposed in this pull request? This PR proposes to resolve the type conflicts in strings and timestamps in partition column values. It looks we need to set the timezone as it needs a cast between strings and timestamps. ```scala val df = Seq((1, "2015-01-01 00:00:00"), (2, "2014-01-01 00:00:00"), (3, "blah")).toDF("i", "str") val path = "/tmp/test.parquet" df.write.format("parquet").partitionBy("str").save(path) spark.read.parquet(path).show() ``` **Before** ``` java.util.NoSuchElementException: None.get at scala.None$.get(Option.scala:347) at scala.None$.get(Option.scala:345) at org.apache.spark.sql.catalyst.expressions.TimeZoneAwareExpression$class.timeZone(datetimeExpressions.scala:46) at org.apache.spark.sql.catalyst.expressions.Cast.timeZone$lzycompute(Cast.scala:172) at org.apache.spark.sql.catalyst.expressions.Cast.timeZone(Cast.scala:172) at org.apache.spark.sql.catalyst.expressions.Cast$$anonfun$castToString$3$$anonfun$apply$16.apply(Cast.scala:208) at org.apache.spark.sql.catalyst.expressions.Cast$$anonfun$castToString$3$$anonfun$apply$16.apply(Cast.scala:208) at org.apache.spark.sql.catalyst.expressions.Cast.org$apache$spark$sql$catalyst$expressions$Cast$$buildCast(Cast.scala:201) at org.apache.spark.sql.catalyst.expressions.Cast$$anonfun$castToString$3.apply(Cast.scala:207) at org.apache.spark.sql.catalyst.expressions.Cast.nullSafeEval(Cast.scala:533) at org.apache.spark.sql.catalyst.expressions.UnaryExpression.eval(Expression.scala:331) at org.apache.spark.sql.execution.datasources.PartitioningUtils$$anonfun$org$apache$spark$sql$execution$datasources$PartitioningUtils$$resolveTypeConflicts$1.apply(PartitioningUtils.scala:481) at org.apache.spark.sql.execution.datasources.PartitioningUtils$$anonfun$org$apache$spark$sql$execution$datasources$PartitioningUtils$$resolveTypeConflicts$1.apply(PartitioningUtils.scala:480) at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) ``` **After** ``` +---+---+ | i|str| +---+---+ | 2|2014-01-01 00:00:00| | 1|2015-01-01 00:00:00| | 3| blah| +---+---+ ``` ## How was this patch tested? Unit tests added in `ParquetPartitionDiscoverySuite` and manual tests. Author: hyukjinkwonCloses #19331 from HyukjinKwon/SPARK-22109. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/04975a68 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/04975a68 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/04975a68 Branch: refs/heads/master Commit: 04975a68b583a6175f93da52374108e5d4754d9a Parents: 50ada2a Author: hyukjinkwon Authored: Sun Sep 24 00:05:17 2017 +0900 Committer: Takuya UESHIN Committed: Sun Sep 24 00:05:17 2017 +0900 -- .../sql/execution/datasources/PartitioningUtils.scala | 11 ++- .../parquet/ParquetPartitionDiscoverySuite.scala| 12 2 files changed, 18 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/04975a68/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala index 92358da..1c00c9e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala @@ -139,7 +139,7 @@ object PartitioningUtils { "root directory of the table. If there are multiple root directories, " + "please load them separately and then union them.") - val resolvedPartitionValues = resolvePartitions(pathsWithPartitionValues) + val resolvedPartitionValues = resolvePartitions(pathsWithPartitionValues, timeZone) // Creates the StructType which represents the partition columns. val fields = { @@ -318,7 +318,8 @@ object PartitioningUtils { * }}} */ def resolvePartitions( - pathsWithPartitionValues: Seq[(Path, PartitionValues)]): Seq[PartitionValues] = { + pathsWithPartitionValues: Seq[(Path, PartitionValues)], +
spark git commit: [SPARK-22033][CORE] BufferHolder, other size checks should account for the specific VM array size limitations
Repository: spark Updated Branches: refs/heads/master 3920af7d1 -> 50ada2a4d [SPARK-22033][CORE] BufferHolder, other size checks should account for the specific VM array size limitations ## What changes were proposed in this pull request? Try to avoid allocating an array bigger than Integer.MAX_VALUE - 8, which is the actual max size on some JVMs, in several places ## How was this patch tested? Existing tests Author: Sean OwenCloses #19266 from srowen/SPARK-22033. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/50ada2a4 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/50ada2a4 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/50ada2a4 Branch: refs/heads/master Commit: 50ada2a4d31609b6c828158cad8e128c2f605b8d Parents: 3920af7 Author: Sean Owen Authored: Sat Sep 23 15:40:59 2017 +0100 Committer: Sean Owen Committed: Sat Sep 23 15:40:59 2017 +0100 -- .../apache/spark/unsafe/array/LongArray.java| 2 +- .../spark/unsafe/map/HashMapGrowthStrategy.java | 8 +++- .../spark/util/collection/CompactBuffer.scala | 20 ++-- .../util/collection/PartitionedPairBuffer.scala | 8 +--- .../expressions/codegen/BufferHolder.java | 11 --- .../vectorized/WritableColumnVector.java| 2 +- 6 files changed, 32 insertions(+), 19 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/50ada2a4/common/unsafe/src/main/java/org/apache/spark/unsafe/array/LongArray.java -- diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/array/LongArray.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/array/LongArray.java index 1a3cdff..2cd39bd 100644 --- a/common/unsafe/src/main/java/org/apache/spark/unsafe/array/LongArray.java +++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/array/LongArray.java @@ -39,7 +39,7 @@ public final class LongArray { private final long length; public LongArray(MemoryBlock memory) { -assert memory.size() < (long) Integer.MAX_VALUE * 8: "Array size > 4 billion elements"; +assert memory.size() < (long) Integer.MAX_VALUE * 8: "Array size >= Integer.MAX_VALUE elements"; this.memory = memory; this.baseObj = memory.getBaseObject(); this.baseOffset = memory.getBaseOffset(); http://git-wip-us.apache.org/repos/asf/spark/blob/50ada2a4/core/src/main/java/org/apache/spark/unsafe/map/HashMapGrowthStrategy.java -- diff --git a/core/src/main/java/org/apache/spark/unsafe/map/HashMapGrowthStrategy.java b/core/src/main/java/org/apache/spark/unsafe/map/HashMapGrowthStrategy.java index 20654e4..b8c2294 100644 --- a/core/src/main/java/org/apache/spark/unsafe/map/HashMapGrowthStrategy.java +++ b/core/src/main/java/org/apache/spark/unsafe/map/HashMapGrowthStrategy.java @@ -30,11 +30,17 @@ public interface HashMapGrowthStrategy { HashMapGrowthStrategy DOUBLING = new Doubling(); class Doubling implements HashMapGrowthStrategy { + +// Some JVMs can't allocate arrays of length Integer.MAX_VALUE; actual max is somewhat +// smaller. Be conservative and lower the cap a little. +private static final int ARRAY_MAX = Integer.MAX_VALUE - 8; + @Override public int nextCapacity(int currentCapacity) { assert (currentCapacity > 0); + int doubleCapacity = currentCapacity * 2; // Guard against overflow - return (currentCapacity * 2 > 0) ? (currentCapacity * 2) : Integer.MAX_VALUE; + return (doubleCapacity > 0 && doubleCapacity <= ARRAY_MAX) ? doubleCapacity : ARRAY_MAX; } } http://git-wip-us.apache.org/repos/asf/spark/blob/50ada2a4/core/src/main/scala/org/apache/spark/util/collection/CompactBuffer.scala -- diff --git a/core/src/main/scala/org/apache/spark/util/collection/CompactBuffer.scala b/core/src/main/scala/org/apache/spark/util/collection/CompactBuffer.scala index 4d43d8d..f5d2fa1 100644 --- a/core/src/main/scala/org/apache/spark/util/collection/CompactBuffer.scala +++ b/core/src/main/scala/org/apache/spark/util/collection/CompactBuffer.scala @@ -126,22 +126,22 @@ private[spark] class CompactBuffer[T: ClassTag] extends Seq[T] with Serializable /** Increase our size to newSize and grow the backing array if needed. */ private def growToSize(newSize: Int): Unit = { -if (newSize < 0) { - throw new UnsupportedOperationException("Can't grow buffer past Int.MaxValue elements") +// Some JVMs can't allocate arrays of length Integer.MAX_VALUE; actual max is somewhat +// smaller. Be conservative and lower the cap a little. +val
spark git commit: [SPARK-22099] The 'job ids' list style needs to be changed in the SQL page.
Repository: spark Updated Branches: refs/heads/master c11f24a94 -> 3920af7d1 [SPARK-22099] The 'job ids' list style needs to be changed in the SQL page. ## What changes were proposed in this pull request? The 'job ids' list style needs to be changed in the SQL page. There are two reasons: 1. If a job id is a line, there are a lot of job ids, then the table row height will be high. As shown below: ![3](https://user-images.githubusercontent.com/26266482/30732242-2fb11442-9fa4-11e7-98ea-80a98f280243.png) 2. should be consistent with the 'JDBC / ODBC Server' page style, I am in this way to modify the style. As shown below: ![2](https://user-images.githubusercontent.com/26266482/30732257-3c550820-9fa4-11e7-9d8e-467d3011e0ac.png) My changes are as follows: ![6](https://user-images.githubusercontent.com/26266482/30732318-8f61d8b8-9fa4-11e7-8af5-037ed12b13c9.png) ![5](https://user-images.githubusercontent.com/26266482/30732284-5b6a6c00-9fa4-11e7-8db9-3a2291f37ae6.png) ## How was this patch tested? manual tests Please review http://spark.apache.org/contributing.html before opening a pull request. Author: guoxiaolongCloses #19320 from guoxiaolongzte/SPARK-22099. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3920af7d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3920af7d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3920af7d Branch: refs/heads/master Commit: 3920af7d1dcf1631d3930b1b780116562741b35d Parents: c11f24a Author: guoxiaolong Authored: Sat Sep 23 15:39:53 2017 +0100 Committer: Sean Owen Committed: Sat Sep 23 15:39:53 2017 +0100 -- .../spark/sql/execution/ui/AllExecutionsPage.scala | 12 +--- 1 file changed, 9 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/3920af7d/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/AllExecutionsPage.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/AllExecutionsPage.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/AllExecutionsPage.scala index 64c9d90..41929ed 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/AllExecutionsPage.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/AllExecutionsPage.scala @@ -88,13 +88,19 @@ private[ui] abstract class ExecutionTable( val duration = executionUIData.completionTime.getOrElse(currentTime) - submissionTime val runningJobs = executionUIData.runningJobs.map { jobId => - {jobId.toString} + +[{jobId.toString}] + } val succeededJobs = executionUIData.succeededJobs.sorted.map { jobId => - {jobId.toString} + +[{jobId.toString}] + } val failedJobs = executionUIData.failedJobs.sorted.map { jobId => - {jobId.toString} + +[{jobId.toString}] + } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-22092] Reallocation in OffHeapColumnVector.reserveInternal corrupts struct and array data
Repository: spark Updated Branches: refs/heads/branch-2.2 c0a34a9ff -> 1a829df94 [SPARK-22092] Reallocation in OffHeapColumnVector.reserveInternal corrupts struct and array data `OffHeapColumnVector.reserveInternal()` will only copy already inserted values during reallocation if `data != null`. In vectors containing arrays or structs this is incorrect, since there field `data` is not used at all. We need to check `nulls` instead. Adds new tests to `ColumnVectorSuite` that reproduce the errors. Author: Ala LuszczakCloses #19323 from ala/port-vector-realloc. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1a829df9 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1a829df9 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1a829df9 Branch: refs/heads/branch-2.2 Commit: 1a829df94a9cfee4395353b0f93fb5bcd628dce4 Parents: c0a34a9 Author: Ala Luszczak Authored: Sat Sep 23 16:09:47 2017 +0200 Committer: Herman van Hovell Committed: Sat Sep 23 16:09:47 2017 +0200 -- .../vectorized/OffHeapColumnVector.java | 2 +- .../vectorized/ColumnVectorSuite.scala | 227 +++ 2 files changed, 228 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/1a829df9/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java -- diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java index a7d3744..cda7f2f 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java @@ -436,7 +436,7 @@ public final class OffHeapColumnVector extends ColumnVector { // Split out the slow path. @Override protected void reserveInternal(int newCapacity) { -int oldCapacity = (this.data == 0L) ? 0 : capacity; +int oldCapacity = (nulls == 0L) ? 0 : capacity; if (this.resultArray != null) { this.lengthData = Platform.reallocateMemory(lengthData, oldCapacity * 4, newCapacity * 4); http://git-wip-us.apache.org/repos/asf/spark/blob/1a829df9/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala new file mode 100644 index 000..19b93c9 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala @@ -0,0 +1,227 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.vectorized + +import org.scalatest.BeforeAndAfterEach + +import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.catalyst.util.ArrayData +import org.apache.spark.sql.types._ +import org.apache.spark.unsafe.types.UTF8String + +class ColumnVectorSuite extends SparkFunSuite with BeforeAndAfterEach { + + var testVector: ColumnVector = _ + + private def allocate(capacity: Int, dt: DataType): ColumnVector = { +new OnHeapColumnVector(capacity, dt) + } + + override def afterEach(): Unit = { +testVector.close() + } + + test("boolean") { +testVector = allocate(10, BooleanType) +(0 until 10).foreach { i => + testVector.appendBoolean(i % 2 == 0) +} + +val array = new ColumnVector.Array(testVector) + +(0 until 10).foreach { i => + assert(array.getBoolean(i) === (i % 2 == 0)) +} + } + + test("byte") { +testVector = allocate(10, ByteType) +(0 until 10).foreach { i => + testVector.appendByte(i.toByte) +} + +val array =
spark git commit: [SPARK-18136] Fix SPARK_JARS_DIR for Python pip install on Windows
Repository: spark Updated Branches: refs/heads/branch-2.2 de6274a58 -> c0a34a9ff [SPARK-18136] Fix SPARK_JARS_DIR for Python pip install on Windows ## What changes were proposed in this pull request? Fix for setup of `SPARK_JARS_DIR` on Windows as it looks for `%SPARK_HOME%\RELEASE` file instead of `%SPARK_HOME%\jars` as it should. RELEASE file is not included in the `pip` build of PySpark. ## How was this patch tested? Local install of PySpark on Anaconda 4.4.0 (Python 3.6.1). Author: Jakub NowackiCloses #19310 from jsnowacki/master. (cherry picked from commit c11f24a94007bbaad0835645843e776507094071) Signed-off-by: hyukjinkwon Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c0a34a9f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c0a34a9f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c0a34a9f Branch: refs/heads/branch-2.2 Commit: c0a34a9fff0912b3f1ae508e43f1fae53a45afae Parents: de6274a Author: Jakub Nowacki Authored: Sat Sep 23 21:04:10 2017 +0900 Committer: hyukjinkwon Committed: Sat Sep 23 21:04:26 2017 +0900 -- bin/spark-class2.cmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c0a34a9f/bin/spark-class2.cmd -- diff --git a/bin/spark-class2.cmd b/bin/spark-class2.cmd index f6157f4..a93fd2f 100644 --- a/bin/spark-class2.cmd +++ b/bin/spark-class2.cmd @@ -29,7 +29,7 @@ if "x%1"=="x" ( ) rem Find Spark jars. -if exist "%SPARK_HOME%\RELEASE" ( +if exist "%SPARK_HOME%\jars" ( set SPARK_JARS_DIR="%SPARK_HOME%\jars" ) else ( set SPARK_JARS_DIR="%SPARK_HOME%\assembly\target\scala-%SPARK_SCALA_VERSION%\jars" - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-18136] Fix SPARK_JARS_DIR for Python pip install on Windows
Repository: spark Updated Branches: refs/heads/branch-2.1 03db72149 -> 0b3e7cc6a [SPARK-18136] Fix SPARK_JARS_DIR for Python pip install on Windows ## What changes were proposed in this pull request? Fix for setup of `SPARK_JARS_DIR` on Windows as it looks for `%SPARK_HOME%\RELEASE` file instead of `%SPARK_HOME%\jars` as it should. RELEASE file is not included in the `pip` build of PySpark. ## How was this patch tested? Local install of PySpark on Anaconda 4.4.0 (Python 3.6.1). Author: Jakub NowackiCloses #19310 from jsnowacki/master. (cherry picked from commit c11f24a94007bbaad0835645843e776507094071) Signed-off-by: hyukjinkwon Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0b3e7cc6 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0b3e7cc6 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0b3e7cc6 Branch: refs/heads/branch-2.1 Commit: 0b3e7cc6ac29c38b04dbfd6d6bf81fe9e2ebd7db Parents: 03db721 Author: Jakub Nowacki Authored: Sat Sep 23 21:04:10 2017 +0900 Committer: hyukjinkwon Committed: Sat Sep 23 21:05:04 2017 +0900 -- bin/spark-class2.cmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/0b3e7cc6/bin/spark-class2.cmd -- diff --git a/bin/spark-class2.cmd b/bin/spark-class2.cmd index f6157f4..a93fd2f 100644 --- a/bin/spark-class2.cmd +++ b/bin/spark-class2.cmd @@ -29,7 +29,7 @@ if "x%1"=="x" ( ) rem Find Spark jars. -if exist "%SPARK_HOME%\RELEASE" ( +if exist "%SPARK_HOME%\jars" ( set SPARK_JARS_DIR="%SPARK_HOME%\jars" ) else ( set SPARK_JARS_DIR="%SPARK_HOME%\assembly\target\scala-%SPARK_SCALA_VERSION%\jars" - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-18136] Fix SPARK_JARS_DIR for Python pip install on Windows
Repository: spark Updated Branches: refs/heads/master f180b6534 -> c11f24a94 [SPARK-18136] Fix SPARK_JARS_DIR for Python pip install on Windows ## What changes were proposed in this pull request? Fix for setup of `SPARK_JARS_DIR` on Windows as it looks for `%SPARK_HOME%\RELEASE` file instead of `%SPARK_HOME%\jars` as it should. RELEASE file is not included in the `pip` build of PySpark. ## How was this patch tested? Local install of PySpark on Anaconda 4.4.0 (Python 3.6.1). Author: Jakub NowackiCloses #19310 from jsnowacki/master. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c11f24a9 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c11f24a9 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c11f24a9 Branch: refs/heads/master Commit: c11f24a94007bbaad0835645843e776507094071 Parents: f180b65 Author: Jakub Nowacki Authored: Sat Sep 23 21:04:10 2017 +0900 Committer: hyukjinkwon Committed: Sat Sep 23 21:04:10 2017 +0900 -- bin/spark-class2.cmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c11f24a9/bin/spark-class2.cmd -- diff --git a/bin/spark-class2.cmd b/bin/spark-class2.cmd index f6157f4..a93fd2f 100644 --- a/bin/spark-class2.cmd +++ b/bin/spark-class2.cmd @@ -29,7 +29,7 @@ if "x%1"=="x" ( ) rem Find Spark jars. -if exist "%SPARK_HOME%\RELEASE" ( +if exist "%SPARK_HOME%\jars" ( set SPARK_JARS_DIR="%SPARK_HOME%\jars" ) else ( set SPARK_JARS_DIR="%SPARK_HOME%\assembly\target\scala-%SPARK_SCALA_VERSION%\jars" - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org