Git Push Summary

2015-04-10 Thread pwendell
Repository: spark
Updated Tags:  refs/tags/v1.3.1-rc3 [created] 3e8391327

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[2/2] spark git commit: Preparing Spark release v1.3.1-rc3

2015-04-10 Thread pwendell
Preparing Spark release v1.3.1-rc3


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3e839132
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3e839132
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3e839132

Branch: refs/heads/branch-1.3
Commit: 3e8391327ba586eaf54447043bd526d919043a44
Parents: 30d906e
Author: Patrick Wendell patr...@databricks.com
Authored: Sat Apr 11 04:04:37 2015 +
Committer: Patrick Wendell patr...@databricks.com
Committed: Sat Apr 11 04:04:37 2015 +

--
 assembly/pom.xml  | 2 +-
 bagel/pom.xml | 2 +-
 core/pom.xml  | 2 +-
 examples/pom.xml  | 2 +-
 external/flume-sink/pom.xml   | 2 +-
 external/flume/pom.xml| 2 +-
 external/kafka-assembly/pom.xml   | 2 +-
 external/kafka/pom.xml| 2 +-
 external/mqtt/pom.xml | 2 +-
 external/twitter/pom.xml  | 2 +-
 external/zeromq/pom.xml   | 2 +-
 extras/java8-tests/pom.xml| 2 +-
 extras/kinesis-asl/pom.xml| 2 +-
 extras/spark-ganglia-lgpl/pom.xml | 2 +-
 graphx/pom.xml| 2 +-
 mllib/pom.xml | 2 +-
 network/common/pom.xml| 2 +-
 network/shuffle/pom.xml   | 2 +-
 network/yarn/pom.xml  | 2 +-
 pom.xml   | 2 +-
 repl/pom.xml  | 2 +-
 sql/catalyst/pom.xml  | 2 +-
 sql/core/pom.xml  | 2 +-
 sql/hive-thriftserver/pom.xml | 2 +-
 sql/hive/pom.xml  | 2 +-
 streaming/pom.xml | 2 +-
 tools/pom.xml | 2 +-
 yarn/pom.xml  | 2 +-
 28 files changed, 28 insertions(+), 28 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/3e839132/assembly/pom.xml
--
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 114dde7..67bebfc 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   parent
 groupIdorg.apache.spark/groupId
 artifactIdspark-parent_2.10/artifactId
-version1.3.1-SNAPSHOT/version
+version1.3.1/version
 relativePath../pom.xml/relativePath
   /parent
 

http://git-wip-us.apache.org/repos/asf/spark/blob/3e839132/bagel/pom.xml
--
diff --git a/bagel/pom.xml b/bagel/pom.xml
index dea41f8..c7750a2 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,7 +21,7 @@
   parent
 groupIdorg.apache.spark/groupId
 artifactIdspark-parent_2.10/artifactId
-version1.3.1-SNAPSHOT/version
+version1.3.1/version
 relativePath../pom.xml/relativePath
   /parent
 

http://git-wip-us.apache.org/repos/asf/spark/blob/3e839132/core/pom.xml
--
diff --git a/core/pom.xml b/core/pom.xml
index 9a79d70..a3dc28f 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   parent
 groupIdorg.apache.spark/groupId
 artifactIdspark-parent_2.10/artifactId
-version1.3.1-SNAPSHOT/version
+version1.3.1/version
 relativePath../pom.xml/relativePath
   /parent
 

http://git-wip-us.apache.org/repos/asf/spark/blob/3e839132/examples/pom.xml
--
diff --git a/examples/pom.xml b/examples/pom.xml
index 73ab234..9f03cbd 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   parent
 groupIdorg.apache.spark/groupId
 artifactIdspark-parent_2.10/artifactId
-version1.3.1-SNAPSHOT/version
+version1.3.1/version
 relativePath../pom.xml/relativePath
   /parent
 

http://git-wip-us.apache.org/repos/asf/spark/blob/3e839132/external/flume-sink/pom.xml
--
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
index 1a5aaf5..46adbe2 100644
--- a/external/flume-sink/pom.xml
+++ b/external/flume-sink/pom.xml
@@ -21,7 +21,7 @@
   parent
 groupIdorg.apache.spark/groupId
 artifactIdspark-parent_2.10/artifactId
-version1.3.1-SNAPSHOT/version
+version1.3.1/version
 relativePath../../pom.xml/relativePath
   /parent
 

http://git-wip-us.apache.org/repos/asf/spark/blob/3e839132/external/flume/pom.xml
--
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index d5539d9..5fc589f 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
   parent
 groupIdorg.apache.spark/groupId
 artifactIdspark-parent_2.10/artifactId
-version1.3.1-SNAPSHOT/version
+version1.3.1/version
 relativePath../../pom.xml/relativePath
   /parent
 


[1/2] spark git commit: Preparing development version 1.3.2-SNAPSHOT

2015-04-10 Thread pwendell
Repository: spark
Updated Branches:
  refs/heads/branch-1.3 30d906e86 - ffc821e20


Preparing development version 1.3.2-SNAPSHOT


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ffc821e2
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ffc821e2
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ffc821e2

Branch: refs/heads/branch-1.3
Commit: ffc821e20712628185c3328e30b119d95499f8d6
Parents: 3e83913
Author: Patrick Wendell patr...@databricks.com
Authored: Sat Apr 11 04:04:37 2015 +
Committer: Patrick Wendell patr...@databricks.com
Committed: Sat Apr 11 04:04:37 2015 +

--
 assembly/pom.xml  | 2 +-
 bagel/pom.xml | 2 +-
 core/pom.xml  | 2 +-
 examples/pom.xml  | 2 +-
 external/flume-sink/pom.xml   | 2 +-
 external/flume/pom.xml| 2 +-
 external/kafka-assembly/pom.xml   | 2 +-
 external/kafka/pom.xml| 2 +-
 external/mqtt/pom.xml | 2 +-
 external/twitter/pom.xml  | 2 +-
 external/zeromq/pom.xml   | 2 +-
 extras/java8-tests/pom.xml| 2 +-
 extras/kinesis-asl/pom.xml| 2 +-
 extras/spark-ganglia-lgpl/pom.xml | 2 +-
 graphx/pom.xml| 2 +-
 mllib/pom.xml | 2 +-
 network/common/pom.xml| 2 +-
 network/shuffle/pom.xml   | 2 +-
 network/yarn/pom.xml  | 2 +-
 pom.xml   | 2 +-
 repl/pom.xml  | 2 +-
 sql/catalyst/pom.xml  | 2 +-
 sql/core/pom.xml  | 2 +-
 sql/hive-thriftserver/pom.xml | 2 +-
 sql/hive/pom.xml  | 2 +-
 streaming/pom.xml | 2 +-
 tools/pom.xml | 2 +-
 yarn/pom.xml  | 2 +-
 28 files changed, 28 insertions(+), 28 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/ffc821e2/assembly/pom.xml
--
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 67bebfc..0952cd2 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   parent
 groupIdorg.apache.spark/groupId
 artifactIdspark-parent_2.10/artifactId
-version1.3.1/version
+version1.3.2-SNAPSHOT/version
 relativePath../pom.xml/relativePath
   /parent
 

http://git-wip-us.apache.org/repos/asf/spark/blob/ffc821e2/bagel/pom.xml
--
diff --git a/bagel/pom.xml b/bagel/pom.xml
index c7750a2..602cc7b 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,7 +21,7 @@
   parent
 groupIdorg.apache.spark/groupId
 artifactIdspark-parent_2.10/artifactId
-version1.3.1/version
+version1.3.2-SNAPSHOT/version
 relativePath../pom.xml/relativePath
   /parent
 

http://git-wip-us.apache.org/repos/asf/spark/blob/ffc821e2/core/pom.xml
--
diff --git a/core/pom.xml b/core/pom.xml
index a3dc28f..5971d05 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   parent
 groupIdorg.apache.spark/groupId
 artifactIdspark-parent_2.10/artifactId
-version1.3.1/version
+version1.3.2-SNAPSHOT/version
 relativePath../pom.xml/relativePath
   /parent
 

http://git-wip-us.apache.org/repos/asf/spark/blob/ffc821e2/examples/pom.xml
--
diff --git a/examples/pom.xml b/examples/pom.xml
index 9f03cbd..e1a3ecc 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   parent
 groupIdorg.apache.spark/groupId
 artifactIdspark-parent_2.10/artifactId
-version1.3.1/version
+version1.3.2-SNAPSHOT/version
 relativePath../pom.xml/relativePath
   /parent
 

http://git-wip-us.apache.org/repos/asf/spark/blob/ffc821e2/external/flume-sink/pom.xml
--
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
index 46adbe2..f46a2a0 100644
--- a/external/flume-sink/pom.xml
+++ b/external/flume-sink/pom.xml
@@ -21,7 +21,7 @@
   parent
 groupIdorg.apache.spark/groupId
 artifactIdspark-parent_2.10/artifactId
-version1.3.1/version
+version1.3.2-SNAPSHOT/version
 relativePath../../pom.xml/relativePath
   /parent
 

http://git-wip-us.apache.org/repos/asf/spark/blob/ffc821e2/external/flume/pom.xml
--
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index 5fc589f..02331e8 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
   parent
 groupIdorg.apache.spark/groupId
 artifactIdspark-parent_2.10/artifactId
-version1.3.1/version
+

spark git commit: [SPARK-5969][PySpark] Fix descending pyspark.rdd.sortByKey.

2015-04-10 Thread joshrosen
Repository: spark
Updated Branches:
  refs/heads/master b9baa4cd9 - 0375134f4


[SPARK-5969][PySpark] Fix descending pyspark.rdd.sortByKey.

The samples should always be sorted in ascending order, because 
bisect.bisect_left is used on it. The reverse order of the result is already 
achieved in rangePartitioner by reversing the found index.

The current implementation also work, but always uses only two partitions -- 
the first one and the last one (because the bisect_left return returns either 
beginning or end for a descending sequence).

Author: Milan Straka f...@ucw.cz

This patch had conflicts when merged, resolved by
Committer: Josh Rosen joshro...@databricks.com

Closes #4761 from foxik/fix-descending-sort and squashes the following commits:

95896b5 [Milan Straka] Add regression test for SPARK-5969.
5757490 [Milan Straka] Fix descending pyspark.rdd.sortByKey.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0375134f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0375134f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0375134f

Branch: refs/heads/master
Commit: 0375134f42197f2e29aa865a513cda381f0a1445
Parents: b9baa4c
Author: Milan Straka f...@ucw.cz
Authored: Fri Apr 10 13:50:32 2015 -0700
Committer: Josh Rosen joshro...@databricks.com
Committed: Fri Apr 10 13:50:32 2015 -0700

--
 python/pyspark/rdd.py   |  2 +-
 python/pyspark/tests.py | 11 +++
 2 files changed, 12 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/0375134f/python/pyspark/rdd.py
--
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index 1b18789..c8e54ed 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -595,7 +595,7 @@ class RDD(object):
 maxSampleSize = numPartitions * 20.0  # constant from Spark's 
RangePartitioner
 fraction = min(maxSampleSize / max(rddSize, 1), 1.0)
 samples = self.sample(False, fraction, 1).map(lambda (k, v): 
k).collect()
-samples = sorted(samples, reverse=(not ascending), key=keyfunc)
+samples = sorted(samples, key=keyfunc)
 
 # we have numPartitions many parts but one of the them has
 # an implicit boundary

http://git-wip-us.apache.org/repos/asf/spark/blob/0375134f/python/pyspark/tests.py
--
diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py
index 0bd5d20..0e3721b 100644
--- a/python/pyspark/tests.py
+++ b/python/pyspark/tests.py
@@ -829,6 +829,17 @@ class RDDTests(ReusedPySparkTestCase):
 rdd = self.sc.parallelize(range(1  20)).map(lambda x: str(x))
 rdd._jrdd.first()
 
+def test_sortByKey_uses_all_partitions_not_only_first_and_last(self):
+# Regression test for SPARK-5969
+seq = [(i * 59 % 101, i) for i in range(101)]  # unsorted sequence
+rdd = self.sc.parallelize(seq)
+for ascending in [True, False]:
+sort = rdd.sortByKey(ascending=ascending, numPartitions=5)
+self.assertEqual(sort.collect(), sorted(seq, reverse=not 
ascending))
+sizes = sort.glom().map(len).collect()
+for size in sizes:
+self.assertGreater(size, 0)
+
 
 class ProfilerTests(PySparkTestCase):
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-6216] [PySpark] check the python version in worker

2015-04-10 Thread joshrosen
Repository: spark
Updated Branches:
  refs/heads/master 0375134f4 - 4740d6a15


[SPARK-6216] [PySpark] check the python version in worker

Author: Davies Liu dav...@databricks.com

Closes #5404 from davies/check_version and squashes the following commits:

e559248 [Davies Liu] add tests
ec33b5f [Davies Liu] check the python version in worker


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4740d6a1
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4740d6a1
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4740d6a1

Branch: refs/heads/master
Commit: 4740d6a158cb4d35408a95265c5b950b9e9628a3
Parents: 0375134
Author: Davies Liu dav...@databricks.com
Authored: Fri Apr 10 14:04:53 2015 -0700
Committer: Josh Rosen joshro...@databricks.com
Committed: Fri Apr 10 14:04:53 2015 -0700

--
 python/pyspark/rdd.py|  2 +-
 python/pyspark/tests.py  | 16 
 python/pyspark/worker.py |  6 +-
 3 files changed, 22 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/4740d6a1/python/pyspark/rdd.py
--
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index c8e54ed..c9ac95d 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -2233,7 +2233,7 @@ class RDD(object):
 def _prepare_for_python_RDD(sc, command, obj=None):
 # the serialized command will be compressed by broadcast
 ser = CloudPickleSerializer()
-pickled_command = ser.dumps(command)
+pickled_command = ser.dumps((command, sys.version_info[:2]))
 if len(pickled_command)  (1  20):  # 1M
 broadcast = sc.broadcast(pickled_command)
 pickled_command = ser.dumps(broadcast)

http://git-wip-us.apache.org/repos/asf/spark/blob/4740d6a1/python/pyspark/tests.py
--
diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py
index 0e3721b..b938b9c 100644
--- a/python/pyspark/tests.py
+++ b/python/pyspark/tests.py
@@ -35,6 +35,8 @@ import itertools
 import threading
 import hashlib
 
+from py4j.protocol import Py4JJavaError
+
 if sys.version_info[:2] = (2, 6):
 try:
 import unittest2 as unittest
@@ -1494,6 +1496,20 @@ class WorkerTests(PySparkTestCase):
 self.assertTrue(not t.isAlive())
 self.assertEqual(10, rdd.count())
 
+def test_with_different_versions_of_python(self):
+rdd = self.sc.parallelize(range(10))
+rdd.count()
+version = sys.version_info
+sys.version_info = (2, 0, 0)
+log4j = self.sc._jvm.org.apache.log4j
+old_level = log4j.LogManager.getRootLogger().getLevel()
+log4j.LogManager.getRootLogger().setLevel(log4j.Level.FATAL)
+try:
+self.assertRaises(Py4JJavaError, lambda: rdd.count())
+finally:
+sys.version_info = version
+log4j.LogManager.getRootLogger().setLevel(old_level)
+
 
 class SparkSubmitTests(unittest.TestCase):
 

http://git-wip-us.apache.org/repos/asf/spark/blob/4740d6a1/python/pyspark/worker.py
--
diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py
index 8a93c32..452d6fa 100644
--- a/python/pyspark/worker.py
+++ b/python/pyspark/worker.py
@@ -88,7 +88,11 @@ def main(infile, outfile):
 command = pickleSer._read_with_length(infile)
 if isinstance(command, Broadcast):
 command = pickleSer.loads(command.value)
-(func, profiler, deserializer, serializer) = command
+(func, profiler, deserializer, serializer), version = command
+if version != sys.version_info[:2]:
+raise Exception((Python in worker has different version %s than 
that in  +
+driver %s, PySpark cannot run with different 
minor versions) %
+(sys.version_info[:2], version))
 init_time = time.time()
 
 def process():


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-5969][PySpark] Fix descending pyspark.rdd.sortByKey.

2015-04-10 Thread joshrosen
Repository: spark
Updated Branches:
  refs/heads/branch-1.3 ec3e76f1e - 48321b83d


[SPARK-5969][PySpark] Fix descending pyspark.rdd.sortByKey.

The samples should always be sorted in ascending order, because 
bisect.bisect_left is used on it. The reverse order of the result is already 
achieved in rangePartitioner by reversing the found index.

The current implementation also work, but always uses only two partitions -- 
the first one and the last one (because the bisect_left return returns either 
beginning or end for a descending sequence).

Author: Milan Straka f...@ucw.cz

This patch had conflicts when merged, resolved by
Committer: Josh Rosen joshro...@databricks.com

Closes #4761 from foxik/fix-descending-sort and squashes the following commits:

95896b5 [Milan Straka] Add regression test for SPARK-5969.
5757490 [Milan Straka] Fix descending pyspark.rdd.sortByKey.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/48321b83
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/48321b83
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/48321b83

Branch: refs/heads/branch-1.3
Commit: 48321b83dbe3fb1f0c3b7d7c4b47c3b1ffa06d68
Parents: ec3e76f
Author: Milan Straka f...@ucw.cz
Authored: Fri Apr 10 13:50:32 2015 -0700
Committer: Josh Rosen joshro...@databricks.com
Committed: Fri Apr 10 15:20:55 2015 -0700

--
 python/pyspark/rdd.py   |  2 +-
 python/pyspark/tests.py | 11 +++
 2 files changed, 12 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/48321b83/python/pyspark/rdd.py
--
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index a1974de..eb8c6b4 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -589,7 +589,7 @@ class RDD(object):
 maxSampleSize = numPartitions * 20.0  # constant from Spark's 
RangePartitioner
 fraction = min(maxSampleSize / max(rddSize, 1), 1.0)
 samples = self.sample(False, fraction, 1).map(lambda (k, v): 
k).collect()
-samples = sorted(samples, reverse=(not ascending), key=keyfunc)
+samples = sorted(samples, key=keyfunc)
 
 # we have numPartitions many parts but one of the them has
 # an implicit boundary

http://git-wip-us.apache.org/repos/asf/spark/blob/48321b83/python/pyspark/tests.py
--
diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py
index dd8d3b1..c10f857 100644
--- a/python/pyspark/tests.py
+++ b/python/pyspark/tests.py
@@ -787,6 +787,17 @@ class RDDTests(ReusedPySparkTestCase):
 rdd = self.sc.parallelize(range(1  20)).map(lambda x: str(x))
 rdd._jrdd.first()
 
+def test_sortByKey_uses_all_partitions_not_only_first_and_last(self):
+# Regression test for SPARK-5969
+seq = [(i * 59 % 101, i) for i in range(101)]  # unsorted sequence
+rdd = self.sc.parallelize(seq)
+for ascending in [True, False]:
+sort = rdd.sortByKey(ascending=ascending, numPartitions=5)
+self.assertEqual(sort.collect(), sorted(seq, reverse=not 
ascending))
+sizes = sort.glom().map(len).collect()
+for size in sizes:
+self.assertGreater(size, 0)
+
 
 class ProfilerTests(PySparkTestCase):
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-5969][PySpark] Fix descending pyspark.rdd.sortByKey.

2015-04-10 Thread joshrosen
Repository: spark
Updated Branches:
  refs/heads/branch-1.2 7a1583917 - daec1c635


[SPARK-5969][PySpark] Fix descending pyspark.rdd.sortByKey.

The samples should always be sorted in ascending order, because 
bisect.bisect_left is used on it. The reverse order of the result is already 
achieved in rangePartitioner by reversing the found index.

The current implementation also work, but always uses only two partitions -- 
the first one and the last one (because the bisect_left return returns either 
beginning or end for a descending sequence).

Author: Milan Straka f...@ucw.cz

This patch had conflicts when merged, resolved by
Committer: Josh Rosen joshro...@databricks.com

Closes #4761 from foxik/fix-descending-sort and squashes the following commits:

95896b5 [Milan Straka] Add regression test for SPARK-5969.
5757490 [Milan Straka] Fix descending pyspark.rdd.sortByKey.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/daec1c63
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/daec1c63
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/daec1c63

Branch: refs/heads/branch-1.2
Commit: daec1c6353e5e4daac2f082f714e45a95939a538
Parents: 7a15839
Author: Milan Straka f...@ucw.cz
Authored: Fri Apr 10 13:50:32 2015 -0700
Committer: Josh Rosen joshro...@databricks.com
Committed: Fri Apr 10 15:21:50 2015 -0700

--
 python/pyspark/rdd.py   |  2 +-
 python/pyspark/tests.py | 11 +++
 2 files changed, 12 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/daec1c63/python/pyspark/rdd.py
--
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index 5f7806b..9463519 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -587,7 +587,7 @@ class RDD(object):
 maxSampleSize = numPartitions * 20.0  # constant from Spark's 
RangePartitioner
 fraction = min(maxSampleSize / max(rddSize, 1), 1.0)
 samples = self.sample(False, fraction, 1).map(lambda (k, v): 
k).collect()
-samples = sorted(samples, reverse=(not ascending), key=keyfunc)
+samples = sorted(samples, key=keyfunc)
 
 # we have numPartitions many parts but one of the them has
 # an implicit boundary

http://git-wip-us.apache.org/repos/asf/spark/blob/daec1c63/python/pyspark/tests.py
--
diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py
index 2e490a0..7cb4645 100644
--- a/python/pyspark/tests.py
+++ b/python/pyspark/tests.py
@@ -744,6 +744,17 @@ class RDDTests(ReusedPySparkTestCase):
 rdd = self.sc.parallelize(range(1  20)).map(lambda x: str(x))
 rdd._jrdd.first()
 
+def test_sortByKey_uses_all_partitions_not_only_first_and_last(self):
+# Regression test for SPARK-5969
+seq = [(i * 59 % 101, i) for i in range(101)]  # unsorted sequence
+rdd = self.sc.parallelize(seq)
+for ascending in [True, False]:
+sort = rdd.sortByKey(ascending=ascending, numPartitions=5)
+self.assertEqual(sort.collect(), sorted(seq, reverse=not 
ascending))
+sizes = sort.glom().map(len).collect()
+for size in sizes:
+self.assertGreater(size, 0)
+
 
 class ProfilerTests(PySparkTestCase):
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-6850] [SparkR] use one partition when we need to compare the whole result

2015-04-10 Thread shivaram
Repository: spark
Updated Branches:
  refs/heads/master 4740d6a15 - 68ecdb7f9


[SPARK-6850] [SparkR] use one partition when we need to compare the whole result

Author: Davies Liu dav...@databricks.com

Closes #5460 from davies/r_test and squashes the following commits:

0a593ce [Davies Liu] use one partition when we need to compare the whole result


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/68ecdb7f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/68ecdb7f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/68ecdb7f

Branch: refs/heads/master
Commit: 68ecdb7f99ae30f7c04c33a47ab7f59a3836f2a4
Parents: 4740d6a
Author: Davies Liu dav...@databricks.com
Authored: Fri Apr 10 15:35:45 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Fri Apr 10 15:35:45 2015 -0700

--
 R/pkg/inst/tests/test_binaryFile.R | 4 ++--
 R/pkg/inst/tests/test_textFile.R   | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/68ecdb7f/R/pkg/inst/tests/test_binaryFile.R
--
diff --git a/R/pkg/inst/tests/test_binaryFile.R 
b/R/pkg/inst/tests/test_binaryFile.R
index 4bb5f58..ca4218f 100644
--- a/R/pkg/inst/tests/test_binaryFile.R
+++ b/R/pkg/inst/tests/test_binaryFile.R
@@ -27,7 +27,7 @@ test_that(saveAsObjectFile()/objectFile() following 
textFile() works, {
   fileName2 - tempfile(pattern=spark-test, fileext=.tmp)
   writeLines(mockFile, fileName1)
 
-  rdd - textFile(sc, fileName1)
+  rdd - textFile(sc, fileName1, 1)
   saveAsObjectFile(rdd, fileName2)
   rdd - objectFile(sc, fileName2)
   expect_equal(collect(rdd), as.list(mockFile))
@@ -40,7 +40,7 @@ test_that(saveAsObjectFile()/objectFile() works on a 
parallelized list, {
   fileName - tempfile(pattern=spark-test, fileext=.tmp)
 
   l - list(1, 2, 3)
-  rdd - parallelize(sc, l)
+  rdd - parallelize(sc, l, 1)
   saveAsObjectFile(rdd, fileName)
   rdd - objectFile(sc, fileName)
   expect_equal(collect(rdd), l)

http://git-wip-us.apache.org/repos/asf/spark/blob/68ecdb7f/R/pkg/inst/tests/test_textFile.R
--
diff --git a/R/pkg/inst/tests/test_textFile.R b/R/pkg/inst/tests/test_textFile.R
index 7bb3e80..6b87b4b 100644
--- a/R/pkg/inst/tests/test_textFile.R
+++ b/R/pkg/inst/tests/test_textFile.R
@@ -81,7 +81,7 @@ test_that(textFile() followed by a saveAsTextFile() returns 
the same content,
   fileName2 - tempfile(pattern=spark-test, fileext=.tmp)
   writeLines(mockFile, fileName1)
 
-  rdd - textFile(sc, fileName1)
+  rdd - textFile(sc, fileName1, 1L)
   saveAsTextFile(rdd, fileName2)
   rdd - textFile(sc, fileName2)
   expect_equal(collect(rdd), as.list(mockFile))
@@ -93,7 +93,7 @@ test_that(textFile() followed by a saveAsTextFile() returns 
the same content,
 test_that(saveAsTextFile() on a parallelized list works as expected, {
   fileName - tempfile(pattern=spark-test, fileext=.tmp)
   l - list(1, 2, 3)
-  rdd - parallelize(sc, l)
+  rdd - parallelize(sc, l, 1L)
   saveAsTextFile(rdd, fileName)
   rdd - textFile(sc, fileName)
   expect_equal(collect(rdd), lapply(l, function(x) {toString(x)}))


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-6851][SQL] Create new instance for each converted parquet relation

2015-04-10 Thread marmbrus
Repository: spark
Updated Branches:
  refs/heads/master 68ecdb7f9 - 23d5f8864


[SPARK-6851][SQL] Create new instance for each converted parquet relation

Otherwise we end up rewriting predicates to be trivially equal (i.e. `a#1 = 
a#2` - `a#3 = a#3`), at which point the query is no longer valid.

Author: Michael Armbrust mich...@databricks.com

Closes #5458 from marmbrus/selfJoinParquet and squashes the following commits:

22df77c [Michael Armbrust] [SPARK-6851][SQL] Create new instance for each 
converted parquet relation


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/23d5f886
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/23d5f886
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/23d5f886

Branch: refs/heads/master
Commit: 23d5f8864f7d665a74b1d38118700139854dbb1c
Parents: 68ecdb7
Author: Michael Armbrust mich...@databricks.com
Authored: Fri Apr 10 16:05:14 2015 -0700
Committer: Michael Armbrust mich...@databricks.com
Committed: Fri Apr 10 16:05:14 2015 -0700

--
 .../spark/sql/hive/HiveMetastoreCatalog.scala   |  4 +-
 .../sql/hive/execution/SQLQuerySuite.scala  | 78 +++-
 2 files changed, 80 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/23d5f886/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
--
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index 315fab6..3ed5c5b 100644
--- 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -279,7 +279,7 @@ private[hive] class HiveMetastoreCatalog(hive: HiveContext) 
extends Catalog with
   }
 }
 
-if (metastoreRelation.hiveQlTable.isPartitioned) {
+val result = if (metastoreRelation.hiveQlTable.isPartitioned) {
   val partitionSchema = 
StructType.fromAttributes(metastoreRelation.partitionKeys)
   val partitionColumnDataTypes = partitionSchema.map(_.dataType)
   val partitions = metastoreRelation.hiveQlPartitions.map { p =
@@ -314,6 +314,8 @@ private[hive] class HiveMetastoreCatalog(hive: HiveContext) 
extends Catalog with
 
   parquetRelation
 }
+
+result.newInstance()
   }
 
   override def getTables(databaseName: Option[String]): Seq[(String, Boolean)] 
= synchronized {

http://git-wip-us.apache.org/repos/asf/spark/blob/23d5f886/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
--
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 7811bd2..4c369c0 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -34,6 +34,17 @@ case class Nested3(f3: Int)
 case class NestedArray2(b: Seq[Int])
 case class NestedArray1(a: NestedArray2)
 
+case class Order(
+id: Int,
+make: String,
+`type`: String,
+price: Int,
+pdate: String,
+customer: String,
+city: String,
+state: String,
+month: Int)
+
 /**
  * A collection of hive query tests where we generate the answers ourselves 
instead of depending on
  * Hive to generate them (in contrast to HiveQuerySuite).  Often this is 
because the query is
@@ -41,6 +52,72 @@ case class NestedArray1(a: NestedArray2)
  */
 class SQLQuerySuite extends QueryTest {
 
+  test(SPARK-6851: Self-joined converted parquet tables) {
+val orders = Seq(
+  Order(1, Atlas, MTB, 234, 2015-01-07, John D, Pacifica, CA, 
20151),
+  Order(3, Swift, MTB, 285, 2015-01-17, John S, Redwood City, 
CA, 20151),
+  Order(4, Atlas, Hybrid, 303, 2015-01-23, Jones S, San Mateo, 
CA, 20151),
+  Order(7, Next, MTB, 356, 2015-01-04, Jane D, Daly City, CA, 
20151),
+  Order(10, Next, YFlikr, 187, 2015-01-09, John D, Fremont, 
CA, 20151),
+  Order(11, Swift, YFlikr, 187, 2015-01-23, John D, Hayward, 
CA, 20151),
+  Order(2, Next, Hybrid, 324, 2015-02-03, Jane D, Daly City, 
CA, 20152),
+  Order(5, Next, Street, 187, 2015-02-08, John D, Fremont, CA, 
20152),
+  Order(6, Atlas, Street, 154, 2015-02-09, John D, Pacifica, 
CA, 20152),
+  Order(8, Swift, Hybrid, 485, 2015-02-19, John S, Redwood City, 
CA, 20152),
+  Order(9, Atlas, Split, 303, 2015-02-28, Jones S, San Mateo, 
CA, 20152))
+
+val orderUpdates = Seq(
+  Order(1, Atlas, MTB, 434, 2015-01-07, John D, Pacifica, CA, 
20151),
+  

[2/2] spark git commit: [SQL] [SPARK-6620] Speed up toDF() and rdd() functions by constructing converters in ScalaReflection

2015-04-10 Thread marmbrus
[SQL] [SPARK-6620] Speed up toDF() and rdd() functions by constructing 
converters in ScalaReflection

cc marmbrus

Author: Volodymyr Lyubinets vlyu...@gmail.com

Closes #5279 from vlyubin/speedup and squashes the following commits:

e75a387 [Volodymyr Lyubinets] Changes to ScalaUDF
11a20ec [Volodymyr Lyubinets] Avoid creating a tuple
c327bc9 [Volodymyr Lyubinets] Moved the only remaining function from 
DataTypeConversions to DateUtils
dec6802 [Volodymyr Lyubinets] Addresed review feedback
74301fa [Volodymyr Lyubinets] Addressed review comments
afa3aa5 [Volodymyr Lyubinets] Minor refactoring, added license, removed debug 
output
881dc60 [Volodymyr Lyubinets] Moved to a separate module; addressed review 
comments; one extra place of usage; changed behaviour for Java
8cad6e2 [Volodymyr Lyubinets] Addressed review commments
41b2aa9 [Volodymyr Lyubinets] Creating converters for ScalaReflection stuff, 
and more


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/67d06880
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/67d06880
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/67d06880

Branch: refs/heads/master
Commit: 67d06880e47e0324409cf7e5b21db1dcb0107b82
Parents: 23d5f88
Author: Volodymyr Lyubinets vlyu...@gmail.com
Authored: Fri Apr 10 16:27:56 2015 -0700
Committer: Michael Armbrust mich...@databricks.com
Committed: Fri Apr 10 16:27:56 2015 -0700

--
 .../spark/ml/feature/TokenizerSuite.scala   |  17 +-
 .../sql/catalyst/CatalystTypeConverters.scala   | 295 +++
 .../spark/sql/catalyst/ScalaReflection.scala|  55 --
 .../sql/catalyst/expressions/ScalaUdf.scala | 819 ---
 .../catalyst/plans/logical/LocalRelation.scala  |   7 +-
 .../spark/sql/types/DataTypeConversions.scala   |  77 --
 .../org/apache/spark/sql/types/DateUtils.scala  |  29 +
 .../sql/catalyst/ScalaReflectionSuite.scala |   4 +-
 .../scala/org/apache/spark/sql/DataFrame.scala  |  11 +-
 .../scala/org/apache/spark/sql/SQLContext.scala |   9 +-
 .../spark/sql/execution/ExistingRDD.scala   |  14 +-
 .../spark/sql/execution/LocalTableScan.scala|  16 +-
 .../apache/spark/sql/execution/SparkPlan.scala  |  11 +-
 .../spark/sql/execution/basicOperators.scala|   9 +-
 .../org/apache/spark/sql/json/JsonRDD.scala |   4 +-
 .../apache/spark/sql/JavaDataFrameSuite.java|  10 +-
 .../org/apache/spark/sql/json/JsonSuite.scala   |   3 +-
 17 files changed, 929 insertions(+), 461 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/67d06880/mllib/src/test/scala/org/apache/spark/ml/feature/TokenizerSuite.scala
--
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/feature/TokenizerSuite.scala 
b/mllib/src/test/scala/org/apache/spark/ml/feature/TokenizerSuite.scala
index bf862b9..d186ead 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/TokenizerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/TokenizerSuite.scala
@@ -25,10 +25,7 @@ import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.sql.{DataFrame, Row, SQLContext}
 
 @BeanInfo
-case class TokenizerTestData(rawText: String, wantedTokens: Seq[String]) {
-  /** Constructor used in [[org.apache.spark.ml.feature.JavaTokenizerSuite]] */
-  def this(rawText: String, wantedTokens: Array[String]) = this(rawText, 
wantedTokens.toSeq)
-}
+case class TokenizerTestData(rawText: String, wantedTokens: Array[String])
 
 class RegexTokenizerSuite extends FunSuite with MLlibTestSparkContext {
   import org.apache.spark.ml.feature.RegexTokenizerSuite._
@@ -46,14 +43,14 @@ class RegexTokenizerSuite extends FunSuite with 
MLlibTestSparkContext {
   .setOutputCol(tokens)
 
 val dataset0 = sqlContext.createDataFrame(Seq(
-  TokenizerTestData(Test for tokenization., Seq(Test, for, 
tokenization, .)),
-  TokenizerTestData(Te,st. punct, Seq(Te, ,, st, ., punct))
+  TokenizerTestData(Test for tokenization., Array(Test, for, 
tokenization, .)),
+  TokenizerTestData(Te,st. punct, Array(Te, ,, st, ., punct))
 ))
 testRegexTokenizer(tokenizer, dataset0)
 
 val dataset1 = sqlContext.createDataFrame(Seq(
-  TokenizerTestData(Test for tokenization., Seq(Test, for, 
tokenization)),
-  TokenizerTestData(Te,st. punct, Seq(punct))
+  TokenizerTestData(Test for tokenization., Array(Test, for, 
tokenization)),
+  TokenizerTestData(Te,st. punct, Array(punct))
 ))
 
 tokenizer.setMinTokenLength(3)
@@ -64,8 +61,8 @@ class RegexTokenizerSuite extends FunSuite with 
MLlibTestSparkContext {
   .setGaps(true)
   .setMinTokenLength(0)
 val dataset2 = sqlContext.createDataFrame(Seq(
-  TokenizerTestData(Test for tokenization., Seq(Test, for, 
tokenization.)),
-  

[1/2] spark git commit: [SQL] [SPARK-6620] Speed up toDF() and rdd() functions by constructing converters in ScalaReflection

2015-04-10 Thread marmbrus
Repository: spark
Updated Branches:
  refs/heads/master 23d5f8864 - 67d06880e


http://git-wip-us.apache.org/repos/asf/spark/blob/67d06880/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataTypeConversions.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataTypeConversions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataTypeConversions.scala
deleted file mode 100644
index a9d63e7..000
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataTypeConversions.scala
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the License); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an AS IS BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.types
-
-import java.text.SimpleDateFormat
-
-import org.apache.spark.sql.Row
-import org.apache.spark.sql.catalyst.ScalaReflection
-import org.apache.spark.sql.catalyst.expressions.GenericMutableRow
-
-
-private[sql] object DataTypeConversions {
-
-  def productToRow(product: Product, schema: StructType): Row = {
-val mutableRow = new GenericMutableRow(product.productArity)
-val schemaFields = schema.fields.toArray
-
-var i = 0
-while (i  mutableRow.length) {
-  mutableRow(i) =
-ScalaReflection.convertToCatalyst(product.productElement(i), 
schemaFields(i).dataType)
-  i += 1
-}
-
-mutableRow
-  }
-
-  def stringToTime(s: String): java.util.Date = {
-if (!s.contains('T')) {
-  // JDBC escape string
-  if (s.contains(' ')) {
-java.sql.Timestamp.valueOf(s)
-  } else {
-java.sql.Date.valueOf(s)
-  }
-} else if (s.endsWith(Z)) {
-  // this is zero timezone of ISO8601
-  stringToTime(s.substring(0, s.length - 1) + GMT-00:00)
-} else if (s.indexOf(GMT) == -1) {
-  // timezone with ISO8601
-  val inset = +00.00.length
-  val s0 = s.substring(0, s.length - inset)
-  val s1 = s.substring(s.length - inset, s.length)
-  if (s0.substring(s0.lastIndexOf(':')).contains('.')) {
-stringToTime(s0 + GMT + s1)
-  } else {
-stringToTime(s0 + .0GMT + s1)
-  }
-} else {
-  // ISO8601 with GMT insert
-  val ISO8601GMT: SimpleDateFormat = new SimpleDateFormat( 
-MM-dd'T'HH:mm:ss.SSSz )
-  ISO8601GMT.parse(s)
-}
-  }
-
-  /** Converts Java objects to catalyst rows / types */
-  def convertJavaToCatalyst(a: Any, dataType: DataType): Any = (a, dataType) 
match {
-case (obj, udt: UserDefinedType[_]) = 
ScalaReflection.convertToCatalyst(obj, udt) // Scala type
-case (d: java.math.BigDecimal, _) = Decimal(d)
-case (other, _) = other
-  }
-}

http://git-wip-us.apache.org/repos/asf/spark/blob/67d06880/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateUtils.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateUtils.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateUtils.scala
index 8a1a3b8..504fb05 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateUtils.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.types
 
 import java.sql.Date
+import java.text.SimpleDateFormat
 import java.util.{Calendar, TimeZone}
 
 import org.apache.spark.sql.catalyst.expressions.Cast
@@ -57,4 +58,32 @@ object DateUtils {
   }
 
   def toString(days: Int): String = 
Cast.threadLocalDateFormat.get.format(toJavaDate(days))
+
+  def stringToTime(s: String): java.util.Date = {
+if (!s.contains('T')) {
+  // JDBC escape string
+  if (s.contains(' ')) {
+java.sql.Timestamp.valueOf(s)
+  } else {
+java.sql.Date.valueOf(s)
+  }
+} else if (s.endsWith(Z)) {
+  // this is zero timezone of ISO8601
+  stringToTime(s.substring(0, s.length - 1) + GMT-00:00)
+} else if (s.indexOf(GMT) == -1) {
+  // timezone with ISO8601
+  val inset = +00.00.length
+  val s0 = s.substring(0, s.length - inset)
+  val s1 = s.substring(s.length - inset, s.length)
+  if (s0.substring(s0.lastIndexOf(':')).contains('.')) {
+stringToTime(s0 + GMT + s1)
+  } else {
+   

[1/2] spark git commit: Revert Preparing development version 1.3.2-SNAPSHOT

2015-04-10 Thread pwendell
Repository: spark
Updated Branches:
  refs/heads/branch-1.3 767f582cf - 30d906e86


Revert Preparing development version 1.3.2-SNAPSHOT

This reverts commit cdef7d080aa3f473f5ea06ba816c01b41a0239eb.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/100ba3a2
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/100ba3a2
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/100ba3a2

Branch: refs/heads/branch-1.3
Commit: 100ba3a2c87054764eb041361ddbef30a29a8b97
Parents: 767f582
Author: Patrick Wendell patr...@databricks.com
Authored: Fri Apr 10 17:09:10 2015 -0700
Committer: Patrick Wendell patr...@databricks.com
Committed: Fri Apr 10 17:09:10 2015 -0700

--
 assembly/pom.xml  | 2 +-
 bagel/pom.xml | 2 +-
 core/pom.xml  | 2 +-
 examples/pom.xml  | 2 +-
 external/flume-sink/pom.xml   | 2 +-
 external/flume/pom.xml| 2 +-
 external/kafka-assembly/pom.xml   | 2 +-
 external/kafka/pom.xml| 2 +-
 external/mqtt/pom.xml | 2 +-
 external/twitter/pom.xml  | 2 +-
 external/zeromq/pom.xml   | 2 +-
 extras/java8-tests/pom.xml| 2 +-
 extras/kinesis-asl/pom.xml| 2 +-
 extras/spark-ganglia-lgpl/pom.xml | 2 +-
 graphx/pom.xml| 2 +-
 mllib/pom.xml | 2 +-
 network/common/pom.xml| 2 +-
 network/shuffle/pom.xml   | 2 +-
 network/yarn/pom.xml  | 2 +-
 pom.xml   | 2 +-
 repl/pom.xml  | 2 +-
 sql/catalyst/pom.xml  | 2 +-
 sql/core/pom.xml  | 2 +-
 sql/hive-thriftserver/pom.xml | 2 +-
 sql/hive/pom.xml  | 2 +-
 streaming/pom.xml | 2 +-
 tools/pom.xml | 2 +-
 yarn/pom.xml  | 2 +-
 28 files changed, 28 insertions(+), 28 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/100ba3a2/assembly/pom.xml
--
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 0952cd2..67bebfc 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   parent
 groupIdorg.apache.spark/groupId
 artifactIdspark-parent_2.10/artifactId
-version1.3.2-SNAPSHOT/version
+version1.3.1/version
 relativePath../pom.xml/relativePath
   /parent
 

http://git-wip-us.apache.org/repos/asf/spark/blob/100ba3a2/bagel/pom.xml
--
diff --git a/bagel/pom.xml b/bagel/pom.xml
index 602cc7b..c7750a2 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,7 +21,7 @@
   parent
 groupIdorg.apache.spark/groupId
 artifactIdspark-parent_2.10/artifactId
-version1.3.2-SNAPSHOT/version
+version1.3.1/version
 relativePath../pom.xml/relativePath
   /parent
 

http://git-wip-us.apache.org/repos/asf/spark/blob/100ba3a2/core/pom.xml
--
diff --git a/core/pom.xml b/core/pom.xml
index 5971d05..a3dc28f 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   parent
 groupIdorg.apache.spark/groupId
 artifactIdspark-parent_2.10/artifactId
-version1.3.2-SNAPSHOT/version
+version1.3.1/version
 relativePath../pom.xml/relativePath
   /parent
 

http://git-wip-us.apache.org/repos/asf/spark/blob/100ba3a2/examples/pom.xml
--
diff --git a/examples/pom.xml b/examples/pom.xml
index e1a3ecc..9f03cbd 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   parent
 groupIdorg.apache.spark/groupId
 artifactIdspark-parent_2.10/artifactId
-version1.3.2-SNAPSHOT/version
+version1.3.1/version
 relativePath../pom.xml/relativePath
   /parent
 

http://git-wip-us.apache.org/repos/asf/spark/blob/100ba3a2/external/flume-sink/pom.xml
--
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
index f46a2a0..46adbe2 100644
--- a/external/flume-sink/pom.xml
+++ b/external/flume-sink/pom.xml
@@ -21,7 +21,7 @@
   parent
 groupIdorg.apache.spark/groupId
 artifactIdspark-parent_2.10/artifactId
-version1.3.2-SNAPSHOT/version
+version1.3.1/version
 relativePath../../pom.xml/relativePath
   /parent
 

http://git-wip-us.apache.org/repos/asf/spark/blob/100ba3a2/external/flume/pom.xml
--
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index 02331e8..5fc589f 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
   parent
 groupIdorg.apache.spark/groupId
 

[2/2] spark git commit: Revert Preparing Spark release v1.3.1-rc2

2015-04-10 Thread pwendell
Revert Preparing Spark release v1.3.1-rc2

This reverts commit 7c4473aa5a7f5de0323394aaedeefbf9738e8eb5.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/30d906e8
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/30d906e8
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/30d906e8

Branch: refs/heads/branch-1.3
Commit: 30d906e86b28c4a864950c17fc4a089046f521b8
Parents: 100ba3a
Author: Patrick Wendell patr...@databricks.com
Authored: Fri Apr 10 17:09:12 2015 -0700
Committer: Patrick Wendell patr...@databricks.com
Committed: Fri Apr 10 17:09:12 2015 -0700

--
 assembly/pom.xml  | 2 +-
 bagel/pom.xml | 2 +-
 core/pom.xml  | 2 +-
 examples/pom.xml  | 2 +-
 external/flume-sink/pom.xml   | 2 +-
 external/flume/pom.xml| 2 +-
 external/kafka-assembly/pom.xml   | 2 +-
 external/kafka/pom.xml| 2 +-
 external/mqtt/pom.xml | 2 +-
 external/twitter/pom.xml  | 2 +-
 external/zeromq/pom.xml   | 2 +-
 extras/java8-tests/pom.xml| 2 +-
 extras/kinesis-asl/pom.xml| 2 +-
 extras/spark-ganglia-lgpl/pom.xml | 2 +-
 graphx/pom.xml| 2 +-
 mllib/pom.xml | 2 +-
 network/common/pom.xml| 2 +-
 network/shuffle/pom.xml   | 2 +-
 network/yarn/pom.xml  | 2 +-
 pom.xml   | 2 +-
 repl/pom.xml  | 2 +-
 sql/catalyst/pom.xml  | 2 +-
 sql/core/pom.xml  | 2 +-
 sql/hive-thriftserver/pom.xml | 2 +-
 sql/hive/pom.xml  | 2 +-
 streaming/pom.xml | 2 +-
 tools/pom.xml | 2 +-
 yarn/pom.xml  | 2 +-
 28 files changed, 28 insertions(+), 28 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/30d906e8/assembly/pom.xml
--
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 67bebfc..114dde7 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   parent
 groupIdorg.apache.spark/groupId
 artifactIdspark-parent_2.10/artifactId
-version1.3.1/version
+version1.3.1-SNAPSHOT/version
 relativePath../pom.xml/relativePath
   /parent
 

http://git-wip-us.apache.org/repos/asf/spark/blob/30d906e8/bagel/pom.xml
--
diff --git a/bagel/pom.xml b/bagel/pom.xml
index c7750a2..dea41f8 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,7 +21,7 @@
   parent
 groupIdorg.apache.spark/groupId
 artifactIdspark-parent_2.10/artifactId
-version1.3.1/version
+version1.3.1-SNAPSHOT/version
 relativePath../pom.xml/relativePath
   /parent
 

http://git-wip-us.apache.org/repos/asf/spark/blob/30d906e8/core/pom.xml
--
diff --git a/core/pom.xml b/core/pom.xml
index a3dc28f..9a79d70 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   parent
 groupIdorg.apache.spark/groupId
 artifactIdspark-parent_2.10/artifactId
-version1.3.1/version
+version1.3.1-SNAPSHOT/version
 relativePath../pom.xml/relativePath
   /parent
 

http://git-wip-us.apache.org/repos/asf/spark/blob/30d906e8/examples/pom.xml
--
diff --git a/examples/pom.xml b/examples/pom.xml
index 9f03cbd..73ab234 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   parent
 groupIdorg.apache.spark/groupId
 artifactIdspark-parent_2.10/artifactId
-version1.3.1/version
+version1.3.1-SNAPSHOT/version
 relativePath../pom.xml/relativePath
   /parent
 

http://git-wip-us.apache.org/repos/asf/spark/blob/30d906e8/external/flume-sink/pom.xml
--
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
index 46adbe2..1a5aaf5 100644
--- a/external/flume-sink/pom.xml
+++ b/external/flume-sink/pom.xml
@@ -21,7 +21,7 @@
   parent
 groupIdorg.apache.spark/groupId
 artifactIdspark-parent_2.10/artifactId
-version1.3.1/version
+version1.3.1-SNAPSHOT/version
 relativePath../../pom.xml/relativePath
   /parent
 

http://git-wip-us.apache.org/repos/asf/spark/blob/30d906e8/external/flume/pom.xml
--
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index 5fc589f..d5539d9 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
   parent
 groupIdorg.apache.spark/groupId
 artifactIdspark-parent_2.10/artifactId
-version1.3.1/version
+version1.3.1-SNAPSHOT/version