Repository: spark
Updated Branches:
  refs/heads/master 053d94fcf -> 4e0395ddb


[SPARK-10168] [STREAMING] Fix the issue that maven publishes wrong artifact jars

This PR removed the `outputFile` configuration from pom.xml and updated 
`tests.py` to search jars for both sbt build and maven build.

I ran ` mvn -Pkinesis-asl -DskipTests clean install` locally, and verified the 
jars in my local repository were correct. I also checked Python tests for maven 
build, and it passed all tests.

Author: zsxwing <zsxw...@gmail.com>

Closes #8373 from zsxwing/SPARK-10168 and squashes the following commits:

e0b5818 [zsxwing] Fix the sbt build
c697627 [zsxwing] Add the jar pathes to the exception message
be1d8a5 [zsxwing] Fix the issue that maven publishes wrong artifact jars


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4e0395dd
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4e0395dd
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4e0395dd

Branch: refs/heads/master
Commit: 4e0395ddb764d092b5b38447af49e196e590e0f0
Parents: 053d94f
Author: zsxwing <zsxw...@gmail.com>
Authored: Mon Aug 24 12:38:01 2015 -0700
Committer: Tathagata Das <tathagata.das1...@gmail.com>
Committed: Mon Aug 24 12:38:01 2015 -0700

----------------------------------------------------------------------
 external/flume-assembly/pom.xml     |  1 -
 external/kafka-assembly/pom.xml     |  1 -
 external/mqtt-assembly/pom.xml      |  1 -
 extras/kinesis-asl-assembly/pom.xml |  1 -
 python/pyspark/streaming/tests.py   | 47 ++++++++++++++++++--------------
 5 files changed, 26 insertions(+), 25 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/4e0395dd/external/flume-assembly/pom.xml
----------------------------------------------------------------------
diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml
index e05e431..561ed4b 100644
--- a/external/flume-assembly/pom.xml
+++ b/external/flume-assembly/pom.xml
@@ -115,7 +115,6 @@
         <artifactId>maven-shade-plugin</artifactId>
         <configuration>
           <shadedArtifactAttached>false</shadedArtifactAttached>
-          
<outputFile>${project.build.directory}/scala-${scala.binary.version}/spark-streaming-flume-assembly-${project.version}.jar</outputFile>
           <artifactSet>
             <includes>
               <include>*:*</include>

http://git-wip-us.apache.org/repos/asf/spark/blob/4e0395dd/external/kafka-assembly/pom.xml
----------------------------------------------------------------------
diff --git a/external/kafka-assembly/pom.xml b/external/kafka-assembly/pom.xml
index 36342f3..6f4e2a8 100644
--- a/external/kafka-assembly/pom.xml
+++ b/external/kafka-assembly/pom.xml
@@ -142,7 +142,6 @@
       <artifactId>maven-shade-plugin</artifactId>
       <configuration>
         <shadedArtifactAttached>false</shadedArtifactAttached>
-        
<outputFile>${project.build.directory}/scala-${scala.binary.version}/spark-streaming-kafka-assembly-${project.version}.jar</outputFile>
         <artifactSet>
           <includes>
             <include>*:*</include>

http://git-wip-us.apache.org/repos/asf/spark/blob/4e0395dd/external/mqtt-assembly/pom.xml
----------------------------------------------------------------------
diff --git a/external/mqtt-assembly/pom.xml b/external/mqtt-assembly/pom.xml
index f3e3f93..8412600 100644
--- a/external/mqtt-assembly/pom.xml
+++ b/external/mqtt-assembly/pom.xml
@@ -132,7 +132,6 @@
         <artifactId>maven-shade-plugin</artifactId>
         <configuration>
           <shadedArtifactAttached>false</shadedArtifactAttached>
-          
<outputFile>${project.build.directory}/scala-${scala.binary.version}/spark-streaming-mqtt-assembly-${project.version}.jar</outputFile>
           <artifactSet>
             <includes>
               <include>*:*</include>

http://git-wip-us.apache.org/repos/asf/spark/blob/4e0395dd/extras/kinesis-asl-assembly/pom.xml
----------------------------------------------------------------------
diff --git a/extras/kinesis-asl-assembly/pom.xml 
b/extras/kinesis-asl-assembly/pom.xml
index 3ca5386..51af3e6 100644
--- a/extras/kinesis-asl-assembly/pom.xml
+++ b/extras/kinesis-asl-assembly/pom.xml
@@ -137,7 +137,6 @@
       <artifactId>maven-shade-plugin</artifactId>
       <configuration>
         <shadedArtifactAttached>false</shadedArtifactAttached>
-        
<outputFile>${project.build.directory}/scala-${scala.binary.version}/spark-streaming-kinesis-asl-assembly-${project.version}.jar</outputFile>
         <artifactSet>
           <includes>
             <include>*:*</include>

http://git-wip-us.apache.org/repos/asf/spark/blob/4e0395dd/python/pyspark/streaming/tests.py
----------------------------------------------------------------------
diff --git a/python/pyspark/streaming/tests.py 
b/python/pyspark/streaming/tests.py
index 510a4f2..cfea95b 100644
--- a/python/pyspark/streaming/tests.py
+++ b/python/pyspark/streaming/tests.py
@@ -1162,11 +1162,20 @@ class KinesisStreamTests(PySparkStreamingTestCase):
             kinesisTestUtils.deleteDynamoDBTable(kinesisAppName)
 
 
+# Search jar in the project dir using the jar name_prefix for both sbt build 
and maven build because
+# the artifact jars are in different directories.
+def search_jar(dir, name_prefix):
+    # We should ignore the following jars
+    ignored_jar_suffixes = ("javadoc.jar", "sources.jar", "test-sources.jar", 
"tests.jar")
+    jars = (glob.glob(os.path.join(dir, "target/scala-*/" + name_prefix + 
"-*.jar")) +  # sbt build
+            glob.glob(os.path.join(dir, "target/" + name_prefix + "_*.jar")))  
# maven build
+    return [jar for jar in jars if not jar.endswith(ignored_jar_suffixes)]
+
+
 def search_kafka_assembly_jar():
     SPARK_HOME = os.environ["SPARK_HOME"]
     kafka_assembly_dir = os.path.join(SPARK_HOME, "external/kafka-assembly")
-    jars = glob.glob(
-        os.path.join(kafka_assembly_dir, 
"target/scala-*/spark-streaming-kafka-assembly-*.jar"))
+    jars = search_jar(kafka_assembly_dir, "spark-streaming-kafka-assembly")
     if not jars:
         raise Exception(
             ("Failed to find Spark Streaming kafka assembly jar in %s. " % 
kafka_assembly_dir) +
@@ -1174,8 +1183,8 @@ def search_kafka_assembly_jar():
             "'build/sbt assembly/assembly streaming-kafka-assembly/assembly' 
or "
             "'build/mvn package' before running this test.")
     elif len(jars) > 1:
-        raise Exception(("Found multiple Spark Streaming Kafka assembly JARs 
in %s; please "
-                         "remove all but one") % kafka_assembly_dir)
+        raise Exception(("Found multiple Spark Streaming Kafka assembly JARs: 
%s; please "
+                         "remove all but one") % (", ".join(jars)))
     else:
         return jars[0]
 
@@ -1183,8 +1192,7 @@ def search_kafka_assembly_jar():
 def search_flume_assembly_jar():
     SPARK_HOME = os.environ["SPARK_HOME"]
     flume_assembly_dir = os.path.join(SPARK_HOME, "external/flume-assembly")
-    jars = glob.glob(
-        os.path.join(flume_assembly_dir, 
"target/scala-*/spark-streaming-flume-assembly-*.jar"))
+    jars = search_jar(flume_assembly_dir, "spark-streaming-flume-assembly")
     if not jars:
         raise Exception(
             ("Failed to find Spark Streaming Flume assembly jar in %s. " % 
flume_assembly_dir) +
@@ -1192,8 +1200,8 @@ def search_flume_assembly_jar():
             "'build/sbt assembly/assembly streaming-flume-assembly/assembly' 
or "
             "'build/mvn package' before running this test.")
     elif len(jars) > 1:
-        raise Exception(("Found multiple Spark Streaming Flume assembly JARs 
in %s; please "
-                        "remove all but one") % flume_assembly_dir)
+        raise Exception(("Found multiple Spark Streaming Flume assembly JARs: 
%s; please "
+                        "remove all but one") % (", ".join(jars)))
     else:
         return jars[0]
 
@@ -1201,8 +1209,7 @@ def search_flume_assembly_jar():
 def search_mqtt_assembly_jar():
     SPARK_HOME = os.environ["SPARK_HOME"]
     mqtt_assembly_dir = os.path.join(SPARK_HOME, "external/mqtt-assembly")
-    jars = glob.glob(
-        os.path.join(mqtt_assembly_dir, 
"target/scala-*/spark-streaming-mqtt-assembly-*.jar"))
+    jars = search_jar(mqtt_assembly_dir, "spark-streaming-mqtt-assembly")
     if not jars:
         raise Exception(
             ("Failed to find Spark Streaming MQTT assembly jar in %s. " % 
mqtt_assembly_dir) +
@@ -1210,8 +1217,8 @@ def search_mqtt_assembly_jar():
             "'build/sbt assembly/assembly streaming-mqtt-assembly/assembly' or 
"
             "'build/mvn package' before running this test")
     elif len(jars) > 1:
-        raise Exception(("Found multiple Spark Streaming MQTT assembly JARs in 
%s; please "
-                         "remove all but one") % mqtt_assembly_dir)
+        raise Exception(("Found multiple Spark Streaming MQTT assembly JARs: 
%s; please "
+                         "remove all but one") % (", ".join(jars)))
     else:
         return jars[0]
 
@@ -1227,8 +1234,8 @@ def search_mqtt_test_jar():
             "You need to build Spark with "
             "'build/sbt assembly/assembly streaming-mqtt/test:assembly'")
     elif len(jars) > 1:
-        raise Exception(("Found multiple Spark Streaming MQTT test JARs in %s; 
please "
-                         "remove all but one") % mqtt_test_dir)
+        raise Exception(("Found multiple Spark Streaming MQTT test JARs: %s; 
please "
+                         "remove all but one") % (", ".join(jars)))
     else:
         return jars[0]
 
@@ -1236,14 +1243,12 @@ def search_mqtt_test_jar():
 def search_kinesis_asl_assembly_jar():
     SPARK_HOME = os.environ["SPARK_HOME"]
     kinesis_asl_assembly_dir = os.path.join(SPARK_HOME, 
"extras/kinesis-asl-assembly")
-    jars = glob.glob(
-        os.path.join(kinesis_asl_assembly_dir,
-                     
"target/scala-*/spark-streaming-kinesis-asl-assembly-*.jar"))
+    jars = search_jar(kinesis_asl_assembly_dir, 
"spark-streaming-kinesis-asl-assembly")
     if not jars:
         return None
     elif len(jars) > 1:
-        raise Exception(("Found multiple Spark Streaming Kinesis ASL assembly 
JARs in %s; please "
-                         "remove all but one") % kinesis_asl_assembly_dir)
+        raise Exception(("Found multiple Spark Streaming Kinesis ASL assembly 
JARs: %s; please "
+                         "remove all but one") % (", ".join(jars)))
     else:
         return jars[0]
 
@@ -1269,8 +1274,8 @@ if __name__ == "__main__":
                                    mqtt_test_jar, kinesis_asl_assembly_jar)
 
     os.environ["PYSPARK_SUBMIT_ARGS"] = "--jars %s pyspark-shell" % jars
-    testcases = [BasicOperationTests, WindowFunctionTests, 
StreamingContextTests,
-                 CheckpointTests, KafkaStreamTests, FlumeStreamTests, 
FlumePollingStreamTests]
+    testcases = [BasicOperationTests, WindowFunctionTests, 
StreamingContextTests, CheckpointTests,
+                 KafkaStreamTests, FlumeStreamTests, FlumePollingStreamTests, 
MQTTStreamTests]
 
     if kinesis_jar_present is True:
         testcases.append(KinesisStreamTests)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to