from:"srowen"

[1/2] spark git commit: [SPARK-19533][EXAMPLES] Convert Java tests to use lambdas, Java 8 features

2017-02-19 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master ba8912e5f -> de14d35f7


http://git-wip-us.apache.org/repos/asf/spark/blob/de14d35f/examples/src/main/java/org/apache/spark/examples/mllib/JavaRecommendationExample.java
--
diff --git 
a/examples/src/main/java/org/apache/spark/examples/mllib/JavaRecommendationExample.java
 
b/examples/src/main/java/org/apache/spark/examples/mllib/JavaRecommendationExample.java
index f69aa4b..1ee68da 100644
--- 
a/examples/src/main/java/org/apache/spark/examples/mllib/JavaRecommendationExample.java
+++ 
b/examples/src/main/java/org/apache/spark/examples/mllib/JavaRecommendationExample.java
@@ -21,7 +21,6 @@ package org.apache.spark.examples.mllib;
 import scala.Tuple2;
 
 import org.apache.spark.api.java.*;
-import org.apache.spark.api.java.function.Function;
 import org.apache.spark.mllib.recommendation.ALS;
 import org.apache.spark.mllib.recommendation.MatrixFactorizationModel;
 import org.apache.spark.mllib.recommendation.Rating;
@@ -37,15 +36,12 @@ public class JavaRecommendationExample {
 // Load and parse the data
 String path = "data/mllib/als/test.data";
 JavaRDD data = jsc.textFile(path);
-JavaRDD ratings = data.map(
-  new Function() {
-public Rating call(String s) {
-  String[] sarray = s.split(",");
-  return new Rating(Integer.parseInt(sarray[0]), 
Integer.parseInt(sarray[1]),
-Double.parseDouble(sarray[2]));
-}
-  }
-);
+JavaRDD ratings = data.map(s -> {
+  String[] sarray = s.split(",");
+  return new Rating(Integer.parseInt(sarray[0]),
+Integer.parseInt(sarray[1]),
+Double.parseDouble(sarray[2]));
+});
 
 // Build the recommendation model using ALS
 int rank = 10;
@@ -53,37 +49,19 @@ public class JavaRecommendationExample {
 MatrixFactorizationModel model = ALS.train(JavaRDD.toRDD(ratings), rank, 
numIterations, 0.01);
 
 // Evaluate the model on rating data
-JavaRDD> userProducts = ratings.map(
-  new Function>() {
-public Tuple2 call(Rating r) {
-  return new Tuple2(r.user(), r.product());
-}
-  }
-);
+JavaRDD> userProducts =
+  ratings.map(r -> new Tuple2<>(r.user(), r.product()));
 JavaPairRDD, Double> predictions = 
JavaPairRDD.fromJavaRDD(
-  model.predict(JavaRDD.toRDD(userProducts)).toJavaRDD().map(
-new Function, Double>>() {
-  public Tuple2, Double> call(Rating r){
-return new Tuple2<>(new Tuple2<>(r.user(), r.product()), 
r.rating());
-  }
-}
-  ));
-JavaRDD> ratesAndPreds =
-  JavaPairRDD.fromJavaRDD(ratings.map(
-new Function, Double>>() {
-  public Tuple2, Double> call(Rating r){
-return new Tuple2<>(new Tuple2<>(r.user(), r.product()), 
r.rating());
-  }
-}
-  )).join(predictions).values();
-double MSE = JavaDoubleRDD.fromRDD(ratesAndPreds.map(
-  new Function, Object>() {
-public Object call(Tuple2 pair) {
-  Double err = pair._1() - pair._2();
-  return err * err;
-}
-  }
-).rdd()).mean();
+  model.predict(JavaRDD.toRDD(userProducts)).toJavaRDD()
+  .map(r -> new Tuple2<>(new Tuple2<>(r.user(), r.product()), 
r.rating()))
+);
+JavaRDD> ratesAndPreds = JavaPairRDD.fromJavaRDD(
+ratings.map(r -> new Tuple2<>(new Tuple2<>(r.user(), r.product()), 
r.rating(
+  .join(predictions).values();
+double MSE = ratesAndPreds.mapToDouble(pair -> {
+  double err = pair._1() - pair._2();
+  return err * err;
+}).mean();
 System.out.println("Mean Squared Error = " + MSE);
 
 // Save and load model

http://git-wip-us.apache.org/repos/asf/spark/blob/de14d35f/examples/src/main/java/org/apache/spark/examples/mllib/JavaRegressionMetricsExample.java
--
diff --git 
a/examples/src/main/java/org/apache/spark/examples/mllib/JavaRegressionMetricsExample.java
 
b/examples/src/main/java/org/apache/spark/examples/mllib/JavaRegressionMetricsExample.java
index b3e5c04..7bb9993 100644
--- 
a/examples/src/main/java/org/apache/spark/examples/mllib/JavaRegressionMetricsExample.java
+++ 
b/examples/src/main/java/org/apache/spark/examples/mllib/JavaRegressionMetricsExample.java
@@ -21,7 +21,6 @@ package org.apache.spark.examples.mllib;
 import scala.Tuple2;
 
 import org.apache.spark.api.java.*;
-import org.apache.spark.api.java.function.Function;
 import org.apache.spark.mllib.linalg.Vectors;
 import org.apache.spark.mllib.regression.LabeledPoint;
 import org.apache.spark.mllib.regression.LinearRegressionModel;
@@ -38,34 +37,24 @@ public class JavaRegressionMetricsExample {
 // Load and parse the data
 String path = "data/mllib/sample_linear_regression_data.txt";
 JavaRDD data = sc.textFile(path);
-JavaRDD parsedData = data.map(
-  new Fun

[2/2] spark git commit: [SPARK-19533][EXAMPLES] Convert Java tests to use lambdas, Java 8 features

2017-02-19 Thread srowen

[SPARK-19533][EXAMPLES] Convert Java tests to use lambdas, Java 8 features

## What changes were proposed in this pull request?

Convert Java tests to use lambdas, Java 8 features.

## How was this patch tested?

Jenkins tests.

Author: Sean Owen 

Closes #16961 from srowen/SPARK-19533.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/de14d35f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/de14d35f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/de14d35f

Branch: refs/heads/master
Commit: de14d35f77071932963a994fac5aec0e5df838a1
Parents: ba8912e
Author: Sean Owen 
Authored: Sun Feb 19 09:37:56 2017 -0800
Committer: Sean Owen 
Committed: Sun Feb 19 09:37:56 2017 -0800

--
 .../org/apache/spark/examples/JavaLogQuery.java |  21 +--
 .../org/apache/spark/examples/JavaPageRank.java |  49 ++-
 .../org/apache/spark/examples/JavaSparkPi.java  |  20 +--
 .../spark/examples/JavaStatusTrackerDemo.java   |   5 +-
 .../java/org/apache/spark/examples/JavaTC.java  |   8 +-
 .../apache/spark/examples/JavaWordCount.java|  27 +---
 .../spark/examples/ml/JavaALSExample.java   |   7 +-
 ...SelectionViaTrainValidationSplitExample.java |   3 -
 .../spark/examples/ml/JavaTokenizerExample.java |  13 +-
 .../examples/ml/JavaVectorSlicerExample.java|   7 +-
 .../mllib/JavaAssociationRulesExample.java  |   6 +-
 .../JavaBinaryClassificationMetricsExample.java |  33 ++---
 .../mllib/JavaBisectingKMeansExample.java   |   7 +-
 .../mllib/JavaChiSqSelectorExample.java |  38 ++
 .../JavaDecisionTreeClassificationExample.java  |  26 +---
 .../JavaDecisionTreeRegressionExample.java  |  33 ++---
 .../mllib/JavaElementwiseProductExample.java|  27 +---
 .../mllib/JavaGaussianMixtureExample.java   |  19 +--
 ...vaGradientBoostingClassificationExample.java |  21 +--
 .../JavaGradientBoostingRegressionExample.java  |  30 +
 .../mllib/JavaIsotonicRegressionExample.java|  39 ++
 .../spark/examples/mllib/JavaKMeansExample.java |  19 +--
 .../spark/examples/mllib/JavaLBFGSExample.java  |  23 +---
 .../JavaLatentDirichletAllocationExample.java   |  28 ++--
 .../JavaLinearRegressionWithSGDExample.java |  47 +++
 .../JavaLogisticRegressionWithLBFGSExample.java |  14 +-
 ...aMulticlassClassificationMetricsExample.java |  13 +-
 .../examples/mllib/JavaNaiveBayesExample.java   |  19 +--
 .../JavaPowerIterationClusteringExample.java|   6 +-
 .../JavaRandomForestClassificationExample.java  |  23 +---
 .../JavaRandomForestRegressionExample.java  |  37 ++---
 .../mllib/JavaRankingMetricsExample.java| 135 ++-
 .../mllib/JavaRecommendationExample.java|  58 +++-
 .../mllib/JavaRegressionMetricsExample.java |  31 ++---
 .../examples/mllib/JavaSVMWithSGDExample.java   |  13 +-
 .../examples/mllib/JavaSimpleFPGrowth.java  |  12 +-
 .../mllib/JavaStreamingTestExample.java |  40 ++
 .../examples/sql/JavaSQLDataSourceExample.java  |   8 +-
 .../spark/examples/sql/JavaSparkSQLExample.java |  60 +++--
 .../examples/sql/hive/JavaSparkHiveExample.java |   9 +-
 .../streaming/JavaStructuredKafkaWordCount.java |  10 +-
 .../JavaStructuredNetworkWordCount.java |  11 +-
 .../JavaStructuredNetworkWordCountWindowed.java |  16 +--
 .../examples/streaming/JavaCustomReceiver.java  |  34 +
 .../streaming/JavaDirectKafkaWordCount.java |  31 +
 .../examples/streaming/JavaFlumeEventCount.java |   8 +-
 .../examples/streaming/JavaKafkaWordCount.java  |  33 +
 .../streaming/JavaNetworkWordCount.java |  25 +---
 .../examples/streaming/JavaQueueStream.java |  24 +---
 .../JavaRecoverableNetworkWordCount.java|  91 +
 .../streaming/JavaSqlNetworkWordCount.java  |  51 +++
 .../streaming/JavaStatefulNetworkWordCount.java |  30 +
 52 files changed, 380 insertions(+), 1018 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/de14d35f/examples/src/main/java/org/apache/spark/examples/JavaLogQuery.java
--
diff --git a/examples/src/main/java/org/apache/spark/examples/JavaLogQuery.java 
b/examples/src/main/java/org/apache/spark/examples/JavaLogQuery.java
index 7775443..cf12de3 100644
--- a/examples/src/main/java/org/apache/spark/examples/JavaLogQuery.java
+++ b/examples/src/main/java/org/apache/spark/examples/JavaLogQuery.java
@@ -17,18 +17,16 @@
 
 package org.apache.spark.examples;
 
-import com.google.common.collect.Lists;
 import scala.Tuple2;
 import scala.Tuple3;
 
 import org.apache.spark.api.java.JavaPairRDD;
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.api.java.function.Function2;
-import

[3/3] spark git commit: [SPARK-19534][TESTS] Convert Java tests to use lambdas, Java 8 features

2017-02-19 Thread srowen

[SPARK-19534][TESTS] Convert Java tests to use lambdas, Java 8 features

## What changes were proposed in this pull request?

Convert tests to use Java 8 lambdas, and modest related fixes to surrounding 
code.

## How was this patch tested?

Jenkins tests

Author: Sean Owen 

Closes #16964 from srowen/SPARK-19534.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1487c9af
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1487c9af
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1487c9af

Branch: refs/heads/master
Commit: 1487c9af20a333ead55955acf4c0aa323bea0d07
Parents: de14d35
Author: Sean Owen 
Authored: Sun Feb 19 09:42:50 2017 -0800
Committer: Sean Owen 
Committed: Sun Feb 19 09:42:50 2017 -0800

--
 .../apache/spark/network/TransportContext.java  |   6 +-
 .../spark/network/util/MapConfigProvider.java   |   8 +-
 .../network/ChunkFetchIntegrationSuite.java |  37 +-
 .../network/RequestTimeoutIntegrationSuite.java |   3 +-
 .../network/TransportClientFactorySuite.java|  51 +-
 .../network/TransportResponseHandlerSuite.java  |  14 +-
 .../network/crypto/AuthIntegrationSuite.java|  19 +-
 .../spark/network/sasl/SparkSaslSuite.java  |  65 +--
 .../util/TransportFrameDecoderSuite.java|  44 +-
 .../network/sasl/SaslIntegrationSuite.java  |  34 +-
 .../ExternalShuffleBlockHandlerSuite.java   |   2 +-
 .../shuffle/ExternalShuffleCleanupSuite.java|   6 +-
 .../ExternalShuffleIntegrationSuite.java|  13 +-
 .../shuffle/OneForOneBlockFetcherSuite.java |  78 ++-
 .../shuffle/RetryingBlockFetcherSuite.java  |  64 ++-
 .../unsafe/sort/UnsafeExternalSorter.java   |   1 -
 .../java/org/apache/spark/JavaJdbcRDDSuite.java |  26 +-
 .../shuffle/sort/UnsafeShuffleWriterSuite.java  |  65 +--
 .../map/AbstractBytesToBytesMapSuite.java   |  25 +-
 .../unsafe/sort/UnsafeExternalSorterSuite.java  |  25 +-
 .../test/org/apache/spark/Java8RDDAPISuite.java |   7 +-
 .../test/org/apache/spark/JavaAPISuite.java | 492 -
 .../kafka010/JavaConsumerStrategySuite.java |  24 +-
 .../SparkSubmitCommandBuilderSuite.java |   2 +-
 .../launcher/SparkSubmitOptionParserSuite.java  |   8 +-
 .../apache/spark/ml/feature/JavaPCASuite.java   |  35 +-
 .../classification/JavaNaiveBayesSuite.java |  10 +-
 .../clustering/JavaBisectingKMeansSuite.java|   4 +-
 .../spark/mllib/clustering/JavaLDASuite.java|  40 +-
 .../mllib/fpm/JavaAssociationRulesSuite.java|   6 +-
 .../regression/JavaLinearRegressionSuite.java   |  11 +-
 .../spark/mllib/tree/JavaDecisionTreeSuite.java |  15 +-
 .../SpecificParquetRecordReaderBase.java|   2 +-
 .../spark/sql/Java8DatasetAggregatorSuite.java  |  16 +-
 .../apache/spark/sql/JavaApplySchemaSuite.java  |  22 +-
 .../apache/spark/sql/JavaDataFrameSuite.java|  47 +-
 .../spark/sql/JavaDatasetAggregatorSuite.java   |  49 +-
 .../sql/JavaDatasetAggregatorSuiteBase.java |  14 +-
 .../org/apache/spark/sql/JavaDatasetSuite.java  | 147 ++
 .../test/org/apache/spark/sql/JavaUDFSuite.java |  37 +-
 .../spark/streaming/JavaMapWithStateSuite.java  |  81 +--
 .../spark/streaming/JavaReceiverAPISuite.java   |  24 +-
 .../spark/streaming/JavaWriteAheadLogSuite.java |  10 +-
 .../apache/spark/streaming/Java8APISuite.java   |  21 +-
 .../apache/spark/streaming/JavaAPISuite.java| 526 +--
 45 files changed, 662 insertions(+), 1574 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/1487c9af/common/network-common/src/main/java/org/apache/spark/network/TransportContext.java
--
diff --git 
a/common/network-common/src/main/java/org/apache/spark/network/TransportContext.java
 
b/common/network-common/src/main/java/org/apache/spark/network/TransportContext.java
index 37ba543..965c4ae 100644
--- 
a/common/network-common/src/main/java/org/apache/spark/network/TransportContext.java
+++ 
b/common/network-common/src/main/java/org/apache/spark/network/TransportContext.java
@@ -17,9 +17,9 @@
 
 package org.apache.spark.network;
 
+import java.util.ArrayList;
 import java.util.List;
 
-import com.google.common.collect.Lists;
 import io.netty.channel.Channel;
 import io.netty.channel.socket.SocketChannel;
 import io.netty.handler.timeout.IdleStateHandler;
@@ -100,7 +100,7 @@ public class TransportContext {
   }
 
   public TransportClientFactory createClientFactory() {
-return createClientFactory(Lists.newArrayList());
+return createClientFactory(new ArrayList<>());
   }
 
   /** Create a server which will attempt to bind to a specific port. */
@@ -120,7 +120,7 @@ public class TransportContext {
   }
 
   public TransportServer createServer() {
-return createServer(0, Lists.newArr

[1/3] spark git commit: [SPARK-19534][TESTS] Convert Java tests to use lambdas, Java 8 features

2017-02-19 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master de14d35f7 -> 1487c9af2


http://git-wip-us.apache.org/repos/asf/spark/blob/1487c9af/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java
--
diff --git 
a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java 
b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java
index a94a37c..577672c 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java
@@ -96,12 +96,7 @@ public class JavaDatasetSuite implements Serializable {
   @Test
   public void testTypedFilterPreservingSchema() {
 Dataset ds = spark.range(10);
-Dataset ds2 = ds.filter(new FilterFunction() {
-  @Override
-  public boolean call(Long value) throws Exception {
-return value > 3;
-  }
-});
+Dataset ds2 = ds.filter((FilterFunction) value -> value > 3);
 Assert.assertEquals(ds.schema(), ds2.schema());
   }
 
@@ -111,44 +106,28 @@ public class JavaDatasetSuite implements Serializable {
 Dataset ds = spark.createDataset(data, Encoders.STRING());
 Assert.assertEquals("hello", ds.first());
 
-Dataset filtered = ds.filter(new FilterFunction() {
-  @Override
-  public boolean call(String v) throws Exception {
-return v.startsWith("h");
-  }
-});
+Dataset filtered = ds.filter((FilterFunction) v -> 
v.startsWith("h"));
 Assert.assertEquals(Arrays.asList("hello"), filtered.collectAsList());
 
 
-Dataset mapped = ds.map(new MapFunction() {
-  @Override
-  public Integer call(String v) throws Exception {
-return v.length();
-  }
-}, Encoders.INT());
+Dataset mapped = ds.map((MapFunction) v -> 
v.length(), Encoders.INT());
 Assert.assertEquals(Arrays.asList(5, 5), mapped.collectAsList());
 
-Dataset parMapped = ds.mapPartitions(new 
MapPartitionsFunction() {
-  @Override
-  public Iterator call(Iterator it) {
-List ls = new LinkedList<>();
-while (it.hasNext()) {
-  ls.add(it.next().toUpperCase(Locale.ENGLISH));
-}
-return ls.iterator();
+Dataset parMapped = 
ds.mapPartitions((MapPartitionsFunction) it -> {
+  List ls = new LinkedList<>();
+  while (it.hasNext()) {
+ls.add(it.next().toUpperCase(Locale.ENGLISH));
   }
+  return ls.iterator();
 }, Encoders.STRING());
 Assert.assertEquals(Arrays.asList("HELLO", "WORLD"), 
parMapped.collectAsList());
 
-Dataset flatMapped = ds.flatMap(new FlatMapFunction() {
-  @Override
-  public Iterator call(String s) {
-List ls = new LinkedList<>();
-for (char c : s.toCharArray()) {
-  ls.add(String.valueOf(c));
-}
-return ls.iterator();
+Dataset flatMapped = ds.flatMap((FlatMapFunction) 
s -> {
+  List ls = new LinkedList<>();
+  for (char c : s.toCharArray()) {
+ls.add(String.valueOf(c));
   }
+  return ls.iterator();
 }, Encoders.STRING());
 Assert.assertEquals(
   Arrays.asList("h", "e", "l", "l", "o", "w", "o", "r", "l", "d"),
@@ -157,16 +136,11 @@ public class JavaDatasetSuite implements Serializable {
 
   @Test
   public void testForeach() {
-final LongAccumulator accum = jsc.sc().longAccumulator();
+LongAccumulator accum = jsc.sc().longAccumulator();
 List data = Arrays.asList("a", "b", "c");
 Dataset ds = spark.createDataset(data, Encoders.STRING());
 
-ds.foreach(new ForeachFunction() {
-  @Override
-  public void call(String s) throws Exception {
-accum.add(1);
-  }
-});
+ds.foreach((ForeachFunction) s -> accum.add(1));
 Assert.assertEquals(3, accum.value().intValue());
   }
 
@@ -175,12 +149,7 @@ public class JavaDatasetSuite implements Serializable {
 List data = Arrays.asList(1, 2, 3);
 Dataset ds = spark.createDataset(data, Encoders.INT());
 
-int reduced = ds.reduce(new ReduceFunction() {
-  @Override
-  public Integer call(Integer v1, Integer v2) throws Exception {
-return v1 + v2;
-  }
-});
+int reduced = ds.reduce((ReduceFunction) (v1, v2) -> v1 + v2);
 Assert.assertEquals(6, reduced);
   }
 
@@ -189,52 +158,38 @@ public class JavaDatasetSuite implements Serializable {
 List data = Arrays.asList("a", "foo", "bar");
 Dataset ds = spark.createDataset(data, Encoders.STRING());
 KeyValueGroupedDataset grouped = ds.groupByKey(
-  new MapFunction() {
-@Override
-public Integer call(String v) throws Exception {
-  return v.length();
-}
-  },
+(MapFunction) v -> v.length(),
   Encoders.INT());
 
-Dataset mapped = grouped.mapGroups(new MapGroupsFunction() {
-  @Override
-  public String call(Integer key, Iterator values) throws 
Exception {
-StringBuilder sb = new StringBui

[2/3] spark git commit: [SPARK-19534][TESTS] Convert Java tests to use lambdas, Java 8 features

2017-02-19 Thread srowen

http://git-wip-us.apache.org/repos/asf/spark/blob/1487c9af/core/src/test/java/test/org/apache/spark/JavaAPISuite.java
--
diff --git a/core/src/test/java/test/org/apache/spark/JavaAPISuite.java 
b/core/src/test/java/test/org/apache/spark/JavaAPISuite.java
index 80aab10..5121491 100644
--- a/core/src/test/java/test/org/apache/spark/JavaAPISuite.java
+++ b/core/src/test/java/test/org/apache/spark/JavaAPISuite.java
@@ -31,7 +31,6 @@ import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
 import java.util.concurrent.*;
 
 import org.apache.spark.Accumulator;
@@ -208,7 +207,7 @@ public class JavaAPISuite implements Serializable {
 assertEquals(new Tuple2<>(3, 2), sortedPairs.get(2));
 
 // Custom comparator
-sortedRDD = rdd.sortByKey(Collections.reverseOrder(), false);
+sortedRDD = rdd.sortByKey(Collections.reverseOrder(), false);
 assertEquals(new Tuple2<>(-1, 1), sortedRDD.first());
 sortedPairs = sortedRDD.collect();
 assertEquals(new Tuple2<>(0, 4), sortedPairs.get(1));
@@ -266,13 +265,7 @@ public class JavaAPISuite implements Serializable {
 JavaRDD> rdd = sc.parallelize(pairs);
 
 // compare on first value
-JavaRDD> sortedRDD =
-rdd.sortBy(new Function, Integer>() {
-  @Override
-  public Integer call(Tuple2 t) {
-return t._1();
-  }
-}, true, 2);
+JavaRDD> sortedRDD = rdd.sortBy(Tuple2::_1, true, 
2);
 
 assertEquals(new Tuple2<>(-1, 1), sortedRDD.first());
 List> sortedPairs = sortedRDD.collect();
@@ -280,12 +273,7 @@ public class JavaAPISuite implements Serializable {
 assertEquals(new Tuple2<>(3, 2), sortedPairs.get(2));
 
 // compare on second value
-sortedRDD = rdd.sortBy(new Function, Integer>() {
-  @Override
-  public Integer call(Tuple2 t) {
-return t._2();
-  }
-}, true, 2);
+sortedRDD = rdd.sortBy(Tuple2::_2, true, 2);
 assertEquals(new Tuple2<>(-1, 1), sortedRDD.first());
 sortedPairs = sortedRDD.collect();
 assertEquals(new Tuple2<>(3, 2), sortedPairs.get(1));
@@ -294,28 +282,20 @@ public class JavaAPISuite implements Serializable {
 
   @Test
   public void foreach() {
-final LongAccumulator accum = sc.sc().longAccumulator();
+LongAccumulator accum = sc.sc().longAccumulator();
 JavaRDD rdd = sc.parallelize(Arrays.asList("Hello", "World"));
-rdd.foreach(new VoidFunction() {
-  @Override
-  public void call(String s) {
-accum.add(1);
-  }
-});
+rdd.foreach(s -> accum.add(1));
 assertEquals(2, accum.value().intValue());
   }
 
   @Test
   public void foreachPartition() {
-final LongAccumulator accum = sc.sc().longAccumulator();
+LongAccumulator accum = sc.sc().longAccumulator();
 JavaRDD rdd = sc.parallelize(Arrays.asList("Hello", "World"));
-rdd.foreachPartition(new VoidFunction>() {
-  @Override
-  public void call(Iterator iter) {
-while (iter.hasNext()) {
-  iter.next();
-  accum.add(1);
-}
+rdd.foreachPartition(iter -> {
+  while (iter.hasNext()) {
+iter.next();
+accum.add(1);
   }
 });
 assertEquals(2, accum.value().intValue());
@@ -361,12 +341,7 @@ public class JavaAPISuite implements Serializable {
   @Test
   public void groupBy() {
 JavaRDD rdd = sc.parallelize(Arrays.asList(1, 1, 2, 3, 5, 8, 13));
-Function isOdd = new Function() {
-  @Override
-  public Boolean call(Integer x) {
-return x % 2 == 0;
-  }
-};
+Function isOdd = x -> x % 2 == 0;
 JavaPairRDD> oddsAndEvens = rdd.groupBy(isOdd);
 assertEquals(2, oddsAndEvens.count());
 assertEquals(2, Iterables.size(oddsAndEvens.lookup(true).get(0)));  // 
Evens
@@ -383,12 +358,7 @@ public class JavaAPISuite implements Serializable {
 // Regression test for SPARK-4459
 JavaRDD rdd = sc.parallelize(Arrays.asList(1, 1, 2, 3, 5, 8, 13));
 Function, Boolean> areOdd =
-  new Function, Boolean>() {
-@Override
-public Boolean call(Tuple2 x) {
-  return (x._1() % 2 == 0) && (x._2() % 2 == 0);
-}
-  };
+x -> (x._1() % 2 == 0) && (x._2() % 2 == 0);
 JavaPairRDD pairRDD = rdd.zip(rdd);
 JavaPairRDD>> oddsAndEvens = 
pairRDD.groupBy(areOdd);
 assertEquals(2, oddsAndEvens.count());
@@ -406,13 +376,7 @@ public class JavaAPISuite implements Serializable {
   public void keyByOnPairRDD() {
 // Regression test for SPARK-4459
 JavaRDD rdd = sc.parallelize(Arrays.asList(1, 1, 2, 3, 5, 8, 13));
-Function, String> sumToString =
-  new Function, String>() {
-@Override
-public String call(Tuple2 x) {
-  return String.valueOf(x._1() + x._2());
-}
-  };
+Function, String> sumToString = x -> 
String.valueOf(x._1() + x._2());
 JavaPairRDD pairRDD = rdd.zip(rdd);
 JavaPairRDD>

spark-website git commit: Update Java example to use Java 8; make Scala/Python pi example consistent with better Java version; minor syntax fixes to these

2017-02-20 Thread srowen

Repository: spark-website
Updated Branches:
  refs/heads/asf-site ae58782ba -> 879303593


Update Java example to use Java 8; make Scala/Python pi example consistent with 
better Java version; minor syntax fixes to these


Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/87930359
Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/87930359
Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/87930359

Branch: refs/heads/asf-site
Commit: 879303593efa229d416eb4178913c1c1a6f7033c
Parents: ae58782
Author: Sean Owen 
Authored: Sun Feb 19 08:28:48 2017 -0800
Committer: Sean Owen 
Committed: Sun Feb 19 08:28:48 2017 -0800

--
 examples.md| 57 +++--
 site/examples.html | 57 +++--
 2 files changed, 44 insertions(+), 70 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark-website/blob/87930359/examples.md
--
diff --git a/examples.md b/examples.md
index 7f13e41..4a87331 100644
--- a/examples.md
+++ b/examples.md
@@ -61,15 +61,9 @@ counts.saveAsTextFile("hdfs://...")
 
 {% highlight java %}
 JavaRDD textFile = sc.textFile("hdfs://...");
-JavaRDD words = textFile.flatMap(new FlatMapFunction() 
{
-  public Iterator call(String s) { return Arrays.asList(s.split(" 
")).iterator(); }
-});
-JavaPairRDD pairs = words.mapToPair(new PairFunction() {
-  public Tuple2 call(String s) { return new Tuple2(s, 1); }
-});
-JavaPairRDD counts = pairs.reduceByKey(new Function2() {
-  public Integer call(Integer a, Integer b) { return a + b; }
-});
+JavaRDD words = textFile.flatMap(s -> Arrays.asList(s.split(" 
")).iterator())
+.mapToPair(word -> new Tuple2<>(word, 1))
+.reduceByKey((a, b) -> a + b);
 counts.saveAsTextFile("hdfs://...");
 {% endhighlight %}
 
@@ -89,12 +83,12 @@ counts.saveAsTextFile("hdfs://...");
 
 
 {% highlight python %}
-def sample(p):
-x, y = random(), random()
-return 1 if x*x + y*y < 1 else 0
+def inside(p):
+x, y = random.random(), random.random()
+return x*x + y*y < 1
 
-count = sc.parallelize(xrange(0, NUM_SAMPLES)).map(sample) \
- .reduce(lambda a, b: a + b)
+count = sc.parallelize(xrange(0, NUM_SAMPLES)) \
+ .filter(inside).count()
 print "Pi is roughly %f" % (4.0 * count / NUM_SAMPLES)
 {% endhighlight %}
 
@@ -103,12 +97,12 @@ print "Pi is roughly %f" % (4.0 * count / NUM_SAMPLES)
 
 
 {% highlight scala %}
-val count = sc.parallelize(1 to NUM_SAMPLES).map{i =>
-  val x = Math.random()
-  val y = Math.random()
-  if (x*x + y*y < 1) 1 else 0
-}.reduce(_ + _)
-println("Pi is roughly " + 4.0 * count / NUM_SAMPLES)
+val count = sc.parallelize(1 to NUM_SAMPLES).filter { _ =>
+  val x = math.random
+  val y = math.random
+  x*x + y*y < 1
+}.count()
+println(s"Pi is roughly ${4.0 * count / NUM_SAMPLES}")
 {% endhighlight %}
 
 
@@ -116,17 +110,15 @@ println("Pi is roughly " + 4.0 * count / NUM_SAMPLES)
 
 
 {% highlight java %}
-List l = new ArrayList(NUM_SAMPLES);
+List l = new ArrayList<>(NUM_SAMPLES);
 for (int i = 0; i < NUM_SAMPLES; i++) {
   l.add(i);
 }
 
-long count = sc.parallelize(l).filter(new Function() {
-  public Boolean call(Integer i) {
-double x = Math.random();
-double y = Math.random();
-return x*x + y*y < 1;
-  }
+long count = sc.parallelize(l).filter(i -> {
+  double x = Math.random();
+  double y = Math.random();
+  return x*x + y*y < 1;
 }).count();
 System.out.println("Pi is roughly " + 4.0 * count / NUM_SAMPLES);
 {% endhighlight %}
@@ -194,14 +186,9 @@ errors.filter(col("line").like("%MySQL%")).collect()
 {% highlight java %}
 // Creates a DataFrame having a single column named "line"
 JavaRDD textFile = sc.textFile("hdfs://...");
-JavaRDD rowRDD = textFile.map(
-  new Function() {
-public Row call(String line) throws Exception {
-  return RowFactory.create(line);
-}
-  });
-List fields = new ArrayList();
-fields.add(DataTypes.createStructField("line", DataTypes.StringType, true));
+JavaRDD rowRDD = textFile.map(RowFactory::create);
+List fields = Arrays.asList(
+  DataTypes.createStructField("line", DataTypes.StringType, true));
 StructType schema = DataTypes.createStructType(fields);
 DataFrame df = sqlContext.createDataFrame(rowRDD, schema);
 

http://git-wip-us.apache.org/repos/asf/spark-website/blob/87930359/site/examples.html
--
diff --git a/site/examples.html b/site/examples.html
index bfff52d..05ec479 100644
--- a/site/examples.html
+++ b/site/examples.html
@@ -247,15 +247,9 @@ In this page, we will show examples using RDD API as well 
as examples using high
 
 
 JavaRDD 
tex

spark git commit: [SPARK-19646][CORE][STREAMING] binaryRecords replicates records in scala API

2017-02-20 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master 776b8f17c -> d0ecca607


[SPARK-19646][CORE][STREAMING] binaryRecords replicates records in scala API

## What changes were proposed in this pull request?

Use `BytesWritable.copyBytes`, not `getBytes`, because `getBytes` returns the 
underlying array, which may be reused when repeated reads don't need a 
different size, as is the case with binaryRecords APIs

## How was this patch tested?

Existing tests

Author: Sean Owen 

Closes #16974 from srowen/SPARK-19646.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d0ecca60
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d0ecca60
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d0ecca60

Branch: refs/heads/master
Commit: d0ecca6075d86bedebf8bc2278085a2cd6cb0a43
Parents: 776b8f1
Author: Sean Owen 
Authored: Mon Feb 20 09:02:09 2017 -0800
Committer: Sean Owen 
Committed: Mon Feb 20 09:02:09 2017 -0800

--
 .../scala/org/apache/spark/SparkContext.scala   |   5 +-
 .../test/scala/org/apache/spark/FileSuite.scala | 178 ---
 .../spark/streaming/StreamingContext.scala  |   5 +-
 .../spark/streaming/InputStreamsSuite.scala |  21 +--
 4 files changed, 53 insertions(+), 156 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/d0ecca60/core/src/main/scala/org/apache/spark/SparkContext.scala
--
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala 
b/core/src/main/scala/org/apache/spark/SparkContext.scala
index e4d8389..17194b9 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -961,12 +961,11 @@ class SparkContext(config: SparkConf) extends Logging {
   classOf[LongWritable],
   classOf[BytesWritable],
   conf = conf)
-val data = br.map { case (k, v) =>
-  val bytes = v.getBytes
+br.map { case (k, v) =>
+  val bytes = v.copyBytes()
   assert(bytes.length == recordLength, "Byte array does not have correct 
length")
   bytes
 }
-data
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/spark/blob/d0ecca60/core/src/test/scala/org/apache/spark/FileSuite.scala
--
diff --git a/core/src/test/scala/org/apache/spark/FileSuite.scala 
b/core/src/test/scala/org/apache/spark/FileSuite.scala
index 6538507..a2d3177 100644
--- a/core/src/test/scala/org/apache/spark/FileSuite.scala
+++ b/core/src/test/scala/org/apache/spark/FileSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark
 
 import java.io._
+import java.nio.ByteBuffer
 import java.util.zip.GZIPOutputStream
 
 import scala.io.Source
@@ -30,7 +31,6 @@ import org.apache.hadoop.mapreduce.Job
 import org.apache.hadoop.mapreduce.lib.input.{FileSplit => NewFileSplit, 
TextInputFormat => NewTextInputFormat}
 import org.apache.hadoop.mapreduce.lib.output.{TextOutputFormat => 
NewTextOutputFormat}
 
-import org.apache.spark.input.PortableDataStream
 import org.apache.spark.internal.config.IGNORE_CORRUPT_FILES
 import org.apache.spark.rdd.{HadoopRDD, NewHadoopRDD}
 import org.apache.spark.storage.StorageLevel
@@ -237,24 +237,26 @@ class FileSuite extends SparkFunSuite with 
LocalSparkContext {
 assert(output.map(_.toString).collect().toList === List("(1,a)", "(2,aa)", 
"(3,aaa)"))
   }
 
-  test("binary file input as byte array") {
-sc = new SparkContext("local", "test")
+  private def writeBinaryData(testOutput: Array[Byte], testOutputCopies: Int): 
File = {
 val outFile = new File(tempDir, "record-bytestream-0.bin")
-val outFileName = outFile.getAbsolutePath()
-
-// create file
-val testOutput = Array[Byte](1, 2, 3, 4, 5, 6)
-val bbuf = java.nio.ByteBuffer.wrap(testOutput)
-// write data to file
-val file = new java.io.FileOutputStream(outFile)
+val file = new FileOutputStream(outFile)
 val channel = file.getChannel
-channel.write(bbuf)
+for (i <- 0 until testOutputCopies) {
+  // Shift values by i so that they're different in the output
+  val alteredOutput = testOutput.map(b => (b + i).toByte)
+  channel.write(ByteBuffer.wrap(alteredOutput))
+}
 channel.close()
 file.close()
+outFile
+  }
 
-val inRdd = sc.binaryFiles(outFileName)
-val (infile: String, indata: PortableDataStream) = inRdd.collect.head
-
+  test("binary file input as byte array") {
+sc = new SparkContext("local", "test")
+val testOutput = Array[Byte](1, 2, 3, 4, 5, 6)
+val outFile = writeBinaryData(testOutput, 1)
+val inRdd =

spark git commit: [SPARK-19646][CORE][STREAMING] binaryRecords replicates records in scala API

2017-02-20 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/branch-2.1 b083ec511 -> 7c371dec1


[SPARK-19646][CORE][STREAMING] binaryRecords replicates records in scala API

## What changes were proposed in this pull request?

Use `BytesWritable.copyBytes`, not `getBytes`, because `getBytes` returns the 
underlying array, which may be reused when repeated reads don't need a 
different size, as is the case with binaryRecords APIs

## How was this patch tested?

Existing tests

Author: Sean Owen 

Closes #16974 from srowen/SPARK-19646.

(cherry picked from commit d0ecca6075d86bedebf8bc2278085a2cd6cb0a43)
Signed-off-by: Sean Owen 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7c371dec
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7c371dec
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7c371dec

Branch: refs/heads/branch-2.1
Commit: 7c371dec1c406831cdea86c7309960e08ddf2c36
Parents: b083ec5
Author: Sean Owen 
Authored: Mon Feb 20 09:02:09 2017 -0800
Committer: Sean Owen 
Committed: Mon Feb 20 09:02:18 2017 -0800

--
 .../scala/org/apache/spark/SparkContext.scala   |   5 +-
 .../test/scala/org/apache/spark/FileSuite.scala | 178 ---
 .../spark/streaming/StreamingContext.scala  |   5 +-
 .../spark/streaming/InputStreamsSuite.scala |  21 +--
 4 files changed, 53 insertions(+), 156 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/7c371dec/core/src/main/scala/org/apache/spark/SparkContext.scala
--
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala 
b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 11ad442..2db48f6 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -944,12 +944,11 @@ class SparkContext(config: SparkConf) extends Logging {
   classOf[LongWritable],
   classOf[BytesWritable],
   conf = conf)
-val data = br.map { case (k, v) =>
-  val bytes = v.getBytes
+br.map { case (k, v) =>
+  val bytes = v.copyBytes()
   assert(bytes.length == recordLength, "Byte array does not have correct 
length")
   bytes
 }
-data
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/spark/blob/7c371dec/core/src/test/scala/org/apache/spark/FileSuite.scala
--
diff --git a/core/src/test/scala/org/apache/spark/FileSuite.scala 
b/core/src/test/scala/org/apache/spark/FileSuite.scala
index cc52bb1..0276575 100644
--- a/core/src/test/scala/org/apache/spark/FileSuite.scala
+++ b/core/src/test/scala/org/apache/spark/FileSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark
 
 import java.io._
+import java.nio.ByteBuffer
 import java.util.zip.GZIPOutputStream
 
 import scala.io.Source
@@ -29,7 +30,6 @@ import org.apache.hadoop.mapreduce.Job
 import org.apache.hadoop.mapreduce.lib.input.{FileSplit => NewFileSplit, 
TextInputFormat => NewTextInputFormat}
 import org.apache.hadoop.mapreduce.lib.output.{TextOutputFormat => 
NewTextOutputFormat}
 
-import org.apache.spark.input.PortableDataStream
 import org.apache.spark.internal.config.IGNORE_CORRUPT_FILES
 import org.apache.spark.rdd.{HadoopRDD, NewHadoopRDD}
 import org.apache.spark.storage.StorageLevel
@@ -231,24 +231,26 @@ class FileSuite extends SparkFunSuite with 
LocalSparkContext {
 assert(output.map(_.toString).collect().toList === List("(1,a)", "(2,aa)", 
"(3,aaa)"))
   }
 
-  test("binary file input as byte array") {
-sc = new SparkContext("local", "test")
+  private def writeBinaryData(testOutput: Array[Byte], testOutputCopies: Int): 
File = {
 val outFile = new File(tempDir, "record-bytestream-0.bin")
-val outFileName = outFile.getAbsolutePath()
-
-// create file
-val testOutput = Array[Byte](1, 2, 3, 4, 5, 6)
-val bbuf = java.nio.ByteBuffer.wrap(testOutput)
-// write data to file
-val file = new java.io.FileOutputStream(outFile)
+val file = new FileOutputStream(outFile)
 val channel = file.getChannel
-channel.write(bbuf)
+for (i <- 0 until testOutputCopies) {
+  // Shift values by i so that they're different in the output
+  val alteredOutput = testOutput.map(b => (b + i).toByte)
+  channel.write(ByteBuffer.wrap(alteredOutput))
+}
 channel.close()
 file.close()
+outFile
+  }
 
-val inRdd = sc.binaryFiles(outFileName)
-val (infile: String, indata: PortableDataStream) = inRdd.collect.head
-
+  test("binary file input as byte array") {
+sc = new SparkContext("local", "test")
+val test

spark git commit: [SPARK-19646][CORE][STREAMING] binaryRecords replicates records in scala API

2017-02-20 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 5c3e56fd2 -> ddd432de2


[SPARK-19646][CORE][STREAMING] binaryRecords replicates records in scala API

Use `BytesWritable.copyBytes`, not `getBytes`, because `getBytes` returns the 
underlying array, which may be reused when repeated reads don't need a 
different size, as is the case with binaryRecords APIs

Existing tests

Author: Sean Owen 

Closes #16974 from srowen/SPARK-19646.

(cherry picked from commit d0ecca6075d86bedebf8bc2278085a2cd6cb0a43)
Signed-off-by: Sean Owen 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ddd432de
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ddd432de
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ddd432de

Branch: refs/heads/branch-2.0
Commit: ddd432de2b5041138a458aae9f5117a3f41d835e
Parents: 5c3e56f
Author: Sean Owen 
Authored: Mon Feb 20 09:02:09 2017 -0800
Committer: Sean Owen 
Committed: Mon Feb 20 09:19:14 2017 -0800

--
 .../scala/org/apache/spark/SparkContext.scala   |   5 +-
 .../test/scala/org/apache/spark/FileSuite.scala | 182 ---
 .../spark/streaming/StreamingContext.scala  |   5 +-
 .../spark/streaming/InputStreamsSuite.scala |  21 ++-
 4 files changed, 55 insertions(+), 158 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/ddd432de/core/src/main/scala/org/apache/spark/SparkContext.scala
--
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala 
b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 82e754b..2abe444 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -922,12 +922,11 @@ class SparkContext(config: SparkConf) extends Logging 
with ExecutorAllocationCli
   classOf[LongWritable],
   classOf[BytesWritable],
   conf = conf)
-val data = br.map { case (k, v) =>
-  val bytes = v.getBytes
+br.map { case (k, v) =>
+  val bytes = v.copyBytes()
   assert(bytes.length == recordLength, "Byte array does not have correct 
length")
   bytes
 }
-data
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/spark/blob/ddd432de/core/src/test/scala/org/apache/spark/FileSuite.scala
--
diff --git a/core/src/test/scala/org/apache/spark/FileSuite.scala 
b/core/src/test/scala/org/apache/spark/FileSuite.scala
index 993834f..98e9f8c 100644
--- a/core/src/test/scala/org/apache/spark/FileSuite.scala
+++ b/core/src/test/scala/org/apache/spark/FileSuite.scala
@@ -17,7 +17,8 @@
 
 package org.apache.spark
 
-import java.io.{File, FileWriter}
+import java.io._
+import java.nio.ByteBuffer
 
 import scala.io.Source
 
@@ -28,7 +29,6 @@ import org.apache.hadoop.mapreduce.Job
 import org.apache.hadoop.mapreduce.lib.input.{FileSplit => NewFileSplit, 
TextInputFormat => NewTextInputFormat}
 import org.apache.hadoop.mapreduce.lib.output.{TextOutputFormat => 
NewTextOutputFormat}
 
-import org.apache.spark.input.PortableDataStream
 import org.apache.spark.rdd.{HadoopRDD, NewHadoopRDD}
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.util.Utils
@@ -229,184 +229,82 @@ class FileSuite extends SparkFunSuite with 
LocalSparkContext {
 assert(output.map(_.toString).collect().toList === List("(1,a)", "(2,aa)", 
"(3,aaa)"))
   }
 
-  test("binary file input as byte array") {
-sc = new SparkContext("local", "test")
+  private def writeBinaryData(testOutput: Array[Byte], testOutputCopies: Int): 
File = {
 val outFile = new File(tempDir, "record-bytestream-0.bin")
-val outFileName = outFile.getAbsolutePath()
-
-// create file
-val testOutput = Array[Byte](1, 2, 3, 4, 5, 6)
-val bbuf = java.nio.ByteBuffer.wrap(testOutput)
-// write data to file
-val file = new java.io.FileOutputStream(outFile)
+val file = new FileOutputStream(outFile)
 val channel = file.getChannel
-channel.write(bbuf)
+for (i <- 0 until testOutputCopies) {
+  // Shift values by i so that they're different in the output
+  val alteredOutput = testOutput.map(b => (b + i).toByte)
+  channel.write(ByteBuffer.wrap(alteredOutput))
+}
 channel.close()
 file.close()
+outFile
+  }
 
-val inRdd = sc.binaryFiles(outFileName)
-val (infile: String, indata: PortableDataStream) = inRdd.collect.head
-
+  test("binary file input as byte array") {
+sc = new SparkContext("local", "test")
+val testOutput = Array[Byte](1, 2, 3, 4, 5, 6)
+val outFile = writeBinaryData(testOutput, 1

spark git commit: [SPARK-19646][BUILD][HOTFIX] Fix compile error from cherry-pick of SPARK-19646 into branch 2.1

2017-02-20 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/branch-2.1 7c371dec1 -> c3316743e


[SPARK-19646][BUILD][HOTFIX] Fix compile error from cherry-pick of SPARK-19646 
into branch 2.1

## What changes were proposed in this pull request?

Fix compile error from cherry-pick of SPARK-19646 into branch 2.1

## How was this patch tested?

Jenkins tests

Author: Sean Owen 

Closes #17003 from srowen/SPARK-19646.2.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c3316743
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c3316743
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c3316743

Branch: refs/heads/branch-2.1
Commit: c3316743e676369ed8ce68fec5b28050a5a28d15
Parents: 7c371de
Author: Sean Owen 
Authored: Mon Feb 20 12:19:54 2017 -0800
Committer: Sean Owen 
Committed: Mon Feb 20 12:19:54 2017 -0800

--
 core/src/test/scala/org/apache/spark/FileSuite.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/c3316743/core/src/test/scala/org/apache/spark/FileSuite.scala
--
diff --git a/core/src/test/scala/org/apache/spark/FileSuite.scala 
b/core/src/test/scala/org/apache/spark/FileSuite.scala
index 0276575..467a16d 100644
--- a/core/src/test/scala/org/apache/spark/FileSuite.scala
+++ b/core/src/test/scala/org/apache/spark/FileSuite.scala
@@ -252,7 +252,7 @@ class FileSuite extends SparkFunSuite with 
LocalSparkContext {
 val inRdd = sc.binaryFiles(outFile.getAbsolutePath)
 val (infile, indata) = inRdd.collect().head
 // Make sure the name and array match
-assert(infile.contains(outFileName)) // a prefix may get added
+assert(infile.contains(outFile.getAbsolutePath)) // a prefix may get added
 assert(indata.toArray === testOutput)
   }
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-19508][CORE] Improve error message when binding service fails

2017-02-20 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master 73f065569 -> 339419145


[SPARK-19508][CORE] Improve error message when binding service fails

## What changes were proposed in this pull request?

Utils provides a helper function to bind service on port. This function can 
bind the service to a random free port. However, if the binding fails on a 
random free port, the retrying and final exception messages look confusing.

17/02/06 16:25:43 WARN Utils: Service 'sparkDriver' could not bind on port 
0. Attempting port 1.
17/02/06 16:25:43 WARN Utils: Service 'sparkDriver' could not bind on port 
0. Attempting port 1.
17/02/06 16:25:43 WARN Utils: Service 'sparkDriver' could not bind on port 
0. Attempting port 1.
17/02/06 16:25:43 WARN Utils: Service 'sparkDriver' could not bind on port 
0. Attempting port 1.
17/02/06 16:25:43 WARN Utils: Service 'sparkDriver' could not bind on port 
0. Attempting port 1.
...
17/02/06 16:25:43 ERROR SparkContext: Error initializing SparkContext.
java.net.BindException: Can't assign requested address: Service 
'sparkDriver' failed after 16 retries (starting from 0)! Consider explicitly 
setting the appropriate port for the service 'sparkDriver' (for example 
spark.ui.port for SparkUI) to an available port or increasing 
spark.port.maxRetries.

## How was this patch tested?

Jenkins tests.

Please review http://spark.apache.org/contributing.html before opening a pull 
request.

Author: Liang-Chi Hsieh 

Closes #16851 from viirya/better-log-message.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/33941914
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/33941914
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/33941914

Branch: refs/heads/master
Commit: 33941914548cc5a65e8467821745d65728176368
Parents: 73f0655
Author: Liang-Chi Hsieh 
Authored: Mon Feb 20 21:25:21 2017 -0800
Committer: Sean Owen 
Committed: Mon Feb 20 21:25:21 2017 -0800

--
 .../scala/org/apache/spark/util/Utils.scala | 27 +++-
 1 file changed, 21 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/33941914/core/src/main/scala/org/apache/spark/util/Utils.scala
--
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala 
b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 1e6e9a2..5538289 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -2210,17 +2210,32 @@ private[spark] object Utils extends Logging {
   } catch {
 case e: Exception if isBindCollision(e) =>
   if (offset >= maxRetries) {
-val exceptionMessage = s"${e.getMessage}: Service$serviceString 
failed after " +
-  s"$maxRetries retries (starting from $startPort)! Consider 
explicitly setting " +
-  s"the appropriate port for the service$serviceString (for 
example spark.ui.port " +
-  s"for SparkUI) to an available port or increasing 
spark.port.maxRetries."
+val exceptionMessage = if (startPort == 0) {
+  s"${e.getMessage}: Service$serviceString failed after " +
+s"$maxRetries retries (on a random free port)! " +
+s"Consider explicitly setting the appropriate binding address 
for " +
+s"the service$serviceString (for example 
spark.driver.bindAddress " +
+s"for SparkDriver) to the correct binding address."
+} else {
+  s"${e.getMessage}: Service$serviceString failed after " +
+s"$maxRetries retries (starting from $startPort)! Consider 
explicitly setting " +
+s"the appropriate port for the service$serviceString (for 
example spark.ui.port " +
+s"for SparkUI) to an available port or increasing 
spark.port.maxRetries."
+}
 val exception = new BindException(exceptionMessage)
 // restore original stack trace
 exception.setStackTrace(e.getStackTrace)
 throw exception
   }
-  logWarning(s"Service$serviceString could not bind on port $tryPort. 
" +
-s"Attempting port ${tryPort + 1}.")
+  if (startPort == 0) {
+// As startPort 0 is for a random free port, it is most possibly 
binding address is
+// not correct.
+logWarning(s"Service$serviceString could not bind on a random free 
port. " +
+  "You may check whether configuring an appropriate binding 
address.")
+  } else {
+logWarning(s"Service$serviceString could not bind on port 
$tryPort. " +
+  s"Attempting port ${tryPort + 1}.")
+

spark git commit: [SPARK-18922][TESTS] Fix new test failures on Windows due to path and resource not closed

2017-02-20 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master 339419145 -> 17b93b5fe


[SPARK-18922][TESTS] Fix new test failures on Windows due to path and resource 
not closed

## What changes were proposed in this pull request?

This PR proposes to fix new test failures on WIndows as below:

**Before**

```
KafkaRelationSuite:
 - test late binding start offsets *** FAILED *** (7 seconds, 679 milliseconds)
   Cause: java.nio.file.FileSystemException: 
C:\projects\spark\target\tmp\spark-4c4b0cd1-4cb7-4908-949d-1b0cc8addb50\topic-4-0\.log
 -> 
C:\projects\spark\target\tmp\spark-4c4b0cd1-4cb7-4908-949d-1b0cc8addb50\topic-4-0\.log.deleted:
 The process cannot access the file because it is being used by another process.

KafkaSourceSuite:
 - deserialization of initial offset with Spark 2.1.0 *** FAILED *** (3 
seconds, 542 milliseconds)
   java.io.IOException: Failed to delete: 
C:\projects\spark\target\tmp\spark-97ef64fc-ae61-4ce3-ac59-287fd38bd824

 - deserialization of initial offset written by Spark 2.1.0 *** FAILED *** (60 
milliseconds)
   java.nio.file.InvalidPathException: Illegal char <:> at index 2: 
/C:/projects/spark/external/kafka-0-10-sql/target/scala-2.11/test-classes/kafka-source-initial-offset-version-2.1.0.b

HiveDDLSuite:
 - partitioned table should always put partition columns at the end of table 
schema *** FAILED *** (657 milliseconds)
   org.apache.spark.sql.AnalysisException: Path does not exist: 
file:/C:projectsspark   arget   mpspark-f1b83d09-850a-4bba-8e43-a2a28dfaa757;

DDLSuite:
 - create a data source table without schema *** FAILED *** (94 milliseconds)
   org.apache.spark.sql.AnalysisException: Path does not exist: 
file:/C:projectsspark   arget   mpspark-a3f3c161-afae-4d6f-9182-e8642f77062b;

 - SET LOCATION for managed table *** FAILED *** (219 milliseconds)
   org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, 
tree:
 Exchange SinglePartit
 +- *HashAggregate(keys=[], functions=[partial_count(1)], output=[count#99367L])
+- *FileScan parquet default.tbl[] Batched: true, Format: Parquet, 
Location: InMemoryFileIndex[file:/C:projectssparkarget   
mpspark-15be2f2f-4ea9-4c47-bfee-1b7b49363033], PartitionFilters: [], 
PushedFilters: [], ReadSchema: struct<>

 - insert data to a data source table which has a not existed location should 
succeed *** FAILED *** (16 milliseconds)
   org.apache.spark.sql.AnalysisException: Path does not exist: 
file:/C:projectsspark   arget   mpspark-34987671-e8d1-4624-ba5b-db1012e1246b;

 - insert into a data source table with no existed partition location should 
succeed *** FAILED *** (16 milliseconds)
   org.apache.spark.sql.AnalysisException: Path does not exist: 
file:/C:projectsspark   arget   mpspark-4c6ccfbf-4091-4032-9fbc-3d40c58267d5;

 - read data from a data source table which has a not existed location should 
succeed *** FAILED *** (0 milliseconds)

 - read data from a data source table with no existed partition location should 
succeed *** FAILED *** (0 milliseconds)
   org.apache.spark.sql.AnalysisException: Path does not exist: 
file:/C:projectsspark   arget   mpspark-6af39e37-abd1-44e8-ac68-e2dfcf67a2f3;

InputOutputMetricsSuite:
 - output metrics on records written *** FAILED *** (0 milliseconds)
   java.lang.IllegalArgumentException: Wrong FS: 
file://C:\projects\spark\target\tmp\spark-cd69ee77-88f2-4202-bed6-19c0ee05ef55\InputOutputMetricsSuite,
 expected: file:///

 - output metrics on records written - new Hadoop API *** FAILED *** (16 
milliseconds)
   java.lang.IllegalArgumentException: Wrong FS: 
file://C:\projects\spark\target\tmp\spark-b69e8fcb-047b-4de8-9cdf-5f026efb6762\InputOutputMetricsSuite,
 expected: file:///
```

**After**

```
KafkaRelationSuite:
 - test late binding start offsets !!! CANCELED !!! (62 milliseconds)

KafkaSourceSuite:
 - deserialization of initial offset with Spark 2.1.0 (5 seconds, 341 
milliseconds)
 - deserialization of initial offset written by Spark 2.1.0 (910 milliseconds)

HiveDDLSuite:
 - partitioned table should always put partition columns at the end of table 
schema (2 seconds)

DDLSuite:
 - create a data source table without schema (828 milliseconds)
 - SET LOCATION for managed table (406 milliseconds)
 - insert data to a data source table which has a not existed location should 
succeed (406 milliseconds)
 - insert into a data source table with no existed partition location should 
succeed (453 milliseconds)
 - read data from a data source table which has a not existed location should 
succeed (94 milliseconds)
 - read data from a data source table with no existed partition location should 
succeed (265 milliseconds)

InputOutputMetricsSuite:
 - output metrics on records written (172 milliseconds)
 - output metrics on records written - new Hadoop API (297 milliseconds)
```

## How was this patch tested?

Fixed tests in `InputOutputMetricsSuite`, `KafkaRelationSuite`,  
`KafkaSourceSuite`, `DDLSuite.scala` and `HiveDDLSu

spark-website git commit: Add instructions for running individual tests.

2017-02-22 Thread srowen

Repository: spark-website
Updated Branches:
  refs/heads/asf-site 879303593 -> ca64fac2e


Add instructions for running individual tests.

This is useful and I often forget how to do it.  I learned some
new tricks when @squito gave @jinxing64 some tips on how to do
this, so I thought it was worth adding this to the website.


Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/ca64fac2
Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/ca64fac2
Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/ca64fac2

Branch: refs/heads/asf-site
Commit: ca64fac2e24256dc3a07711e004c540b892965fe
Parents: 8793035
Author: Kay Ousterhout 
Authored: Sat Feb 11 18:38:46 2017 -0800
Committer: Sean Owen 
Committed: Wed Feb 22 05:48:18 2017 -0800

--
 developer-tools.md| 81 +-
 site/developer-tools.html | 72 -
 2 files changed, 151 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark-website/blob/ca64fac2/developer-tools.md
--
diff --git a/developer-tools.md b/developer-tools.md
index e8853b8..88f3f36 100644
--- a/developer-tools.md
+++ b/developer-tools.md
@@ -9,7 +9,9 @@ navigation:
 
 Useful Developer Tools
 
-Reducing Build Times
+Reducing Build Times
+
+SBT: Avoiding Re-Creating the Assembly JAR
 
 Spark's default build strategy is to assemble a jar including all of its 
dependencies. This can 
 be cumbersome when doing iterative development. When developing locally, it is 
possible to create 
@@ -32,6 +34,83 @@ $ ./bin/spark-shell
 $ build/sbt ~compile
 ```
 
+Maven: Speeding up Compilation with Zinc
+
+[Zinc](https://github.com/typesafehub/zinc) is a long-running server version 
of SBT's incremental
+compiler. When run locally as a background process, it speeds up builds of 
Scala-based projects
+like Spark. Developers who regularly recompile Spark with Maven will be the 
most interested in
+Zinc. The project site gives instructions for building and running `zinc`; OS 
X users can
+install it using `brew install zinc`.
+
+If using the `build/mvn` package `zinc` will automatically be downloaded and 
leveraged for all
+builds. This process will auto-start after the first time `build/mvn` is 
called and bind to port
+3030 unless the `ZINC_PORT` environment variable is set. The `zinc` process 
can subsequently be
+shut down at any time by running `build/zinc-/bin/zinc -shutdown` and 
will automatically
+restart whenever `build/mvn` is called.
+
+Running Individual Tests
+
+When developing locally, it's often convenient to run a single test or a few 
tests, rather than running the entire test suite.
+
+Testing with SBT
+
+The fastest way to run individual tests is to use the `sbt` console. It's 
fastest to keep a `sbt` console open, and use it to re-run tests as necessary.  
For example, to run all of the tests in a particular project, e.g., `core`:
+
+```
+$ build/sbt
+> project core
+> test
+```
+
+You can run a single test suite using the `testOnly` command.  For example, to 
run the DAGSchedulerSuite:
+
+```
+> testOnly org.apache.spark.scheduler.DAGSchedulerSuite
+```
+
+The `testOnly` command accepts wildcards; e.g., you can also run the 
`DAGSchedulerSuite` with:
+
+```
+> testOnly *DAGSchedulerSuite
+```
+
+Or you could run all of the tests in the scheduler package:
+
+```
+> testOnly org.apache.spark.scheduler.*
+```
+
+If you'd like to run just a single test in the `DAGSchedulerSuite`, e.g., a 
test that includes "SPARK-12345" in the name, you run the following command in 
the sbt console:
+
+```
+> testOnly *DAGSchedulerSuite -- -z "SPARK-12345"
+```
+
+If you'd prefer, you can run all of these commands on the command line (but 
this will be slower than running tests using an open cosole).  To do this, you 
need to surround `testOnly` and the following arguments in quotes:
+
+```
+$ build/sbt "core/testOnly *DAGSchedulerSuite -- -z SPARK-12345"
+```
+
+For more about how to run individual tests with sbt, see the [sbt 
documentation](http://www.scala-sbt.org/0.13/docs/Testing.html).
+
+
+Testing with Maven
+
+With Maven, you can use the `-DwildcardSuites` flag to run individual Scala 
tests:
+
+```
+build/mvn -Dtest=none 
-DwildcardSuites=org.apache.spark.scheduler.DAGSchedulerSuite test
+```
+
+You need `-Dtest=none` to avoid running the Java tests.  For more information 
about the ScalaTest Maven Plugin, refer to the [ScalaTest 
documentation](http://www.scalatest.org/user_guide/using_the_scalatest_maven_plugin).
+
+To run individual Java tests, you can use the `-Dtest` flag:
+
+```
+build/mvn test -DwildcardSuites=none 
-Dtest=org.apache.spark.streaming.JavaAPISuite test
+```
+
 Checking Out Pull Re

spark-website git commit: Fix last update to Java pi example

2017-02-22 Thread srowen

Repository: spark-website
Updated Branches:
  refs/heads/asf-site ca64fac2e -> 470b7ed51


Fix last update to Java pi example


Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/470b7ed5
Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/470b7ed5
Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/470b7ed5

Branch: refs/heads/asf-site
Commit: 470b7ed51a112cdefd2ec6ee3a342b3956e05189
Parents: ca64fac
Author: Sean Owen 
Authored: Wed Feb 22 05:10:01 2017 -0800
Committer: Sean Owen 
Committed: Wed Feb 22 05:52:43 2017 -0800

--
 examples.md| 7 ---
 site/examples.html | 7 ---
 2 files changed, 8 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark-website/blob/470b7ed5/examples.md
--
diff --git a/examples.md b/examples.md
index 4a87331..2d1dbaa 100644
--- a/examples.md
+++ b/examples.md
@@ -61,9 +61,10 @@ counts.saveAsTextFile("hdfs://...")
 
 {% highlight java %}
 JavaRDD textFile = sc.textFile("hdfs://...");
-JavaRDD words = textFile.flatMap(s -> Arrays.asList(s.split(" 
")).iterator())
-.mapToPair(word -> new Tuple2<>(word, 1))
-.reduceByKey((a, b) -> a + b);
+JavaPairRDD counts = textFile
+.flatMap(s -> Arrays.asList(s.split(" ")).iterator())
+.mapToPair(word -> new Tuple2<>(word, 1))
+.reduceByKey((a, b) -> a + b);
 counts.saveAsTextFile("hdfs://...");
 {% endhighlight %}
 

http://git-wip-us.apache.org/repos/asf/spark-website/blob/470b7ed5/site/examples.html
--
diff --git a/site/examples.html b/site/examples.html
index 05ec479..a19e263 100644
--- a/site/examples.html
+++ b/site/examples.html
@@ -247,9 +247,10 @@ In this page, we will show examples using RDD API as well 
as examples using high
 
 
 JavaRDD 
textFile = sc.textFile("hdfs://...");
-JavaRDD words 
= textFile.flatMap(s -> Arrays.asList(s.split(" ")).iterator())
-.mapToPair(word 
-> new Tuple2<>(word, 1))
-.reduceByKey((a, b) -> a + b);
+JavaPairRDD counts = textFile
+.flatMap(s -> Arrays.asList(s.split(" ")).iterator())
+.mapToPair(word -> 
new Tuple2<>(word, 
1))
+.reduceByKey((a, b) -> a + b);
 counts.saveAsTextFile("hdfs://...");
 
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [MINOR][DOCS] Fix few typos in structured streaming doc

2017-02-24 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master fa7c582e9 -> 1b9ba258e


[MINOR][DOCS] Fix few typos in structured streaming doc

## What changes were proposed in this pull request?

Minor typo in `even-time`, which is changed to `event-time` and a couple of 
grammatical errors fix.

## How was this patch tested?

N/A - since this is a doc fix. I did a jekyll build locally though.

Author: Ramkumar Venkataraman 

Closes #17037 from ramkumarvenkat/doc-fix.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1b9ba258
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1b9ba258
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1b9ba258

Branch: refs/heads/master
Commit: 1b9ba258e086e2ba89a4f35a54106e2f8a38b525
Parents: fa7c582
Author: Ramkumar Venkataraman 
Authored: Sat Feb 25 02:18:22 2017 +
Committer: Sean Owen 
Committed: Sat Feb 25 02:18:22 2017 +

--
 docs/structured-streaming-programming-guide.md | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/1b9ba258/docs/structured-streaming-programming-guide.md
--
diff --git a/docs/structured-streaming-programming-guide.md 
b/docs/structured-streaming-programming-guide.md
index ad3b2fb..6af47b6 100644
--- a/docs/structured-streaming-programming-guide.md
+++ b/docs/structured-streaming-programming-guide.md
@@ -392,7 +392,7 @@ data, thus relieving the users from reasoning about it. As 
an example, letâs
 see how this model handles event-time based processing and late arriving data.
 
 ## Handling Event-time and Late Data
-Event-time is the time embedded in the data itself. For many applications, you 
may want to operate on this event-time. For example, if you want to get the 
number of events generated by IoT devices every minute, then you probably want 
to use the time when the data was generated (that is, event-time in the data), 
rather than the time Spark receives them. This event-time is very naturally 
expressed in this model -- each event from the devices is a row in the table, 
and event-time is a column value in the row. This allows window-based 
aggregations (e.g. number of events every minute) to be just a special type of 
grouping and aggregation on the even-time column -- each time window is a group 
and each row can belong to multiple windows/groups. Therefore, such 
event-time-window-based aggregation queries can be defined consistently on both 
a static dataset (e.g. from collected device events logs) as well as on a data 
stream, making the life of the user much easier.
+Event-time is the time embedded in the data itself. For many applications, you 
may want to operate on this event-time. For example, if you want to get the 
number of events generated by IoT devices every minute, then you probably want 
to use the time when the data was generated (that is, event-time in the data), 
rather than the time Spark receives them. This event-time is very naturally 
expressed in this model -- each event from the devices is a row in the table, 
and event-time is a column value in the row. This allows window-based 
aggregations (e.g. number of events every minute) to be just a special type of 
grouping and aggregation on the event-time column -- each time window is a 
group and each row can belong to multiple windows/groups. Therefore, such 
event-time-window-based aggregation queries can be defined consistently on both 
a static dataset (e.g. from collected device events logs) as well as on a data 
stream, making the life of the user much easier.
 
 Furthermore, this model naturally handles data that has arrived later than 
 expected based on its event-time. Since Spark is updating the Result Table, 
@@ -401,7 +401,7 @@ as well as cleaning up old aggregates to limit the size of 
intermediate
 state data. Since Spark 2.1, we have support for watermarking which 
 allows the user to specify the threshold of late data, and allows the engine
 to accordingly clean up old state. These are explained later in more 
-details in the [Window Operations](#window-operations-on-event-time) section.
+detail in the [Window Operations](#window-operations-on-event-time) section.
 
 ## Fault Tolerance Semantics
 Delivering end-to-end exactly-once semantics was one of key goals behind the 
design of Structured Streaming. To achieve that, we have designed the 
Structured Streaming sources, the sinks and the execution engine to reliably 
track the exact progress of the processing so that it can handle any kind of 
failure by restarting and/or reprocessing. Every streaming source is assumed to 
have offsets (similar to Kafka offsets, or Kinesis sequence numbers)
@@ -647,7 +647,7 @@ df.groupBy("deviceType").count()
 
 
 #

spark git commit: [SPARK-19673][SQL] "ThriftServer default app name is changed wrong"

2017-02-25 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master 061bcfb86 -> fe07de956


[SPARK-19673][SQL] "ThriftServer default app name is changed wrong"

## What changes were proposed in this pull request?
In spark 1.x ,the name of ThriftServer is SparkSQL:localHostName. While the 
ThriftServer default name is changed to the className of HiveThfift2 , which is 
not appropriate.

## How was this patch tested?
manual tests

Please review http://spark.apache.org/contributing.html before opening a pull 
request.

Author: lvdongr 

Closes #17010 from lvdongr/ThriftserverName.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fe07de95
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fe07de95
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fe07de95

Branch: refs/heads/master
Commit: fe07de9566b345c7ad6a985bf1fdf1062351f6cf
Parents: 061bcfb
Author: lvdongr 
Authored: Sat Feb 25 21:47:02 2017 +
Committer: Sean Owen 
Committed: Sat Feb 25 21:47:02 2017 +

--
 .../scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala  | 1 +
 1 file changed, 1 insertion(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/fe07de95/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala
--
diff --git 
a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala
 
b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala
index 78a3094..c0b2994 100644
--- 
a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala
+++ 
b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala
@@ -40,6 +40,7 @@ private[hive] object SparkSQLEnv extends Logging {
   val maybeAppName = sparkConf
 .getOption("spark.app.name")
 .filterNot(_ == classOf[SparkSQLCLIDriver].getName)
+.filterNot(_ == classOf[HiveThriftServer2].getName)
 
   sparkConf
 
.setAppName(maybeAppName.getOrElse(s"SparkSQL::${Utils.localHostName()}"))


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-15288][MESOS] Mesos dispatcher should handle gracefully when any thread gets UncaughtException

2017-02-25 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master fe07de956 -> 410392ed7


[SPARK-15288][MESOS] Mesos dispatcher should handle gracefully when any thread 
gets UncaughtException

## What changes were proposed in this pull request?

Adding the default UncaughtExceptionHandler to the MesosClusterDispatcher.
## How was this patch tested?

I verified it manually, when any of the dispatcher thread gets uncaught 
exceptions then the default UncaughtExceptionHandler will handle those 
exceptions.

Author: Devaraj K 

Closes #13072 from devaraj-kavali/SPARK-15288.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/410392ed
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/410392ed
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/410392ed

Branch: refs/heads/master
Commit: 410392ed75da64c6980fad5b450b352ee8377cb8
Parents: fe07de9
Author: Devaraj K 
Authored: Sat Feb 25 21:48:41 2017 +
Committer: Sean Owen 
Committed: Sat Feb 25 21:48:41 2017 +

--
 .../org/apache/spark/deploy/mesos/MesosClusterDispatcher.scala| 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/410392ed/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/MesosClusterDispatcher.scala
--
diff --git 
a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/MesosClusterDispatcher.scala
 
b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/MesosClusterDispatcher.scala
index 792ade8..38b082a 100644
--- 
a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/MesosClusterDispatcher.scala
+++ 
b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/MesosClusterDispatcher.scala
@@ -25,7 +25,7 @@ import org.apache.spark.deploy.mesos.ui.MesosClusterUI
 import org.apache.spark.deploy.rest.mesos.MesosRestServer
 import org.apache.spark.internal.Logging
 import org.apache.spark.scheduler.cluster.mesos._
-import org.apache.spark.util.{CommandLineUtils, ShutdownHookManager, Utils}
+import org.apache.spark.util.{CommandLineUtils, ShutdownHookManager, 
SparkUncaughtExceptionHandler, Utils}
 
 /*
  * A dispatcher that is responsible for managing and launching drivers, and is 
intended to be
@@ -97,6 +97,7 @@ private[mesos] object MesosClusterDispatcher
   with CommandLineUtils {
 
   override def main(args: Array[String]) {
+Thread.setDefaultUncaughtExceptionHandler(SparkUncaughtExceptionHandler)
 Utils.initDaemon(log)
 val conf = new SparkConf
 val dispatcherArgs = new MesosClusterDispatcherArguments(args, conf)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [MINOR][BUILD] Fix lint-java breaks in Java

2017-02-27 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master 9f8e39215 -> 4ba9c6c45


[MINOR][BUILD] Fix lint-java breaks in Java

## What changes were proposed in this pull request?

This PR proposes to fix the lint-breaks as below:

```
[ERROR] 
src/test/java/org/apache/spark/network/TransportResponseHandlerSuite.java:[29,8]
 (imports) UnusedImports: Unused import - 
org.apache.spark.network.buffer.ManagedBuffer.
[ERROR] src/main/java/org/apache/spark/unsafe/types/UTF8String.java:[156,10] 
(modifier) ModifierOrder: 'Nonnull' annotation modifier does not precede 
non-annotation modifiers.
[ERROR] src/main/java/org/apache/spark/SparkFirehoseListener.java:[122] (sizes) 
LineLength: Line is longer than 100 characters (found 105).
[ERROR] 
src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java:[164,78]
 (coding) OneStatementPerLine: Only one statement per line allowed.
[ERROR] src/test/java/test/org/apache/spark/JavaAPISuite.java:[1157] (sizes) 
LineLength: Line is longer than 100 characters (found 121).
[ERROR] 
src/test/java/org/apache/spark/streaming/JavaMapWithStateSuite.java:[149] 
(sizes) LineLength: Line is longer than 100 characters (found 113).
[ERROR] src/test/java/test/org/apache/spark/streaming/Java8APISuite.java:[146] 
(sizes) LineLength: Line is longer than 100 characters (found 122).
[ERROR] src/test/java/test/org/apache/spark/streaming/JavaAPISuite.java:[32,8] 
(imports) UnusedImports: Unused import - org.apache.spark.streaming.Time.
[ERROR] src/test/java/test/org/apache/spark/streaming/JavaAPISuite.java:[611] 
(sizes) LineLength: Line is longer than 100 characters (found 101).
[ERROR] src/test/java/test/org/apache/spark/streaming/JavaAPISuite.java:[1317] 
(sizes) LineLength: Line is longer than 100 characters (found 102).
[ERROR] 
src/test/java/test/org/apache/spark/sql/JavaDatasetAggregatorSuite.java:[91] 
(sizes) LineLength: Line is longer than 100 characters (found 102).
[ERROR] src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java:[113] 
(sizes) LineLength: Line is longer than 100 characters (found 101).
[ERROR] src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java:[164] 
(sizes) LineLength: Line is longer than 100 characters (found 110).
[ERROR] src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java:[212] 
(sizes) LineLength: Line is longer than 100 characters (found 114).
[ERROR] 
src/test/java/org/apache/spark/mllib/tree/JavaDecisionTreeSuite.java:[36] 
(sizes) LineLength: Line is longer than 100 characters (found 101).
[ERROR] 
src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java:[26,8]
 (imports) UnusedImports: Unused import - com.amazonaws.regions.RegionUtils.
[ERROR] 
src/test/java/org/apache/spark/streaming/kinesis/JavaKinesisStreamSuite.java:[20,8]
 (imports) UnusedImports: Unused import - com.amazonaws.regions.RegionUtils.
[ERROR] 
src/test/java/org/apache/spark/streaming/kinesis/JavaKinesisStreamSuite.java:[94]
 (sizes) LineLength: Line is longer than 100 characters (found 103).
[ERROR] 
src/main/java/org/apache/spark/examples/ml/JavaTokenizerExample.java:[30,8] 
(imports) UnusedImports: Unused import - org.apache.spark.sql.api.java.UDF1.
[ERROR] 
src/main/java/org/apache/spark/examples/ml/JavaTokenizerExample.java:[72] 
(sizes) LineLength: Line is longer than 100 characters (found 104).
[ERROR] 
src/main/java/org/apache/spark/examples/mllib/JavaRankingMetricsExample.java:[121]
 (sizes) LineLength: Line is longer than 100 characters (found 101).
[ERROR] 
src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java:[28,8]
 (imports) UnusedImports: Unused import - org.apache.spark.api.java.JavaRDD.
[ERROR] 
src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java:[29,8]
 (imports) UnusedImports: Unused import - 
org.apache.spark.api.java.JavaSparkContext.
```

## How was this patch tested?

Manually via

```bash
./dev/lint-java
```

Author: hyukjinkwon 

Closes #17072 from HyukjinKwon/java-lint.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4ba9c6c4
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4ba9c6c4
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4ba9c6c4

Branch: refs/heads/master
Commit: 4ba9c6c453606f5e5a1e324d5f933d2c9307a604
Parents: 9f8e392
Author: hyukjinkwon 
Authored: Mon Feb 27 08:44:26 2017 +
Committer: Sean Owen 
Committed: Mon Feb 27 08:44:26 2017 +

--
 .../network/TransportResponseHandlerSuite.java  |   1 -
 .../apache/spark/unsafe/types/UTF8String.java   |   3 +-
 .../org/apache/spark/SparkFirehoseListener.java | 225 ++-
 .../unsafe/sort/UnsafeExternalSorter.java   |   4 +-
 .../test/org/apache/spark/JavaAPISuite.java | 109 -
 .../spark/examples/ml/JavaTokenizerExample.java |   4 +-
 .../mllib/JavaRankingMetricsExample.java|   4 +-
 .../streaming/JavaKi

[1/2] spark git commit: [SPARK-19660][CORE][SQL] Replace the configuration property names that are deprecated in the version of Hadoop 2.6

2017-02-28 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master a350bc16d -> 9b8eca65d


http://git-wip-us.apache.org/repos/asf/spark/blob/9b8eca65/sql/hive/src/test/resources/golden/merge2-3-10266e3d5dd4c841c0d65030b1edba7c
--
diff --git 
a/sql/hive/src/test/resources/golden/merge2-3-10266e3d5dd4c841c0d65030b1edba7c 
b/sql/hive/src/test/resources/golden/merge2-3-10266e3d5dd4c841c0d65030b1edba7c
new file mode 100644
index 000..573541a
--- /dev/null
+++ 
b/sql/hive/src/test/resources/golden/merge2-3-10266e3d5dd4c841c0d65030b1edba7c
@@ -0,0 +1 @@
+0

http://git-wip-us.apache.org/repos/asf/spark/blob/9b8eca65/sql/hive/src/test/resources/golden/merge2-3-6e53a3ac93113f20db3a12f1dcf30e86
--
diff --git 
a/sql/hive/src/test/resources/golden/merge2-3-6e53a3ac93113f20db3a12f1dcf30e86 
b/sql/hive/src/test/resources/golden/merge2-3-6e53a3ac93113f20db3a12f1dcf30e86
deleted file mode 100644
index 573541a..000
--- 
a/sql/hive/src/test/resources/golden/merge2-3-6e53a3ac93113f20db3a12f1dcf30e86
+++ /dev/null
@@ -1 +0,0 @@
-0

http://git-wip-us.apache.org/repos/asf/spark/blob/9b8eca65/sql/hive/src/test/resources/golden/merge2-4-84967075baa3e56fff2a23f8ab9ba076
--
diff --git 
a/sql/hive/src/test/resources/golden/merge2-4-84967075baa3e56fff2a23f8ab9ba076 
b/sql/hive/src/test/resources/golden/merge2-4-84967075baa3e56fff2a23f8ab9ba076
deleted file mode 100644
index 573541a..000
--- 
a/sql/hive/src/test/resources/golden/merge2-4-84967075baa3e56fff2a23f8ab9ba076
+++ /dev/null
@@ -1 +0,0 @@
-0

http://git-wip-us.apache.org/repos/asf/spark/blob/9b8eca65/sql/hive/src/test/resources/golden/merge2-4-9cbd6d400fb6c3cd09010e3dbd76601
--
diff --git 
a/sql/hive/src/test/resources/golden/merge2-4-9cbd6d400fb6c3cd09010e3dbd76601 
b/sql/hive/src/test/resources/golden/merge2-4-9cbd6d400fb6c3cd09010e3dbd76601
new file mode 100644
index 000..573541a
--- /dev/null
+++ 
b/sql/hive/src/test/resources/golden/merge2-4-9cbd6d400fb6c3cd09010e3dbd76601
@@ -0,0 +1 @@
+0

http://git-wip-us.apache.org/repos/asf/spark/blob/9b8eca65/sql/hive/src/test/resources/golden/merge2-5-1ba2d6f3bb3348da3fee7fab4f283f34
--
diff --git 
a/sql/hive/src/test/resources/golden/merge2-5-1ba2d6f3bb3348da3fee7fab4f283f34 
b/sql/hive/src/test/resources/golden/merge2-5-1ba2d6f3bb3348da3fee7fab4f283f34
new file mode 100644
index 000..573541a
--- /dev/null
+++ 
b/sql/hive/src/test/resources/golden/merge2-5-1ba2d6f3bb3348da3fee7fab4f283f34
@@ -0,0 +1 @@
+0

http://git-wip-us.apache.org/repos/asf/spark/blob/9b8eca65/sql/hive/src/test/resources/golden/merge2-5-2ee5d706fe3a3bcc38b795f6e94970ea
--
diff --git 
a/sql/hive/src/test/resources/golden/merge2-5-2ee5d706fe3a3bcc38b795f6e94970ea 
b/sql/hive/src/test/resources/golden/merge2-5-2ee5d706fe3a3bcc38b795f6e94970ea
deleted file mode 100644
index 573541a..000
--- 
a/sql/hive/src/test/resources/golden/merge2-5-2ee5d706fe3a3bcc38b795f6e94970ea
+++ /dev/null
@@ -1 +0,0 @@
-0

http://git-wip-us.apache.org/repos/asf/spark/blob/9b8eca65/sql/hive/src/test/resources/golden/parallel-0-23a4feaede17467a8cc26e4d86ec30f9
--
diff --git 
a/sql/hive/src/test/resources/golden/parallel-0-23a4feaede17467a8cc26e4d86ec30f9
 
b/sql/hive/src/test/resources/golden/parallel-0-23a4feaede17467a8cc26e4d86ec30f9
deleted file mode 100644
index 573541a..000
--- 
a/sql/hive/src/test/resources/golden/parallel-0-23a4feaede17467a8cc26e4d86ec30f9
+++ /dev/null
@@ -1 +0,0 @@
-0

http://git-wip-us.apache.org/repos/asf/spark/blob/9b8eca65/sql/hive/src/test/resources/golden/parallel-0-6dc30e2de057022e63bd2a645fbec4c2
--
diff --git 
a/sql/hive/src/test/resources/golden/parallel-0-6dc30e2de057022e63bd2a645fbec4c2
 
b/sql/hive/src/test/resources/golden/parallel-0-6dc30e2de057022e63bd2a645fbec4c2
new file mode 100644
index 000..573541a
--- /dev/null
+++ 
b/sql/hive/src/test/resources/golden/parallel-0-6dc30e2de057022e63bd2a645fbec4c2
@@ -0,0 +1 @@
+0

http://git-wip-us.apache.org/repos/asf/spark/blob/9b8eca65/sql/hive/src/test/resources/golden/rcfile_lazydecompress-11-25715870c569b0f8c3d483e3a38b3199
--
diff --git 
a/sql/hive/src/test/resources/golden/rcfile_lazydecompress-11-25715870c569b0f8c3d483e3a38b3199
 
b/sql/hive/src/test/resources/golden/rcfile_lazydecompress-11-25715870c569b0f8c3d483e3a38b3199
new file mode 100644
index 000..573541a
--- /dev/null
+++ 
b/sql/hive/src/test/resources/golden/rcfile_lazydecompress-11-25715870c569b0f8c3d483e3a38b3199

[2/2] spark git commit: [SPARK-19660][CORE][SQL] Replace the configuration property names that are deprecated in the version of Hadoop 2.6

2017-02-28 Thread srowen

[SPARK-19660][CORE][SQL] Replace the configuration property names that are 
deprecated in the version of Hadoop 2.6

## What changes were proposed in this pull request?

Replace all the Hadoop deprecated configuration property names according to 
[DeprecatedProperties](https://hadoop.apache.org/docs/r2.6.0/hadoop-project-dist/hadoop-common/DeprecatedProperties.html).

except:
https://github.com/apache/spark/blob/v2.1.0/python/pyspark/sql/tests.py#L1533
https://github.com/apache/spark/blob/v2.1.0/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala#L987
https://github.com/apache/spark/blob/v2.1.0/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala#L45
https://github.com/apache/spark/blob/v2.1.0/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala#L614

## How was this patch tested?

Existing tests

Author: Yuming Wang 

Closes #16990 from wangyum/HadoopDeprecatedProperties.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9b8eca65
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9b8eca65
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9b8eca65

Branch: refs/heads/master
Commit: 9b8eca65dcf68129470ead39362ce870ffb0bb1d
Parents: a350bc1
Author: Yuming Wang 
Authored: Tue Feb 28 10:13:42 2017 +
Committer: Sean Owen 
Committed: Tue Feb 28 10:13:42 2017 +

--
 R/WINDOWS.md|  2 +-
 R/run-tests.sh  |  2 +-
 appveyor.yml|  2 +-
 .../io/HadoopMapReduceCommitProtocol.scala  | 10 ++---
 .../io/SparkHadoopMapReduceWriter.scala |  2 +-
 .../scala/org/apache/spark/rdd/HadoopRDD.scala  | 10 ++---
 .../org/apache/spark/rdd/PairRDDFunctions.scala |  9 ++--
 .../test/scala/org/apache/spark/FileSuite.scala |  5 ++-
 docs/hardware-provisioning.md   |  4 +-
 python/pyspark/tests.py | 47 ++--
 .../spark/sql/execution/command/tables.scala|  4 +-
 .../datasources/FileFormatWriter.scala  | 10 ++---
 .../HiveWindowFunctionQuerySuite.scala  |  8 ++--
 .../org/apache/spark/sql/hive/TableReader.scala |  8 ++--
 .../hive/execution/InsertIntoHiveTable.scala| 15 ---
 .../apache/spark/sql/hive/test/TestHive.scala   |  2 +-
 ...4_hadoop20-2-2b9ccaa793eae0e73bf76335d3d6880 |  1 +
 ..._hadoop20-2-db1cd54a4cb36de2087605f32e41824f |  1 -
 .../combine1-2-6142f47d3fcdd4323162014d5eb35e07 |  1 +
 .../combine1-2-c95dc367df88c9e5cf77157f29ba2daf |  1 -
 .../combine1-3-10266e3d5dd4c841c0d65030b1edba7c |  1 +
 .../combine1-3-6e53a3ac93113f20db3a12f1dcf30e86 |  1 -
 .../combine1-4-84967075baa3e56fff2a23f8ab9ba076 |  1 -
 .../combine1-4-9cbd6d400fb6c3cd09010e3dbd76601  |  1 +
 .../combine1-5-1ba2d6f3bb3348da3fee7fab4f283f34 |  1 +
 .../combine1-5-2ee5d706fe3a3bcc38b795f6e94970ea |  1 -
 .../combine2-2-6142f47d3fcdd4323162014d5eb35e07 |  1 +
 .../combine2-2-c95dc367df88c9e5cf77157f29ba2daf |  1 -
 .../combine2-3-10266e3d5dd4c841c0d65030b1edba7c |  1 +
 .../combine2-3-6e53a3ac93113f20db3a12f1dcf30e86 |  1 -
 .../combine2-4-84967075baa3e56fff2a23f8ab9ba076 |  1 -
 .../combine2-4-9cbd6d400fb6c3cd09010e3dbd76601  |  1 +
 .../combine2-5-1ba2d6f3bb3348da3fee7fab4f283f34 |  1 +
 .../combine2-5-2ee5d706fe3a3bcc38b795f6e94970ea |  1 -
 .../groupby1-3-c8478dac3497697b4375ee35118a5c3e |  1 +
 .../groupby1-3-d57ed4bbfee1ffaffaeba0a4be84c31d |  1 -
 .../groupby1-5-c9cee6382b64bd3d71177527961b8be2 |  1 +
 .../groupby1-5-dd7bf298b8c921355edd8665c6b0c168 |  1 -
 ...by1_limit-0-83c59d378571a6e487aa20217bd87817 |  1 -
 ...by1_limit-0-be2c0b32a02a1154bfdee1a52530f387 |  1 +
 ...upby1_map-2-83c59d378571a6e487aa20217bd87817 |  1 -
 ...upby1_map-2-be2c0b32a02a1154bfdee1a52530f387 |  1 +
 ..._map_skew-2-83c59d378571a6e487aa20217bd87817 |  1 -
 ..._map_skew-2-be2c0b32a02a1154bfdee1a52530f387 |  1 +
 ...y1_noskew-2-83c59d378571a6e487aa20217bd87817 |  1 -
 ...y1_noskew-2-be2c0b32a02a1154bfdee1a52530f387 |  1 +
 ...by2_limit-0-83c59d378571a6e487aa20217bd87817 |  1 -
 ...by2_limit-0-be2c0b32a02a1154bfdee1a52530f387 |  1 +
 ...upby2_map-2-83c59d378571a6e487aa20217bd87817 |  1 -
 ...upby2_map-2-be2c0b32a02a1154bfdee1a52530f387 |  1 +
 ..._map_skew-2-83c59d378571a6e487aa20217bd87817 |  1 -
 ..._map_skew-2-be2c0b32a02a1154bfdee1a52530f387 |  1 +
 ...y2_noskew-2-83c59d378571a6e487aa20217bd87817 |  1 -
 ...y2_noskew-2-be2c0b32a02a1154bfdee1a52530f387 |  1 +
 ...upby4_map-2-83c59d378571a6e487aa20217bd87817 |  1 -
 ...upby4_map-2-be2c0b32a02a1154bfdee1a52530f387 |  1 +
 ..._map_skew-2-83c59d378571a6e487aa20217bd87817 |  1 -
 ..._map_skew-2-be2c0b32a02a1154bfdee1a52530f387 |  1 +
 ...y4_noskew-2-83c59d378571a6e487aa20217bd87817 |  1 -
 ...y4_noskew-2-be2c0b32a02a1154bfdee1a52530f387 |  1 +
 ...upby5_map-2-83c59d378571a6e487aa20217bd87817 |  1 -
 ...upby5_map-2-be2c0b32a02a1154bfde

spark git commit: [SPARK-19769][DOCS] Update quickstart instructions

2017-02-28 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/branch-2.1 947c0cd90 -> d887f7581


[SPARK-19769][DOCS] Update quickstart instructions

## What changes were proposed in this pull request?

This change addresses the renaming of the `simple.sbt` build file to
`build.sbt`. Newer versions of the sbt tool are not finding the older
named file and are looking for the `build.sbt`. The quickstart
instructions for self-contained applications is updated with this
change.

## How was this patch tested?

As this is a relatively minor change of a few words, the markdown was checked 
for syntax and spelling. Site was built with `SKIP_API=1 jekyll serve` for 
testing purposes.

Author: Michael McCune 

Closes #17101 from elmiko/spark-19769.

(cherry picked from commit bf5987cbe6c9f4a1a91d912ed3a9098111632d1a)
Signed-off-by: Sean Owen 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d887f758
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d887f758
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d887f758

Branch: refs/heads/branch-2.1
Commit: d887f758152be4d6e089066a97b1eab817d3be83
Parents: 947c0cd
Author: Michael McCune 
Authored: Wed Mar 1 00:07:16 2017 +0100
Committer: Sean Owen 
Committed: Wed Mar 1 00:07:26 2017 +0100

--
 docs/quick-start.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/d887f758/docs/quick-start.md
--
diff --git a/docs/quick-start.md b/docs/quick-start.md
index 0836c60..478bdcf 100644
--- a/docs/quick-start.md
+++ b/docs/quick-start.md
@@ -260,7 +260,7 @@ object which contains information about our
 application. 
 
 Our application depends on the Spark API, so we'll also include an sbt 
configuration file, 
-`simple.sbt`, which explains that Spark is a dependency. This file also adds a 
repository that 
+`build.sbt`, which explains that Spark is a dependency. This file also adds a 
repository that 
 Spark depends on:
 
 {% highlight scala %}
@@ -273,7 +273,7 @@ scalaVersion := "{{site.SCALA_VERSION}}"
 libraryDependencies += "org.apache.spark" %% "spark-core" % 
"{{site.SPARK_VERSION}}"
 {% endhighlight %}
 
-For sbt to work correctly, we'll need to layout `SimpleApp.scala` and 
`simple.sbt`
+For sbt to work correctly, we'll need to layout `SimpleApp.scala` and 
`build.sbt`
 according to the typical directory structure. Once that is in place, we can 
create a JAR package
 containing the application's code, then use the `spark-submit` script to run 
our program.
 
@@ -281,7 +281,7 @@ containing the application's code, then use the 
`spark-submit` script to run our
 # Your directory layout should look like this
 $ find .
 .
-./simple.sbt
+./build.sbt
 ./src
 ./src/main
 ./src/main/scala


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-19769][DOCS] Update quickstart instructions

2017-02-28 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 dcfb05c86 -> c9c45d97b


[SPARK-19769][DOCS] Update quickstart instructions

## What changes were proposed in this pull request?

This change addresses the renaming of the `simple.sbt` build file to
`build.sbt`. Newer versions of the sbt tool are not finding the older
named file and are looking for the `build.sbt`. The quickstart
instructions for self-contained applications is updated with this
change.

## How was this patch tested?

As this is a relatively minor change of a few words, the markdown was checked 
for syntax and spelling. Site was built with `SKIP_API=1 jekyll serve` for 
testing purposes.

Author: Michael McCune 

Closes #17101 from elmiko/spark-19769.

(cherry picked from commit bf5987cbe6c9f4a1a91d912ed3a9098111632d1a)
Signed-off-by: Sean Owen 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c9c45d97
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c9c45d97
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c9c45d97

Branch: refs/heads/branch-2.0
Commit: c9c45d97bd98049b6fd32a4194e8aaff526c034d
Parents: dcfb05c
Author: Michael McCune 
Authored: Wed Mar 1 00:07:16 2017 +0100
Committer: Sean Owen 
Committed: Wed Mar 1 00:07:41 2017 +0100

--
 docs/quick-start.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/c9c45d97/docs/quick-start.md
--
diff --git a/docs/quick-start.md b/docs/quick-start.md
index c67b010..70cbccf 100644
--- a/docs/quick-start.md
+++ b/docs/quick-start.md
@@ -260,7 +260,7 @@ object which contains information about our
 application. 
 
 Our application depends on the Spark API, so we'll also include an sbt 
configuration file, 
-`simple.sbt`, which explains that Spark is a dependency. This file also adds a 
repository that 
+`build.sbt`, which explains that Spark is a dependency. This file also adds a 
repository that 
 Spark depends on:
 
 {% highlight scala %}
@@ -273,7 +273,7 @@ scalaVersion := "{{site.SCALA_VERSION}}"
 libraryDependencies += "org.apache.spark" %% "spark-core" % 
"{{site.SPARK_VERSION}}"
 {% endhighlight %}
 
-For sbt to work correctly, we'll need to layout `SimpleApp.scala` and 
`simple.sbt`
+For sbt to work correctly, we'll need to layout `SimpleApp.scala` and 
`build.sbt`
 according to the typical directory structure. Once that is in place, we can 
create a JAR package
 containing the application's code, then use the `spark-submit` script to run 
our program.
 
@@ -281,7 +281,7 @@ containing the application's code, then use the 
`spark-submit` script to run our
 # Your directory layout should look like this
 $ find .
 .
-./simple.sbt
+./build.sbt
 ./src
 ./src/main
 ./src/main/scala


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-19373][MESOS] Base spark.scheduler.minRegisteredResourceRatio on registered cores rather than accepted cores

2017-02-28 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master bf5987cbe -> ca3864d6e


[SPARK-19373][MESOS] Base spark.scheduler.minRegisteredResourceRatio on 
registered cores rather than accepted cores

## What changes were proposed in this pull request?

See JIRA

## How was this patch tested?

Unit tests, Mesos/Spark integration tests

cc skonto susanxhuynh

Author: Michael Gummelt 

Closes #17045 from mgummelt/SPARK-19373-registered-resources.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ca3864d6
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ca3864d6
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ca3864d6

Branch: refs/heads/master
Commit: ca3864d6e090ca3e68a2ef0cf527e6e00c8c4f64
Parents: bf5987c
Author: Michael Gummelt 
Authored: Wed Mar 1 00:10:55 2017 +0100
Committer: Sean Owen 
Committed: Wed Mar 1 00:10:55 2017 +0100

--
 .../MesosCoarseGrainedSchedulerBackend.scala|  27 +++--
 ...esosCoarseGrainedSchedulerBackendSuite.scala | 111 +--
 2 files changed, 70 insertions(+), 68 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/ca3864d6/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
--
diff --git 
a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
 
b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
index f555072..f69c223 100644
--- 
a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
+++ 
b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
@@ -54,14 +54,17 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
   with org.apache.mesos.Scheduler
   with MesosSchedulerUtils {
 
-  val MAX_SLAVE_FAILURES = 2 // Blacklist a slave after this many failures
+  // Blacklist a slave after this many failures
+  private val MAX_SLAVE_FAILURES = 2
 
-  // Maximum number of cores to acquire (TODO: we'll need more flexible 
controls here)
-  val maxCores = conf.get("spark.cores.max", Int.MaxValue.toString).toInt
+  private val maxCoresOption = conf.getOption("spark.cores.max").map(_.toInt)
 
-  val useFetcherCache = conf.getBoolean("spark.mesos.fetcherCache.enable", 
false)
+  // Maximum number of cores to acquire
+  private val maxCores = maxCoresOption.getOrElse(Int.MaxValue)
 
-  val maxGpus = conf.getInt("spark.mesos.gpus.max", 0)
+  private val useFetcherCache = 
conf.getBoolean("spark.mesos.fetcherCache.enable", false)
+
+  private val maxGpus = conf.getInt("spark.mesos.gpus.max", 0)
 
   private[this] val shutdownTimeoutMS =
 conf.getTimeAsMs("spark.mesos.coarse.shutdownTimeout", "10s")
@@ -75,10 +78,10 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
   private val shuffleServiceEnabled = 
conf.getBoolean("spark.shuffle.service.enabled", false)
 
   // Cores we have acquired with each Mesos task ID
-  val coresByTaskId = new mutable.HashMap[String, Int]
-  val gpusByTaskId = new mutable.HashMap[String, Int]
-  var totalCoresAcquired = 0
-  var totalGpusAcquired = 0
+  private val coresByTaskId = new mutable.HashMap[String, Int]
+  private val gpusByTaskId = new mutable.HashMap[String, Int]
+  private var totalCoresAcquired = 0
+  private var totalGpusAcquired = 0
 
   // SlaveID -> Slave
   // This map accumulates entries for the duration of the job.  Slaves are 
never deleted, because
@@ -108,7 +111,7 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
   // may lead to deadlocks since the superclass might also try to lock
   private val stateLock = new ReentrantLock
 
-  val extraCoresPerExecutor = conf.getInt("spark.mesos.extra.cores", 0)
+  private val extraCoresPerExecutor = conf.getInt("spark.mesos.extra.cores", 0)
 
   // Offer constraints
   private val slaveOfferConstraints =
@@ -139,7 +142,7 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
   securityManager.isAuthenticationEnabled())
   }
 
-  var nextMesosTaskId = 0
+  private var nextMesosTaskId = 0
 
   @volatile var appId: String = _
 
@@ -256,7 +259,7 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
   }
 
   override def sufficientResourcesRegistered(): Boolean = {
-totalCoresAcquired >= maxCores * minRegisteredRatio
+totalCoreCount.get >= maxCoresOption.getOrElse(0) * minRegisteredRatio
   }
 
   override def disconnected(d: org.apache.mesos.SchedulerDriver) {}

http://git-wip-us.apache.org/repos/asf/spark/blob/ca3864d6/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/meso

spark git commit: [SPARK-19769][DOCS] Update quickstart instructions

2017-02-28 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master d743ea4c7 -> bf5987cbe


[SPARK-19769][DOCS] Update quickstart instructions

## What changes were proposed in this pull request?

This change addresses the renaming of the `simple.sbt` build file to
`build.sbt`. Newer versions of the sbt tool are not finding the older
named file and are looking for the `build.sbt`. The quickstart
instructions for self-contained applications is updated with this
change.

## How was this patch tested?

As this is a relatively minor change of a few words, the markdown was checked 
for syntax and spelling. Site was built with `SKIP_API=1 jekyll serve` for 
testing purposes.

Author: Michael McCune 

Closes #17101 from elmiko/spark-19769.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/bf5987cb
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/bf5987cb
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/bf5987cb

Branch: refs/heads/master
Commit: bf5987cbe6c9f4a1a91d912ed3a9098111632d1a
Parents: d743ea4
Author: Michael McCune 
Authored: Wed Mar 1 00:07:16 2017 +0100
Committer: Sean Owen 
Committed: Wed Mar 1 00:07:16 2017 +0100

--
 docs/quick-start.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/bf5987cb/docs/quick-start.md
--
diff --git a/docs/quick-start.md b/docs/quick-start.md
index 04ac278..aa4319a 100644
--- a/docs/quick-start.md
+++ b/docs/quick-start.md
@@ -260,7 +260,7 @@ object which contains information about our
 application. 
 
 Our application depends on the Spark API, so we'll also include an sbt 
configuration file, 
-`simple.sbt`, which explains that Spark is a dependency. This file also adds a 
repository that 
+`build.sbt`, which explains that Spark is a dependency. This file also adds a 
repository that 
 Spark depends on:
 
 {% highlight scala %}
@@ -273,7 +273,7 @@ scalaVersion := "{{site.SCALA_VERSION}}"
 libraryDependencies += "org.apache.spark" %% "spark-core" % 
"{{site.SPARK_VERSION}}"
 {% endhighlight %}
 
-For sbt to work correctly, we'll need to layout `SimpleApp.scala` and 
`simple.sbt`
+For sbt to work correctly, we'll need to layout `SimpleApp.scala` and 
`build.sbt`
 according to the typical directory structure. Once that is in place, we can 
create a JAR package
 containing the application's code, then use the `spark-submit` script to run 
our program.
 
@@ -281,7 +281,7 @@ containing the application's code, then use the 
`spark-submit` script to run our
 # Your directory layout should look like this
 $ find .
 .
-./simple.sbt
+./build.sbt
 ./src
 ./src/main
 ./src/main/scala


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-19373][MESOS] Base spark.scheduler.minRegisteredResourceRatio …

2017-03-01 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/branch-2.1 bbe0d8caa -> 27347b5f2


[SPARK-19373][MESOS] Base spark.scheduler.minRegisteredResourceRatio â¦

â¦on registered cores rather than accepted cores

See JIRA

Unit tests, Mesos/Spark integration tests

cc skonto susanxhuynh

Author: Michael Gummelt 

Closes #17045 from mgummelt/SPARK-19373-registered-resources.

## What changes were proposed in this pull request?

(Please fill in changes proposed in this fix)

## How was this patch tested?

(Please explain how this patch was tested. E.g. unit tests, integration tests, 
manual tests)
(If this patch involves UI changes, please attach a screenshot; otherwise, 
remove this)

Please review http://spark.apache.org/contributing.html before opening a pull 
request.

Author: Michael Gummelt 

Closes #17129 from mgummelt/SPARK-19373-registered-resources-2.1.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/27347b5f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/27347b5f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/27347b5f

Branch: refs/heads/branch-2.1
Commit: 27347b5f26f668783d8ded89149a5e761b67f786
Parents: bbe0d8c
Author: Michael Gummelt 
Authored: Thu Mar 2 00:32:32 2017 +0100
Committer: Sean Owen 
Committed: Thu Mar 2 00:32:32 2017 +0100

--
 .../MesosCoarseGrainedSchedulerBackend.scala|  27 +++--
 ...esosCoarseGrainedSchedulerBackendSuite.scala | 111 +--
 2 files changed, 70 insertions(+), 68 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/27347b5f/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
--
diff --git 
a/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
 
b/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
index 5063c1f..22df2b1 100644
--- 
a/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
+++ 
b/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
@@ -54,14 +54,17 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
   with org.apache.mesos.Scheduler
   with MesosSchedulerUtils {
 
-  val MAX_SLAVE_FAILURES = 2 // Blacklist a slave after this many failures
+  // Blacklist a slave after this many failures
+  private val MAX_SLAVE_FAILURES = 2
 
-  // Maximum number of cores to acquire (TODO: we'll need more flexible 
controls here)
-  val maxCores = conf.get("spark.cores.max", Int.MaxValue.toString).toInt
+  private val maxCoresOption = conf.getOption("spark.cores.max").map(_.toInt)
 
-  val useFetcherCache = conf.getBoolean("spark.mesos.fetcherCache.enable", 
false)
+  // Maximum number of cores to acquire
+  private val maxCores = maxCoresOption.getOrElse(Int.MaxValue)
 
-  val maxGpus = conf.getInt("spark.mesos.gpus.max", 0)
+  private val useFetcherCache = 
conf.getBoolean("spark.mesos.fetcherCache.enable", false)
+
+  private val maxGpus = conf.getInt("spark.mesos.gpus.max", 0)
 
   private[this] val shutdownTimeoutMS =
 conf.getTimeAsMs("spark.mesos.coarse.shutdownTimeout", "10s")
@@ -75,10 +78,10 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
   private val shuffleServiceEnabled = 
conf.getBoolean("spark.shuffle.service.enabled", false)
 
   // Cores we have acquired with each Mesos task ID
-  val coresByTaskId = new mutable.HashMap[String, Int]
-  val gpusByTaskId = new mutable.HashMap[String, Int]
-  var totalCoresAcquired = 0
-  var totalGpusAcquired = 0
+  private val coresByTaskId = new mutable.HashMap[String, Int]
+  private val gpusByTaskId = new mutable.HashMap[String, Int]
+  private var totalCoresAcquired = 0
+  private var totalGpusAcquired = 0
 
   // SlaveID -> Slave
   // This map accumulates entries for the duration of the job.  Slaves are 
never deleted, because
@@ -108,7 +111,7 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
   // may lead to deadlocks since the superclass might also try to lock
   private val stateLock = new ReentrantLock
 
-  val extraCoresPerExecutor = conf.getInt("spark.mesos.extra.cores", 0)
+  private val extraCoresPerExecutor = conf.getInt("spark.mesos.extra.cores", 0)
 
   // Offer constraints
   private val slaveOfferConstraints =
@@ -140,7 +143,7 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
   securityManager.isSaslEncryptionEnabled())
   }
 
-  var nextMesosTaskId = 0
+  private var nextMesosTaskId = 0
 
   @volatile var appId: String = _
 
@@ -257,7 +260,7 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
   }
 
   override def sufficientResourcesRegistered(): Boolean = {
-tota

spark git commit: [SPARK-19775][SQL] Remove an obsolete `partitionBy().insertInto()` test case

2017-03-01 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master 2ff1467d6 -> db0ddce52


[SPARK-19775][SQL] Remove an obsolete `partitionBy().insertInto()` test case

## What changes were proposed in this pull request?

This issue removes [a test 
case](https://github.com/apache/spark/blame/master/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala#L287-L298)
 which was introduced by 
[SPARK-14459](https://github.com/apache/spark/commit/652bbb1bf62722b08a062c7a2bf72019f85e179e)
 and was superseded by 
[SPARK-16033](https://github.com/apache/spark/blame/master/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala#L365-L371).
 Basically, we cannot use `partitionBy` and `insertInto` together.

```scala
  test("Reject partitioning that does not match table") {
withSQLConf(("hive.exec.dynamic.partition.mode", "nonstrict")) {
  sql("CREATE TABLE partitioned (id bigint, data string) PARTITIONED BY 
(part string)")
  val data = (1 to 10).map(i => (i, s"data-$i", if ((i % 2) == 0) "even" 
else "odd"))
  .toDF("id", "data", "part")

  intercept[AnalysisException] {
// cannot partition by 2 fields when there is only one in the table 
definition
data.write.partitionBy("part", "data").insertInto("partitioned")
  }
}
  }
```

## How was this patch tested?

This only removes a test case. Pass the existing Jenkins test.

Author: Dongjoon Hyun 

Closes #17106 from dongjoon-hyun/SPARK-19775.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/db0ddce5
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/db0ddce5
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/db0ddce5

Branch: refs/heads/master
Commit: db0ddce523bb823cba996e92ef36ceca31492d2c
Parents: 2ff1467
Author: Dongjoon Hyun 
Authored: Thu Mar 2 00:45:59 2017 +0100
Committer: Sean Owen 
Committed: Thu Mar 2 00:45:59 2017 +0100

--
 .../spark/sql/hive/InsertIntoHiveTableSuite.scala  | 13 -
 1 file changed, 13 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/db0ddce5/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala
--
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala
index 71ce5a7..d6999af 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala
@@ -284,19 +284,6 @@ class InsertIntoHiveTableSuite extends QueryTest with 
TestHiveSingleton with Bef
 sql("DROP TABLE hiveTableWithStructValue")
   }
 
-  test("Reject partitioning that does not match table") {
-withSQLConf(("hive.exec.dynamic.partition.mode", "nonstrict")) {
-  sql("CREATE TABLE partitioned (id bigint, data string) PARTITIONED BY 
(part string)")
-  val data = (1 to 10).map(i => (i, s"data-$i", if ((i % 2) == 0) "even" 
else "odd"))
-  .toDF("id", "data", "part")
-
-  intercept[AnalysisException] {
-// cannot partition by 2 fields when there is only one in the table 
definition
-data.write.partitionBy("part", "data").insertInto("partitioned")
-  }
-}
-  }
-
   test("Test partition mode = strict") {
 withSQLConf(("hive.exec.dynamic.partition.mode", "strict")) {
   sql("CREATE TABLE partitioned (id bigint, data string) PARTITIONED BY 
(part string)")


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-19739][CORE] propagate S3 session token to cluser

2017-03-03 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master d556b3170 -> fa50143cd


[SPARK-19739][CORE] propagate S3 session token to cluser

## What changes were proposed in this pull request?

propagate S3 session token to cluser

## How was this patch tested?

existing ut

Author: uncleGen 

Closes #17080 from uncleGen/SPARK-19739.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fa50143c
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fa50143c
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fa50143c

Branch: refs/heads/master
Commit: fa50143cd33586f4658892f434c9f6c23346e1bf
Parents: d556b31
Author: uncleGen 
Authored: Fri Mar 3 11:49:00 2017 +0100
Committer: Sean Owen 
Committed: Fri Mar 3 11:49:00 2017 +0100

--
 .../org/apache/spark/deploy/SparkHadoopUtil.scala  | 13 -
 1 file changed, 8 insertions(+), 5 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/fa50143c/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
--
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala 
b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
index 941e2d1..f475ce8 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
@@ -82,17 +82,20 @@ class SparkHadoopUtil extends Logging {
 // the behavior of the old implementation of this code, for backwards 
compatibility.
 if (conf != null) {
   // Explicitly check for S3 environment variables
-  if (System.getenv("AWS_ACCESS_KEY_ID") != null &&
-  System.getenv("AWS_SECRET_ACCESS_KEY") != null) {
-val keyId = System.getenv("AWS_ACCESS_KEY_ID")
-val accessKey = System.getenv("AWS_SECRET_ACCESS_KEY")
-
+  val keyId = System.getenv("AWS_ACCESS_KEY_ID")
+  val accessKey = System.getenv("AWS_SECRET_ACCESS_KEY")
+  if (keyId != null && accessKey != null) {
 hadoopConf.set("fs.s3.awsAccessKeyId", keyId)
 hadoopConf.set("fs.s3n.awsAccessKeyId", keyId)
 hadoopConf.set("fs.s3a.access.key", keyId)
 hadoopConf.set("fs.s3.awsSecretAccessKey", accessKey)
 hadoopConf.set("fs.s3n.awsSecretAccessKey", accessKey)
 hadoopConf.set("fs.s3a.secret.key", accessKey)
+
+val sessionToken = System.getenv("AWS_SESSION_TOKEN")
+if (sessionToken != null) {
+  hadoopConf.set("fs.s3a.session.token", sessionToken)
+}
   }
   // Copy any "spark.hadoop.foo=bar" system properties into conf as 
"foo=bar"
   conf.getAll.foreach { case (key, value) =>


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-19797][DOC] ML pipeline document correction

2017-03-03 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/branch-2.1 1237aaea2 -> accbed7c2


[SPARK-19797][DOC] ML pipeline document correction

## What changes were proposed in this pull request?
Description about pipeline in this paragraph is incorrect 
https://spark.apache.org/docs/latest/ml-pipeline.html#how-it-works

> If the Pipeline had more **stages**, it would call the 
> LogisticRegressionModelâs transform() method on the DataFrame before 
> passing the DataFrame to the next stage.

Reason: Transformer could also be a stage. But only another Estimator will 
invoke an transform call and pass the data to next stage. The description in 
the document misleads ML pipeline users.

## How was this patch tested?
This is a tiny modification of **docs/ml-pipelines.md**. I jekyll build the 
modification and check the compiled document.

Author: Zhe Sun 

Closes #17137 from ymwdalex/SPARK-19797-ML-pipeline-document-correction.

(cherry picked from commit 0bac3e4cde75678beac02e67b8873fe779e9ad34)
Signed-off-by: Sean Owen 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/accbed7c
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/accbed7c
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/accbed7c

Branch: refs/heads/branch-2.1
Commit: accbed7c2cfbe46fa6f55e97241b617c6ad4431f
Parents: 1237aae
Author: Zhe Sun 
Authored: Fri Mar 3 11:55:57 2017 +0100
Committer: Sean Owen 
Committed: Fri Mar 3 11:56:07 2017 +0100

--
 docs/ml-pipeline.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/accbed7c/docs/ml-pipeline.md
--
diff --git a/docs/ml-pipeline.md b/docs/ml-pipeline.md
index 7cbb146..aa92c0a 100644
--- a/docs/ml-pipeline.md
+++ b/docs/ml-pipeline.md
@@ -132,7 +132,7 @@ The `Pipeline.fit()` method is called on the original 
`DataFrame`, which has raw
 The `Tokenizer.transform()` method splits the raw text documents into words, 
adding a new column with words to the `DataFrame`.
 The `HashingTF.transform()` method converts the words column into feature 
vectors, adding a new column with those vectors to the `DataFrame`.
 Now, since `LogisticRegression` is an `Estimator`, the `Pipeline` first calls 
`LogisticRegression.fit()` to produce a `LogisticRegressionModel`.
-If the `Pipeline` had more stages, it would call the 
`LogisticRegressionModel`'s `transform()`
+If the `Pipeline` had more `Estimator`s, it would call the 
`LogisticRegressionModel`'s `transform()`
 method on the `DataFrame` before passing the `DataFrame` to the next stage.
 
 A `Pipeline` is an `Estimator`.


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-19797][DOC] ML pipeline document correction

2017-03-03 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master fa50143cd -> 0bac3e4cd


[SPARK-19797][DOC] ML pipeline document correction

## What changes were proposed in this pull request?
Description about pipeline in this paragraph is incorrect 
https://spark.apache.org/docs/latest/ml-pipeline.html#how-it-works

> If the Pipeline had more **stages**, it would call the 
> LogisticRegressionModelâs transform() method on the DataFrame before 
> passing the DataFrame to the next stage.

Reason: Transformer could also be a stage. But only another Estimator will 
invoke an transform call and pass the data to next stage. The description in 
the document misleads ML pipeline users.

## How was this patch tested?
This is a tiny modification of **docs/ml-pipelines.md**. I jekyll build the 
modification and check the compiled document.

Author: Zhe Sun 

Closes #17137 from ymwdalex/SPARK-19797-ML-pipeline-document-correction.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0bac3e4c
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0bac3e4c
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0bac3e4c

Branch: refs/heads/master
Commit: 0bac3e4cde75678beac02e67b8873fe779e9ad34
Parents: fa50143
Author: Zhe Sun 
Authored: Fri Mar 3 11:55:57 2017 +0100
Committer: Sean Owen 
Committed: Fri Mar 3 11:55:57 2017 +0100

--
 docs/ml-pipeline.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/0bac3e4c/docs/ml-pipeline.md
--
diff --git a/docs/ml-pipeline.md b/docs/ml-pipeline.md
index 7cbb146..aa92c0a 100644
--- a/docs/ml-pipeline.md
+++ b/docs/ml-pipeline.md
@@ -132,7 +132,7 @@ The `Pipeline.fit()` method is called on the original 
`DataFrame`, which has raw
 The `Tokenizer.transform()` method splits the raw text documents into words, 
adding a new column with words to the `DataFrame`.
 The `HashingTF.transform()` method converts the words column into feature 
vectors, adding a new column with those vectors to the `DataFrame`.
 Now, since `LogisticRegression` is an `Estimator`, the `Pipeline` first calls 
`LogisticRegression.fit()` to produce a `LogisticRegressionModel`.
-If the `Pipeline` had more stages, it would call the 
`LogisticRegressionModel`'s `transform()`
+If the `Pipeline` had more `Estimator`s, it would call the 
`LogisticRegressionModel`'s `transform()`
 method on the `DataFrame` before passing the `DataFrame` to the next stage.
 
 A `Pipeline` is an `Estimator`.


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-19801][BUILD] Remove JDK7 from Travis CI

2017-03-03 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master 0bac3e4cd -> 776fac398


[SPARK-19801][BUILD] Remove JDK7 from Travis CI

## What changes were proposed in this pull request?

Since Spark 2.1.0, Travis CI was supported by SPARK-15207 for automated PR 
verification (JDK7/JDK8 maven compilation and Java Linter) and contributors can 
see the additional result via their Travis CI dashboard (or PC).

This PR aims to make `.travis.yml` up-to-date by removing JDK7 which was 
removed via SPARK-19550.

## How was this patch tested?

See the result via Travis CI.

- https://travis-ci.org/dongjoon-hyun/spark/builds/207111713

Author: Dongjoon Hyun 

Closes #17143 from dongjoon-hyun/SPARK-19801.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/776fac39
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/776fac39
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/776fac39

Branch: refs/heads/master
Commit: 776fac3988271a1e4128cb31f21e5f7f3b7bcf0e
Parents: 0bac3e4
Author: Dongjoon Hyun 
Authored: Fri Mar 3 12:00:54 2017 +0100
Committer: Sean Owen 
Committed: Fri Mar 3 12:00:54 2017 +0100

--
 .travis.yml | 1 -
 1 file changed, 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/776fac39/.travis.yml
--
diff --git a/.travis.yml b/.travis.yml
index d94872d..d7e9f8c 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -28,7 +28,6 @@ dist: trusty
 # 2. Choose language and target JDKs for parallel builds.
 language: java
 jdk:
-  - oraclejdk7
   - oraclejdk8
 
 # 3. Setup cache directory for SBT and Maven.


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark-website git commit: Update commiter list

2017-03-03 Thread srowen

Repository: spark-website
Updated Branches:
  refs/heads/asf-site 470b7ed51 -> c1b9ad3cb


Update commiter list


Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/c1b9ad3c
Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/c1b9ad3c
Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/c1b9ad3c

Branch: refs/heads/asf-site
Commit: c1b9ad3cbe413b10f872c6a3363f1028c31b1a16
Parents: 470b7ed
Author: Holden Karau 
Authored: Wed Mar 1 22:15:10 2017 -0800
Committer: Sean Owen 
Committed: Fri Mar 3 12:31:03 2017 +0100

--
 committers.md|  4 
 site/committers.html | 15 ++-
 2 files changed, 18 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark-website/blob/c1b9ad3c/committers.md
--
diff --git a/committers.md b/committers.md
index 03defa6..a97bb72 100644
--- a/committers.md
+++ b/committers.md
@@ -28,6 +28,7 @@ navigation:
 |Herman van Hovell|QuestTec B.V.|
 |Yin Huai|Databricks|
 |Shane Huang|Intel|
+|Holden Karau|IBM|
 |Andy Konwinski|Databricks|
 |Ryan LeCompte|Quantifind|
 |Haoyuan Li|Alluxio, UC Berkeley|
@@ -50,11 +51,13 @@ navigation:
 |Prashant Sharma|IBM|
 |Ram Sriharsha|Databricks|
 |DB Tsai|Netflix|
+|Takuya Ueshin||
 |Marcelo Vanzin|Cloudera|
 |Shivaram Venkataraman|UC Berkeley|
 |Patrick Wendell|Databricks|
 |Andrew Xia|Alibaba|
 |Reynold Xin|Databricks|
+|Burak Yavuz|Databricks|
 |Matei Zaharia|Databricks, Stanford|
 |Shixiong Zhu|Databricks|
 
@@ -117,6 +120,7 @@ You can verify the result is one change with `git log`. 
Then resume the script i
 
 Also, please remember to set Assignee on JIRAs where applicable when they are 
resolved. The script 
 can't do this automatically.
+Once a PR is merged please leave a comment on the PR stating which branch(es) 
it has been merged with.

spark git commit: [SPARK-19550][SPARKR][DOCS] Update R document to use JDK8

2017-03-04 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master fbc405803 -> 6b0cfd9fa


[SPARK-19550][SPARKR][DOCS] Update R document to use JDK8

## What changes were proposed in this pull request?

Update R document to use JDK8.

## How was this patch tested?

manual tests

Author: Yuming Wang 

Closes #17162 from wangyum/SPARK-19550.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6b0cfd9f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6b0cfd9f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6b0cfd9f

Branch: refs/heads/master
Commit: 6b0cfd9fa51aca4536d7c3f2a4bbceae11a50339
Parents: fbc4058
Author: Yuming Wang 
Authored: Sat Mar 4 16:43:31 2017 +
Committer: Sean Owen 
Committed: Sat Mar 4 16:43:31 2017 +

--
 R/WINDOWS.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/6b0cfd9f/R/WINDOWS.md
--
diff --git a/R/WINDOWS.md b/R/WINDOWS.md
index cb2eebb..9ca7e58 100644
--- a/R/WINDOWS.md
+++ b/R/WINDOWS.md
@@ -6,7 +6,7 @@ To build SparkR on Windows, the following steps are required
 include Rtools and R in `PATH`.
 
 2. Install
-[JDK7](http://www.oracle.com/technetwork/java/javase/downloads/jdk7-downloads-1880260.html)
 and set
+[JDK8](http://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html)
 and set
 `JAVA_HOME` in the system environment variables.
 
 3. Download and install [Maven](http://maven.apache.org/download.html). Also 
include the `bin`


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-19792][WEBUI] In the Master Page,the column named “Memory per Node” ,I think it is not all right

2017-03-05 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master 6b0cfd9fa -> 42c4cd9e2


[SPARK-19792][WEBUI] In the Master Page,the column named âMemory per Nodeâ 
,I think it is not all right

Signed-off-by: liuxian 

## What changes were proposed in this pull request?

Open the spark web page,in the Master Page ,have two tables:Running 
Applications table and Completed Applications table, to the column named 
âMemory per Nodeâ ,I think it is not all right ,because a node may be not 
have only one executor.So I think that should be named as âMemory per 
Executorâ.Otherwise easy to let the user misunderstanding

## How was this patch tested?

N/A

Author: liuxian 

Closes #17132 from 10110346/wid-lx-0302.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/42c4cd9e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/42c4cd9e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/42c4cd9e

Branch: refs/heads/master
Commit: 42c4cd9e2a44eaa6a16e3b490eb82b6292d9b2ea
Parents: 6b0cfd9
Author: liuxian 
Authored: Sun Mar 5 10:23:50 2017 +
Committer: Sean Owen 
Committed: Sun Mar 5 10:23:50 2017 +

--
 .../main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/42c4cd9e/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
--
diff --git 
a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala 
b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
index 7dbe329..e722a24 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
@@ -76,7 +76,7 @@ private[ui] class MasterPage(parent: MasterWebUI) extends 
WebUIPage("") {
 val aliveWorkers = state.workers.filter(_.state == WorkerState.ALIVE)
 val workerTable = UIUtils.listingTable(workerHeaders, workerRow, workers)
 
-val appHeaders = Seq("Application ID", "Name", "Cores", "Memory per Node", 
"Submitted Time",
+val appHeaders = Seq("Application ID", "Name", "Cores", "Memory per 
Executor", "Submitted Time",
   "User", "State", "Duration")
 val activeApps = state.activeApps.sortBy(_.startTime).reverse
 val activeAppsTable = UIUtils.listingTable(appHeaders, appRow, activeApps)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-19702][MESOS] Increase default refuse_seconds timeout in the Mesos Spark Dispatcher

2017-03-07 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master 6f4684622 -> 2e30c0b9b


[SPARK-19702][MESOS] Increase default refuse_seconds timeout in the Mesos Spark 
Dispatcher

## What changes were proposed in this pull request?

Increase default refuse_seconds timeout, and make it configurable.  See JIRA 
for details on how this reduces the risk of starvation.

## How was this patch tested?

Unit tests, Manual testing, and Mesos/Spark integration test suite

cc susanxhuynh skonto jmlvanre

Author: Michael Gummelt 

Closes #17031 from mgummelt/SPARK-19702-suppress-revive.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2e30c0b9
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2e30c0b9
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2e30c0b9

Branch: refs/heads/master
Commit: 2e30c0b9bcaa6f7757bd85d1f1ec392d5f916f83
Parents: 6f46846
Author: Michael Gummelt 
Authored: Tue Mar 7 21:29:08 2017 +
Committer: Sean Owen 
Committed: Tue Mar 7 21:29:08 2017 +

--
 .../cluster/mesos/MesosClusterScheduler.scala   | 75 ++--
 .../MesosCoarseGrainedSchedulerBackend.scala| 69 --
 .../MesosFineGrainedSchedulerBackend.scala  | 19 +++--
 .../cluster/mesos/MesosSchedulerUtils.scala | 60 
 .../mesos/MesosClusterSchedulerSuite.scala  | 51 -
 ...esosCoarseGrainedSchedulerBackendSuite.scala |  7 +-
 .../spark/scheduler/cluster/mesos/Utils.scala   | 11 +++
 7 files changed, 187 insertions(+), 105 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/2e30c0b9/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala
--
diff --git 
a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala
 
b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala
index 2760f31..1bc6f71 100644
--- 
a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala
+++ 
b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala
@@ -152,6 +152,7 @@ private[spark] class MesosClusterScheduler(
   // is registered with Mesos master.
   @volatile protected var ready = false
   private var masterInfo: Option[MasterInfo] = None
+  private var schedulerDriver: SchedulerDriver = _
 
   def submitDriver(desc: MesosDriverDescription): CreateSubmissionResponse = {
 val c = new CreateSubmissionResponse
@@ -168,9 +169,8 @@ private[spark] class MesosClusterScheduler(
 return c
   }
   c.submissionId = desc.submissionId
-  queuedDriversState.persist(desc.submissionId, desc)
-  queuedDrivers += desc
   c.success = true
+  addDriverToQueue(desc)
 }
 c
   }
@@ -191,7 +191,7 @@ private[spark] class MesosClusterScheduler(
   // 4. Check if it has already completed.
   if (launchedDrivers.contains(submissionId)) {
 val task = launchedDrivers(submissionId)
-mesosDriver.killTask(task.taskId)
+schedulerDriver.killTask(task.taskId)
 k.success = true
 k.message = "Killing running driver"
   } else if (removeFromQueuedDrivers(submissionId)) {
@@ -324,7 +324,7 @@ private[spark] class MesosClusterScheduler(
 ready = false
 metricsSystem.report()
 metricsSystem.stop()
-mesosDriver.stop(true)
+schedulerDriver.stop(true)
   }
 
   override def registered(
@@ -340,6 +340,8 @@ private[spark] class MesosClusterScheduler(
 
 stateLock.synchronized {
   this.masterInfo = Some(masterInfo)
+  this.schedulerDriver = driver
+
   if (!pendingRecover.isEmpty) {
 // Start task reconciliation if we need to recover.
 val statuses = pendingRecover.collect {
@@ -506,11 +508,10 @@ private[spark] class MesosClusterScheduler(
   }
 
   private class ResourceOffer(
-  val offerId: OfferID,
-  val slaveId: SlaveID,
-  var resources: JList[Resource]) {
+  val offer: Offer,
+  var remainingResources: JList[Resource]) {
 override def toString(): String = {
-  s"Offer id: ${offerId}, resources: ${resources}"
+  s"Offer id: ${offer.getId}, resources: ${remainingResources}"
 }
   }
 
@@ -518,16 +519,16 @@ private[spark] class MesosClusterScheduler(
 val taskId = TaskID.newBuilder().setValue(desc.submissionId).build()
 
 val (remainingResources, cpuResourcesToUse) =
-  partitionResources(offer.resources, "cpus", desc.cores)
+  partitionResources(offer.remainingResources, "cpus", desc.cores)
 val (finalResources, memResourcesToUse) =
   partitionResources(remainingResources.asJava, "m

spark git commit: [SPARK-19693][SQL] Make the SET mapreduce.job.reduces automatically converted to spark.sql.shuffle.partitions

2017-03-08 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master 81303f7ca -> 3f9f9180c


[SPARK-19693][SQL] Make the SET mapreduce.job.reduces automatically converted 
to spark.sql.shuffle.partitions

## What changes were proposed in this pull request?
Make the `SET mapreduce.job.reduces` automatically converted to 
`spark.sql.shuffle.partitions`, it's similar to `SET mapred.reduce.tasks`.

## How was this patch tested?

unit tests

Author: Yuming Wang 

Closes #17020 from wangyum/SPARK-19693.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3f9f9180
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3f9f9180
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3f9f9180

Branch: refs/heads/master
Commit: 3f9f9180c2e695ad468eb813df5feec41e169531
Parents: 81303f7
Author: Yuming Wang 
Authored: Wed Mar 8 11:31:01 2017 +
Committer: Sean Owen 
Committed: Wed Mar 8 11:31:01 2017 +

--
 .../spark/sql/execution/command/SetCommand.scala   | 17 +
 .../org/apache/spark/sql/internal/SQLConf.scala|  4 
 .../scala/org/apache/spark/sql/SQLQuerySuite.scala | 12 
 3 files changed, 33 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/3f9f9180/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala
index 7afa4e7..5f12830 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala
@@ -60,6 +60,23 @@ case class SetCommand(kv: Option[(String, Option[String])]) 
extends RunnableComm
   }
   (keyValueOutput, runFunc)
 
+case Some((SQLConf.Replaced.MAPREDUCE_JOB_REDUCES, Some(value))) =>
+  val runFunc = (sparkSession: SparkSession) => {
+logWarning(
+  s"Property ${SQLConf.Replaced.MAPREDUCE_JOB_REDUCES} is Hadoop's 
property, " +
+s"automatically converted to ${SQLConf.SHUFFLE_PARTITIONS.key} 
instead.")
+if (value.toInt < 1) {
+  val msg =
+s"Setting negative ${SQLConf.Replaced.MAPREDUCE_JOB_REDUCES} for 
automatically " +
+  "determining the number of reducers is not supported."
+  throw new IllegalArgumentException(msg)
+} else {
+  sparkSession.conf.set(SQLConf.SHUFFLE_PARTITIONS.key, value)
+  Seq(Row(SQLConf.SHUFFLE_PARTITIONS.key, value))
+}
+  }
+  (keyValueOutput, runFunc)
+
 case Some((key @ SetCommand.VariableName(name), Some(value))) =>
   val runFunc = (sparkSession: SparkSession) => {
 sparkSession.conf.set(name, value)

http://git-wip-us.apache.org/repos/asf/spark/blob/3f9f9180/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 461dfe3..fd3acd4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -677,6 +677,10 @@ object SQLConf {
   object Deprecated {
 val MAPRED_REDUCE_TASKS = "mapred.reduce.tasks"
   }
+
+  object Replaced {
+val MAPREDUCE_JOB_REDUCES = "mapreduce.job.reduces"
+  }
 }
 
 /**

http://git-wip-us.apache.org/repos/asf/spark/blob/3f9f9180/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
--
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 468ea05..d9e0196 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -1019,6 +1019,18 @@ class SQLQuerySuite extends QueryTest with 
SharedSQLContext {
 spark.sessionState.conf.clear()
   }
 
+  test("SET mapreduce.job.reduces automatically converted to 
spark.sql.shuffle.partitions") {
+spark.sessionState.conf.clear()
+val before = spark.conf.get(SQLConf.SHUFFLE_PARTITIONS.key).toInt
+val newConf = before + 1
+sql(s"SET mapreduce.job.reduces=${newConf.toString}")
+val after = spark.conf.get(SQLConf.SHUFFLE_PARTITIONS.key).toInt
+assert(before != after)
+assert(newConf === after)
+intercept[IllegalArgumentException](sql(s"SET mapreduce.job.reduces=-1"))
+spark.sessionState.conf.clear()
+  }
+

spark git commit: [SPARK-16440][MLLIB] Ensure broadcasted variables are destroyed even in case of exception

2017-03-08 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master 3f9f9180c -> 9ea201cf6


[SPARK-16440][MLLIB] Ensure broadcasted variables are destroyed even in case of 
exception

## What changes were proposed in this pull request?

Ensure broadcasted variable are destroyed even in case of exception
## How was this patch tested?

Word2VecSuite was run locally

Author: Anthony Truchet 

Closes #14299 from AnthonyTruchet/SPARK-16440.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9ea201cf
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9ea201cf
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9ea201cf

Branch: refs/heads/master
Commit: 9ea201cf6482c9c62c9428759d238063db62d66e
Parents: 3f9f918
Author: Anthony Truchet 
Authored: Wed Mar 8 11:44:25 2017 +
Committer: Sean Owen 
Committed: Wed Mar 8 11:44:25 2017 +

--
 .../org/apache/spark/mllib/feature/Word2Vec.scala | 18 +++---
 1 file changed, 15 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/9ea201cf/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
--
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala 
b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
index 2364d43..531c8b0 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
@@ -30,6 +30,7 @@ import org.json4s.jackson.JsonMethods._
 import org.apache.spark.SparkContext
 import org.apache.spark.annotation.Since
 import org.apache.spark.api.java.JavaRDD
+import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.internal.Logging
 import org.apache.spark.mllib.linalg.{Vector, Vectors}
 import org.apache.spark.mllib.util.{Loader, Saveable}
@@ -314,6 +315,20 @@ class Word2Vec extends Serializable with Logging {
 val expTable = sc.broadcast(createExpTable())
 val bcVocab = sc.broadcast(vocab)
 val bcVocabHash = sc.broadcast(vocabHash)
+try {
+  doFit(dataset, sc, expTable, bcVocab, bcVocabHash)
+} finally {
+  expTable.destroy(blocking = false)
+  bcVocab.destroy(blocking = false)
+  bcVocabHash.destroy(blocking = false)
+}
+  }
+
+  private def doFit[S <: Iterable[String]](
+dataset: RDD[S], sc: SparkContext,
+expTable: Broadcast[Array[Float]],
+bcVocab: Broadcast[Array[VocabWord]],
+bcVocabHash: Broadcast[mutable.HashMap[String, Int]]) = {
 // each partition is a collection of sentences,
 // will be translated into arrays of Index integer
 val sentences: RDD[Array[Int]] = dataset.mapPartitions { sentenceIter =>
@@ -435,9 +450,6 @@ class Word2Vec extends Serializable with Logging {
   bcSyn1Global.destroy(false)
 }
 newSentences.unpersist()
-expTable.destroy(false)
-bcVocab.destroy(false)
-bcVocabHash.destroy(false)
 
 val wordArray = vocab.map(_.word)
 new Word2VecModel(wordArray.zipWithIndex.toMap, syn0Global)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [DOCS][SS] fix structured streaming python example

2017-03-12 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/branch-2.1 e481a7381 -> f9833c66a


[DOCS][SS] fix structured streaming python example

## What changes were proposed in this pull request?

- SS python example: `TypeError: 'xxx' object is not callable`
- some other doc issue.

## How was this patch tested?

Jenkins.

Author: uncleGen 

Closes #17257 from uncleGen/docs-ss-python.

(cherry picked from commit e29a74d5b1fa3f9356b7af5dd7e3fce49bc8eb7d)
Signed-off-by: Sean Owen 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f9833c66
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f9833c66
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f9833c66

Branch: refs/heads/branch-2.1
Commit: f9833c66a2f11414357854dae00e9e2448869254
Parents: e481a73
Author: uncleGen 
Authored: Sun Mar 12 08:29:37 2017 +
Committer: Sean Owen 
Committed: Sun Mar 12 08:29:46 2017 +

--
 docs/structured-streaming-programming-guide.md| 18 +-
 .../execution/streaming/FileStreamSource.scala|  2 +-
 .../streaming/dstream/FileInputDStream.scala  |  2 +-
 3 files changed, 11 insertions(+), 11 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/f9833c66/docs/structured-streaming-programming-guide.md
--
diff --git a/docs/structured-streaming-programming-guide.md 
b/docs/structured-streaming-programming-guide.md
index 45ee551..d316e04 100644
--- a/docs/structured-streaming-programming-guide.md
+++ b/docs/structured-streaming-programming-guide.md
@@ -545,7 +545,7 @@ spark = SparkSession. ...
 
 # Read text from socket 
 socketDF = spark \
-.readStream() \
+.readStream \
 .format("socket") \
 .option("host", "localhost") \
 .option("port", ) \
@@ -558,7 +558,7 @@ socketDF.printSchema()
 # Read all the csv files written atomically in a directory
 userSchema = StructType().add("name", "string").add("age", "integer")
 csvDF = spark \
-.readStream() \
+.readStream \
 .option("sep", ";") \
 .schema(userSchema) \
 .csv("/path/to/directory")  # Equivalent to 
format("csv").load("/path/to/directory")
@@ -995,7 +995,7 @@ Here is the compatibility matrix.
 
 Update mode uses watermark to drop old aggregation state.
 
-Complete mode does drop not old aggregation state since by definition 
this mode
+Complete mode does not drop old aggregation state since by definition 
this mode
 preserves all data in the Result Table.
 
   
@@ -1217,13 +1217,13 @@ noAggDF = deviceDataDf.select("device").where("signal > 
10")
 
 # Print new data to console
 noAggDF \
-.writeStream() \
+.writeStream \
 .format("console") \
 .start()
 
 # Write new data to Parquet files
 noAggDF \
-.writeStream() \
+.writeStream \
 .format("parquet") \
 .option("checkpointLocation", "path/to/checkpoint/dir") \
 .option("path", "path/to/destination/dir") \
@@ -1234,14 +1234,14 @@ aggDF = df.groupBy("device").count()
 
 # Print updated aggregations to console
 aggDF \
-.writeStream() \
+.writeStream \
 .outputMode("complete") \
 .format("console") \
 .start()
 
 # Have all the aggregates in an in memory table. The query name will be the 
table name
 aggDF \
-.writeStream() \
+.writeStream \
 .queryName("aggregates") \
 .outputMode("complete") \
 .format("memory") \
@@ -1329,7 +1329,7 @@ query.lastProgress();// the most recent progress 
update of this streaming qu
 
 
 {% highlight python %}
-query = df.writeStream().format("console").start()   # get the query object
+query = df.writeStream.format("console").start()   # get the query object
 
 query.id()  # get the unique identifier of the running query that 
persists across restarts from checkpoint data
 
@@ -1674,7 +1674,7 @@ aggDF
 
 {% highlight python %}
 aggDF \
-.writeStream() \
+.writeStream \
 .outputMode("complete") \
 .option("checkpointLocation", "path/to/HDFS/dir") \
 .format("memory") \

http://git-wip-us.apache.org/repos/asf/spark/blob/f9833c66/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
index 0f0b6f1..fd94bb6 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
@@ -86,7 +86,7 @@ class FileStreamSource(
   }
   seenFiles.purge()
 
-  logInfo(s"maxFi

spark git commit: [DOCS][SS] fix structured streaming python example

2017-03-12 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master f6fdf92d0 -> e29a74d5b


[DOCS][SS] fix structured streaming python example

## What changes were proposed in this pull request?

- SS python example: `TypeError: 'xxx' object is not callable`
- some other doc issue.

## How was this patch tested?

Jenkins.

Author: uncleGen 

Closes #17257 from uncleGen/docs-ss-python.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e29a74d5
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e29a74d5
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e29a74d5

Branch: refs/heads/master
Commit: e29a74d5b1fa3f9356b7af5dd7e3fce49bc8eb7d
Parents: f6fdf92
Author: uncleGen 
Authored: Sun Mar 12 08:29:37 2017 +
Committer: Sean Owen 
Committed: Sun Mar 12 08:29:37 2017 +

--
 docs/structured-streaming-programming-guide.md| 18 +-
 .../execution/streaming/FileStreamSource.scala|  2 +-
 .../streaming/dstream/FileInputDStream.scala  |  2 +-
 3 files changed, 11 insertions(+), 11 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/e29a74d5/docs/structured-streaming-programming-guide.md
--
diff --git a/docs/structured-streaming-programming-guide.md 
b/docs/structured-streaming-programming-guide.md
index 995ac77..7988472 100644
--- a/docs/structured-streaming-programming-guide.md
+++ b/docs/structured-streaming-programming-guide.md
@@ -539,7 +539,7 @@ spark = SparkSession. ...
 
 # Read text from socket 
 socketDF = spark \
-.readStream() \
+.readStream \
 .format("socket") \
 .option("host", "localhost") \
 .option("port", ) \
@@ -552,7 +552,7 @@ socketDF.printSchema()
 # Read all the csv files written atomically in a directory
 userSchema = StructType().add("name", "string").add("age", "integer")
 csvDF = spark \
-.readStream() \
+.readStream \
 .option("sep", ";") \
 .schema(userSchema) \
 .csv("/path/to/directory")  # Equivalent to 
format("csv").load("/path/to/directory")
@@ -971,7 +971,7 @@ Here is the compatibility matrix.
 
 Update mode uses watermark to drop old aggregation state.
 
-Complete mode does drop not old aggregation state since by definition 
this mode
+Complete mode does not drop old aggregation state since by definition 
this mode
 preserves all data in the Result Table.
 
   
@@ -1201,13 +1201,13 @@ noAggDF = deviceDataDf.select("device").where("signal > 
10")
 
 # Print new data to console
 noAggDF \
-.writeStream() \
+.writeStream \
 .format("console") \
 .start()
 
 # Write new data to Parquet files
 noAggDF \
-.writeStream() \
+.writeStream \
 .format("parquet") \
 .option("checkpointLocation", "path/to/checkpoint/dir") \
 .option("path", "path/to/destination/dir") \
@@ -1218,14 +1218,14 @@ aggDF = df.groupBy("device").count()
 
 # Print updated aggregations to console
 aggDF \
-.writeStream() \
+.writeStream \
 .outputMode("complete") \
 .format("console") \
 .start()
 
 # Have all the aggregates in an in memory table. The query name will be the 
table name
 aggDF \
-.writeStream() \
+.writeStream \
 .queryName("aggregates") \
 .outputMode("complete") \
 .format("memory") \
@@ -1313,7 +1313,7 @@ query.lastProgress();// the most recent progress 
update of this streaming qu
 
 
 {% highlight python %}
-query = df.writeStream().format("console").start()   # get the query object
+query = df.writeStream.format("console").start()   # get the query object
 
 query.id()  # get the unique identifier of the running query that 
persists across restarts from checkpoint data
 
@@ -1658,7 +1658,7 @@ aggDF
 
 {% highlight python %}
 aggDF \
-.writeStream() \
+.writeStream \
 .outputMode("complete") \
 .option("checkpointLocation", "path/to/HDFS/dir") \
 .format("memory") \

http://git-wip-us.apache.org/repos/asf/spark/blob/e29a74d5/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
index 411a15f..a9e64c6 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
@@ -97,7 +97,7 @@ class FileStreamSource(
   }
   seenFiles.purge()
 
-  logInfo(s"maxFilesPerBatch = $maxFilesPerBatch, maxFileAge = $maxFileAgeMs")
+  logInfo(s"maxFilesPerBatch = $maxFilesP

[2/2] spark-website git commit: add Spark Project Improvement Proposals doc

2017-03-12 Thread srowen

add Spark Project Improvement Proposals doc


Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/39838046
Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/39838046
Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/39838046

Branch: refs/heads/asf-site
Commit: 39838046c1f2cc0c3e00bd08f5130af6f9798aed
Parents: c1b9ad3
Author: cody koeninger 
Authored: Fri Mar 10 13:31:58 2017 -0600
Committer: cody koeninger 
Committed: Fri Mar 10 13:31:58 2017 -0600

--
 _layouts/global.html|   1 +
 improvement-proposals.md|  91 ++
 site/committers.html|   1 +
 site/community.html |   1 +
 site/contributing.html  |   1 +
 site/developer-tools.html   |   1 +
 site/documentation.html |   1 +
 site/downloads.html |   1 +
 site/examples.html  |   1 +
 site/faq.html   |   1 +
 site/graphx/index.html  |   1 +
 site/improvement-proposals.html | 295 +++
 site/index.html |   1 +
 site/mailing-lists.html |   1 +
 site/mllib/index.html   |   1 +
 site/news/amp-camp-2013-registration-ope.html   |   1 +
 .../news/announcing-the-first-spark-summit.html |   1 +
 .../news/fourth-spark-screencast-published.html |   1 +
 site/news/index.html|   1 +
 site/news/nsdi-paper.html   |   1 +
 site/news/one-month-to-spark-summit-2015.html   |   1 +
 .../proposals-open-for-spark-summit-east.html   |   1 +
 ...registration-open-for-spark-summit-east.html |   1 +
 .../news/run-spark-and-shark-on-amazon-emr.html |   1 +
 site/news/spark-0-6-1-and-0-5-2-released.html   |   1 +
 site/news/spark-0-6-2-released.html |   1 +
 site/news/spark-0-7-0-released.html |   1 +
 site/news/spark-0-7-2-released.html |   1 +
 site/news/spark-0-7-3-released.html |   1 +
 site/news/spark-0-8-0-released.html |   1 +
 site/news/spark-0-8-1-released.html |   1 +
 site/news/spark-0-9-0-released.html |   1 +
 site/news/spark-0-9-1-released.html |   1 +
 site/news/spark-0-9-2-released.html |   1 +
 site/news/spark-1-0-0-released.html |   1 +
 site/news/spark-1-0-1-released.html |   1 +
 site/news/spark-1-0-2-released.html |   1 +
 site/news/spark-1-1-0-released.html |   1 +
 site/news/spark-1-1-1-released.html |   1 +
 site/news/spark-1-2-0-released.html |   1 +
 site/news/spark-1-2-1-released.html |   1 +
 site/news/spark-1-2-2-released.html |   1 +
 site/news/spark-1-3-0-released.html |   1 +
 site/news/spark-1-4-0-released.html |   1 +
 site/news/spark-1-4-1-released.html |   1 +
 site/news/spark-1-5-0-released.html |   1 +
 site/news/spark-1-5-1-released.html |   1 +
 site/news/spark-1-5-2-released.html |   1 +
 site/news/spark-1-6-0-released.html |   1 +
 site/news/spark-1-6-1-released.html |   1 +
 site/news/spark-1-6-2-released.html |   1 +
 site/news/spark-1-6-3-released.html |   1 +
 site/news/spark-2-0-0-released.html |   1 +
 site/news/spark-2-0-1-released.html |   1 +
 site/news/spark-2-0-2-released.html |   1 +
 site/news/spark-2-1-0-released.html |   1 +
 site/news/spark-2.0.0-preview.html  |   1 +
 .../spark-accepted-into-apache-incubator.html   |   1 +
 site/news/spark-and-shark-in-the-news.html  |   1 +
 site/news/spark-becomes-tlp.html|   1 +
 site/news/spark-featured-in-wired.html  |   1 +
 .../spark-mailing-lists-moving-to-apache.html   |   1 +
 site/news/spark-meetups.html|   1 +
 site/news/spark-screencasts-published.html  |   1 +
 site/news/spark-summit-2013-is-a-wrap.html  |   1 +
 site/news/spark-summit-2014-videos-posted.html  |   1 +
 site/news/spark-summit-2015-videos-posted.html  |   1 +
 site/news/spark-summit-agenda-posted.html   |   1 +
 .../spark-summit-east-2015-videos-posted.html   |   1 +
 .../spark-summit-east-2016-cfp-closing.html |   1 +
 .../spark-summit-east-2017-agenda-posted.html   |   1 +
 site/news/spark-summit-east-agenda-posted.html  |   1 +
 .../news/spark-summit-europe-agenda-posted.html |   1 +
 site/news/spark-summit-europe.html  |   1 +
 .../spark-summit-june-2016-agenda-posted.html   |   1 +
 site/news/spark-tips-from-quantifind.html   |   1 +
 .../sp

[1/2] spark-website git commit: add Spark Project Improvement Proposals doc

2017-03-12 Thread srowen

Repository: spark-website
Updated Branches:
  refs/heads/asf-site c1b9ad3cb -> 39838046c


http://git-wip-us.apache.org/repos/asf/spark-website/blob/39838046/site/news/spark-summit-east-2016-cfp-closing.html
--
diff --git a/site/news/spark-summit-east-2016-cfp-closing.html 
b/site/news/spark-summit-east-2016-cfp-closing.html
index cc43c32..74fde88 100644
--- a/site/news/spark-summit-east-2016-cfp-closing.html
+++ b/site/news/spark-summit-east-2016-cfp-closing.html
@@ -119,6 +119,7 @@
 
   Mailing Lists & Resources
   Contributing to Spark
+  Improvement Proposals 
(SPIP)
   https://issues.apache.org/jira/browse/SPARK";>Issue 
Tracker
   Powered By
   Project Committers

http://git-wip-us.apache.org/repos/asf/spark-website/blob/39838046/site/news/spark-summit-east-2017-agenda-posted.html
--
diff --git a/site/news/spark-summit-east-2017-agenda-posted.html 
b/site/news/spark-summit-east-2017-agenda-posted.html
index 58af016..65d3636 100644
--- a/site/news/spark-summit-east-2017-agenda-posted.html
+++ b/site/news/spark-summit-east-2017-agenda-posted.html
@@ -119,6 +119,7 @@
 
   Mailing Lists & Resources
   Contributing to Spark
+  Improvement Proposals 
(SPIP)
   https://issues.apache.org/jira/browse/SPARK";>Issue 
Tracker
   Powered By
   Project Committers

http://git-wip-us.apache.org/repos/asf/spark-website/blob/39838046/site/news/spark-summit-east-agenda-posted.html
--
diff --git a/site/news/spark-summit-east-agenda-posted.html 
b/site/news/spark-summit-east-agenda-posted.html
index 0bc68c7..9b3f5ba 100644
--- a/site/news/spark-summit-east-agenda-posted.html
+++ b/site/news/spark-summit-east-agenda-posted.html
@@ -119,6 +119,7 @@
 
   Mailing Lists & Resources
   Contributing to Spark
+  Improvement Proposals 
(SPIP)
   https://issues.apache.org/jira/browse/SPARK";>Issue 
Tracker
   Powered By
   Project Committers

http://git-wip-us.apache.org/repos/asf/spark-website/blob/39838046/site/news/spark-summit-europe-agenda-posted.html
--
diff --git a/site/news/spark-summit-europe-agenda-posted.html 
b/site/news/spark-summit-europe-agenda-posted.html
index dfbb20d..176f8f2 100644
--- a/site/news/spark-summit-europe-agenda-posted.html
+++ b/site/news/spark-summit-europe-agenda-posted.html
@@ -119,6 +119,7 @@
 
   Mailing Lists & Resources
   Contributing to Spark
+  Improvement Proposals 
(SPIP)
   https://issues.apache.org/jira/browse/SPARK";>Issue 
Tracker
   Powered By
   Project Committers

http://git-wip-us.apache.org/repos/asf/spark-website/blob/39838046/site/news/spark-summit-europe.html
--
diff --git a/site/news/spark-summit-europe.html 
b/site/news/spark-summit-europe.html
index 9728b5f..a122ab0 100644
--- a/site/news/spark-summit-europe.html
+++ b/site/news/spark-summit-europe.html
@@ -119,6 +119,7 @@
 
   Mailing Lists & Resources
   Contributing to Spark
+  Improvement Proposals 
(SPIP)
   https://issues.apache.org/jira/browse/SPARK";>Issue 
Tracker
   Powered By
   Project Committers

http://git-wip-us.apache.org/repos/asf/spark-website/blob/39838046/site/news/spark-summit-june-2016-agenda-posted.html
--
diff --git a/site/news/spark-summit-june-2016-agenda-posted.html 
b/site/news/spark-summit-june-2016-agenda-posted.html
index 969edb4..6b5e8c5 100644
--- a/site/news/spark-summit-june-2016-agenda-posted.html
+++ b/site/news/spark-summit-june-2016-agenda-posted.html
@@ -119,6 +119,7 @@
 
   Mailing Lists & Resources
   Contributing to Spark
+  Improvement Proposals 
(SPIP)
   https://issues.apache.org/jira/browse/SPARK";>Issue 
Tracker
   Powered By
   Project Committers

http://git-wip-us.apache.org/repos/asf/spark-website/blob/39838046/site/news/spark-tips-from-quantifind.html
--
diff --git a/site/news/spark-tips-from-quantifind.html 
b/site/news/spark-tips-from-quantifind.html
index 546feda..246eb4e 100644
--- a/site/news/spark-tips-from-quantifind.html
+++ b/site/news/spark-tips-from-quantifind.html
@@ -119,6 +119,7 @@
 
   Mailing Lists & Resources
   Contributing to Spark
+  Improvement Proposals 
(SPIP)
   https://issues.apache.org/jira/browse/SPARK";>Issue 
Tracker
   Powered By
   Project Committers

http://git-wip-us.apache.org/repos/asf/

spark git commit: [SPARK-19922][ML] small speedups to findSynonyms

2017-03-14 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master 1c7275efa -> 5e96a57b2


[SPARK-19922][ML] small speedups to findSynonyms

Currently generating synonyms using a large model (I've tested with 3m words) 
is very slow. These efficiencies have sped things up for us by ~17%

I wasn't sure if such small changes were worthy of a jira, but the guidelines 
seemed to suggest that that is the preferred approach

## What changes were proposed in this pull request?

Address a few small issues in the findSynonyms logic:
1) remove usage of ``Array.fill`` to zero out the ``cosineVec`` array. The 
default float value in Scala and Java is 0.0f, so explicitly setting the values 
to zero is not needed
2) use Floats throughout. The conversion to Doubles before doing the 
``priorityQueue`` is totally superfluous, since all the similarity computations 
are done using Floats anyway. Creating a second large array just serves to put 
extra strain on the GC
3) convert the slow ``for(i <- cosVec.indices)`` to an ugly, but faster, 
``while`` loop

These efficiencies are really only apparent when working with a large model
## How was this patch tested?

Existing unit tests + some in-house tests to time the difference

cc jkbradley MLNick srowen

Author: Asher Krim 
Author: Asher Krim 

Closes #17263 from Krimit/fasterFindSynonyms.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5e96a57b
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5e96a57b
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5e96a57b

Branch: refs/heads/master
Commit: 5e96a57b2f383d4b33735681b41cd3ec06570671
Parents: 1c7275e
Author: Asher Krim 
Authored: Tue Mar 14 13:08:11 2017 +
Committer: Sean Owen 
Committed: Tue Mar 14 13:08:11 2017 +

--
 .../apache/spark/mllib/feature/Word2Vec.scala   | 34 +++-
 1 file changed, 19 insertions(+), 15 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/5e96a57b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
--
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala 
b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
index 531c8b0..6f96813 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
@@ -491,8 +491,8 @@ class Word2VecModel private[spark] (
 
   // wordVecNorms: Array of length numWords, each value being the Euclidean 
norm
   //   of the wordVector.
-  private val wordVecNorms: Array[Double] = {
-val wordVecNorms = new Array[Double](numWords)
+  private val wordVecNorms: Array[Float] = {
+val wordVecNorms = new Array[Float](numWords)
 var i = 0
 while (i < numWords) {
   val vec = wordVectors.slice(i * vectorSize, i * vectorSize + vectorSize)
@@ -570,7 +570,7 @@ class Word2VecModel private[spark] (
 require(num > 0, "Number of similar words should > 0")
 
 val fVector = vector.toArray.map(_.toFloat)
-val cosineVec = Array.fill[Float](numWords)(0)
+val cosineVec = new Array[Float](numWords)
 val alpha: Float = 1
 val beta: Float = 0
 // Normalize input vector before blas.sgemv to avoid Inf value
@@ -581,22 +581,23 @@ class Word2VecModel private[spark] (
 blas.sgemv(
   "T", vectorSize, numWords, alpha, wordVectors, vectorSize, fVector, 1, 
beta, cosineVec, 1)
 
-val cosVec = cosineVec.map(_.toDouble)
-var ind = 0
-while (ind < numWords) {
-  val norm = wordVecNorms(ind)
-  if (norm == 0.0) {
-cosVec(ind) = 0.0
+var i = 0
+while (i < numWords) {
+  val norm = wordVecNorms(i)
+  if (norm == 0.0f) {
+cosineVec(i) = 0.0f
   } else {
-cosVec(ind) /= norm
+cosineVec(i) /= norm
   }
-  ind += 1
+  i += 1
 }
 
-val pq = new BoundedPriorityQueue[(String, Double)](num + 
1)(Ordering.by(_._2))
+val pq = new BoundedPriorityQueue[(String, Float)](num + 
1)(Ordering.by(_._2))
 
-for(i <- cosVec.indices) {
-  pq += Tuple2(wordList(i), cosVec(i))
+var j = 0
+while (j < numWords) {
+  pq += Tuple2(wordList(j), cosineVec(j))
+  j += 1
 }
 
 val scored = pq.toSeq.sortBy(-_._2)
@@ -606,7 +607,10 @@ class Word2VecModel private[spark] (
   case None => scored
 }
 
-filtered.take(num).toArray
+filtered
+  .take(num)
+  .map { case (word, score) => (word, score.toDouble) }
+  .toArray
   }
 
   /**


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-16599][CORE] java.util.NoSuchElementException: None.get at at org.apache.spark.storage.BlockInfoManager.releaseAllLocksForTask

2017-03-18 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master ccba622e3 -> 54e61df26


[SPARK-16599][CORE] java.util.NoSuchElementException: None.get at at 
org.apache.spark.storage.BlockInfoManager.releaseAllLocksForTask

## What changes were proposed in this pull request?

Avoid None.get exception in (rare?) case that no readLocks exist
Note that while this would resolve the immediate cause of the exception, it's 
not clear it is the root problem.

## How was this patch tested?

Existing tests

Author: Sean Owen 

Closes #17290 from srowen/SPARK-16599.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/54e61df2
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/54e61df2
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/54e61df2

Branch: refs/heads/master
Commit: 54e61df2634163382c7d01a2ad40ffb5e7270abc
Parents: ccba622
Author: Sean Owen 
Authored: Sat Mar 18 18:01:24 2017 +0100
Committer: Sean Owen 
Committed: Sat Mar 18 18:01:24 2017 +0100

--
 .../main/scala/org/apache/spark/storage/BlockInfoManager.scala   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/54e61df2/core/src/main/scala/org/apache/spark/storage/BlockInfoManager.scala
--
diff --git 
a/core/src/main/scala/org/apache/spark/storage/BlockInfoManager.scala 
b/core/src/main/scala/org/apache/spark/storage/BlockInfoManager.scala
index dd8f5ba..490d45d 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockInfoManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockInfoManager.scala
@@ -23,7 +23,7 @@ import scala.collection.JavaConverters._
 import scala.collection.mutable
 import scala.reflect.ClassTag
 
-import com.google.common.collect.ConcurrentHashMultiset
+import com.google.common.collect.{ConcurrentHashMultiset, ImmutableMultiset}
 
 import org.apache.spark.{SparkException, TaskContext}
 import org.apache.spark.internal.Logging
@@ -340,7 +340,7 @@ private[storage] class BlockInfoManager extends Logging {
 val blocksWithReleasedLocks = mutable.ArrayBuffer[BlockId]()
 
 val readLocks = synchronized {
-  readLocksByTask.remove(taskAttemptId).get
+  
readLocksByTask.remove(taskAttemptId).getOrElse(ImmutableMultiset.of[BlockId]())
 }
 val writeLocks = synchronized {
   writeLocksByTask.remove(taskAttemptId).getOrElse(Seq.empty)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-20011][ML][DOCS] Clarify documentation for ALS 'rank' parameter

2017-03-21 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master d2dcd6792 -> 7620aed82


[SPARK-20011][ML][DOCS] Clarify documentation for ALS 'rank' parameter

## What changes were proposed in this pull request?

API documentation and collaborative filtering documentation page changes to 
clarify inconsistent description of ALS rank parameter.

 - [DOCS] was previously: "rank is the number of latent factors in the model."
 - [API] was previously:  "rank - number of features to use"

This change describes rank in both places consistently as:

 - "Number of features to use (also referred to as the number of latent 
factors)"

Author: Chris Snow 

Author: christopher snow 

Closes #17345 from snowch/SPARK-20011.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7620aed8
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7620aed8
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7620aed8

Branch: refs/heads/master
Commit: 7620aed828d8baefc425b54684a83c81f1507b02
Parents: d2dcd67
Author: christopher snow 
Authored: Tue Mar 21 13:23:59 2017 +
Committer: Sean Owen 
Committed: Tue Mar 21 13:23:59 2017 +

--
 docs/mllib-collaborative-filtering.md   |  2 +-
 .../org/apache/spark/mllib/recommendation/ALS.scala | 16 
 python/pyspark/mllib/recommendation.py  |  4 ++--
 3 files changed, 11 insertions(+), 11 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/7620aed8/docs/mllib-collaborative-filtering.md
--
diff --git a/docs/mllib-collaborative-filtering.md 
b/docs/mllib-collaborative-filtering.md
index 0f891a0..d1bb6d6 100644
--- a/docs/mllib-collaborative-filtering.md
+++ b/docs/mllib-collaborative-filtering.md
@@ -20,7 +20,7 @@ algorithm to learn these latent factors. The implementation 
in `spark.mllib` has
 following parameters:
 
 * *numBlocks* is the number of blocks used to parallelize computation (set to 
-1 to auto-configure).
-* *rank* is the number of latent factors in the model.
+* *rank* is the number of features to use (also referred to as the number of 
latent factors).
 * *iterations* is the number of iterations of ALS to run. ALS typically 
converges to a reasonable
   solution in 20 iterations or less.
 * *lambda* specifies the regularization parameter in ALS.

http://git-wip-us.apache.org/repos/asf/spark/blob/7620aed8/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
--
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala 
b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
index 76b1bc1..1428822 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
@@ -301,7 +301,7 @@ object ALS {
* level of parallelism.
*
* @param ratingsRDD of [[Rating]] objects with userID, productID, and 
rating
-   * @param rank   number of features to use
+   * @param rank   number of features to use (also referred to as the 
number of latent factors)
* @param iterations number of iterations of ALS
* @param lambda regularization parameter
* @param blocks level of parallelism to split computation into
@@ -326,7 +326,7 @@ object ALS {
* level of parallelism.
*
* @param ratingsRDD of [[Rating]] objects with userID, productID, and 
rating
-   * @param rank   number of features to use
+   * @param rank   number of features to use (also referred to as the 
number of latent factors)
* @param iterations number of iterations of ALS
* @param lambda regularization parameter
* @param blocks level of parallelism to split computation into
@@ -349,7 +349,7 @@ object ALS {
* parallelism automatically based on the number of partitions in `ratings`.
*
* @param ratingsRDD of [[Rating]] objects with userID, productID, and 
rating
-   * @param rank   number of features to use
+   * @param rank   number of features to use (also referred to as the 
number of latent factors)
* @param iterations number of iterations of ALS
* @param lambda regularization parameter
*/
@@ -366,7 +366,7 @@ object ALS {
* parallelism automatically based on the number of partitions in `ratings`.
*
* @param ratingsRDD of [[Rating]] objects with userID, productID, and 
rating
-   * @param rank   number of features to use
+   * @param rank   number of features to use (also referred to as the 
number of latent factors)
* @param iterations number of iterations of ALS
*/
   @Since("0.8.0")
@@ -383,7 +383,7 @@ object ALS {
* a level of parallelism g

spark git commit: [SPARK-19998][BLOCK MANAGER] Change the exception log to add RDD id of the related the block

2017-03-21 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master 7620aed82 -> 650d03cfc


[SPARK-19998][BLOCK MANAGER] Change the exception log to add RDD id of the 
related the block

## What changes were proposed in this pull request?

"java.lang.Exception: Could not compute split, block $blockId not found" 
doesn't have the rdd id info, the "BlockManager: Removing RDD $id" has only the 
RDD id, so it couldn't find that the Exception's reason is the Removing; so 
it's better block not found Exception add RDD id info

## How was this patch tested?

Existing tests

Author: jianran.tfh 
Author: jianran 

Closes #17334 from jianran/SPARK-19998.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/650d03cf
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/650d03cf
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/650d03cf

Branch: refs/heads/master
Commit: 650d03cfc9a609a2c603f9ced452d03ec8429b0d
Parents: 7620aed
Author: jianran.tfh 
Authored: Tue Mar 21 15:15:19 2017 +
Committer: Sean Owen 
Committed: Tue Mar 21 15:15:19 2017 +

--
 core/src/main/scala/org/apache/spark/rdd/BlockRDD.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/650d03cf/core/src/main/scala/org/apache/spark/rdd/BlockRDD.scala
--
diff --git a/core/src/main/scala/org/apache/spark/rdd/BlockRDD.scala 
b/core/src/main/scala/org/apache/spark/rdd/BlockRDD.scala
index d47b755..4e036c2 100644
--- a/core/src/main/scala/org/apache/spark/rdd/BlockRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/BlockRDD.scala
@@ -47,7 +47,7 @@ class BlockRDD[T: ClassTag](sc: SparkContext, @transient val 
blockIds: Array[Blo
 blockManager.get[T](blockId) match {
   case Some(block) => block.data.asInstanceOf[Iterator[T]]
   case None =>
-throw new Exception("Could not compute split, block " + blockId + " 
not found")
+throw new Exception(s"Could not compute split, block $blockId of RDD 
$id not found")
 }
   }
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-20021][PYSPARK] Miss backslash in python code

2017-03-22 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master 7343a0940 -> facfd6088


[SPARK-20021][PYSPARK] Miss backslash in python code

## What changes were proposed in this pull request?

Add backslash for line continuation in python code.

## How was this patch tested?

Jenkins.

Author: uncleGen 
Author: dylon 

Closes #17352 from uncleGen/python-example-doc.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/facfd608
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/facfd608
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/facfd608

Branch: refs/heads/master
Commit: facfd608865c385c0dabfe09cffe5874532a9cdf
Parents: 7343a09
Author: uncleGen 
Authored: Wed Mar 22 11:10:08 2017 +
Committer: Sean Owen 
Committed: Wed Mar 22 11:10:08 2017 +

--
 docs/structured-streaming-programming-guide.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/facfd608/docs/structured-streaming-programming-guide.md
--
diff --git a/docs/structured-streaming-programming-guide.md 
b/docs/structured-streaming-programming-guide.md
index 7988472..ff07ad1 100644
--- a/docs/structured-streaming-programming-guide.md
+++ b/docs/structured-streaming-programming-guide.md
@@ -764,11 +764,11 @@ Dataset windowedCounts = words
 words = ...  # streaming DataFrame of schema { timestamp: Timestamp, word: 
String }
 
 # Group the data by window and word and compute the count of each group
-windowedCounts = words
-.withWatermark("timestamp", "10 minutes")
+windowedCounts = words \
+.withWatermark("timestamp", "10 minutes") \
 .groupBy(
 window(words.timestamp, "10 minutes", "5 minutes"),
-words.word)
+words.word) \
 .count()
 {% endhighlight %}
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-20021][PYSPARK] Miss backslash in python code

2017-03-22 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/branch-2.1 277ed375b -> 56f997f13


[SPARK-20021][PYSPARK] Miss backslash in python code

## What changes were proposed in this pull request?

Add backslash for line continuation in python code.

## How was this patch tested?

Jenkins.

Author: uncleGen 
Author: dylon 

Closes #17352 from uncleGen/python-example-doc.

(cherry picked from commit facfd608865c385c0dabfe09cffe5874532a9cdf)
Signed-off-by: Sean Owen 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/56f997f1
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/56f997f1
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/56f997f1

Branch: refs/heads/branch-2.1
Commit: 56f997f1355dc119dfb038d269d8f2f5170f559a
Parents: 277ed37
Author: uncleGen 
Authored: Wed Mar 22 11:10:08 2017 +
Committer: Sean Owen 
Committed: Wed Mar 22 11:10:18 2017 +

--
 docs/structured-streaming-programming-guide.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/56f997f1/docs/structured-streaming-programming-guide.md
--
diff --git a/docs/structured-streaming-programming-guide.md 
b/docs/structured-streaming-programming-guide.md
index d316e04..f73cf93 100644
--- a/docs/structured-streaming-programming-guide.md
+++ b/docs/structured-streaming-programming-guide.md
@@ -788,11 +788,11 @@ Dataset windowedCounts = words
 words = ...  # streaming DataFrame of schema { timestamp: Timestamp, word: 
String }
 
 # Group the data by window and word and compute the count of each group
-windowedCounts = words
-.withWatermark("timestamp", "10 minutes")
+windowedCounts = words \
+.withWatermark("timestamp", "10 minutes") \
 .groupBy(
 window(words.timestamp, "10 minutes", "5 minutes"),
-words.word)
+words.word) \
 .count()
 {% endhighlight %}
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-20027][DOCS] Compilation fix in java docs.

2017-03-22 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master facfd6088 -> 0caade634


[SPARK-20027][DOCS] Compilation fix in java docs.

## What changes were proposed in this pull request?

During build/sbt publish-local, build breaks due to javadocs errors. This patch 
fixes those errors.

## How was this patch tested?

Tested by running the sbt build.

Author: Prashant Sharma 

Closes #17358 from ScrapCodes/docs-fix.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0caade63
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0caade63
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0caade63

Branch: refs/heads/master
Commit: 0caade634076034182e22318eb09a6df1c560576
Parents: facfd60
Author: Prashant Sharma 
Authored: Wed Mar 22 13:52:03 2017 +
Committer: Sean Owen 
Committed: Wed Mar 22 13:52:03 2017 +

--
 .../java/org/apache/spark/network/crypto/ClientChallenge.java | 2 +-
 .../main/java/org/apache/spark/network/crypto/ServerResponse.java | 2 +-
 .../src/main/java/org/apache/spark/unsafe/types/UTF8String.java   | 2 +-
 .../spark/api/java/function/FlatMapGroupsWithStateFunction.java   | 3 ++-
 4 files changed, 5 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/0caade63/common/network-common/src/main/java/org/apache/spark/network/crypto/ClientChallenge.java
--
diff --git 
a/common/network-common/src/main/java/org/apache/spark/network/crypto/ClientChallenge.java
 
b/common/network-common/src/main/java/org/apache/spark/network/crypto/ClientChallenge.java
index 3312a5b..819b8a7 100644
--- 
a/common/network-common/src/main/java/org/apache/spark/network/crypto/ClientChallenge.java
+++ 
b/common/network-common/src/main/java/org/apache/spark/network/crypto/ClientChallenge.java
@@ -28,7 +28,7 @@ import org.apache.spark.network.protocol.Encoders;
 /**
  * The client challenge message, used to initiate authentication.
  *
- * @see README.md
+ * Please see crypto/README.md for more details of implementation.
  */
 public class ClientChallenge implements Encodable {
   /** Serialization tag used to catch incorrect payloads. */

http://git-wip-us.apache.org/repos/asf/spark/blob/0caade63/common/network-common/src/main/java/org/apache/spark/network/crypto/ServerResponse.java
--
diff --git 
a/common/network-common/src/main/java/org/apache/spark/network/crypto/ServerResponse.java
 
b/common/network-common/src/main/java/org/apache/spark/network/crypto/ServerResponse.java
index affdbf4..caf3a0f 100644
--- 
a/common/network-common/src/main/java/org/apache/spark/network/crypto/ServerResponse.java
+++ 
b/common/network-common/src/main/java/org/apache/spark/network/crypto/ServerResponse.java
@@ -28,7 +28,7 @@ import org.apache.spark.network.protocol.Encoders;
 /**
  * Server's response to client's challenge.
  *
- * @see README.md
+ * Please see crypto/README.md for more details.
  */
 public class ServerResponse implements Encodable {
   /** Serialization tag used to catch incorrect payloads. */

http://git-wip-us.apache.org/repos/asf/spark/blob/0caade63/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
--
diff --git 
a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java 
b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
index 4c28075..5437e99 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
@@ -863,7 +863,7 @@ public final class UTF8String implements 
Comparable, Externalizable,
* This is done solely for better performance and is not expected to be used 
by end users.
*
* {@link LongWrapper} could have been used here but using `int` directly 
save the extra cost of
-   * conversion from `long` -> `int`
+   * conversion from `long` to `int`
*/
   public static class IntWrapper {
 public int value = 0;

http://git-wip-us.apache.org/repos/asf/spark/blob/0caade63/sql/core/src/main/java/org/apache/spark/api/java/function/FlatMapGroupsWithStateFunction.java
--
diff --git 
a/sql/core/src/main/java/org/apache/spark/api/java/function/FlatMapGroupsWithStateFunction.java
 
b/sql/core/src/main/java/org/apache/spark/api/java/function/FlatMapGroupsWithStateFunction.java
index 29af78c..bdda8aa 100644
--- 
a/sql/core/src/main/java/org/apache/spark/api/java/function/FlatMapGroupsWithStateFunction.java
+++ 
b/sql/core/src/main/java/org/apache/spark/api/java/function/FlatMapGroupsWithStateFunction.java
@@ -28,7 +28

spark git commit: [MINOR][BUILD] Fix javadoc8 break

2017-03-23 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master 07c12c09a -> aefe79890


[MINOR][BUILD] Fix javadoc8 break

## What changes were proposed in this pull request?

Several javadoc8 breaks have been introduced. This PR proposes fix those 
instances so that we can build Scala/Java API docs.

```
[error] 
.../spark/sql/core/target/java/org/apache/spark/sql/streaming/GroupState.java:6:
 error: reference not found
[error]  * flatMapGroupsWithState operations on {link 
KeyValueGroupedDataset}.
[error] ^
[error] 
.../spark/sql/core/target/java/org/apache/spark/sql/streaming/GroupState.java:10:
 error: reference not found
[error]  * Both, mapGroupsWithState and 
flatMapGroupsWithState in {link KeyValueGroupedDataset}
[error] 
   ^
[error] 
.../spark/sql/core/target/java/org/apache/spark/sql/streaming/GroupState.java:51:
 error: reference not found
[error]  *{link GroupStateTimeout.ProcessingTimeTimeout}) or event time 
(i.e.
[error]  ^
[error] 
.../spark/sql/core/target/java/org/apache/spark/sql/streaming/GroupState.java:52:
 error: reference not found
[error]  *{link GroupStateTimeout.EventTimeTimeout}).
[error]  ^
[error] 
.../spark/sql/core/target/java/org/apache/spark/sql/streaming/GroupState.java:158:
 error: reference not found
[error]  *   Spark SQL types (see {link Encoder} for more details).
[error]  ^
[error] 
.../spark/mllib/target/java/org/apache/spark/ml/fpm/FPGrowthParams.java:26: 
error: bad use of '>'
[error]* Number of partitions (>=1) used by parallel FP-growth. By default 
the param is not set, and
[error]^
[error] 
.../spark/sql/core/src/main/java/org/apache/spark/api/java/function/FlatMapGroupsWithStateFunction.java:30:
 error: reference not found
[error]  * {link 
org.apache.spark.sql.KeyValueGroupedDataset#flatMapGroupsWithState(
[error]   ^
[error] 
.../spark/sql/core/target/java/org/apache/spark/sql/KeyValueGroupedDataset.java:211:
 error: reference not found
[error]* See {link GroupState} for more details.
[error] ^
[error] 
.../spark/sql/core/target/java/org/apache/spark/sql/KeyValueGroupedDataset.java:232:
 error: reference not found
[error]* See {link GroupState} for more details.
[error] ^
[error] 
.../spark/sql/core/target/java/org/apache/spark/sql/KeyValueGroupedDataset.java:254:
 error: reference not found
[error]* See {link GroupState} for more details.
[error] ^
[error] 
.../spark/sql/core/target/java/org/apache/spark/sql/KeyValueGroupedDataset.java:277:
 error: reference not found
[error]* See {link GroupState} for more details.
[error] ^
[error] .../spark/core/target/java/org/apache/spark/TaskContextImpl.java:10: 
error: reference not found
[error]  * {link TaskMetrics} & {link MetricsSystem} objects are not thread 
safe.
[error]   ^
[error] .../spark/core/target/java/org/apache/spark/TaskContextImpl.java:10: 
error: reference not found
[error]  * {link TaskMetrics} & {link MetricsSystem} objects are not thread 
safe.
[error] ^
[info] 13 errors
```

```
jekyll 3.3.1 | Error:  Unidoc generation failed
```

## How was this patch tested?

Manually via `jekyll build`

Author: hyukjinkwon 

Closes #17389 from HyukjinKwon/minor-javadoc8-fix.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/aefe7989
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/aefe7989
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/aefe7989

Branch: refs/heads/master
Commit: aefe79890541bc0829f184e03eb3961739ca8ef2
Parents: 07c12c0
Author: hyukjinkwon 
Authored: Thu Mar 23 08:41:30 2017 +
Committer: Sean Owen 
Committed: Thu Mar 23 08:41:30 2017 +

--
 .../org/apache/spark/TaskContextImpl.scala  |  2 +-
 .../org/apache/spark/ml/fpm/FPGrowth.scala  |  4 ++--
 .../FlatMapGroupsWithStateFunction.java |  2 +-
 .../spark/sql/KeyValueGroupedDataset.scala  |  8 +++
 .../apache/spark/sql/streaming/GroupState.scala | 22 ++--
 5 files changed, 19 insertions(+), 19 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/aefe7989/core/src/main/scala/org/apache/spark/TaskContextImpl.scala
--
diff --git a/core/src/main/scala/org/apache/spark/TaskContextImpl.scala 
b/core/src/main/scala/org/apache/spark/TaskContextImpl.scala
index ea8dcdf..f346cf8 100644
--- a/core/src/main/scala/org/apache/spark/TaskContextImpl.scala
+++ b/core/src/main/scala/org/apache/spark/TaskContextImpl.scala
@@ -38,7 +38

spark git commit: [INFRA] Close stale PRs

2017-03-23 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master aefe79890 -> b70c03a42


[INFRA] Close stale PRs

Closes #16819
Closes #13467
Closes #16083
Closes #17135
Closes #8785
Closes #16278
Closes #16997
Closes #17073
Closes #17220

Added:
Closes #12059
Closes #12524
Closes #12888
Closes #16061

Author: Sean Owen 

Closes #17386 from srowen/StalePRs.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b70c03a4
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b70c03a4
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b70c03a4

Branch: refs/heads/master
Commit: b70c03a42002e924e979acbc98a8b464830be532
Parents: aefe798
Author: Sean Owen 
Authored: Thu Mar 23 08:42:42 2017 +
Committer: Sean Owen 
Committed: Thu Mar 23 08:42:42 2017 +

--

--



-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-20078][MESOS] Mesos executor configurability for task name and labels

2017-03-25 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master a2ce0a2e3 -> e8ddb91c7


[SPARK-20078][MESOS] Mesos executor configurability for task name and labels

## What changes were proposed in this pull request?

Adding configurable mesos executor names and labels using 
`spark.mesos.task.name` and `spark.mesos.task.labels`.

Labels were defined as `k1:v1,k2:v2`.

mgummelt

## How was this patch tested?

Added unit tests to verify labels were added correctly, with incorrect labels 
being ignored and added a test to test the name of the executor.

Tested with: `./build/sbt -Pmesos mesos/test`

Please review http://spark.apache.org/contributing.html before opening a pull 
request.

Author: Kalvin Chau 

Closes #17404 from kalvinnchau/mesos-config.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e8ddb91c
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e8ddb91c
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e8ddb91c

Branch: refs/heads/master
Commit: e8ddb91c7ea5a0b4576cf47aaf969bcc82860b7c
Parents: a2ce0a2
Author: Kalvin Chau 
Authored: Sat Mar 25 10:42:15 2017 +
Committer: Sean Owen 
Committed: Sat Mar 25 10:42:15 2017 +

--
 .../mesos/MesosCoarseGrainedSchedulerBackend.scala   |  3 ++-
 .../mesos/MesosCoarseGrainedSchedulerBackendSuite.scala  | 11 +++
 2 files changed, 13 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/e8ddb91c/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
--
diff --git 
a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
 
b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
index c049a32..5bdc2a2 100644
--- 
a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
+++ 
b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
@@ -403,7 +403,8 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
 .setTaskId(TaskID.newBuilder().setValue(taskId.toString).build())
 .setSlaveId(offer.getSlaveId)
 .setCommand(createCommand(offer, taskCPUs + extraCoresPerExecutor, 
taskId))
-.setName("Task " + taskId)
+.setName(s"${sc.appName} $taskId")
+
   taskBuilder.addAllResources(resourcesToUse.asJava)
   
taskBuilder.setContainer(MesosSchedulerBackendUtil.containerInfo(sc.conf))
 

http://git-wip-us.apache.org/repos/asf/spark/blob/e8ddb91c/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackendSuite.scala
--
diff --git 
a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackendSuite.scala
 
b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackendSuite.scala
index 98033be..eb83926 100644
--- 
a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackendSuite.scala
+++ 
b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackendSuite.scala
@@ -464,6 +464,17 @@ class MesosCoarseGrainedSchedulerBackendSuite extends 
SparkFunSuite
 assert(!uris.asScala.head.getCache)
   }
 
+  test("mesos sets task name to spark.app.name") {
+setBackend()
+
+val offers = List(Resources(backend.executorMemory(sc), 1))
+offerResources(offers)
+val launchedTasks = verifyTaskLaunched(driver, "o1")
+
+// Add " 0" to the taskName to match the executor number that is appended
+assert(launchedTasks.head.getName == "test-mesos-dynamic-alloc 0")
+  }
+
   test("mesos supports spark.mesos.network.name") {
 setBackend(Map(
   "spark.mesos.network.name" -> "test-network-name"


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: logging improvements

2017-03-26 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master 93bb0b911 -> 362ee9329


logging improvements

## What changes were proposed in this pull request?
Adding additional information to existing logging messages:
  - YarnAllocator: log the executor ID together with the container id when a 
container for an executor is launched.
  - NettyRpcEnv: log the receiver address when there is a timeout waiting for 
an answer to a remote call.
  - ExecutorAllocationManager: fix a typo in the logging message for the list 
of executors to be removed.

## How was this patch tested?
Build spark and submit the word count example to a YARN cluster using cluster 
mode

Author: Juan Rodriguez Hortala 

Closes #17411 from juanrh/logging-improvements.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/362ee932
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/362ee932
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/362ee932

Branch: refs/heads/master
Commit: 362ee93296a0de6342b4339e941e6a11f445c5b2
Parents: 93bb0b9
Author: Juan Rodriguez Hortala 
Authored: Sun Mar 26 10:39:05 2017 +0100
Committer: Sean Owen 
Committed: Sun Mar 26 10:39:05 2017 +0100

--
 .../main/scala/org/apache/spark/ExecutorAllocationManager.scala   | 2 +-
 core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala  | 3 ++-
 .../main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala   | 3 ++-
 3 files changed, 5 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/362ee932/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
--
diff --git 
a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala 
b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
index 1366251..261b332 100644
--- a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
+++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
@@ -439,7 +439,7 @@ private[spark] class ExecutorAllocationManager(
   executorsRemoved
 } else {
   logWarning(s"Unable to reach the cluster manager to kill executor/s " +
-"executorIdsToBeRemoved.mkString(\",\") or no executor eligible to 
kill!")
+s"${executorIdsToBeRemoved.mkString(",")} or no executor eligible to 
kill!")
   Seq.empty[String]
 }
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/362ee932/core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala
--
diff --git a/core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala 
b/core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala
index ff5e39a..b316e54 100644
--- a/core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala
+++ b/core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala
@@ -236,7 +236,8 @@ private[netty] class NettyRpcEnv(
 
   val timeoutCancelable = timeoutScheduler.schedule(new Runnable {
 override def run(): Unit = {
-  onFailure(new TimeoutException(s"Cannot receive any reply in 
${timeout.duration}"))
+  onFailure(new TimeoutException(s"Cannot receive any reply from 
${remoteAddr} " +
+s"in ${timeout.duration}"))
 }
   }, timeout.duration.toNanos, TimeUnit.NANOSECONDS)
   promise.future.onComplete { v =>

http://git-wip-us.apache.org/repos/asf/spark/blob/362ee932/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
--
diff --git 
a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
 
b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
index abd2de7..2555676 100644
--- 
a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
+++ 
b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
@@ -494,7 +494,8 @@ private[yarn] class YarnAllocator(
   val containerId = container.getId
   val executorId = executorIdCounter.toString
   assert(container.getResource.getMemory >= resource.getMemory)
-  logInfo(s"Launching container $containerId on host $executorHostname")
+  logInfo(s"Launching container $containerId on host $executorHostname " +
+s"for executor with ID $executorId")
 
   def updateInternalState(): Unit = synchronized {
 numExecutorsRunning += 1


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark-website git commit: added section about mima

2017-03-27 Thread srowen

Repository: spark-website
Updated Branches:
  refs/heads/asf-site 39838046c -> 8b27c470c


added section about mima


Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/8b27c470
Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/8b27c470
Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/8b27c470

Branch: refs/heads/asf-site
Commit: 8b27c470c32b30f6bddafd4cdec2b75b5b975fb6
Parents: 3983804
Author: Benjamin Fradet 
Authored: Sat Mar 25 21:48:40 2017 +
Committer: Benjamin Fradet 
Committed: Sun Mar 26 18:14:45 2017 +0100

--
 developer-tools.md| 56 +-
 site/developer-tools.html | 51 +-
 2 files changed, 105 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark-website/blob/8b27c470/developer-tools.md
--
diff --git a/developer-tools.md b/developer-tools.md
index 88f3f36..547d8aa 100644
--- a/developer-tools.md
+++ b/developer-tools.md
@@ -111,6 +111,60 @@ To run individual Java tests, you can use the `-Dtest` 
flag:
 build/mvn test -DwildcardSuites=none 
-Dtest=org.apache.spark.streaming.JavaAPISuite test
 ```
 
+Binary compatibility
+
+To ensure binary compatibility, Spark uses 
[MiMa](https://github.com/typesafehub/migration-manager).
+
+Ensuring binary compatibility
+
+When working on an issue, it's always a good idea to check that your changes do
+not introduce binary incompatibilities before opening a pull request.
+
+You can do so by running the following command:
+
+```
+$ dev/mima
+```
+
+A binary incompatibility reported by MiMa might look like the following:
+
+```
+[error] method this(org.apache.spark.sql.Dataset)Unit in class 
org.apache.spark.SomeClass does not have a correspondent in current version
+[error] filter with: 
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.SomeClass.this")
+```
+
+If you open a pull request containing binary incompatibilities anyway, Jenkins
+will remind you by failing the test build with the following message:
+
+```
+Test build #xx has finished for PR yy at commit ff.
+
+  This patch fails MiMa tests.
+  This patch merges cleanly.
+  This patch adds no public classes.
+```
+
+Solving a binary incompatibility
+
+If you believe that your binary incompatibilies are justified or that MiMa
+reported false positives (e.g. the reported binary incompatibilities are about 
a
+non-user facing API), you can filter them out by adding an exclusion in
+[project/MimaExcludes.scala](https://github.com/apache/spark/blob/master/project/MimaExcludes.scala)
+containing what was suggested by the MiMa report and a comment containing the
+JIRA number of the issue you're working on as well as its title.
+
+For the problem described above, we might add the following:
+
+{% highlight scala %}
+// [SPARK-zz][CORE] Fix an issue
+ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.SomeClass.this")
+{% endhighlight %}
+
+Otherwise, you will have to resolve those incompatibilies before opening or
+updating your pull request. Usually, the problems reported by MiMa are
+self-explanatory and revolve around missing members (methods or fields) that
+you will have to add back in order to maintain binary compatibility.
+
 Checking Out Pull Requests
 
 Git provides a mechanism for fetching remote pull requests into your own local 
repository. 
@@ -181,7 +235,7 @@ It is due to an incorrect Scala library in the classpath. 
To fix it:
 - Remove `scala-library-2.10.4.jar - lib_managed\jars`
 
 In the event of "Could not find resource path for Web UI: 
org/apache/spark/ui/static", 
-it's due to a classpath issue (some classes were probably not compiled). To 
fix this, it 
+it's due to a classpath issue (some classes were probably not compiled). To 
fix this, it is
 sufficient to run a test from the command line:
 
 ```

http://git-wip-us.apache.org/repos/asf/spark-website/blob/8b27c470/site/developer-tools.html
--
diff --git a/site/developer-tools.html b/site/developer-tools.html
index d09815d..a44bfde 100644
--- a/site/developer-tools.html
+++ b/site/developer-tools.html
@@ -287,6 +287,55 @@ restart whenever build/mvn is called.
 build/mvn test -DwildcardSuites=none 
-Dtest=org.apache.spark.streaming.JavaAPISuite test
 
 
+Binary compatibility
+
+To ensure binary compatibility, Spark uses https://github.com/typesafehub/migration-manager";>MiMa.
+
+Ensuring binary compatibility
+
+When working on an issue, it’s always a good idea to check that your 
changes do
+not introduce binary incompatibilities before opening a pull request.
+
+You can do so by running the following command:

spark git commit: [SPARK-20107][DOC] Add spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version option to configuration.md

2017-03-30 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master 471de5db5 -> edc87d76e


[SPARK-20107][DOC] Add 
spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version option to 
configuration.md

## What changes were proposed in this pull request?

Add `spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version` option to 
`configuration.md`.
Set `spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version=2` can speed 
up 
[HadoopMapReduceCommitProtocol.commitJob](https://github.com/apache/spark/blob/v2.1.0/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala#L121)
 for many output files.

All cloudera's hadoop 2.6.0-cdh5.4.0 or higher versions(see: 
https://github.com/cloudera/hadoop-common/commit/1c1236182304d4075276c00c4592358f428bc433
 and 
https://github.com/cloudera/hadoop-common/commit/16b2de27321db7ce2395c08baccfdec5562017f0)
 and apache's hadoop 2.7.0 or higher versions support this improvement.

More see:

1. [MAPREDUCE-4815](https://issues.apache.org/jira/browse/MAPREDUCE-4815): 
Speed up FileOutputCommitter#commitJob for many output files.
2. [MAPREDUCE-6406](https://issues.apache.org/jira/browse/MAPREDUCE-6406): 
Update the default version for the property 
mapreduce.fileoutputcommitter.algorithm.version to 2.

## How was this patch tested?

Manual test and exist tests.

Author: Yuming Wang 

Closes #17442 from wangyum/SPARK-20107.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/edc87d76
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/edc87d76
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/edc87d76

Branch: refs/heads/master
Commit: edc87d76efea7b4d19d9d0c4ddba274a3ccb8752
Parents: 471de5d
Author: Yuming Wang 
Authored: Thu Mar 30 10:39:57 2017 +0100
Committer: Sean Owen 
Committed: Thu Mar 30 10:39:57 2017 +0100

--
 docs/configuration.md | 9 +
 1 file changed, 9 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/edc87d76/docs/configuration.md
--
diff --git a/docs/configuration.md b/docs/configuration.md
index 4729f1b..a975392 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -1137,6 +1137,15 @@ Apart from these, the following properties are also 
available, and may be useful
 mapping has high overhead for blocks close to or below the page size of 
the operating system.
   
 
+
+  
spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version
+  1
+  
+The file output committer algorithm version, valid algorithm version 
number: 1 or 2.
+Version 2 may have better performance, but version 1 may handle failures 
better in certain situations,
+as per https://issues.apache.org/jira/browse/MAPREDUCE-4815";>MAPREDUCE-4815.
+  
+
 
 
 ### Networking


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [DOCS] Docs-only improvements

2017-03-30 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master b454d4402 -> 0197262a3


[DOCS] Docs-only improvements

â¦adoc

## What changes were proposed in this pull request?

Use recommended values for row boundaries in Window's scaladoc, i.e. 
`Window.unboundedPreceding`, `Window.unboundedFollowing`, and 
`Window.currentRow` (that were introduced in 2.1.0).

## How was this patch tested?

Local build

Author: Jacek Laskowski 

Closes #17417 from jaceklaskowski/window-expression-scaladoc.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0197262a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0197262a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0197262a

Branch: refs/heads/master
Commit: 0197262a358fd174a188f8246ae777e53157610e
Parents: b454d44
Author: Jacek Laskowski 
Authored: Thu Mar 30 16:07:27 2017 +0100
Committer: Sean Owen 
Committed: Thu Mar 30 16:07:27 2017 +0100

--
 .../org/apache/spark/memory/MemoryConsumer.java |  2 --
 .../sort/BypassMergeSortShuffleWriter.java  |  5 ++---
 .../apache/spark/ExecutorAllocationClient.scala |  5 ++---
 .../scala/org/apache/spark/scheduler/Task.scala |  2 +-
 .../apache/spark/serializer/Serializer.scala|  2 +-
 .../spark/shuffle/BlockStoreShuffleReader.scala |  3 +--
 .../shuffle/IndexShuffleBlockResolver.scala |  4 ++--
 .../spark/shuffle/sort/SortShuffleManager.scala |  4 ++--
 .../org/apache/spark/util/AccumulatorV2.scala   |  2 +-
 .../spark/examples/ml/DataFrameExample.scala|  2 +-
 .../org/apache/spark/ml/stat/Correlation.scala  |  2 +-
 .../sql/catalyst/analysis/ResolveHints.scala|  2 +-
 .../catalyst/encoders/ExpressionEncoder.scala   |  6 ++---
 .../sql/catalyst/expressions/Expression.scala   |  2 +-
 .../expressions/windowExpressions.scala |  2 +-
 .../spark/sql/catalyst/optimizer/objects.scala  |  2 +-
 .../spark/sql/catalyst/parser/AstBuilder.scala  |  6 ++---
 .../spark/sql/catalyst/plans/QueryPlan.scala|  5 +++--
 .../catalyst/plans/logical/LogicalPlan.scala|  2 +-
 .../catalyst/parser/ExpressionParserSuite.scala |  3 ++-
 .../scala/org/apache/spark/sql/Column.scala | 18 +++
 .../org/apache/spark/sql/DatasetHolder.scala|  3 ++-
 .../org/apache/spark/sql/SparkSession.scala |  2 +-
 .../spark/sql/execution/command/databases.scala |  2 +-
 .../spark/sql/execution/streaming/Source.scala  |  2 +-
 .../apache/spark/sql/expressions/Window.scala   | 23 ++--
 .../spark/sql/expressions/WindowSpec.scala  | 20 -
 .../scala/org/apache/spark/sql/functions.scala  |  2 +-
 .../sql/hive/HiveSessionStateBuilder.scala  |  2 +-
 .../streaming/scheduler/InputInfoTracker.scala  |  2 +-
 30 files changed, 68 insertions(+), 71 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/0197262a/core/src/main/java/org/apache/spark/memory/MemoryConsumer.java
--
diff --git a/core/src/main/java/org/apache/spark/memory/MemoryConsumer.java 
b/core/src/main/java/org/apache/spark/memory/MemoryConsumer.java
index fc1f3a8..48cf4b9 100644
--- a/core/src/main/java/org/apache/spark/memory/MemoryConsumer.java
+++ b/core/src/main/java/org/apache/spark/memory/MemoryConsumer.java
@@ -60,8 +60,6 @@ public abstract class MemoryConsumer {
 
   /**
* Force spill during building.
-   *
-   * For testing.
*/
   public void spill() throws IOException {
 spill(Long.MAX_VALUE, this);

http://git-wip-us.apache.org/repos/asf/spark/blob/0197262a/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java
--
diff --git 
a/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java
 
b/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java
index 4a15559..323a5d3 100644
--- 
a/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java
+++ 
b/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java
@@ -52,8 +52,7 @@ import org.apache.spark.util.Utils;
  * This class implements sort-based shuffle's hash-style shuffle fallback 
path. This write path
  * writes incoming records to separate files, one file per reduce partition, 
then concatenates these
  * per-partition files to form a single output file, regions of which are 
served to reducers.
- * Records are not buffered in memory. This is essentially identical to
- * {@link org.apache.spark.shuffle.hash.HashShuffleWriter}, except that it 
writes output in a format
+ * Records are not buffered in memory. It writes output in a format
  * that can be served / consumed via {@link 
org.apache.spark.shuffle.IndexShuffleBlockResolver}.
  * 
  * This write path is in

spark git commit: [SPARK-19999] Workaround JDK-8165231 to identify PPC64 architectures as supporting unaligned access

2017-03-30 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master 0197262a3 -> 258bff2c3


[SPARK-1] Workaround JDK-8165231 to identify PPC64 architectures as 
supporting unaligned access

 java.nio.Bits.unaligned() does not return true for the ppc64le arch.
see https://bugs.openjdk.java.net/browse/JDK-8165231
## What changes were proposed in this pull request?
check architecture

## How was this patch tested?

unit test

Author: samelamin 
Author: samelamin 

Closes #17472 from samelamin/SPARK-1.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/258bff2c
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/258bff2c
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/258bff2c

Branch: refs/heads/master
Commit: 258bff2c3f54490ddca898e276029db9adf575d9
Parents: 0197262
Author: samelamin 
Authored: Thu Mar 30 16:08:26 2017 +0100
Committer: Sean Owen 
Committed: Thu Mar 30 16:08:26 2017 +0100

--
 .../java/org/apache/spark/unsafe/Platform.java  | 28 +++-
 1 file changed, 16 insertions(+), 12 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/258bff2c/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java
--
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java 
b/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java
index f13c24a..1321b83 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java
@@ -46,18 +46,22 @@ public final class Platform {
   private static final boolean unaligned;
   static {
 boolean _unaligned;
-// use reflection to access unaligned field
-try {
-  Class bitsClass =
-Class.forName("java.nio.Bits", false, 
ClassLoader.getSystemClassLoader());
-  Method unalignedMethod = bitsClass.getDeclaredMethod("unaligned");
-  unalignedMethod.setAccessible(true);
-  _unaligned = Boolean.TRUE.equals(unalignedMethod.invoke(null));
-} catch (Throwable t) {
-  // We at least know x86 and x64 support unaligned access.
-  String arch = System.getProperty("os.arch", "");
-  //noinspection DynamicRegexReplaceableByCompiledPattern
-  _unaligned = arch.matches("^(i[3-6]86|x86(_64)?|x64|amd64|aarch64)$");
+String arch = System.getProperty("os.arch", "");
+if (arch.equals("ppc64le") || arch.equals("ppc64")) {
+  // Since java.nio.Bits.unaligned() doesn't return true on ppc (See 
JDK-8165231), but ppc64 and ppc64le support it
+  _unaligned = true;
+} else {
+  try {
+Class bitsClass =
+  Class.forName("java.nio.Bits", false, 
ClassLoader.getSystemClassLoader());
+Method unalignedMethod = bitsClass.getDeclaredMethod("unaligned");
+unalignedMethod.setAccessible(true);
+_unaligned = Boolean.TRUE.equals(unalignedMethod.invoke(null));
+  } catch (Throwable t) {
+// We at least know x86 and x64 support unaligned access.
+//noinspection DynamicRegexReplaceableByCompiledPattern
+_unaligned = arch.matches("^(i[3-6]86|x86(_64)?|x64|amd64|aarch64)$");
+  }
 }
 unaligned = _unaligned;
   }


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [DOCS][MINOR] Fixed a few typos in the Structured Streaming documentation

2017-03-30 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master e9d268f63 -> 669a11b61


[DOCS][MINOR] Fixed a few typos in the Structured Streaming documentation

Fixed a few typos.

There is one more I'm not sure of:

```
Append mode uses watermark to drop old aggregation state. But the 
output of a
windowed aggregation is delayed the late threshold specified in 
`withWatermark()` as by
the modes semantics, rows can be added to the Result Table only once 
after they are
```

Not sure how to change `is delayed the late threshold`.

Author: Seigneurin, Alexis (CONT) 

Closes #17443 from aseigneurin/typos.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/669a11b6
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/669a11b6
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/669a11b6

Branch: refs/heads/master
Commit: 669a11b61bc217a13217f1ef48d781329c45575e
Parents: e9d268f
Author: Seigneurin, Alexis (CONT) 
Authored: Thu Mar 30 16:12:17 2017 +0100
Committer: Sean Owen 
Committed: Thu Mar 30 16:12:17 2017 +0100

--
 docs/structured-streaming-programming-guide.md | 18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/669a11b6/docs/structured-streaming-programming-guide.md
--
diff --git a/docs/structured-streaming-programming-guide.md 
b/docs/structured-streaming-programming-guide.md
index ff07ad1..b5cf9f1 100644
--- a/docs/structured-streaming-programming-guide.md
+++ b/docs/structured-streaming-programming-guide.md
@@ -717,11 +717,11 @@ However, to run this query for days, it's necessary for 
the system to bound the
 intermediate in-memory state it accumulates. This means the system needs to 
know when an old 
 aggregate can be dropped from the in-memory state because the application is 
not going to receive 
 late data for that aggregate any more. To enable this, in Spark 2.1, we have 
introduced 
-**watermarking**, which let's the engine automatically track the current event 
time in the data and
+**watermarking**, which lets the engine automatically track the current event 
time in the data
 and attempt to clean up old state accordingly. You can define the watermark of 
a query by 
-specifying the event time column and the threshold on how late the data is 
expected be in terms of 
+specifying the event time column and the threshold on how late the data is 
expected to be in terms of 
 event time. For a specific window starting at time `T`, the engine will 
maintain state and allow late
-data to be update the state until `(max event time seen by the engine - late 
threshold > T)`. 
+data to update the state until `(max event time seen by the engine - late 
threshold > T)`. 
 In other words, late data within the threshold will be aggregated, 
 but data later than the threshold will be dropped. Let's understand this with 
an example. We can 
 easily define watermarking on the previous example using `withWatermark()` as 
shown below.
@@ -792,7 +792,7 @@ This watermark lets the engine maintain intermediate state 
for additional 10 min
 data to be counted. For example, the data `(12:09, cat)` is out of order and 
late, and it falls in
 windows `12:05 - 12:15` and `12:10 - 12:20`. Since, it is still ahead of the 
watermark `12:04` in 
 the trigger, the engine still maintains the intermediate counts as state and 
correctly updates the 
-counts of the related windows. However, when the watermark is updated to 
12:11, the intermediate 
+counts of the related windows. However, when the watermark is updated to 
`12:11`, the intermediate 
 state for window `(12:00 - 12:10)` is cleared, and all subsequent data (e.g. 
`(12:04, donkey)`) 
 is considered "too late" and therefore ignored. Note that after every trigger, 
 the updated counts (i.e. purple rows) are written to sink as the trigger 
output, as dictated by 
@@ -825,7 +825,7 @@ section for detailed explanation of the semantics of each 
output mode.
 same column as the timestamp column used in the aggregate. For example, 
 `df.withWatermark("time", "1 min").groupBy("time2").count()` is invalid 
 in Append output mode, as watermark is defined on a different column
-as the aggregation column.
+from the aggregation column.
 
 - `withWatermark` must be called before the aggregation for the watermark 
details to be used. 
 For example, `df.groupBy("time").count().withWatermark("time", "1 min")` is 
invalid in Append 
@@ -909,7 +909,7 @@ track of all the data received in the stream. This is 
therefore fundamentally ha
 efficiently.
 
 ## Starting Streaming Queries
-Once you have defined the final result DataFrame/Dataset, all that is left is 
for you start the streaming computation. To do that, you have

spark git commit: [SPARK-20127][CORE] few warning have been fixed which Intellij IDEA reported Intellij IDEA

2017-03-30 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master 669a11b61 -> 5e00a5de1


[SPARK-20127][CORE] few warning have been fixed which Intellij IDEA reported 
Intellij IDEA

## What changes were proposed in this pull request?
Few changes related to Intellij IDEA inspection.

## How was this patch tested?
Changes were tested by existing unit tests

Author: Denis Bolshakov 

Closes #17458 from dbolshak/SPARK-20127.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5e00a5de
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5e00a5de
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5e00a5de

Branch: refs/heads/master
Commit: 5e00a5de14ae2d80471c6f38c30cc6fe63e05163
Parents: 669a11b
Author: Denis Bolshakov 
Authored: Thu Mar 30 16:15:40 2017 +0100
Committer: Sean Owen 
Committed: Thu Mar 30 16:15:40 2017 +0100

--
 .../java/org/apache/spark/memory/TaskMemoryManager.java   |  6 +-
 .../java/org/apache/spark/status/api/v1/TaskSorting.java  |  5 ++---
 .../main/scala/org/apache/spark/io/CompressionCodec.scala |  3 +--
 core/src/main/scala/org/apache/spark/ui/WebUI.scala   |  2 +-
 .../org/apache/spark/ui/exec/ExecutorThreadDumpPage.scala |  2 +-
 .../scala/org/apache/spark/ui/exec/ExecutorsPage.scala|  3 +--
 .../scala/org/apache/spark/ui/exec/ExecutorsTab.scala |  4 ++--
 .../scala/org/apache/spark/ui/jobs/AllStagesPage.scala|  4 ++--
 .../scala/org/apache/spark/ui/jobs/ExecutorTable.scala|  4 ++--
 .../org/apache/spark/ui/jobs/JobProgressListener.scala|  4 ++--
 .../main/scala/org/apache/spark/ui/jobs/StagePage.scala   | 10 +-
 .../main/scala/org/apache/spark/ui/jobs/StageTable.scala  |  2 +-
 .../scala/org/apache/spark/ui/storage/StoragePage.scala   |  2 +-
 13 files changed, 22 insertions(+), 29 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/5e00a5de/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java
--
diff --git a/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java 
b/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java
index 39fb3b2..aa0b373 100644
--- a/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java
+++ b/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java
@@ -155,11 +155,7 @@ public class TaskMemoryManager {
 for (MemoryConsumer c: consumers) {
   if (c != consumer && c.getUsed() > 0 && c.getMode() == mode) {
 long key = c.getUsed();
-List list = sortedConsumers.get(key);
-if (list == null) {
-  list = new ArrayList<>(1);
-  sortedConsumers.put(key, list);
-}
+List list = sortedConsumers.computeIfAbsent(key, k 
-> new ArrayList<>(1));
 list.add(c);
   }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/5e00a5de/core/src/main/java/org/apache/spark/status/api/v1/TaskSorting.java
--
diff --git a/core/src/main/java/org/apache/spark/status/api/v1/TaskSorting.java 
b/core/src/main/java/org/apache/spark/status/api/v1/TaskSorting.java
index 9307eb9..b38639e 100644
--- a/core/src/main/java/org/apache/spark/status/api/v1/TaskSorting.java
+++ b/core/src/main/java/org/apache/spark/status/api/v1/TaskSorting.java
@@ -19,6 +19,7 @@ package org.apache.spark.status.api.v1;
 
 import org.apache.spark.util.EnumUtil;
 
+import java.util.Collections;
 import java.util.HashSet;
 import java.util.Set;
 
@@ -30,9 +31,7 @@ public enum TaskSorting {
   private final Set alternateNames;
   TaskSorting(String... names) {
 alternateNames = new HashSet<>();
-for (String n: names) {
-  alternateNames.add(n);
-}
+Collections.addAll(alternateNames, names);
   }
 
   public static TaskSorting fromString(String str) {

http://git-wip-us.apache.org/repos/asf/spark/blob/5e00a5de/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
--
diff --git a/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala 
b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
index 2e991ce..c216fe4 100644
--- a/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
+++ b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
@@ -71,8 +71,7 @@ private[spark] object CompressionCodec {
   val ctor = 
Utils.classForName(codecClass).getConstructor(classOf[SparkConf])
   Some(ctor.newInstance(conf).asInstanceOf[CompressionCodec])
 } catch {
-  case e: ClassNotFoundException => None
-  case e: IllegalArgumentException => None
+  case _: ClassNotFoundException | _: IllegalArgumentException => None
 }

spark git commit: [SPARK-20096][SPARK SUBMIT][MINOR] Expose the right queue name not null if set by --conf or configure file

2017-03-30 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master 258bff2c3 -> e9d268f63


[SPARK-20096][SPARK SUBMIT][MINOR] Expose the right queue name not null if set 
by --conf or configure file

## What changes were proposed in this pull request?

while submit apps with -v or --verboseï¼ we can print the right queue name, 
but if we set a queue name with `spark.yarn.queue` by --conf or in the 
spark-default.conf, we just got `null`  for the queue in Parsed arguments.
```
bin/spark-shell -v --conf spark.yarn.queue=thequeue
Using properties file: 
/home/hadoop/spark-2.1.0-bin-apache-hdp2.7.3/conf/spark-defaults.conf

Adding default property: spark.yarn.queue=default
Parsed arguments:
  master  yarn
  deployMode  client
  ...
  queue   null
  
  verbose true
Spark properties used, including those specified through
 --conf and those from the properties file 
/home/hadoop/spark-2.1.0-bin-apache-hdp2.7.3/conf/spark-defaults.conf:
  spark.yarn.queue -> thequeue
  
```
## How was this patch tested?

ut and local verify

Author: Kent Yao 

Closes #17430 from yaooqinn/SPARK-20096.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e9d268f6
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e9d268f6
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e9d268f6

Branch: refs/heads/master
Commit: e9d268f63e7308486739aa56ece02815bfb432d6
Parents: 258bff2
Author: Kent Yao 
Authored: Thu Mar 30 16:11:03 2017 +0100
Committer: Sean Owen 
Committed: Thu Mar 30 16:11:03 2017 +0100

--
 .../org/apache/spark/deploy/SparkSubmitArguments.scala   |  1 +
 .../scala/org/apache/spark/deploy/SparkSubmitSuite.scala | 11 +++
 2 files changed, 12 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/e9d268f6/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
--
diff --git 
a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala 
b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
index 0614d80..0144fd1 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
@@ -190,6 +190,7 @@ private[deploy] class SparkSubmitArguments(args: 
Seq[String], env: Map[String, S
   .orNull
 numExecutors = Option(numExecutors)
   .getOrElse(sparkProperties.get("spark.executor.instances").orNull)
+queue = 
Option(queue).orElse(sparkProperties.get("spark.yarn.queue")).orNull
 keytab = 
Option(keytab).orElse(sparkProperties.get("spark.yarn.keytab")).orNull
 principal = 
Option(principal).orElse(sparkProperties.get("spark.yarn.principal")).orNull
 

http://git-wip-us.apache.org/repos/asf/spark/blob/e9d268f6/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
--
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala 
b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
index a591b98..7c2ec01 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
@@ -148,6 +148,17 @@ class SparkSubmitSuite
 appArgs.childArgs should be (Seq("--master", "local", "some", "--weird", 
"args"))
   }
 
+  test("print the right queue name") {
+val clArgs = Seq(
+  "--name", "myApp",
+  "--class", "Foo",
+  "--conf", "spark.yarn.queue=thequeue",
+  "userjar.jar")
+val appArgs = new SparkSubmitArguments(clArgs)
+appArgs.queue should be ("thequeue")
+appArgs.toString should include ("thequeue")
+  }
+
   test("specify deploy mode through configuration") {
 val clArgs = Seq(
   "--master", "yarn",


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-20177] Document about compression way has some little detail ch…

2017-04-01 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master 567a50acf -> cf5963c96


[SPARK-20177] Document about compression way has some little detail châ¦

â¦anges.

## What changes were proposed in this pull request?

Document compression way little detail changes.
1.spark.eventLog.compress add 'Compression will use spark.io.compression.codec.'
2.spark.broadcast.compress add 'Compression will use 
spark.io.compression.codec.'
3,spark.rdd.compress add 'Compression will use spark.io.compression.codec.'
4.spark.io.compression.codec add 'event log describe'.

eg
Through the documents, I don't know  what is compression mode about 'event log'.

## How was this patch tested?

manual tests

Please review http://spark.apache.org/contributing.html before opening a pull 
request.

Author: éå°é¾ 10207633 

Closes #17498 from guoxiaolongzte/SPARK-20177.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/cf5963c9
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/cf5963c9
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/cf5963c9

Branch: refs/heads/master
Commit: cf5963c961e7eba37bdd58658ed4dfff66ce3c72
Parents: 567a50a
Author: éå°é¾ 10207633 
Authored: Sat Apr 1 11:48:58 2017 +0100
Committer: Sean Owen 
Committed: Sat Apr 1 11:48:58 2017 +0100

--
 docs/configuration.md | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/cf5963c9/docs/configuration.md
--
diff --git a/docs/configuration.md b/docs/configuration.md
index a975392..2687f54 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -639,6 +639,7 @@ Apart from these, the following properties are also 
available, and may be useful
   false
   
 Whether to compress logged events, if spark.eventLog.enabled 
is true.
+Compression will use spark.io.compression.codec.
   
 
 
@@ -773,14 +774,15 @@ Apart from these, the following properties are also 
available, and may be useful
   true
   
 Whether to compress broadcast variables before sending them. Generally a 
good idea.
+Compression will use spark.io.compression.codec.
   
 
 
   spark.io.compression.codec
   lz4
   
-The codec used to compress internal data such as RDD partitions, broadcast 
variables and
-shuffle outputs. By default, Spark provides three codecs: 
lz4, lzf,
+The codec used to compress internal data such as RDD partitions, event 
log, broadcast variables
+and shuffle outputs. By default, Spark provides three codecs: 
lz4, lzf,
 and snappy. You can also use fully qualified class names to 
specify the codec,
 e.g.
 org.apache.spark.io.LZ4CompressionCodec,
@@ -881,6 +883,7 @@ Apart from these, the following properties are also 
available, and may be useful
 StorageLevel.MEMORY_ONLY_SER in Java
 and Scala or StorageLevel.MEMORY_ONLY in Python).
 Can save substantial space at the cost of some extra CPU time.
+Compression will use spark.io.compression.codec.
   
 
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-20123][BUILD] SPARK_HOME variable might have spaces in it(e.g. $SPARK…

2017-04-02 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master d40cbb861 -> 76de2d115


[SPARK-20123][BUILD] SPARK_HOME variable might have spaces in it(e.g. $SPARKâ¦

JIRA Issue: https://issues.apache.org/jira/browse/SPARK-20123

## What changes were proposed in this pull request?

If $SPARK_HOME or $FWDIR variable contains spaces, then use 
"./dev/make-distribution.sh --name custom-spark --tgz -Psparkr -Phadoop-2.7 
-Phive -Phive-thriftserver -Pmesos -Pyarn" build spark will failed.

## How was this patch tested?

manual tests

Author: zuotingbing 

Closes #17452 from zuotingbing/spark-bulid.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/76de2d11
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/76de2d11
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/76de2d11

Branch: refs/heads/master
Commit: 76de2d115364aa6a1fdaacdfae05f0c695c953b8
Parents: d40cbb8
Author: zuotingbing 
Authored: Sun Apr 2 15:31:13 2017 +0100
Committer: Sean Owen 
Committed: Sun Apr 2 15:31:13 2017 +0100

--
 R/check-cran.sh | 20 ++--
 R/create-docs.sh| 10 +-
 R/create-rd.sh  |  8 
 R/install-dev.sh| 14 +++---
 R/install-source-package.sh | 20 ++--
 dev/make-distribution.sh| 32 
 6 files changed, 52 insertions(+), 52 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/76de2d11/R/check-cran.sh
--
diff --git a/R/check-cran.sh b/R/check-cran.sh
index a188b14..22cc9c6 100755
--- a/R/check-cran.sh
+++ b/R/check-cran.sh
@@ -20,18 +20,18 @@
 set -o pipefail
 set -e
 
-FWDIR="$(cd `dirname "${BASH_SOURCE[0]}"`; pwd)"
-pushd $FWDIR > /dev/null
+FWDIR="$(cd "`dirname "${BASH_SOURCE[0]}"`"; pwd)"
+pushd "$FWDIR" > /dev/null
 
-. $FWDIR/find-r.sh
+. "$FWDIR/find-r.sh"
 
 # Install the package (this is required for code in vignettes to run when 
building it later)
 # Build the latest docs, but not vignettes, which is built with the package 
next
-. $FWDIR/install-dev.sh
+. "$FWDIR/install-dev.sh"
 
 # Build source package with vignettes
 SPARK_HOME="$(cd "${FWDIR}"/..; pwd)"
-. "${SPARK_HOME}"/bin/load-spark-env.sh
+. "${SPARK_HOME}/bin/load-spark-env.sh"
 if [ -f "${SPARK_HOME}/RELEASE" ]; then
   SPARK_JARS_DIR="${SPARK_HOME}/jars"
 else
@@ -40,16 +40,16 @@ fi
 
 if [ -d "$SPARK_JARS_DIR" ]; then
   # Build a zip file containing the source package with vignettes
-  SPARK_HOME="${SPARK_HOME}" "$R_SCRIPT_PATH/"R CMD build $FWDIR/pkg
+  SPARK_HOME="${SPARK_HOME}" "$R_SCRIPT_PATH/R" CMD build "$FWDIR/pkg"
 
   find pkg/vignettes/. -not -name '.' -not -name '*.Rmd' -not -name '*.md' 
-not -name '*.pdf' -not -name '*.html' -delete
 else
-  echo "Error Spark JARs not found in $SPARK_HOME"
+  echo "Error Spark JARs not found in '$SPARK_HOME'"
   exit 1
 fi
 
 # Run check as-cran.
-VERSION=`grep Version $FWDIR/pkg/DESCRIPTION | awk '{print $NF}'`
+VERSION=`grep Version "$FWDIR/pkg/DESCRIPTION" | awk '{print $NF}'`
 
 CRAN_CHECK_OPTIONS="--as-cran"
 
@@ -67,10 +67,10 @@ echo "Running CRAN check with $CRAN_CHECK_OPTIONS options"
 
 if [ -n "$NO_TESTS" ] && [ -n "$NO_MANUAL" ]
 then
-  "$R_SCRIPT_PATH/"R CMD check $CRAN_CHECK_OPTIONS SparkR_"$VERSION".tar.gz
+  "$R_SCRIPT_PATH/R" CMD check $CRAN_CHECK_OPTIONS "SparkR_$VERSION.tar.gz"
 else
   # This will run tests and/or build vignettes, and require SPARK_HOME
-  SPARK_HOME="${SPARK_HOME}" "$R_SCRIPT_PATH/"R CMD check $CRAN_CHECK_OPTIONS 
SparkR_"$VERSION".tar.gz
+  SPARK_HOME="${SPARK_HOME}" "$R_SCRIPT_PATH/R" CMD check $CRAN_CHECK_OPTIONS 
"SparkR_$VERSION.tar.gz"
 fi
 
 popd > /dev/null

http://git-wip-us.apache.org/repos/asf/spark/blob/76de2d11/R/create-docs.sh
--
diff --git a/R/create-docs.sh b/R/create-docs.sh
index 6bef7e7..310dbc5 100755
--- a/R/create-docs.sh
+++ b/R/create-docs.sh
@@ -33,15 +33,15 @@ export FWDIR="$(cd "`dirname "${BASH_SOURCE[0]}"`"; pwd)"
 export SPARK_HOME="$(cd "`dirname "${BASH_SOURCE[0]}"`"/..; pwd)"
 
 # Required for setting SPARK_SCALA_VERSION
-. "${SPARK_HOME}"/bin/load-spark-env.sh
+. "${SPARK_HOME}/bin/load-spark-env.sh"
 
 echo "Using Scala $SPARK_SCALA_VERSION"
 
-pushd $FWDIR > /dev/null
-. $FWDIR/find-r.sh
+pushd "$FWDIR" > /dev/null
+. "$FWDIR/find-r.sh"
 
 # Install the package (this will also generate the Rd files)
-. $FWDIR/install-dev.sh
+. "$FWDIR/install-dev.sh"
 
 # Now create HTML files
 
@@ -49,7 +49,7 @@ pushd $FWDIR > /dev/null
 mkdir -p pkg/html
 pushd pkg/html
 
-"$R_SCRIPT_PATH/"Rscript -e 'libDir <- "../../lib"; library(SparkR, 
lib.loc=libDir); library(knitr); knit_rd("SparkR", links = 
tools::findHTMLlinks(paste(libDir, "SparkR", sep="/")))'
+"$R_S

spark git commit: [SPARK-19999][BACKPORT-2.1][CORE] Workaround JDK-8165231 to identify PPC64 architectures as supporting unaligned access

2017-04-02 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/branch-2.1 e3cec18e1 -> 968eace85


[SPARK-1][BACKPORT-2.1][CORE] Workaround JDK-8165231 to identify PPC64 
architectures as supporting unaligned access

## What changes were proposed in this pull request?

This PR is backport of #17472 to Spark 2.1

java.nio.Bits.unaligned() does not return true for the ppc64le arch.
see 
[https://bugs.openjdk.java.net/browse/JDK-8165231](https://bugs.openjdk.java.net/browse/JDK-8165231)
Check architecture in Platform.java

## How was this patch tested?

unit test

Author: Kazuaki Ishizaki 

Closes #17509 from kiszk/branch-2.1.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/968eace8
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/968eace8
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/968eace8

Branch: refs/heads/branch-2.1
Commit: 968eace85005d265cb8ff9d3f4aa2d20db58f8fe
Parents: e3cec18
Author: Kazuaki Ishizaki 
Authored: Sun Apr 2 15:33:48 2017 +0100
Committer: Sean Owen 
Committed: Sun Apr 2 15:33:48 2017 +0100

--
 .../java/org/apache/spark/unsafe/Platform.java  | 28 +++-
 1 file changed, 16 insertions(+), 12 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/968eace8/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java
--
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java 
b/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java
index 671b8c7..ba35cf2 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java
@@ -46,18 +46,22 @@ public final class Platform {
   private static final boolean unaligned;
   static {
 boolean _unaligned;
-// use reflection to access unaligned field
-try {
-  Class bitsClass =
-Class.forName("java.nio.Bits", false, 
ClassLoader.getSystemClassLoader());
-  Method unalignedMethod = bitsClass.getDeclaredMethod("unaligned");
-  unalignedMethod.setAccessible(true);
-  _unaligned = Boolean.TRUE.equals(unalignedMethod.invoke(null));
-} catch (Throwable t) {
-  // We at least know x86 and x64 support unaligned access.
-  String arch = System.getProperty("os.arch", "");
-  //noinspection DynamicRegexReplaceableByCompiledPattern
-  _unaligned = arch.matches("^(i[3-6]86|x86(_64)?|x64|amd64|aarch64)$");
+String arch = System.getProperty("os.arch", "");
+if (arch.equals("ppc64le") || arch.equals("ppc64")) {
+  // Since java.nio.Bits.unaligned() doesn't return true on ppc (See 
JDK-8165231), but ppc64 and ppc64le support it
+  _unaligned = true;
+} else {
+  try {
+Class bitsClass =
+  Class.forName("java.nio.Bits", false, 
ClassLoader.getSystemClassLoader());
+Method unalignedMethod = bitsClass.getDeclaredMethod("unaligned");
+unalignedMethod.setAccessible(true);
+_unaligned = Boolean.TRUE.equals(unalignedMethod.invoke(null));
+  } catch (Throwable t) {
+// We at least know x86 and x64 support unaligned access.
+//noinspection DynamicRegexReplaceableByCompiledPattern
+_unaligned = arch.matches("^(i[3-6]86|x86(_64)?|x64|amd64|aarch64)$");
+  }
 }
 unaligned = _unaligned;
   }


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-20173][SQL][HIVE-THRIFTSERVER] Throw NullPointerException when HiveThriftServer2 is shutdown

2017-04-02 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master 76de2d115 -> 657cb9541


[SPARK-20173][SQL][HIVE-THRIFTSERVER] Throw NullPointerException when 
HiveThriftServer2 is shutdown

## What changes were proposed in this pull request?

If the shutdown hook called before the variable `uiTab` is set , it will throw 
a NullPointerException.

## How was this patch tested?

manual tests

Author: zuotingbing 

Closes #17496 from zuotingbing/SPARK-HiveThriftServer2.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/657cb954
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/657cb954
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/657cb954

Branch: refs/heads/master
Commit: 657cb9541db8508ce64d08cc3de14cd02adf16b5
Parents: 76de2d1
Author: zuotingbing 
Authored: Sun Apr 2 15:39:51 2017 +0100
Committer: Sean Owen 
Committed: Sun Apr 2 15:39:51 2017 +0100

--
 .../org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/657cb954/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala
--
diff --git 
a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala
 
b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala
index 13c6f11..1455360 100644
--- 
a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala
+++ 
b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala
@@ -46,7 +46,7 @@ import org.apache.spark.util.{ShutdownHookManager, Utils}
  */
 object HiveThriftServer2 extends Logging {
   var LOG = LogFactory.getLog(classOf[HiveServer2])
-  var uiTab: Option[ThriftServerTab] = _
+  var uiTab: Option[ThriftServerTab] = None
   var listener: HiveThriftServer2Listener = _
 
   /**


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-20166][SQL] Use XXX for ISO 8601 timezone instead of ZZ (FastDateFormat specific) in CSV/JSON timeformat options

2017-04-03 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master 2a903a1ee -> cff11fd20


[SPARK-20166][SQL] Use XXX for ISO 8601 timezone instead of ZZ (FastDateFormat 
specific) in CSV/JSON timeformat options

## What changes were proposed in this pull request?

This PR proposes to use `XXX` format instead of `ZZ`. `ZZ` seems a 
`FastDateFormat` specific.

`ZZ` supports "ISO 8601 extended format time zones" but it seems 
`FastDateFormat` specific option.
I misunderstood this is compatible format with `SimpleDateFormat` when this 
change is introduced.
Please see [SimpleDateFormat documentation]( 
https://docs.oracle.com/javase/7/docs/api/java/text/SimpleDateFormat.html#iso8601timezone)
 and [FastDateFormat 
documentation](https://commons.apache.org/proper/commons-lang/apidocs/org/apache/commons/lang3/time/FastDateFormat.html).

It seems we better replace `ZZ` to `XXX` because they look using the same 
strategy - 
[FastDateParser.java#L930](https://github.com/apache/commons-lang/blob/8767cd4f1a6af07093c1e6c422dae8e574be7e5e/src/main/java/org/apache/commons/lang3/time/FastDateParser.java#L930),
 [FastDateParser.java#L932-L951 
](https://github.com/apache/commons-lang/blob/8767cd4f1a6af07093c1e6c422dae8e574be7e5e/src/main/java/org/apache/commons/lang3/time/FastDateParser.java#L932-L951)
 and 
[FastDateParser.java#L596-L601](https://github.com/apache/commons-lang/blob/8767cd4f1a6af07093c1e6c422dae8e574be7e5e/src/main/java/org/apache/commons/lang3/time/FastDateParser.java#L596-L601).

I also checked the codes and manually debugged it for sure. It seems both cases 
use the same pattern `( Z|(?:[+-]\\d{2}(?::)\\d{2}))`.

_Note that this should be rather a fix about documentation and not the 
behaviour change because `ZZ` seems invalid date format in `SimpleDateFormat` 
as documented in `DataFrameReader` and etc, and both `ZZ` and `XXX` look 
identically working with `FastDateFormat`_

Current documentation is as below:

```
   * `timestampFormat` (default `-MM-dd'T'HH:mm:ss.SSSZZ`): sets the 
string that
   * indicates a timestamp format. Custom date formats follow the formats at
   * `java.text.SimpleDateFormat`. This applies to timestamp type.
```

## How was this patch tested?

Existing tests should cover this. Also, manually tested as below (BTW, I don't 
think these are worth being added as tests within Spark):

**Parse**

```scala
scala> new 
java.text.SimpleDateFormat("-MM-dd'T'HH:mm:ss.SSSXXX").parse("2017-03-21T00:00:00.000-11:00")
res4: java.util.Date = Tue Mar 21 20:00:00 KST 2017

scala>  new 
java.text.SimpleDateFormat("-MM-dd'T'HH:mm:ss.SSSXXX").parse("2017-03-21T00:00:00.000Z")
res10: java.util.Date = Tue Mar 21 09:00:00 KST 2017

scala> new 
java.text.SimpleDateFormat("-MM-dd'T'HH:mm:ss.SSSZZ").parse("2017-03-21T00:00:00.000-11:00")
java.text.ParseException: Unparseable date: "2017-03-21T00:00:00.000-11:00"
  at java.text.DateFormat.parse(DateFormat.java:366)
  ... 48 elided
scala>  new 
java.text.SimpleDateFormat("-MM-dd'T'HH:mm:ss.SSSZZ").parse("2017-03-21T00:00:00.000Z")
java.text.ParseException: Unparseable date: "2017-03-21T00:00:00.000Z"
  at java.text.DateFormat.parse(DateFormat.java:366)
  ... 48 elided
```

```scala
scala> 
org.apache.commons.lang3.time.FastDateFormat.getInstance("-MM-dd'T'HH:mm:ss.SSSXXX").parse("2017-03-21T00:00:00.000-11:00")
res7: java.util.Date = Tue Mar 21 20:00:00 KST 2017

scala> 
org.apache.commons.lang3.time.FastDateFormat.getInstance("-MM-dd'T'HH:mm:ss.SSSXXX").parse("2017-03-21T00:00:00.000Z")
res1: java.util.Date = Tue Mar 21 09:00:00 KST 2017

scala> 
org.apache.commons.lang3.time.FastDateFormat.getInstance("-MM-dd'T'HH:mm:ss.SSSZZ").parse("2017-03-21T00:00:00.000-11:00")
res8: java.util.Date = Tue Mar 21 20:00:00 KST 2017

scala> 
org.apache.commons.lang3.time.FastDateFormat.getInstance("-MM-dd'T'HH:mm:ss.SSSZZ").parse("2017-03-21T00:00:00.000Z")
res2: java.util.Date = Tue Mar 21 09:00:00 KST 2017
```

**Format**

```scala
scala> new 
java.text.SimpleDateFormat("-MM-dd'T'HH:mm:ss.SSSXXX").format(new 
java.text.SimpleDateFormat("-MM-dd'T'HH:mm:ss.SSSXXX").parse("2017-03-21T00:00:00.000-11:00"))
res6: String = 2017-03-21T20:00:00.000+09:00
```

```scala
scala> val fd = 
org.apache.commons.lang3.time.FastDateFormat.getInstance("-MM-dd'T'HH:mm:ss.SSSZZ")
fd: org.apache.commons.lang3.time.FastDateFormat = 
FastDateFormat[-MM-dd'T'HH:mm:ss.SSSZZ,ko_KR,Asia/Seoul]

scala> fd.format(fd.parse("2017-03-21T00:00:00.000-11:00"))
res1: String = 2017-03-21T20:00:00.000+09:00

scala> val fd = 
org.apache.commons.lang3.time.FastDateFormat.getInstance("-MM-dd'T'HH:mm:ss.SSSXXX")
fd: org.apache.commons.lang3.time.FastDateFormat = 
FastDateFormat[-MM-dd'T'HH:mm:ss.SSSXXX,ko_KR,Asia/Seoul]

scala> fd.format(fd.parse("2017-03-21T00:00:00.000-11:00"))
res2: String = 2017-03-21T20:00:00.000+09:00
```

Author: hyukjinkwon 

Closes #17489 from HyukjinKwon/SPARK-20166.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
C

spark git commit: [MINOR][DOCS] Replace non-breaking space to normal spaces that breaks rendering markdown

2017-04-03 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master cff11fd20 -> 364b0db75


[MINOR][DOCS] Replace non-breaking space to normal spaces that breaks rendering 
markdown

# What changes were proposed in this pull request?

It seems there are several non-breaking spaces were inserted into several 
`.md`s and they look breaking rendering markdown files.

These are different. For example, this can be checked via `python` as below:

```python
>>> "Â "
'\xc2\xa0'
>>> " "
' '
```

_Note that it seems this PR description automatically replaces non-breaking 
spaces into normal spaces. Please open a `vi` and copy and paste it into 
`python` to verify this (do not copy the characters here)._

I checked the output below in  Sapari and Chrome on Mac OS and, Internal 
Explorer on Windows 10.

**Before**

![2017-04-03 12 37 
17](https://cloud.githubusercontent.com/assets/6477701/24594655/50aaba02-186a-11e7-80bb-d34b17a3398a.png)
![2017-04-03 12 36 
57](https://cloud.githubusercontent.com/assets/6477701/24594654/50a855e6-186a-11e7-94e2-661e56544b0f.png)

**After**

![2017-04-03 12 36 
46](https://cloud.githubusercontent.com/assets/6477701/24594657/53c2545c-186a-11e7-9a73-00529afbfd75.png)
![2017-04-03 12 36 
31](https://cloud.githubusercontent.com/assets/6477701/24594658/53c286c0-186a-11e7-99c9-e66b1f510fe7.png)

## How was this patch tested?

Manually checking.

These instances were found via

```
grep --include=*.scala --include=*.python --include=*.java --include=*.r 
--include=*.R --include=*.md --include=*.r -r -I "Â " .
```

in Mac OS.

It seems there are several instances more as below:

```
./docs/sql-programming-guide.md:âÂ Â  âââ ...
./docs/sql-programming-guide.md:âÂ Â  â
./docs/sql-programming-guide.md:âÂ Â  âââ country=US
./docs/sql-programming-guide.md:âÂ Â  âÂ Â  âââ data.parquet
./docs/sql-programming-guide.md:âÂ Â  âââ country=CN
./docs/sql-programming-guide.md:âÂ Â  âÂ Â  âââ data.parquet
./docs/sql-programming-guide.md:âÂ Â  âââ ...
./docs/sql-programming-guide.md: Â Â  âââ ...
./docs/sql-programming-guide.md: Â Â  â
./docs/sql-programming-guide.md: Â Â  âââ country=US
./docs/sql-programming-guide.md: Â Â  âÂ Â  âââ data.parquet
./docs/sql-programming-guide.md: Â Â  âââ country=CN
./docs/sql-programming-guide.md: Â Â  âÂ Â  âââ data.parquet
./docs/sql-programming-guide.md: Â Â  âââ ...
./sql/core/src/test/README.md:âÂ Â  âââ *.avdl  # 
Testing Avro IDL(s)
./sql/core/src/test/README.md:âÂ Â  âââ *.avpr  # !! NO 
TOUCH !! Protocol files generated from Avro IDL(s)
./sql/core/src/test/README.md:âÂ Â  âââ gen-avro.sh # 
Script used to generate Java code for Avro
./sql/core/src/test/README.md:âÂ Â  âââ gen-thrift.sh   # 
Script used to generate Java code for Thrift
```

These seems generated via `tree` command which inserts non-breaking spaces. 
They do not look causing any problem for rendering within code blocks and I did 
not fix it to reduce the overhead to manually replace it when it is overwritten 
via `tree` command in the future.

Author: hyukjinkwon 

Closes #17517 from HyukjinKwon/non-breaking-space.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/364b0db7
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/364b0db7
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/364b0db7

Branch: refs/heads/master
Commit: 364b0db75308ddd346b4ab1e032680e8eb4c1753
Parents: cff11fd
Author: hyukjinkwon 
Authored: Mon Apr 3 10:09:11 2017 +0100
Committer: Sean Owen 
Committed: Mon Apr 3 10:09:11 2017 +0100

--
 README.md  | 2 +-
 docs/building-spark.md | 2 +-
 docs/monitoring.md | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/364b0db7/README.md
--
diff --git a/README.md b/README.md
index d0eca1d..1e521a7 100644
--- a/README.md
+++ b/README.md
@@ -97,7 +97,7 @@ building for particular Hive and Hive Thriftserver 
distributions.
 Please refer to the [Configuration 
Guide](http://spark.apache.org/docs/latest/configuration.html)
 in the online documentation for an overview on how to configure Spark.
 
-##Â Contributing
+## Contributing
 
 Please review the [Contribution to Spark 
guide](http://spark.apache.org/contributing.html)
 for information on how to get started contributing to the project.

http://git-wip-us.apache.org/repos/asf/spark/blob/364b0db7/docs/building-spark.md
--
diff --git a/docs/building-

spark git commit: [MINOR][DOCS] Replace non-breaking space to normal spaces that breaks rendering markdown

2017-04-03 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/branch-2.1 ca144106b -> 77700ea38


[MINOR][DOCS] Replace non-breaking space to normal spaces that breaks rendering 
markdown

# What changes were proposed in this pull request?

It seems there are several non-breaking spaces were inserted into several 
`.md`s and they look breaking rendering markdown files.

These are different. For example, this can be checked via `python` as below:

```python
>>> "Â "
'\xc2\xa0'
>>> " "
' '
```

_Note that it seems this PR description automatically replaces non-breaking 
spaces into normal spaces. Please open a `vi` and copy and paste it into 
`python` to verify this (do not copy the characters here)._

I checked the output below in  Sapari and Chrome on Mac OS and, Internal 
Explorer on Windows 10.

**Before**

![2017-04-03 12 37 
17](https://cloud.githubusercontent.com/assets/6477701/24594655/50aaba02-186a-11e7-80bb-d34b17a3398a.png)
![2017-04-03 12 36 
57](https://cloud.githubusercontent.com/assets/6477701/24594654/50a855e6-186a-11e7-94e2-661e56544b0f.png)

**After**

![2017-04-03 12 36 
46](https://cloud.githubusercontent.com/assets/6477701/24594657/53c2545c-186a-11e7-9a73-00529afbfd75.png)
![2017-04-03 12 36 
31](https://cloud.githubusercontent.com/assets/6477701/24594658/53c286c0-186a-11e7-99c9-e66b1f510fe7.png)

## How was this patch tested?

Manually checking.

These instances were found via

```
grep --include=*.scala --include=*.python --include=*.java --include=*.r 
--include=*.R --include=*.md --include=*.r -r -I "Â " .
```

in Mac OS.

It seems there are several instances more as below:

```
./docs/sql-programming-guide.md:âÂ Â  âââ ...
./docs/sql-programming-guide.md:âÂ Â  â
./docs/sql-programming-guide.md:âÂ Â  âââ country=US
./docs/sql-programming-guide.md:âÂ Â  âÂ Â  âââ data.parquet
./docs/sql-programming-guide.md:âÂ Â  âââ country=CN
./docs/sql-programming-guide.md:âÂ Â  âÂ Â  âââ data.parquet
./docs/sql-programming-guide.md:âÂ Â  âââ ...
./docs/sql-programming-guide.md: Â Â  âââ ...
./docs/sql-programming-guide.md: Â Â  â
./docs/sql-programming-guide.md: Â Â  âââ country=US
./docs/sql-programming-guide.md: Â Â  âÂ Â  âââ data.parquet
./docs/sql-programming-guide.md: Â Â  âââ country=CN
./docs/sql-programming-guide.md: Â Â  âÂ Â  âââ data.parquet
./docs/sql-programming-guide.md: Â Â  âââ ...
./sql/core/src/test/README.md:âÂ Â  âââ *.avdl  # 
Testing Avro IDL(s)
./sql/core/src/test/README.md:âÂ Â  âââ *.avpr  # !! NO 
TOUCH !! Protocol files generated from Avro IDL(s)
./sql/core/src/test/README.md:âÂ Â  âââ gen-avro.sh # 
Script used to generate Java code for Avro
./sql/core/src/test/README.md:âÂ Â  âââ gen-thrift.sh   # 
Script used to generate Java code for Thrift
```

These seems generated via `tree` command which inserts non-breaking spaces. 
They do not look causing any problem for rendering within code blocks and I did 
not fix it to reduce the overhead to manually replace it when it is overwritten 
via `tree` command in the future.

Author: hyukjinkwon 

Closes #17517 from HyukjinKwon/non-breaking-space.

(cherry picked from commit 364b0db75308ddd346b4ab1e032680e8eb4c1753)
Signed-off-by: Sean Owen 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/77700ea3
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/77700ea3
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/77700ea3

Branch: refs/heads/branch-2.1
Commit: 77700ea38540b8326c37623abeebabf3d2497418
Parents: ca14410
Author: hyukjinkwon 
Authored: Mon Apr 3 10:09:11 2017 +0100
Committer: Sean Owen 
Committed: Mon Apr 3 10:09:21 2017 +0100

--
 README.md  | 2 +-
 docs/building-spark.md | 2 +-
 docs/monitoring.md | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/77700ea3/README.md
--
diff --git a/README.md b/README.md
index f598323..d861e9f 100644
--- a/README.md
+++ b/README.md
@@ -98,7 +98,7 @@ building for particular Hive and Hive Thriftserver 
distributions.
 Please refer to the [Configuration 
Guide](http://spark.apache.org/docs/latest/configuration.html)
 in the online documentation for an overview on how to configure Spark.
 
-##Â Contributing
+## Contributing
 
 Please review the [Contribution to Spark 
guide](http://spark.apache.org/contributing.html)
 for information on how to get started contributing to the project.

http://git-wip-us.apache.org/repos/asf/spark/blob/77700ea3/docs/building-spa

spark git commit: [SPARK-9002][CORE] KryoSerializer initialization does not include 'Array[Int]'

2017-04-03 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master 364b0db75 -> fb5869f2c


[SPARK-9002][CORE] KryoSerializer initialization does not include 'Array[Int]'

[SPARK-9002][CORE] KryoSerializer initialization does not include 'Array[Int]'

## What changes were proposed in this pull request?

Array[Int] has been registered in KryoSerializer.
The following file has been changed
core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala

## How was this patch tested?

First, the issue was reproduced by new unit test.
Then, the issue was fixed to pass the failed test.

Author: Denis Bolshakov 

Closes #17482 from dbolshak/SPARK-9002.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fb5869f2
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fb5869f2
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fb5869f2

Branch: refs/heads/master
Commit: fb5869f2cf94217b3e254e2d0820507dc83a25cc
Parents: 364b0db
Author: Denis Bolshakov 
Authored: Mon Apr 3 10:16:07 2017 +0100
Committer: Sean Owen 
Committed: Mon Apr 3 10:16:07 2017 +0100

--
 .../org/apache/spark/serializer/KryoSerializer.scala  |  7 +++
 .../org/apache/spark/serializer/KryoSerializerSuite.scala | 10 ++
 2 files changed, 17 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/fb5869f2/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
--
diff --git 
a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala 
b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
index 0381563..6fc66e2 100644
--- a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
@@ -384,9 +384,16 @@ private[serializer] object KryoSerializer {
 classOf[HighlyCompressedMapStatus],
 classOf[CompactBuffer[_]],
 classOf[BlockManagerId],
+classOf[Array[Boolean]],
 classOf[Array[Byte]],
 classOf[Array[Short]],
+classOf[Array[Int]],
 classOf[Array[Long]],
+classOf[Array[Float]],
+classOf[Array[Double]],
+classOf[Array[Char]],
+classOf[Array[String]],
+classOf[Array[Array[String]]],
 classOf[BoundedPriorityQueue[_]],
 classOf[SparkConf]
   )

http://git-wip-us.apache.org/repos/asf/spark/blob/fb5869f2/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
--
diff --git 
a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala 
b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
index a30653b..7c3922e 100644
--- a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
@@ -76,6 +76,9 @@ class KryoSerializerSuite extends SparkFunSuite with 
SharedSparkContext {
   }
 
   test("basic types") {
+val conf = new SparkConf(false)
+conf.set("spark.kryo.registrationRequired", "true")
+
 val ser = new KryoSerializer(conf).newInstance()
 def check[T: ClassTag](t: T) {
   assert(ser.deserialize[T](ser.serialize(t)) === t)
@@ -106,6 +109,9 @@ class KryoSerializerSuite extends SparkFunSuite with 
SharedSparkContext {
   }
 
   test("pairs") {
+val conf = new SparkConf(false)
+conf.set("spark.kryo.registrationRequired", "true")
+
 val ser = new KryoSerializer(conf).newInstance()
 def check[T: ClassTag](t: T) {
   assert(ser.deserialize[T](ser.serialize(t)) === t)
@@ -130,12 +136,16 @@ class KryoSerializerSuite extends SparkFunSuite with 
SharedSparkContext {
   }
 
   test("Scala data structures") {
+val conf = new SparkConf(false)
+conf.set("spark.kryo.registrationRequired", "true")
+
 val ser = new KryoSerializer(conf).newInstance()
 def check[T: ClassTag](t: T) {
   assert(ser.deserialize[T](ser.serialize(t)) === t)
 }
 check(List[Int]())
 check(List[Int](1, 2, 3))
+check(Seq[Int](1, 2, 3))
 check(List[String]())
 check(List[String]("x", "y", "z"))
 check(None)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-20190][APP-ID] applications//jobs' in rest api,status should be [running|s…

2017-04-04 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master b34f7665d -> c95fbea68


[SPARK-20190][APP-ID] applications//jobs' in rest api,status should be 
[running|sâ¦

â¦ucceeded|failed|unknown]

## What changes were proposed in this pull request?

'/applications/[app-id]/jobs' in rest api.status should 
be'[running|succeeded|failed|unknown]'.
now status is '[complete|succeeded|failed]'.
but '/applications/[app-id]/jobs?status=complete' the server return 'HTTP ERROR 
404'.
Added '?status=running' and '?status=unknown'.
code ï¼
public enum JobExecutionStatus {
RUNNING,
SUCCEEDED,
FAILED,
UNKNOWN;

## How was this patch tested?

 manual tests

Please review http://spark.apache.org/contributing.html before opening a pull 
request.

Author: guoxiaolongzte 

Closes #17507 from guoxiaolongzte/SPARK-20190.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c95fbea6
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c95fbea6
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c95fbea6

Branch: refs/heads/master
Commit: c95fbea68e9dfb2c96a1d13dde17d80a37066ae6
Parents: b34f766
Author: guoxiaolongzte 
Authored: Tue Apr 4 09:56:17 2017 +0100
Committer: Sean Owen 
Committed: Tue Apr 4 09:56:17 2017 +0100

--
 docs/monitoring.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/c95fbea6/docs/monitoring.md
--
diff --git a/docs/monitoring.md b/docs/monitoring.md
index 6cbc666..4d0617d 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -289,7 +289,7 @@ can be identified by their `[attempt-id]`. In the API 
listed below, when running
 /applications/[app-id]/jobs
 
   A list of all jobs for a given application.
-  ?status=[complete|succeeded|failed] list only jobs in 
the specific state.
+  ?status=[running|succeeded|failed|unknown] list only 
jobs in the specific state.
 
   
   


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-20190][APP-ID] applications//jobs' in rest api,status should be [running|s…

2017-04-04 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/branch-2.1 77700ea38 -> f9546dacb


[SPARK-20190][APP-ID] applications//jobs' in rest api,status should be 
[running|sâ¦

â¦ucceeded|failed|unknown]

## What changes were proposed in this pull request?

'/applications/[app-id]/jobs' in rest api.status should 
be'[running|succeeded|failed|unknown]'.
now status is '[complete|succeeded|failed]'.
but '/applications/[app-id]/jobs?status=complete' the server return 'HTTP ERROR 
404'.
Added '?status=running' and '?status=unknown'.
code ï¼
public enum JobExecutionStatus {
RUNNING,
SUCCEEDED,
FAILED,
UNKNOWN;

## How was this patch tested?

 manual tests

Please review http://spark.apache.org/contributing.html before opening a pull 
request.

Author: guoxiaolongzte 

Closes #17507 from guoxiaolongzte/SPARK-20190.

(cherry picked from commit c95fbea68e9dfb2c96a1d13dde17d80a37066ae6)
Signed-off-by: Sean Owen 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f9546dac
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f9546dac
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f9546dac

Branch: refs/heads/branch-2.1
Commit: f9546dacb6c7d25b93d952aa421a80acc6532c11
Parents: 77700ea
Author: guoxiaolongzte 
Authored: Tue Apr 4 09:56:17 2017 +0100
Committer: Sean Owen 
Committed: Tue Apr 4 09:56:26 2017 +0100

--
 docs/monitoring.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/f9546dac/docs/monitoring.md
--
diff --git a/docs/monitoring.md b/docs/monitoring.md
index 5c8539d..be59350 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -278,7 +278,7 @@ can be identified by their `[attempt-id]`. In the API 
listed below, when running
 /applications/[app-id]/jobs
 
   A list of all jobs for a given application.
-  ?status=[complete|succeeded|failed] list only jobs in 
the specific state.
+  ?status=[running|succeeded|failed|unknown] list only 
jobs in the specific state.
 
   
   


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-20042][WEB UI] Fix log page buttons for reverse proxy mode

2017-04-05 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master dad499f32 -> 6f09dc70d


[SPARK-20042][WEB UI] Fix log page buttons for reverse proxy mode

with spark.ui.reverseProxy=true, full path URLs like /log will point to
the master web endpoint which is serving the worker UI as reverse proxy.
To access a REST endpoint in the worker in reverse proxy mode , the
leading /proxy/"target"/ part of the base URI must be retained.

Added logic to log-view.js to handle this, similar to executorspage.js

Patch was tested manually

Author: Oliver KÃ¶th 

Closes #17370 from okoethibm/master.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6f09dc70
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6f09dc70
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6f09dc70

Branch: refs/heads/master
Commit: 6f09dc70d9808cae004ceda9ad615aa9be50f43d
Parents: dad499f
Author: Oliver KÃ¶th 
Authored: Wed Apr 5 08:09:42 2017 +0100
Committer: Sean Owen 
Committed: Wed Apr 5 08:09:42 2017 +0100

--
 .../org/apache/spark/ui/static/log-view.js   | 19 ---
 1 file changed, 16 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/6f09dc70/core/src/main/resources/org/apache/spark/ui/static/log-view.js
--
diff --git a/core/src/main/resources/org/apache/spark/ui/static/log-view.js 
b/core/src/main/resources/org/apache/spark/ui/static/log-view.js
index 1782b4f..b5c43e5 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/log-view.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/log-view.js
@@ -51,13 +51,26 @@ function noNewAlert() {
   window.setTimeout(function () {alert.css("display", "none");}, 4000);
 }
 
+
+function getRESTEndPoint() {
+  // If the worker is served from the master through a proxy (see doc on 
spark.ui.reverseProxy), 
+  // we need to retain the leading ../proxy// part of the URL when 
making REST requests.
+  // Similar logic is contained in executorspage.js function 
createRESTEndPoint.
+  var words = document.baseURI.split('/');
+  var ind = words.indexOf("proxy");
+  if (ind > 0) {
+  return words.slice(0, ind + 2).join('/') + "/log";
+  }
+  return "/log"
+}
+
 function loadMore() {
   var offset = Math.max(startByte - byteLength, 0);
   var moreByteLength = Math.min(byteLength, startByte);
 
   $.ajax({
 type: "GET",
-url: "/log" + baseParams + "&offset=" + offset + "&byteLength=" + 
moreByteLength,
+url: getRESTEndPoint() + baseParams + "&offset=" + offset + "&byteLength=" 
+ moreByteLength,
 success: function (data) {
   var oldHeight = $(".log-content")[0].scrollHeight;
   var newlineIndex = data.indexOf('\n');
@@ -83,14 +96,14 @@ function loadMore() {
 function loadNew() {
   $.ajax({
 type: "GET",
-url: "/log" + baseParams + "&byteLength=0",
+url: getRESTEndPoint() + baseParams + "&byteLength=0",
 success: function (data) {
   var dataInfo = data.substring(0, data.indexOf('\n')).match(/\d+/g);
   var newDataLen = dataInfo[2] - totalLogLength;
   if (newDataLen != 0) {
 $.ajax({
   type: "GET",
-  url: "/log" + baseParams + "&byteLength=" + newDataLen,
+  url: getRESTEndPoint() + baseParams + "&byteLength=" + newDataLen,
   success: function (data) {
 var newlineIndex = data.indexOf('\n');
 var dataInfo = data.substring(0, newlineIndex).match(/\d+/g);


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-20042][WEB UI] Fix log page buttons for reverse proxy mode

2017-04-05 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/branch-2.1 00c124884 -> efc72dcc3


[SPARK-20042][WEB UI] Fix log page buttons for reverse proxy mode

with spark.ui.reverseProxy=true, full path URLs like /log will point to
the master web endpoint which is serving the worker UI as reverse proxy.
To access a REST endpoint in the worker in reverse proxy mode , the
leading /proxy/"target"/ part of the base URI must be retained.

Added logic to log-view.js to handle this, similar to executorspage.js

Patch was tested manually

Author: Oliver KÃ¶th 

Closes #17370 from okoethibm/master.

(cherry picked from commit 6f09dc70d9808cae004ceda9ad615aa9be50f43d)
Signed-off-by: Sean Owen 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/efc72dcc
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/efc72dcc
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/efc72dcc

Branch: refs/heads/branch-2.1
Commit: efc72dcc3f964ea9931fb47a454db253556d0f8a
Parents: 00c1248
Author: Oliver KÃ¶th 
Authored: Wed Apr 5 08:09:42 2017 +0100
Committer: Sean Owen 
Committed: Wed Apr 5 08:09:52 2017 +0100

--
 .../org/apache/spark/ui/static/log-view.js   | 19 ---
 1 file changed, 16 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/efc72dcc/core/src/main/resources/org/apache/spark/ui/static/log-view.js
--
diff --git a/core/src/main/resources/org/apache/spark/ui/static/log-view.js 
b/core/src/main/resources/org/apache/spark/ui/static/log-view.js
index 1782b4f..b5c43e5 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/log-view.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/log-view.js
@@ -51,13 +51,26 @@ function noNewAlert() {
   window.setTimeout(function () {alert.css("display", "none");}, 4000);
 }
 
+
+function getRESTEndPoint() {
+  // If the worker is served from the master through a proxy (see doc on 
spark.ui.reverseProxy), 
+  // we need to retain the leading ../proxy// part of the URL when 
making REST requests.
+  // Similar logic is contained in executorspage.js function 
createRESTEndPoint.
+  var words = document.baseURI.split('/');
+  var ind = words.indexOf("proxy");
+  if (ind > 0) {
+  return words.slice(0, ind + 2).join('/') + "/log";
+  }
+  return "/log"
+}
+
 function loadMore() {
   var offset = Math.max(startByte - byteLength, 0);
   var moreByteLength = Math.min(byteLength, startByte);
 
   $.ajax({
 type: "GET",
-url: "/log" + baseParams + "&offset=" + offset + "&byteLength=" + 
moreByteLength,
+url: getRESTEndPoint() + baseParams + "&offset=" + offset + "&byteLength=" 
+ moreByteLength,
 success: function (data) {
   var oldHeight = $(".log-content")[0].scrollHeight;
   var newlineIndex = data.indexOf('\n');
@@ -83,14 +96,14 @@ function loadMore() {
 function loadNew() {
   $.ajax({
 type: "GET",
-url: "/log" + baseParams + "&byteLength=0",
+url: getRESTEndPoint() + baseParams + "&byteLength=0",
 success: function (data) {
   var dataInfo = data.substring(0, data.indexOf('\n')).match(/\d+/g);
   var newDataLen = dataInfo[2] - totalLogLength;
   if (newDataLen != 0) {
 $.ajax({
   type: "GET",
-  url: "/log" + baseParams + "&byteLength=" + newDataLen,
+  url: getRESTEndPoint() + baseParams + "&byteLength=" + newDataLen,
   success: function (data) {
 var newlineIndex = data.indexOf('\n');
 var dataInfo = data.substring(0, newlineIndex).match(/\d+/g);


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-19807][WEB UI] Add reason for cancellation when a stage is killed using web UI

2017-04-05 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master 6f09dc70d -> 71c3c4815


[SPARK-19807][WEB UI] Add reason for cancellation when a stage is killed using 
web UI

## What changes were proposed in this pull request?

When a user kills a stage using web UI (in Stages page), 
StagesTab.handleKillRequest requests SparkContext to cancel the stage without 
giving a reason. SparkContext has cancelStage(stageId: Int, reason: String) 
that Spark could use to pass the information for monitoring/debugging purposes.

## How was this patch tested?

manual tests

Please review http://spark.apache.org/contributing.html before opening a pull 
request.

Author: shaolinliu 
Author: lvdongr 

Closes #17258 from shaolinliu/SPARK-19807.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/71c3c481
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/71c3c481
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/71c3c481

Branch: refs/heads/master
Commit: 71c3c48159fe7eb4a46fc2a1b78b72088ccfa824
Parents: 6f09dc7
Author: shaolinliu 
Authored: Wed Apr 5 13:47:44 2017 +0100
Committer: Sean Owen 
Committed: Wed Apr 5 13:47:44 2017 +0100

--
 core/src/main/scala/org/apache/spark/ui/jobs/StagesTab.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/71c3c481/core/src/main/scala/org/apache/spark/ui/jobs/StagesTab.scala
--
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StagesTab.scala 
b/core/src/main/scala/org/apache/spark/ui/jobs/StagesTab.scala
index c1f2511..181465b 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/StagesTab.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/StagesTab.scala
@@ -42,7 +42,7 @@ private[ui] class StagesTab(parent: SparkUI) extends 
SparkUITab(parent, "stages"
   val stageId = Option(request.getParameter("id")).map(_.toInt)
   stageId.foreach { id =>
 if (progressListener.activeStages.contains(id)) {
-  sc.foreach(_.cancelStage(id))
+  sc.foreach(_.cancelStage(id, "killed via the Web UI"))
   // Do a quick pause here to give Spark time to kill the stage so it 
shows up as
   // killed after the refresh. Note that this will block the serving 
thread so the
   // time should be limited in duration.


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-11242][SQL] In conf/spark-env.sh.template SPARK_DRIVER_MEMORY is documented incorrectly

2015-10-22 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master d4950e6be -> 188ea348f


[SPARK-11242][SQL] In conf/spark-env.sh.template SPARK_DRIVER_MEMORY is 
documented incorrectly

Minor fix on the comment

Author: guoxi 

Closes #9201 from xguo27/SPARK-11242.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/188ea348
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/188ea348
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/188ea348

Branch: refs/heads/master
Commit: 188ea348fdcf877d86f3c433cd15f6468fe3b42a
Parents: d4950e6
Author: guoxi 
Authored: Thu Oct 22 13:56:18 2015 -0700
Committer: Sean Owen 
Committed: Thu Oct 22 13:56:18 2015 -0700

--
 conf/spark-env.sh.template | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/188ea348/conf/spark-env.sh.template
--
diff --git a/conf/spark-env.sh.template b/conf/spark-env.sh.template
index 990ded4..771251f 100755
--- a/conf/spark-env.sh.template
+++ b/conf/spark-env.sh.template
@@ -36,10 +36,10 @@
 
 # Options read in YARN client mode
 # - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
-# - SPARK_EXECUTOR_INSTANCES, Number of workers to start (Default: 2)
-# - SPARK_EXECUTOR_CORES, Number of cores for the workers (Default: 1).
-# - SPARK_EXECUTOR_MEMORY, Memory per Worker (e.g. 1000M, 2G) (Default: 1G)
-# - SPARK_DRIVER_MEMORY, Memory for Master (e.g. 1000M, 2G) (Default: 1G)
+# - SPARK_EXECUTOR_INSTANCES, Number of executors to start (Default: 2)
+# - SPARK_EXECUTOR_CORES, Number of cores for the executors (Default: 1).
+# - SPARK_EXECUTOR_MEMORY, Memory per Executor (e.g. 1000M, 2G) (Default: 1G)
+# - SPARK_DRIVER_MEMORY, Memory for Driver (e.g. 1000M, 2G) (Default: 1G)
 # - SPARK_YARN_APP_NAME, The name of your application (Default: Spark)
 # - SPARK_YARN_QUEUE, The hadoop queue to use for allocation requests 
(Default: âdefaultâ)
 # - SPARK_YARN_DIST_FILES, Comma separated list of files to be distributed 
with the job.


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-11245] update twitter4j to 4.0.4 version

2015-10-24 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master ffed00493 -> e5bc8c275


[SPARK-11245] update twitter4j to 4.0.4 version

update twitter4j to 4.0.4 version
https://issues.apache.org/jira/browse/SPARK-11245

Author: dima 

Closes #9221 from pronix/twitter4j_update.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e5bc8c27
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e5bc8c27
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e5bc8c27

Branch: refs/heads/master
Commit: e5bc8c27577f96c1ae5dc8cf9bf41cbe2877ffe3
Parents: ffed004
Author: dima 
Authored: Sat Oct 24 18:16:45 2015 +0100
Committer: Sean Owen 
Committed: Sat Oct 24 18:16:45 2015 +0100

--
 external/twitter/pom.xml   | 2 +-
 .../org/apache/spark/streaming/twitter/TwitterInputDStream.scala   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/e5bc8c27/external/twitter/pom.xml
--
diff --git a/external/twitter/pom.xml b/external/twitter/pom.xml
index 4c22ec8..087270d 100644
--- a/external/twitter/pom.xml
+++ b/external/twitter/pom.xml
@@ -51,7 +51,7 @@
 
   org.twitter4j
   twitter4j-stream
-  3.0.3
+  4.0.4
 
 
   org.scalacheck

http://git-wip-us.apache.org/repos/asf/spark/blob/e5bc8c27/external/twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterInputDStream.scala
--
diff --git 
a/external/twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterInputDStream.scala
 
b/external/twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterInputDStream.scala
index d7de74b..9a85a65 100644
--- 
a/external/twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterInputDStream.scala
+++ 
b/external/twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterInputDStream.scala
@@ -87,7 +87,7 @@ class TwitterReceiver(
 
   val query = new FilterQuery
   if (filters.size > 0) {
-query.track(filters.toArray)
+query.track(filters.mkString(","))
 newTwitterStream.filter(query)
   } else {
 newTwitterStream.sample()


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-11264] bin/spark-class can't find assembly jars with certain GREP_OPTIONS set

2015-10-24 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/branch-1.5 56f0bb6ed -> 1cd2d9ced


[SPARK-11264] bin/spark-class can't find assembly jars with certain 
GREP_OPTIONS set

Temporarily remove GREP_OPTIONS if set in bin/spark-class.

Some GREP_OPTIONS will modify the output of the grep commands that are looking 
for the assembly jars.
For example, if the -n option is specified, the grep output will look like:
5:spark-assembly-1.5.1-hadoop2.4.0.jar

This will not match the regular expressions, and so the jar files will not be 
found.  We could improve the regular expression to handle this case and trim 
off extra characters, but it is difficult to know which options may or may not 
be set.  Unsetting GREP_OPTIONS within the script handles all the cases and 
gives the desired output.

Author: Jeffrey Naisbitt 

Closes #9231 from naisbitt/unset-GREP_OPTIONS.

(cherry picked from commit 28132ceb10d0c127495ce8cb36135e1cb54164d7)
Signed-off-by: Sean Owen 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1cd2d9ce
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1cd2d9ce
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1cd2d9ce

Branch: refs/heads/branch-1.5
Commit: 1cd2d9ced55ab52899a4e57b5c3da6b444ec9ae4
Parents: 56f0bb6
Author: Jeffrey Naisbitt 
Authored: Sat Oct 24 18:21:36 2015 +0100
Committer: Sean Owen 
Committed: Sat Oct 24 18:21:47 2015 +0100

--
 bin/spark-class | 1 +
 1 file changed, 1 insertion(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/1cd2d9ce/bin/spark-class
--
diff --git a/bin/spark-class b/bin/spark-class
index 2b59e5d..7db8cb5 100755
--- a/bin/spark-class
+++ b/bin/spark-class
@@ -42,6 +42,7 @@ else
   ASSEMBLY_DIR="$SPARK_HOME/assembly/target/scala-$SPARK_SCALA_VERSION"
 fi
 
+GREP_OPTIONS=
 num_jars="$(ls -1 "$ASSEMBLY_DIR" | grep "^spark-assembly.*hadoop.*\.jar$" | 
wc -l)"
 if [ "$num_jars" -eq "0" -a -z "$SPARK_ASSEMBLY_JAR" ]; then
   echo "Failed to find Spark assembly in $ASSEMBLY_DIR." 1>&2


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-11264] bin/spark-class can't find assembly jars with certain GREP_OPTIONS set

2015-10-24 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master e5bc8c275 -> 28132ceb1


[SPARK-11264] bin/spark-class can't find assembly jars with certain 
GREP_OPTIONS set

Temporarily remove GREP_OPTIONS if set in bin/spark-class.

Some GREP_OPTIONS will modify the output of the grep commands that are looking 
for the assembly jars.
For example, if the -n option is specified, the grep output will look like:
5:spark-assembly-1.5.1-hadoop2.4.0.jar

This will not match the regular expressions, and so the jar files will not be 
found.  We could improve the regular expression to handle this case and trim 
off extra characters, but it is difficult to know which options may or may not 
be set.  Unsetting GREP_OPTIONS within the script handles all the cases and 
gives the desired output.

Author: Jeffrey Naisbitt 

Closes #9231 from naisbitt/unset-GREP_OPTIONS.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/28132ceb
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/28132ceb
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/28132ceb

Branch: refs/heads/master
Commit: 28132ceb10d0c127495ce8cb36135e1cb54164d7
Parents: e5bc8c2
Author: Jeffrey Naisbitt 
Authored: Sat Oct 24 18:21:36 2015 +0100
Committer: Sean Owen 
Committed: Sat Oct 24 18:21:36 2015 +0100

--
 bin/spark-class | 1 +
 1 file changed, 1 insertion(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/28132ceb/bin/spark-class
--
diff --git a/bin/spark-class b/bin/spark-class
index e38e08d..8cae6cc 100755
--- a/bin/spark-class
+++ b/bin/spark-class
@@ -42,6 +42,7 @@ else
   ASSEMBLY_DIR="$SPARK_HOME/assembly/target/scala-$SPARK_SCALA_VERSION"
 fi
 
+GREP_OPTIONS=
 num_jars="$(ls -1 "$ASSEMBLY_DIR" | grep "^spark-assembly.*hadoop.*\.jar$" | 
wc -l)"
 if [ "$num_jars" -eq "0" -a -z "$SPARK_ASSEMBLY_JAR" -a 
"$SPARK_PREPEND_CLASSES" != "1" ]; then
   echo "Failed to find Spark assembly in $ASSEMBLY_DIR." 1>&2


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: Fix typos

2015-10-24 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master 28132ceb1 -> 146da0d81


Fix typos

Two typos squashed.

BTW Let me know how to proceed with other typos if I ran across any. I don't 
feel well to leave them aside as much as sending pull requests with such tiny 
changes. Guide me.

Author: Jacek Laskowski 

Closes #9250 from jaceklaskowski/typos-hunting.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/146da0d8
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/146da0d8
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/146da0d8

Branch: refs/heads/master
Commit: 146da0d8100490a6e49a6c076ec253cdaf9f8905
Parents: 28132ce
Author: Jacek Laskowski 
Authored: Sun Oct 25 01:33:22 2015 +0100
Committer: Sean Owen 
Committed: Sun Oct 25 01:33:22 2015 +0100

--
 core/src/main/scala/org/apache/spark/SparkConf.scala  | 2 +-
 core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala  | 2 +-
 .../src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala | 3 ++-
 core/src/main/scala/org/apache/spark/util/ThreadUtils.scala   | 2 +-
 4 files changed, 5 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/146da0d8/core/src/main/scala/org/apache/spark/SparkConf.scala
--
diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala 
b/core/src/main/scala/org/apache/spark/SparkConf.scala
index 58d3b84..f023e4b 100644
--- a/core/src/main/scala/org/apache/spark/SparkConf.scala
+++ b/core/src/main/scala/org/apache/spark/SparkConf.scala
@@ -621,7 +621,7 @@ private[spark] object SparkConf extends Logging {
   /**
* Return whether the given config should be passed to an executor on 
start-up.
*
-   * Certain akka and authentication configs are required of the executor when 
it connects to
+   * Certain akka and authentication configs are required from the executor 
when it connects to
* the scheduler, while the rest of the spark configs can be inherited from 
the driver later.
*/
   def isExecutorStartupConf(name: String): Boolean = {

http://git-wip-us.apache.org/repos/asf/spark/blob/146da0d8/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
--
diff --git a/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala 
b/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
index 48afe3a..fdf76d3 100644
--- a/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
@@ -197,7 +197,7 @@ private[spark] class MetricsSystem private (
   }
 } catch {
   case e: Exception => {
-logError("Sink class " + classPath + " cannot be instantialized")
+logError("Sink class " + classPath + " cannot be instantiated")
 throw e
   }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/146da0d8/core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala
--
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala 
b/core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala
index f25f3ed..cb9a300 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala
@@ -22,7 +22,8 @@ import org.apache.spark.executor.TaskMetrics
 import org.apache.spark.storage.BlockManagerId
 
 /**
- * Low-level task scheduler interface, currently implemented exclusively by 
TaskSchedulerImpl.
+ * Low-level task scheduler interface, currently implemented exclusively by
+ * [[org.apache.spark.scheduler.TaskSchedulerImpl]].
  * This interface allows plugging in different task schedulers. Each 
TaskScheduler schedules tasks
  * for a single SparkContext. These schedulers get sets of tasks submitted to 
them from the
  * DAGScheduler for each stage, and are responsible for sending the tasks to 
the cluster, running

http://git-wip-us.apache.org/repos/asf/spark/blob/146da0d8/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala
--
diff --git a/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala 
b/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala
index 15e7519..5328344 100644
--- a/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala
@@ -80,7 +80,7 @@ private[spark] object ThreadUtils {
   }
 
   /**
-   * Wrapper over newSingleThreadScheduledExecutor.
+   * Wrapper over ScheduledThreadPoolExecutor.
*/
   def ne

spark git commit: Fix typos

2015-10-24 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/branch-1.5 1cd2d9ced -> 5200a6e29


Fix typos

Two typos squashed.

BTW Let me know how to proceed with other typos if I ran across any. I don't 
feel well to leave them aside as much as sending pull requests with such tiny 
changes. Guide me.

Author: Jacek Laskowski 

Closes #9250 from jaceklaskowski/typos-hunting.

(cherry picked from commit 146da0d8100490a6e49a6c076ec253cdaf9f8905)
Signed-off-by: Sean Owen 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5200a6e2
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5200a6e2
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5200a6e2

Branch: refs/heads/branch-1.5
Commit: 5200a6e2919bf9c0acc129b4db7c15546d157351
Parents: 1cd2d9c
Author: Jacek Laskowski 
Authored: Sun Oct 25 01:33:22 2015 +0100
Committer: Sean Owen 
Committed: Sun Oct 25 01:33:37 2015 +0100

--
 core/src/main/scala/org/apache/spark/SparkConf.scala  | 2 +-
 core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala  | 2 +-
 .../src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala | 3 ++-
 core/src/main/scala/org/apache/spark/util/ThreadUtils.scala   | 2 +-
 4 files changed, 5 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/5200a6e2/core/src/main/scala/org/apache/spark/SparkConf.scala
--
diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala 
b/core/src/main/scala/org/apache/spark/SparkConf.scala
index b344b5e..cad0ded 100644
--- a/core/src/main/scala/org/apache/spark/SparkConf.scala
+++ b/core/src/main/scala/org/apache/spark/SparkConf.scala
@@ -600,7 +600,7 @@ private[spark] object SparkConf extends Logging {
   /**
* Return whether the given config should be passed to an executor on 
start-up.
*
-   * Certain akka and authentication configs are required of the executor when 
it connects to
+   * Certain akka and authentication configs are required from the executor 
when it connects to
* the scheduler, while the rest of the spark configs can be inherited from 
the driver later.
*/
   def isExecutorStartupConf(name: String): Boolean = {

http://git-wip-us.apache.org/repos/asf/spark/blob/5200a6e2/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
--
diff --git a/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala 
b/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
index 4517f46..d972fa7 100644
--- a/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
@@ -197,7 +197,7 @@ private[spark] class MetricsSystem private (
   }
 } catch {
   case e: Exception => {
-logError("Sink class " + classPath + " cannot be instantialized")
+logError("Sink class " + classPath + " cannot be instantiated")
 throw e
   }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/5200a6e2/core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala
--
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala 
b/core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala
index f25f3ed..cb9a300 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala
@@ -22,7 +22,8 @@ import org.apache.spark.executor.TaskMetrics
 import org.apache.spark.storage.BlockManagerId
 
 /**
- * Low-level task scheduler interface, currently implemented exclusively by 
TaskSchedulerImpl.
+ * Low-level task scheduler interface, currently implemented exclusively by
+ * [[org.apache.spark.scheduler.TaskSchedulerImpl]].
  * This interface allows plugging in different task schedulers. Each 
TaskScheduler schedules tasks
  * for a single SparkContext. These schedulers get sets of tasks submitted to 
them from the
  * DAGScheduler for each stage, and are responsible for sending the tasks to 
the cluster, running

http://git-wip-us.apache.org/repos/asf/spark/blob/5200a6e2/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala
--
diff --git a/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala 
b/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala
index 22e291a..06976f8 100644
--- a/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala
@@ -81,7 +81,7 @@ private[spark] object ThreadUtils {
   }
 
   /**
-   * Wrapper

spark git commit: [SPARK-11287] Fixed class name to properly start TestExecutor from deploy.client.TestClient

2015-10-25 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master 92b9c5edd -> 80279ac18


[SPARK-11287] Fixed class name to properly start TestExecutor from 
deploy.client.TestClient

Executing deploy.client.TestClient fails due to bad class name for TestExecutor 
in ApplicationDescription.

Author: Bryan Cutler 

Closes #9255 from BryanCutler/fix-TestClient-classname-SPARK-11287.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/80279ac1
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/80279ac1
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/80279ac1

Branch: refs/heads/master
Commit: 80279ac1875d488f7000f352a958a35536bd4c2e
Parents: 92b9c5e
Author: Bryan Cutler 
Authored: Sun Oct 25 19:05:45 2015 +
Committer: Sean Owen 
Committed: Sun Oct 25 19:05:45 2015 +

--
 .../main/scala/org/apache/spark/deploy/client/TestClient.scala| 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/80279ac1/core/src/main/scala/org/apache/spark/deploy/client/TestClient.scala
--
diff --git 
a/core/src/main/scala/org/apache/spark/deploy/client/TestClient.scala 
b/core/src/main/scala/org/apache/spark/deploy/client/TestClient.scala
index 1c79089..adb3f02 100644
--- a/core/src/main/scala/org/apache/spark/deploy/client/TestClient.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/client/TestClient.scala
@@ -48,8 +48,9 @@ private[spark] object TestClient {
 val url = args(0)
 val conf = new SparkConf
 val rpcEnv = RpcEnv.create("spark", Utils.localHostName(), 0, conf, new 
SecurityManager(conf))
+val executorClassname = 
TestExecutor.getClass.getCanonicalName.stripSuffix("$")
 val desc = new ApplicationDescription("TestClient", Some(1), 512,
-  Command("spark.deploy.client.TestExecutor", Seq(), Map(), Seq(), Seq(), 
Seq()), "ignored")
+  Command(executorClassname, Seq(), Map(), Seq(), Seq(), Seq()), "ignored")
 val listener = new TestListener
 val client = new AppClient(rpcEnv, Array(url), desc, listener, new 
SparkConf)
 client.start()


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-11287] Fixed class name to properly start TestExecutor from deploy.client.TestClient

2015-10-25 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/branch-1.5 36fddb07a -> 74921c219


[SPARK-11287] Fixed class name to properly start TestExecutor from 
deploy.client.TestClient

Executing deploy.client.TestClient fails due to bad class name for TestExecutor 
in ApplicationDescription.

Author: Bryan Cutler 

Closes #9255 from BryanCutler/fix-TestClient-classname-SPARK-11287.

(cherry picked from commit 80279ac1875d488f7000f352a958a35536bd4c2e)
Signed-off-by: Sean Owen 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/74921c21
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/74921c21
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/74921c21

Branch: refs/heads/branch-1.5
Commit: 74921c219ba1c8917a6b69f36a44c488d10804e4
Parents: 36fddb0
Author: Bryan Cutler 
Authored: Sun Oct 25 19:05:45 2015 +
Committer: Sean Owen 
Committed: Sun Oct 25 19:05:55 2015 +

--
 .../main/scala/org/apache/spark/deploy/client/TestClient.scala| 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/74921c21/core/src/main/scala/org/apache/spark/deploy/client/TestClient.scala
--
diff --git 
a/core/src/main/scala/org/apache/spark/deploy/client/TestClient.scala 
b/core/src/main/scala/org/apache/spark/deploy/client/TestClient.scala
index 1c79089..adb3f02 100644
--- a/core/src/main/scala/org/apache/spark/deploy/client/TestClient.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/client/TestClient.scala
@@ -48,8 +48,9 @@ private[spark] object TestClient {
 val url = args(0)
 val conf = new SparkConf
 val rpcEnv = RpcEnv.create("spark", Utils.localHostName(), 0, conf, new 
SecurityManager(conf))
+val executorClassname = 
TestExecutor.getClass.getCanonicalName.stripSuffix("$")
 val desc = new ApplicationDescription("TestClient", Some(1), 512,
-  Command("spark.deploy.client.TestExecutor", Seq(), Map(), Seq(), Seq(), 
Seq()), "ignored")
+  Command(executorClassname, Seq(), Map(), Seq(), Seq(), Seq()), "ignored")
 val listener = new TestListener
 val client = new AppClient(rpcEnv, Array(url), desc, listener, new 
SparkConf)
 client.start()


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-5966][WIP] Spark-submit deploy-mode cluster is not compatible with master local>

2015-10-26 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master 05c4bdb57 -> 616be29c7


[SPARK-5966][WIP] Spark-submit deploy-mode cluster is not compatible with 
master local>

â¦ master local>

Author: Kevin Yu 

Closes #9220 from kevinyu98/working_on_spark-5966.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/616be29c
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/616be29c
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/616be29c

Branch: refs/heads/master
Commit: 616be29c7f2ebc184bd5ec97210da36a2174d80c
Parents: 05c4bdb
Author: Kevin Yu 
Authored: Mon Oct 26 09:34:15 2015 +
Committer: Sean Owen 
Committed: Mon Oct 26 09:35:19 2015 +

--
 core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala | 2 ++
 1 file changed, 2 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/616be29c/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
--
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala 
b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index 640cc32..84ae122 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -328,6 +328,8 @@ object SparkSubmit {
   case (STANDALONE, CLUSTER) if args.isR =>
 printErrorAndExit("Cluster deploy mode is currently not supported for 
R " +
   "applications on standalone clusters.")
+  case (LOCAL, CLUSTER) =>
+printErrorAndExit("Cluster deploy mode is not compatible with master 
\"local\"")
   case (_, CLUSTER) if isShell(args.primaryResource) =>
 printErrorAndExit("Cluster deploy mode is not applicable to Spark 
shells.")
   case (_, CLUSTER) if isSqlShell(args.mainClass) =>


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-5966][WIP] Spark-submit deploy-mode cluster is not compatible with master local>

2015-10-26 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/branch-1.5 74921c219 -> a355d0d0d


[SPARK-5966][WIP] Spark-submit deploy-mode cluster is not compatible with 
master local>

â¦ master local>

Author: Kevin Yu 

Closes #9220 from kevinyu98/working_on_spark-5966.

(cherry picked from commit 616be29c7f2ebc184bd5ec97210da36a2174d80c)
Signed-off-by: Sean Owen 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a355d0d0
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a355d0d0
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a355d0d0

Branch: refs/heads/branch-1.5
Commit: a355d0d0d1dad72ccdffcb41c29b50f4aa051a48
Parents: 74921c2
Author: Kevin Yu 
Authored: Mon Oct 26 09:34:15 2015 +
Committer: Sean Owen 
Committed: Mon Oct 26 09:35:52 2015 +

--
 core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala | 2 ++
 1 file changed, 2 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/a355d0d0/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
--
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala 
b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index 86fcf94..fefbba9 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -328,6 +328,8 @@ object SparkSubmit {
   case (STANDALONE, CLUSTER) if args.isR =>
 printErrorAndExit("Cluster deploy mode is currently not supported for 
R " +
   "applications on standalone clusters.")
+  case (LOCAL, CLUSTER) =>
+printErrorAndExit("Cluster deploy mode is not compatible with master 
\"local\"")
   case (_, CLUSTER) if isShell(args.primaryResource) =>
 printErrorAndExit("Cluster deploy mode is not applicable to Spark 
shells.")
   case (_, CLUSTER) if isSqlShell(args.mainClass) =>


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-11276][CORE] SizeEstimator prevents class unloading

2015-10-27 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master d77d198fc -> feb8d6a44


[SPARK-11276][CORE] SizeEstimator prevents class unloading

The SizeEstimator keeps a cache of ClassInfos but this cache uses Class objects 
as keys.
Which results in strong references to the Class objects. If these classes are 
dynamically created
this prevents the corresponding ClassLoader from being GCed. Leading to PermGen 
exhaustion.

We use a Map with WeakKeys to prevent this issue.

Author: Sem Mulder 

Closes #9244 from SemMulder/fix-sizeestimator-classunloading.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/feb8d6a4
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/feb8d6a4
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/feb8d6a4

Branch: refs/heads/master
Commit: feb8d6a44fbfc31a880c0cfcaadc91786073
Parents: d77d198
Author: Sem Mulder 
Authored: Tue Oct 27 07:55:10 2015 +
Committer: Sean Owen 
Committed: Tue Oct 27 07:55:10 2015 +

--
 core/src/main/scala/org/apache/spark/util/SizeEstimator.scala | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/feb8d6a4/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
--
diff --git a/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala 
b/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
index 14b1f2a..23ee4ef 100644
--- a/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
+++ b/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.util
 
+import com.google.common.collect.MapMaker
+
 import java.lang.management.ManagementFactory
 import java.lang.reflect.{Field, Modifier}
 import java.util.{IdentityHashMap, Random}
@@ -29,7 +31,6 @@ import org.apache.spark.Logging
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.util.collection.OpenHashSet
 
-
 /**
  * :: DeveloperApi ::
  * Estimates the sizes of Java objects (number of bytes of memory they 
occupy), for use in
@@ -73,7 +74,8 @@ object SizeEstimator extends Logging {
   private val ALIGN_SIZE = 8
 
   // A cache of ClassInfo objects for each class
-  private val classInfos = new ConcurrentHashMap[Class[_], ClassInfo]
+  // We use weakKeys to allow GC of dynamically created classes
+  private val classInfos = new MapMaker().weakKeys().makeMap[Class[_], 
ClassInfo]()
 
   // Object and pointer sizes are arch dependent
   private var is64bit = false


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-11318] Include hive profile in make-distribution.sh command

2015-10-29 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master f79ebf2a9 -> f304f9c9a


[SPARK-11318] Include hive profile in make-distribution.sh command

Author: tedyu 

Closes #9281 from tedyu/master.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f304f9c9
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f304f9c9
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f304f9c9

Branch: refs/heads/master
Commit: f304f9c9a1c954b3b5786f90bb13f543637d3192
Parents: f79ebf2
Author: tedyu 
Authored: Thu Oct 29 15:02:13 2015 +0100
Committer: Sean Owen 
Committed: Thu Oct 29 15:02:13 2015 +0100

--
 docs/building-spark.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/f304f9c9/docs/building-spark.md
--
diff --git a/docs/building-spark.md b/docs/building-spark.md
index 743643c..4f73adb 100644
--- a/docs/building-spark.md
+++ b/docs/building-spark.md
@@ -38,7 +38,7 @@ To create a Spark distribution like those distributed by the
 to be runnable, use `make-distribution.sh` in the project root directory. It 
can be configured 
 with Maven profile settings and so on like the direct Maven build. Example:
 
-./make-distribution.sh --name custom-spark --tgz -Phadoop-2.4 -Pyarn
+./make-distribution.sh --name custom-spark --tgz -Psparkr -Phadoop-2.4 
-Phive -Phive-thriftserver -Pyarn
 
 For more information on usage, run `./make-distribution.sh --help`
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-11388][BUILD] Fix self closing tags.

2015-10-29 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master f304f9c9a -> 3bb2a8d75


[SPARK-11388][BUILD] Fix self closing tags.

Java 8 javadoc does not like self closing tags: ``, ``, ...

This PR fixes those.

Author: Herman van Hovell 

Closes #9339 from hvanhovell/SPARK-11388.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3bb2a8d7
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3bb2a8d7
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3bb2a8d7

Branch: refs/heads/master
Commit: 3bb2a8d7508b507edfcc21bd20912b0ff4a0a248
Parents: f304f9c
Author: Herman van Hovell 
Authored: Thu Oct 29 15:11:00 2015 +0100
Committer: Sean Owen 
Committed: Thu Oct 29 15:11:00 2015 +0100

--
 .../main/java/org/apache/spark/launcher/SparkAppHandle.java  | 4 ++--
 .../main/java/org/apache/spark/launcher/SparkLauncher.java   | 8 
 2 files changed, 6 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/3bb2a8d7/launcher/src/main/java/org/apache/spark/launcher/SparkAppHandle.java
--
diff --git 
a/launcher/src/main/java/org/apache/spark/launcher/SparkAppHandle.java 
b/launcher/src/main/java/org/apache/spark/launcher/SparkAppHandle.java
index 2896a91..13dd9f1 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/SparkAppHandle.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/SparkAppHandle.java
@@ -19,7 +19,7 @@ package org.apache.spark.launcher;
 
 /**
  * A handle to a running Spark application.
- * 
+ * 
  * Provides runtime information about the underlying Spark application, and 
actions to control it.
  *
  * @since 1.6.0
@@ -110,7 +110,7 @@ public interface SparkAppHandle {
  * Callback for changes in the handle's state.
  *
  * @param handle The updated handle.
- * @see {@link SparkAppHandle#getState()}
+ * @see SparkAppHandle#getState()
  */
 void stateChanged(SparkAppHandle handle);
 

http://git-wip-us.apache.org/repos/asf/spark/blob/3bb2a8d7/launcher/src/main/java/org/apache/spark/launcher/SparkLauncher.java
--
diff --git 
a/launcher/src/main/java/org/apache/spark/launcher/SparkLauncher.java 
b/launcher/src/main/java/org/apache/spark/launcher/SparkLauncher.java
index 5d74b37..dd1c93a 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/SparkLauncher.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/SparkLauncher.java
@@ -350,7 +350,7 @@ public class SparkLauncher {
 
   /**
* Launches a sub-process that will start the configured Spark application.
-   * 
+   * 
* The {@link #startApplication(SparkAppHandle.Listener...)} method is 
preferred when launching
* Spark, since it provides better control of the child application.
*
@@ -362,16 +362,16 @@ public class SparkLauncher {
 
   /**
* Starts a Spark application.
-   * 
+   * 
* This method returns a handle that provides information about the running 
application and can
* be used to do basic interaction with it.
-   * 
+   * 
* The returned handle assumes that the application will instantiate a 
single SparkContext
* during its lifetime. Once that context reports a final state (one that 
indicates the
* SparkContext has stopped), the handle will not perform new state 
transitions, so anything
* that happens after that cannot be monitored. If the underlying 
application is launched as
* a child process, {@link SparkAppHandle#kill()} can still be used to kill 
the child process.
-   * 
+   * 
* Currently, all applications are launched as child processes. The child's 
stdout and stderr
* are merged and written to a logger (see java.util.logging). 
The logger's name
* can be defined by setting {@link #CHILD_PROCESS_LOGGER_NAME} in the app's 
configuration. If


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-10986][MESOS] Set the context class loader in the Mesos executor backend.

2015-10-30 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master 14d08b990 -> 0451b0014


[SPARK-10986][MESOS] Set the context class loader in the Mesos executor backend.

See [SPARK-10986](https://issues.apache.org/jira/browse/SPARK-10986) for 
details.

This fixes the `ClassNotFoundException` for Spark classes in the serializer.

I am not sure this is the right way to handle the class loader, but I couldn't 
find any documentation on how the context class loader is used and who relies 
on it. It seems at least the serializer uses it to instantiate classes during 
deserialization.

I am open to suggestions (I tried this fix on a real Mesos cluster and it 
*does* fix the issue).

tnachen andrewor14

Author: Iulian Dragos 

Closes #9282 from dragos/issue/mesos-classloader.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0451b001
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0451b001
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0451b001

Branch: refs/heads/master
Commit: 0451b00148a294c665146563242d2fe2de943a02
Parents: 14d08b9
Author: Iulian Dragos 
Authored: Fri Oct 30 16:51:32 2015 +
Committer: Sean Owen 
Committed: Fri Oct 30 16:51:32 2015 +

--
 .../scala/org/apache/spark/executor/MesosExecutorBackend.scala  | 5 +
 1 file changed, 5 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/0451b001/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala
--
diff --git 
a/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala 
b/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala
index 0474fd2..c9f18eb 100644
--- a/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala
+++ b/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala
@@ -63,6 +63,11 @@ private[spark] class MesosExecutorBackend
 
 logInfo(s"Registered with Mesos as executor ID $executorId with 
$cpusPerTask cpus")
 this.driver = driver
+// Set a context class loader to be picked up by the serializer. Without 
this call
+// the serializer would default to the null class loader, and fail to find 
Spark classes
+// See SPARK-10986.
+Thread.currentThread().setContextClassLoader(this.getClass.getClassLoader)
+
 val properties = Utils.deserialize[Array[(String, 
String)]](executorInfo.getData.toByteArray) ++
   Seq[(String, String)](("spark.app.id", frameworkInfo.getId.getValue))
 val conf = new SparkConf(loadDefaults = true).setAll(properties)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-11342][TESTS] Allow to set hadoop profile when running dev/ru…

2015-10-30 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master 40c77fb23 -> 729f983e6


[SPARK-11342][TESTS] Allow to set hadoop profile when running dev/ruâ¦

â¦n_tests

Author: Jeff Zhang 

Closes #9295 from zjffdu/SPARK-11342.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/729f983e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/729f983e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/729f983e

Branch: refs/heads/master
Commit: 729f983e66cf65da2e8f48c463ccde2b355240c4
Parents: 40c77fb
Author: Jeff Zhang 
Authored: Fri Oct 30 18:50:12 2015 +
Committer: Sean Owen 
Committed: Fri Oct 30 18:50:12 2015 +

--
 dev/run-tests.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/729f983e/dev/run-tests.py
--
diff --git a/dev/run-tests.py b/dev/run-tests.py
index 6b4b710..9e1abb0 100755
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@@ -486,7 +486,7 @@ def main():
 else:
 # else we're running locally and can use local settings
 build_tool = "sbt"
-hadoop_version = "hadoop2.3"
+hadoop_version = os.environ.get("HADOOP_PROFILE", "hadoop2.3")
 test_env = "local"
 
 print("[info] Using build tool", build_tool, "with Hadoop profile", 
hadoop_version,


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-11226][SQL] Empty line in json file should be skipped

2015-10-31 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master 3c471885d -> 97b3c8fb4


[SPARK-11226][SQL] Empty line in json file should be skipped

Currently the empty line in json file will be parsed into Row with all null 
field values. But in json, "{}" represents a json object, empty line is 
supposed to be skipped.

Make a trivial change for this.

Author: Jeff Zhang 

Closes #9211 from zjffdu/SPARK-11226.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/97b3c8fb
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/97b3c8fb
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/97b3c8fb

Branch: refs/heads/master
Commit: 97b3c8fb470f0d3c1cdb1aeb27f675e695442e87
Parents: 3c47188
Author: Jeff Zhang 
Authored: Sat Oct 31 11:10:37 2015 +
Committer: Sean Owen 
Committed: Sat Oct 31 11:10:37 2015 +

--
 .../datasources/json/JacksonParser.scala| 46 +++-
 .../org/apache/spark/sql/SQLQuerySuite.scala| 11 +
 .../execution/datasources/json/JsonSuite.scala  |  3 --
 3 files changed, 36 insertions(+), 24 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/97b3c8fb/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JacksonParser.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JacksonParser.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JacksonParser.scala
index b2e5201..4f53eeb 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JacksonParser.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JacksonParser.scala
@@ -245,29 +245,33 @@ private[sql] object JacksonParser {
   val factory = new JsonFactory()
 
   iter.flatMap { record =>
-try {
-  Utils.tryWithResource(factory.createParser(record)) { parser =>
-parser.nextToken()
-
-convertField(factory, parser, schema) match {
-  case null => failedRecord(record)
-  case row: InternalRow => row :: Nil
-  case array: ArrayData =>
-if (array.numElements() == 0) {
-  Nil
-} else {
-  array.toArray[InternalRow](schema)
-}
-  case _ =>
-sys.error(
-  s"Failed to parse record $record. Please make sure that each 
line of the file " +
-"(or each string in the RDD) is a valid JSON object or " +
-"an array of JSON objects.")
+if (record.trim.isEmpty) {
+  Nil
+} else {
+  try {
+Utils.tryWithResource(factory.createParser(record)) { parser =>
+  parser.nextToken()
+
+  convertField(factory, parser, schema) match {
+case null => failedRecord(record)
+case row: InternalRow => row :: Nil
+case array: ArrayData =>
+  if (array.numElements() == 0) {
+Nil
+  } else {
+array.toArray[InternalRow](schema)
+  }
+case _ =>
+  sys.error(
+s"Failed to parse record $record. Please make sure that 
each line of " +
+  "the file (or each string in the RDD) is a valid JSON 
object or " +
+  "an array of JSON objects.")
+  }
 }
+  } catch {
+case _: JsonProcessingException =>
+  failedRecord(record)
   }
-} catch {
-  case _: JsonProcessingException =>
-failedRecord(record)
 }
   }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/97b3c8fb/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
--
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 5a616fa..5413ef1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -225,6 +225,17 @@ class SQLQuerySuite extends QueryTest with 
SharedSQLContext {
   Seq(Row("1"), Row("2")))
   }
 
+  test("SPARK-11226 Skip empty line in json file") {
+sqlContext.read.json(
+  sparkContext.parallelize(
+Seq("{\"a\": \"1\"}}", "{\"a\": \"2\"}}", "{\"a\": \"3\"}}", "")))
+  .registerTempTable("d")
+
+checkAnswer(
+  sql("select count(1) from d"),
+  Seq(Row(3)))
+  }
+
   test("SPARK-8828 sum should return null if all input va

spark git commit: [SPARK-11305][DOCS] Remove Third-Party Hadoop Distributions Doc Page

2015-11-01 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master aa494a9c2 -> 643c49c75


[SPARK-11305][DOCS] Remove Third-Party Hadoop Distributions Doc Page

Remove Hadoop third party distro page, and move Hadoop cluster config info to 
configuration page

CC pwendell

Author: Sean Owen 

Closes #9298 from srowen/SPARK-11305.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/643c49c7
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/643c49c7
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/643c49c7

Branch: refs/heads/master
Commit: 643c49c75ee95243fd19ae73b5170e6e6e212b8d
Parents: aa494a9
Author: Sean Owen 
Authored: Sun Nov 1 12:25:49 2015 +
Committer: Sean Owen 
Committed: Sun Nov 1 12:25:49 2015 +

--
 README.md|   5 +-
 docs/_layouts/global.html|   1 -
 docs/configuration.md|  15 
 docs/hadoop-third-party-distributions.md | 117 --
 docs/index.md|   1 -
 docs/programming-guide.md|   9 +-
 6 files changed, 19 insertions(+), 129 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/643c49c7/README.md
--
diff --git a/README.md b/README.md
index 4116ef3..c0d6a94 100644
--- a/README.md
+++ b/README.md
@@ -87,10 +87,7 @@ Hadoop, you must build Spark against the same version that 
your cluster runs.
 Please refer to the build documentation at
 ["Specifying the Hadoop 
Version"](http://spark.apache.org/docs/latest/building-spark.html#specifying-the-hadoop-version)
 for detailed guidance on building for a particular distribution of Hadoop, 
including
-building for particular Hive and Hive Thriftserver distributions. See also
-["Third Party Hadoop 
Distributions"](http://spark.apache.org/docs/latest/hadoop-third-party-distributions.html)
-for guidance on building a Spark application that works with a particular
-distribution.
+building for particular Hive and Hive Thriftserver distributions.
 
 ## Configuration
 

http://git-wip-us.apache.org/repos/asf/spark/blob/643c49c7/docs/_layouts/global.html
--
diff --git a/docs/_layouts/global.html b/docs/_layouts/global.html
index b4952fe..467ff7a 100755
--- a/docs/_layouts/global.html
+++ b/docs/_layouts/global.html
@@ -112,7 +112,6 @@
 Job 
Scheduling
 Security
 Hardware Provisioning
-3rd-Party Hadoop 
Distros
 
 Building 
Spark
 https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark";>Contributing
 to Spark

http://git-wip-us.apache.org/repos/asf/spark/blob/643c49c7/docs/configuration.md
--
diff --git a/docs/configuration.md b/docs/configuration.md
index 682384d..c276e8e 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -1674,3 +1674,18 @@ Spark uses [log4j](http://logging.apache.org/log4j/) for 
logging. You can config
 To specify a different configuration directory other than the default 
"SPARK_HOME/conf",
 you can set SPARK_CONF_DIR. Spark will use the the configuration files 
(spark-defaults.conf, spark-env.sh, log4j.properties, etc)
 from this directory.
+
+# Inheriting Hadoop Cluster Configuration
+
+If you plan to read and write from HDFS using Spark, there are two Hadoop 
configuration files that
+should be included on Spark's classpath:
+
+* `hdfs-site.xml`, which provides default behaviors for the HDFS client.
+* `core-site.xml`, which sets the default filesystem name.
+
+The location of these configuration files varies across CDH and HDP versions, 
but
+a common location is inside of `/etc/hadoop/conf`. Some tools, such as 
Cloudera Manager, create
+configurations on-the-fly, but offer a mechanisms to download copies of them.
+
+To make these files visible to Spark, set `HADOOP_CONF_DIR` in 
`$SPARK_HOME/spark-env.sh`
+to a location containing the configuration files.

http://git-wip-us.apache.org/repos/asf/spark/blob/643c49c7/docs/hadoop-third-party-distributions.md
--
diff --git a/docs/hadoop-third-party-distributions.md 
b/docs/hadoop-third-party-distributions.md
deleted file mode 100644
index 795dd82..000
--- a/docs/hadoop-third-party-distributions.md
+++ /dev/null
@@ -1,117 +0,0 @@

-layout: global
-title: Third-Party Hadoop Distributions

-
-Spark can run against all versions of Cloudera's D

spark git commit: [SPARK-11271][SPARK-11016][CORE] Use Spark BitSet instead of RoaringBitmap to reduce memory usage

2015-11-02 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master e963070c1 -> e209fa271


[SPARK-11271][SPARK-11016][CORE] Use Spark BitSet instead of RoaringBitmap to 
reduce memory usage

JIRA: https://issues.apache.org/jira/browse/SPARK-11271

As reported in the JIRA ticket, when there are too many tasks, the memory usage 
of MapStatus will cause problem. Use BitSet instead of RoaringBitMap should be 
more efficient in memory usage.

Author: Liang-Chi Hsieh 

Closes #9243 from viirya/mapstatus-bitset.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e209fa27
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e209fa27
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e209fa27

Branch: refs/heads/master
Commit: e209fa271ae57dc8849f8b1241bf1ea7d6d3d62c
Parents: e963070
Author: Liang-Chi Hsieh 
Authored: Mon Nov 2 08:52:52 2015 +
Committer: Sean Owen 
Committed: Mon Nov 2 08:52:52 2015 +

--
 core/pom.xml|  4 --
 .../org/apache/spark/scheduler/MapStatus.scala  | 13 +++---
 .../spark/serializer/KryoSerializer.scala   | 10 +---
 .../apache/spark/util/collection/BitSet.scala   | 28 +--
 .../spark/serializer/KryoSerializerSuite.scala  |  6 ---
 .../spark/util/collection/BitSetSuite.scala | 49 
 pom.xml |  5 --
 7 files changed, 82 insertions(+), 33 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/e209fa27/core/pom.xml
--
diff --git a/core/pom.xml b/core/pom.xml
index 319a500..1b6b135 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -174,10 +174,6 @@
   lz4
 
 
-  org.roaringbitmap
-  RoaringBitmap
-
-
   commons-net
   commons-net
 

http://git-wip-us.apache.org/repos/asf/spark/blob/e209fa27/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala
--
diff --git a/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala 
b/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala
index 1efce12..180c8d1 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala
@@ -19,9 +19,8 @@ package org.apache.spark.scheduler
 
 import java.io.{Externalizable, ObjectInput, ObjectOutput}
 
-import org.roaringbitmap.RoaringBitmap
-
 import org.apache.spark.storage.BlockManagerId
+import org.apache.spark.util.collection.BitSet
 import org.apache.spark.util.Utils
 
 /**
@@ -133,7 +132,7 @@ private[spark] class CompressedMapStatus(
 private[spark] class HighlyCompressedMapStatus private (
 private[this] var loc: BlockManagerId,
 private[this] var numNonEmptyBlocks: Int,
-private[this] var emptyBlocks: RoaringBitmap,
+private[this] var emptyBlocks: BitSet,
 private[this] var avgSize: Long)
   extends MapStatus with Externalizable {
 
@@ -146,7 +145,7 @@ private[spark] class HighlyCompressedMapStatus private (
   override def location: BlockManagerId = loc
 
   override def getSizeForBlock(reduceId: Int): Long = {
-if (emptyBlocks.contains(reduceId)) {
+if (emptyBlocks.get(reduceId)) {
   0
 } else {
   avgSize
@@ -161,7 +160,7 @@ private[spark] class HighlyCompressedMapStatus private (
 
   override def readExternal(in: ObjectInput): Unit = Utils.tryOrIOException {
 loc = BlockManagerId(in)
-emptyBlocks = new RoaringBitmap()
+emptyBlocks = new BitSet
 emptyBlocks.readExternal(in)
 avgSize = in.readLong()
   }
@@ -177,15 +176,15 @@ private[spark] object HighlyCompressedMapStatus {
 // From a compression standpoint, it shouldn't matter whether we track 
empty or non-empty
 // blocks. From a performance standpoint, we benefit from tracking empty 
blocks because
 // we expect that there will be far fewer of them, so we will perform 
fewer bitmap insertions.
-val emptyBlocks = new RoaringBitmap()
 val totalNumBlocks = uncompressedSizes.length
+val emptyBlocks = new BitSet(totalNumBlocks)
 while (i < totalNumBlocks) {
   var size = uncompressedSizes(i)
   if (size > 0) {
 numNonEmptyBlocks += 1
 totalSize += size
   } else {
-emptyBlocks.add(i)
+emptyBlocks.set(i)
   }
   i += 1
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/e209fa27/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
--
diff --git 
a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala 
b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
index c5195c1..bc51d4f 100644
--- a/core/src/main/sc

spark git commit: [SPARK-11413][BUILD] Bump joda-time version to 2.9 for java 8 and s3

2015-11-02 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master e209fa271 -> ea4a3e7d0


[SPARK-11413][BUILD] Bump joda-time version to 2.9 for java 8 and s3

It's a known issue that joda-time before 2.8.1 is incompatible with java 1.8u60 
or later, which causes s3 request to fail. This affects Spark when using s3 as 
data source.
https://github.com/aws/aws-sdk-java/issues/444

Author: Yongjia Wang 

Closes #9379 from yongjiaw/SPARK-11413.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ea4a3e7d
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ea4a3e7d
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ea4a3e7d

Branch: refs/heads/master
Commit: ea4a3e7d06dd4a0f669460513b27469c468214fb
Parents: e209fa2
Author: Yongjia Wang 
Authored: Mon Nov 2 08:59:35 2015 +
Committer: Sean Owen 
Committed: Mon Nov 2 08:59:35 2015 +

--
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/ea4a3e7d/pom.xml
--
diff --git a/pom.xml b/pom.xml
index 50c8f29..762bfc7 100644
--- a/pom.xml
+++ b/pom.xml
@@ -176,7 +176,7 @@
 3.2.10
 2.7.8
 1.9
-2.5
+2.9
 3.5.2
 1.3.9
 0.9.2


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-11344] Made ApplicationDescription and DriverDescription case classes

2015-11-03 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master b86f2cab6 -> 233e534ac


[SPARK-11344] Made ApplicationDescription and DriverDescription case classes

DriverDescription refactored to case class because it included no mutable 
fields.

ApplicationDescription had one mutable field, which was appUiUrl. This field 
was set by the driver to point to the driver web UI. Master was modifying this 
field when the application was removed to redirect requests to history server. 
This was wrong because objects which are sent over the wire should be 
immutable. Now appUiUrl is immutable in ApplicationDescription and always 
points to the driver UI even if it is already shutdown. The UI url which master 
exposes to the user and modifies dynamically is now included into 
ApplicationInfo - a data object which describes the application state 
internally in master. That URL in ApplicationInfo is initialised with the value 
from ApplicationDescription.

ApplicationDescription also included value user, which is now a part of case 
class fields.

Author: Jacek Lewandowski 

Closes #9299 from jacek-lewandowski/SPARK-11344.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/233e534a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/233e534a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/233e534a

Branch: refs/heads/master
Commit: 233e534ac43ea25ac1b0e6a985f6928d46c5d03a
Parents: b86f2ca
Author: Jacek Lewandowski 
Authored: Tue Nov 3 12:46:11 2015 +
Committer: Sean Owen 
Committed: Tue Nov 3 12:46:11 2015 +

--
 .../spark/deploy/ApplicationDescription.scala   | 33 ++--
 .../apache/spark/deploy/DriverDescription.scala | 21 -
 .../spark/deploy/master/ApplicationInfo.scala   |  7 +
 .../org/apache/spark/deploy/master/Master.scala | 12 ---
 .../deploy/master/ui/ApplicationPage.scala  |  2 +-
 .../spark/deploy/master/ui/MasterPage.scala |  2 +-
 .../apache/spark/deploy/DeployTestUtils.scala   |  3 +-
 7 files changed, 34 insertions(+), 46 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/233e534a/core/src/main/scala/org/apache/spark/deploy/ApplicationDescription.scala
--
diff --git 
a/core/src/main/scala/org/apache/spark/deploy/ApplicationDescription.scala 
b/core/src/main/scala/org/apache/spark/deploy/ApplicationDescription.scala
index ae99432..78bbd5c 100644
--- a/core/src/main/scala/org/apache/spark/deploy/ApplicationDescription.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/ApplicationDescription.scala
@@ -19,30 +19,17 @@ package org.apache.spark.deploy
 
 import java.net.URI
 
-private[spark] class ApplicationDescription(
-val name: String,
-val maxCores: Option[Int],
-val memoryPerExecutorMB: Int,
-val command: Command,
-var appUiUrl: String,
-val eventLogDir: Option[URI] = None,
+private[spark] case class ApplicationDescription(
+name: String,
+maxCores: Option[Int],
+memoryPerExecutorMB: Int,
+command: Command,
+appUiUrl: String,
+eventLogDir: Option[URI] = None,
 // short name of compression codec used when writing event logs, if any 
(e.g. lzf)
-val eventLogCodec: Option[String] = None,
-val coresPerExecutor: Option[Int] = None)
-  extends Serializable {
-
-  val user = System.getProperty("user.name", "")
-
-  def copy(
-  name: String = name,
-  maxCores: Option[Int] = maxCores,
-  memoryPerExecutorMB: Int = memoryPerExecutorMB,
-  command: Command = command,
-  appUiUrl: String = appUiUrl,
-  eventLogDir: Option[URI] = eventLogDir,
-  eventLogCodec: Option[String] = eventLogCodec): ApplicationDescription =
-new ApplicationDescription(
-  name, maxCores, memoryPerExecutorMB, command, appUiUrl, eventLogDir, 
eventLogCodec)
+eventLogCodec: Option[String] = None,
+coresPerExecutor: Option[Int] = None,
+user: String = System.getProperty("user.name", "")) {
 
   override def toString: String = "ApplicationDescription(" + name + ")"
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/233e534a/core/src/main/scala/org/apache/spark/deploy/DriverDescription.scala
--
diff --git 
a/core/src/main/scala/org/apache/spark/deploy/DriverDescription.scala 
b/core/src/main/scala/org/apache/spark/deploy/DriverDescription.scala
index 659fb43..1f5626a 100644
--- a/core/src/main/scala/org/apache/spark/deploy/DriverDescription.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/DriverDescription.scala
@@ -17,21 +17,12 @@
 
 package org.apache.spark.deploy
 
-private[deploy] class DriverDescription(
-val jarUrl: String,
-val mem: Int,
-val cores: Int,
-val supervise: Boolean,
-val c

spark git commit: [SPARK-2960][DEPLOY] Support executing Spark from symlinks (reopen)

2015-11-04 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master 2692bdb7d -> 8aff36e91


[SPARK-2960][DEPLOY] Support executing Spark from symlinks (reopen)

This PR is based on the work of roji to support running Spark scripts from 
symlinks. Thanks for the great work roji . Would you mind taking a look at this 
PR, thanks a lot.

For releases like HDP and others, normally it will expose the Spark executables 
as symlinks and put in `PATH`, but current Spark's scripts do not support 
finding real path from symlink recursively, this will make spark fail to 
execute from symlink. This PR try to solve this issue by finding the absolute 
path from symlink.

Instead of using `readlink -f` like what this PR 
(https://github.com/apache/spark/pull/2386) implemented is that `-f` is not 
support for Mac, so here manually seeking the path through loop.

I've tested with Mac and Linux (Cent OS), looks fine.

This PR did not fix the scripts under `sbin` folder, not sure if it needs to be 
fixed also?

Please help to review, any comment is greatly appreciated.

Author: jerryshao 
Author: Shay Rojansky 

Closes #8669 from jerryshao/SPARK-2960.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8aff36e9
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8aff36e9
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8aff36e9

Branch: refs/heads/master
Commit: 8aff36e91de0fee2f3f56c6d240bb203b5bb48ba
Parents: 2692bdb
Author: jerryshao 
Authored: Wed Nov 4 10:49:34 2015 +
Committer: Sean Owen 
Committed: Wed Nov 4 10:49:34 2015 +

--
 bin/beeline |  8 +---
 bin/load-spark-env.sh   | 32 ++--
 bin/pyspark | 14 --
 bin/run-example | 18 ++
 bin/spark-class | 15 ---
 bin/spark-shell |  9 ++---
 bin/spark-sql   |  7 +--
 bin/spark-submit|  6 --
 bin/sparkR  |  9 ++---
 sbin/slaves.sh  |  9 +
 sbin/spark-config.sh| 23 +++
 sbin/spark-daemon.sh| 23 ---
 sbin/spark-daemons.sh   |  9 +
 sbin/start-all.sh   | 11 ++-
 sbin/start-history-server.sh| 11 ++-
 sbin/start-master.sh| 17 +
 sbin/start-mesos-dispatcher.sh  | 11 ++-
 sbin/start-mesos-shuffle-service.sh | 11 ++-
 sbin/start-shuffle-service.sh   | 11 ++-
 sbin/start-slave.sh | 18 +-
 sbin/start-slaves.sh| 19 +--
 sbin/start-thriftserver.sh  | 11 ++-
 sbin/stop-all.sh| 14 +++---
 sbin/stop-history-server.sh |  7 ---
 sbin/stop-master.sh | 13 +++--
 sbin/stop-mesos-dispatcher.sh   |  9 +
 sbin/stop-mesos-shuffle-service.sh  |  7 ---
 sbin/stop-shuffle-service.sh|  7 ---
 sbin/stop-slave.sh  | 15 ---
 sbin/stop-slaves.sh | 15 ---
 sbin/stop-thriftserver.sh   |  7 ---
 31 files changed, 213 insertions(+), 183 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/8aff36e9/bin/beeline
--
diff --git a/bin/beeline b/bin/beeline
index 3fcb6df..1627626 100755
--- a/bin/beeline
+++ b/bin/beeline
@@ -23,8 +23,10 @@
 # Enter posix mode for bash
 set -o posix
 
-# Figure out where Spark is installed
-FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
+# Figure out if SPARK_HOME is set
+if [ -z "${SPARK_HOME}" ]; then
+  export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
+fi
 
 CLASS="org.apache.hive.beeline.BeeLine"
-exec "$FWDIR/bin/spark-class" $CLASS "$@"
+exec "${SPARK_HOME}/bin/spark-class" $CLASS "$@"

http://git-wip-us.apache.org/repos/asf/spark/blob/8aff36e9/bin/load-spark-env.sh
--
diff --git a/bin/load-spark-env.sh b/bin/load-spark-env.sh
index 95779e9..eaea964 100644
--- a/bin/load-spark-env.sh
+++ b/bin/load-spark-env.sh
@@ -20,13 +20,17 @@
 # This script loads spark-env.sh if it exists, and ensures it is only loaded 
once.
 # spark-env.sh is loaded from SPARK_CONF_DIR if set, or within the current 
directory's
 # conf/ subdirectory.
-FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
+
+# Figure out where Spark is installed
+if [ -z "${SPARK_HOME}" ]; then
+  export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
+fi
 
 if [ -z "$SPARK_ENV_LOADED" ]; then
   export SPARK_ENV_LOADED=1
 
   # Returns the parent of the directory this

spark git commit: [SPARK-11442] Reduce numSlices for local metrics test of SparkListenerSuite

2015-11-04 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master 8aff36e91 -> c09e51398


[SPARK-11442] Reduce numSlices for local metrics test of SparkListenerSuite

In the thread, 
http://search-hadoop.com/m/q3RTtcQiFSlTxeP/test+failed+due+to+OOME&subj=test+failed+due+to+OOME,
 it was discussed that memory consumption for SparkListenerSuite should be 
brought down.

This is an attempt in that direction by reducing numSlices for local metrics 
test.

Author: tedyu 

Closes #9384 from tedyu/master.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c09e5139
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c09e5139
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c09e5139

Branch: refs/heads/master
Commit: c09e5139874fb3626e005c8240cca5308b902ef3
Parents: 8aff36e
Author: tedyu 
Authored: Wed Nov 4 10:51:40 2015 +
Committer: Sean Owen 
Committed: Wed Nov 4 10:51:40 2015 +

--
 .../org/apache/spark/scheduler/SparkListenerSuite.scala | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/c09e5139/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala
--
diff --git 
a/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala 
b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala
index a9652d7..53102b9 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala
@@ -212,14 +212,15 @@ class SparkListenerSuite extends SparkFunSuite with 
LocalSparkContext with Match
   i
 }
 
-val d = sc.parallelize(0 to 1e4.toInt, 64).map(w)
+val numSlices = 16
+val d = sc.parallelize(0 to 1e3.toInt, numSlices).map(w)
 d.count()
 sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
 listener.stageInfos.size should be (1)
 
 val d2 = d.map { i => w(i) -> i * 2 }.setName("shuffle input 1")
 val d3 = d.map { i => w(i) -> (0 to (i % 5)) }.setName("shuffle input 2")
-val d4 = d2.cogroup(d3, 64).map { case (k, (v1, v2)) =>
+val d4 = d2.cogroup(d3, numSlices).map { case (k, (v1, v2)) =>
   w(k) -> (v1.size, v2.size)
 }
 d4.setName("A Cogroup")
@@ -258,8 +259,8 @@ class SparkListenerSuite extends SparkFunSuite with 
LocalSparkContext with Match
 if (stageInfo.rddInfos.exists(_.name == d4.name)) {
   taskMetrics.shuffleReadMetrics should be ('defined)
   val sm = taskMetrics.shuffleReadMetrics.get
-  sm.totalBlocksFetched should be (128)
-  sm.localBlocksFetched should be (128)
+  sm.totalBlocksFetched should be (2*numSlices)
+  sm.localBlocksFetched should be (2*numSlices)
   sm.remoteBlocksFetched should be (0)
   sm.remoteBytesRead should be (0L)
 }


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-11440][CORE][STREAMING][BUILD] Declare rest of @Experimental items non-experimental if they've existed since 1.2.0

2015-11-05 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master 81498dd5c -> 6f81eae24


[SPARK-11440][CORE][STREAMING][BUILD] Declare rest of @Experimental items 
non-experimental if they've existed since 1.2.0

Remove `Experimental` annotations in core, streaming for items that existed in 
1.2.0 or before. The changes are:

* SparkContext
  * binary{Files,Records} : 1.2.0
  * submitJob : 1.0.0
* JavaSparkContext
  * binary{Files,Records} : 1.2.0
* DoubleRDDFunctions, JavaDoubleRDD
  * {mean,sum}Approx : 1.0.0
* PairRDDFunctions, JavaPairRDD
  * sampleByKeyExact : 1.2.0
  * countByKeyApprox : 1.0.0
* PairRDDFunctions
  * countApproxDistinctByKey : 1.1.0
* RDD
  * countApprox, countByValueApprox, countApproxDistinct : 1.0.0
* JavaRDDLike
  * countApprox : 1.0.0
* PythonHadoopUtil.Converter : 1.1.0
* PortableDataStream : 1.2.0 (related to binaryFiles)
* BoundedDouble : 1.0.0
* PartialResult : 1.0.0
* StreamingContext, JavaStreamingContext
  * binaryRecordsStream : 1.2.0
* HiveContext
  * analyze : 1.2.0

Author: Sean Owen 

Closes #9396 from srowen/SPARK-11440.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6f81eae2
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6f81eae2
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6f81eae2

Branch: refs/heads/master
Commit: 6f81eae24f83df51a99d4bb2629dd7daadc01519
Parents: 81498dd
Author: Sean Owen 
Authored: Thu Nov 5 09:08:53 2015 +
Committer: Sean Owen 
Committed: Thu Nov 5 09:08:53 2015 +

--
 core/src/main/scala/org/apache/spark/SparkContext.scala   | 10 +-
 .../scala/org/apache/spark/api/java/JavaDoubleRDD.scala   |  7 ---
 .../scala/org/apache/spark/api/java/JavaPairRDD.scala |  9 -
 .../scala/org/apache/spark/api/java/JavaRDDLike.scala |  5 -
 .../org/apache/spark/api/java/JavaSparkContext.scala  |  7 ---
 .../org/apache/spark/api/python/PythonHadoopUtil.scala|  3 ---
 .../scala/org/apache/spark/input/PortableDataStream.scala |  2 --
 .../scala/org/apache/spark/partial/BoundedDouble.scala|  4 
 .../scala/org/apache/spark/partial/PartialResult.scala|  3 ---
 .../scala/org/apache/spark/rdd/DoubleRDDFunctions.scala   |  4 
 .../scala/org/apache/spark/rdd/PairRDDFunctions.scala |  7 ---
 core/src/main/scala/org/apache/spark/rdd/RDD.scala|  8 +---
 .../scala/org/apache/spark/sql/hive/HiveContext.scala |  2 --
 .../org/apache/spark/streaming/StreamingContext.scala |  3 ---
 .../spark/streaming/api/java/JavaStreamingContext.scala   |  3 ---
 15 files changed, 2 insertions(+), 75 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/6f81eae2/core/src/main/scala/org/apache/spark/SparkContext.scala
--
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala 
b/core/src/main/scala/org/apache/spark/SparkContext.scala
index a6857b4..7421821 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -45,7 +45,7 @@ import org.apache.hadoop.mapreduce.lib.input.{FileInputFormat 
=> NewFileInputFor
 
 import org.apache.mesos.MesosNativeLibrary
 
-import org.apache.spark.annotation.{DeveloperApi, Experimental}
+import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.deploy.{LocalSparkCluster, SparkHadoopUtil}
 import org.apache.spark.executor.{ExecutorEndpoint, TriggerThreadDump}
@@ -870,8 +870,6 @@ class SparkContext(config: SparkConf) extends Logging with 
ExecutorAllocationCli
   }
 
   /**
-   * :: Experimental ::
-   *
* Get an RDD for a Hadoop-readable dataset as PortableDataStream for each 
file
* (useful for binary data)
*
@@ -902,7 +900,6 @@ class SparkContext(config: SparkConf) extends Logging with 
ExecutorAllocationCli
* list of inputs.
* @param minPartitions A suggestion value of the minimal splitting number 
for input data.
*/
-  @Experimental
   def binaryFiles(
   path: String,
   minPartitions: Int = defaultMinPartitions): RDD[(String, 
PortableDataStream)] = withScope {
@@ -922,8 +919,6 @@ class SparkContext(config: SparkConf) extends Logging with 
ExecutorAllocationCli
   }
 
   /**
-   * :: Experimental ::
-   *
* Load data from a flat binary file, assuming the length of each record is 
constant.
*
* '''Note:''' We ensure that the byte array for each record in the 
resulting RDD
@@ -936,7 +931,6 @@ class SparkContext(config: SparkConf) extends Logging with 
ExecutorAllocationCli
*
* @return An RDD of data with values, represented as byte arrays
*/
-  @Experimental
   def binaryRecords(
   path: String,

spark git commit: [SPARK-11378][STREAMING] make StreamingContext.awaitTerminationOrTimeout return properly

2015-11-05 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master 6f81eae24 -> 859dff56e


[SPARK-11378][STREAMING] make StreamingContext.awaitTerminationOrTimeout return 
properly

This adds a failing test checking that `awaitTerminationOrTimeout` returns the 
expected value, and then fixes that failing test with the addition of a 
`return`.

tdas zsxwing

Author: Nick Evans 

Closes #9336 from manygrams/fix_await_termination_or_timeout.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/859dff56
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/859dff56
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/859dff56

Branch: refs/heads/master
Commit: 859dff56eb0f8c63c86e7e900a12340c199e6247
Parents: 6f81eae
Author: Nick Evans 
Authored: Thu Nov 5 09:18:20 2015 +
Committer: Sean Owen 
Committed: Thu Nov 5 09:18:20 2015 +

--
 python/pyspark/streaming/context.py | 2 +-
 python/pyspark/streaming/tests.py   | 7 +++
 2 files changed, 8 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/859dff56/python/pyspark/streaming/context.py
--
diff --git a/python/pyspark/streaming/context.py 
b/python/pyspark/streaming/context.py
index 975c754..8be56c9 100644
--- a/python/pyspark/streaming/context.py
+++ b/python/pyspark/streaming/context.py
@@ -218,7 +218,7 @@ class StreamingContext(object):
 
 @param timeout: time to wait in seconds
 """
-self._jssc.awaitTerminationOrTimeout(int(timeout * 1000))
+return self._jssc.awaitTerminationOrTimeout(int(timeout * 1000))
 
 def stop(self, stopSparkContext=True, stopGraceFully=False):
 """

http://git-wip-us.apache.org/repos/asf/spark/blob/859dff56/python/pyspark/streaming/tests.py
--
diff --git a/python/pyspark/streaming/tests.py 
b/python/pyspark/streaming/tests.py
index f7fa481..1794796 100644
--- a/python/pyspark/streaming/tests.py
+++ b/python/pyspark/streaming/tests.py
@@ -596,6 +596,13 @@ class StreamingContextTests(PySparkStreamingTestCase):
 self.ssc = StreamingContext.getActiveOrCreate(None, setupFunc)
 self.assertTrue(self.setupCalled)
 
+def test_await_termination_or_timeout(self):
+self._add_input_stream()
+self.ssc.start()
+self.assertFalse(self.ssc.awaitTerminationOrTimeout(0.001))
+self.ssc.stop(False)
+self.assertTrue(self.ssc.awaitTerminationOrTimeout(0.001))
+
 
 class CheckpointTests(unittest.TestCase):
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-11378][STREAMING] make StreamingContext.awaitTerminationOrTimeout return properly

2015-11-05 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/branch-1.5 d31b312fc -> 9522dd23d


[SPARK-11378][STREAMING] make StreamingContext.awaitTerminationOrTimeout return 
properly

This adds a failing test checking that `awaitTerminationOrTimeout` returns the 
expected value, and then fixes that failing test with the addition of a 
`return`.

tdas zsxwing

Author: Nick Evans 

Closes #9336 from manygrams/fix_await_termination_or_timeout.

(cherry picked from commit 859dff56eb0f8c63c86e7e900a12340c199e6247)
Signed-off-by: Sean Owen 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9522dd23
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9522dd23
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9522dd23

Branch: refs/heads/branch-1.5
Commit: 9522dd23d5b059c76f32b0a288389b03b6c971a8
Parents: d31b312
Author: Nick Evans 
Authored: Thu Nov 5 09:18:20 2015 +
Committer: Sean Owen 
Committed: Thu Nov 5 09:18:33 2015 +

--
 python/pyspark/streaming/context.py | 2 +-
 python/pyspark/streaming/tests.py   | 7 +++
 2 files changed, 8 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/9522dd23/python/pyspark/streaming/context.py
--
diff --git a/python/pyspark/streaming/context.py 
b/python/pyspark/streaming/context.py
index a8c9ffc..3a8f949 100644
--- a/python/pyspark/streaming/context.py
+++ b/python/pyspark/streaming/context.py
@@ -256,7 +256,7 @@ class StreamingContext(object):
 
 @param timeout: time to wait in seconds
 """
-self._jssc.awaitTerminationOrTimeout(int(timeout * 1000))
+return self._jssc.awaitTerminationOrTimeout(int(timeout * 1000))
 
 def stop(self, stopSparkContext=True, stopGraceFully=False):
 """

http://git-wip-us.apache.org/repos/asf/spark/blob/9522dd23/python/pyspark/streaming/tests.py
--
diff --git a/python/pyspark/streaming/tests.py 
b/python/pyspark/streaming/tests.py
index a8c7b51..824f356 100644
--- a/python/pyspark/streaming/tests.py
+++ b/python/pyspark/streaming/tests.py
@@ -585,6 +585,13 @@ class StreamingContextTests(PySparkStreamingTestCase):
 self.ssc = StreamingContext.getActiveOrCreate(None, setupFunc)
 self.assertTrue(self.setupCalled)
 
+def test_await_termination_or_timeout(self):
+self._add_input_stream()
+self.ssc.start()
+self.assertFalse(self.ssc.awaitTerminationOrTimeout(0.001))
+self.ssc.stop(False)
+self.assertTrue(self.ssc.awaitTerminationOrTimeout(0.001))
+
 
 class CheckpointTests(unittest.TestCase):
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-11449][CORE] PortableDataStream should be a factory

2015-11-05 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master 859dff56e -> 7bdc92197


[SPARK-11449][CORE] PortableDataStream should be a factory

```PortableDataStream``` maintains some internal state. This makes it tricky to 
reuse a stream (one needs to call ```close``` on both the 
```PortableDataStream``` and the ```InputStream``` it produces).

This PR removes all state from ```PortableDataStream``` and effectively turns 
it into an ```InputStream```/```Array[Byte]``` factory. This makes the user 
responsible for managing the ```InputStream``` it returns.

cc srowen

Author: Herman van Hovell 

Closes #9417 from hvanhovell/SPARK-11449.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7bdc9219
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7bdc9219
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7bdc9219

Branch: refs/heads/master
Commit: 7bdc92197cce0edc0110dc9c2158e6e3f42c72ee
Parents: 859dff5
Author: Herman van Hovell 
Authored: Thu Nov 5 09:23:09 2015 +
Committer: Sean Owen 
Committed: Thu Nov 5 09:23:09 2015 +

--
 .../apache/spark/input/PortableDataStream.scala | 45 +++-
 1 file changed, 16 insertions(+), 29 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/7bdc9219/core/src/main/scala/org/apache/spark/input/PortableDataStream.scala
--
diff --git 
a/core/src/main/scala/org/apache/spark/input/PortableDataStream.scala 
b/core/src/main/scala/org/apache/spark/input/PortableDataStream.scala
index 33e4ee0..280e7a5 100644
--- a/core/src/main/scala/org/apache/spark/input/PortableDataStream.scala
+++ b/core/src/main/scala/org/apache/spark/input/PortableDataStream.scala
@@ -21,7 +21,7 @@ import java.io.{ByteArrayInputStream, ByteArrayOutputStream, 
DataInputStream, Da
 
 import scala.collection.JavaConverters._
 
-import com.google.common.io.ByteStreams
+import com.google.common.io.{Closeables, ByteStreams}
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.mapreduce.{InputSplit, JobContext, RecordReader, 
TaskAttemptContext}
@@ -82,7 +82,6 @@ private[spark] abstract class StreamBasedRecordReader[T](
 if (!processed) {
   val fileIn = new PortableDataStream(split, context, index)
   value = parseStream(fileIn)
-  fileIn.close() // if it has not been open yet, close does nothing
   key = fileIn.getPath
   processed = true
   true
@@ -134,12 +133,6 @@ class PortableDataStream(
 index: Integer)
   extends Serializable {
 
-  // transient forces file to be reopened after being serialization
-  // it is also used for non-serializable classes
-
-  @transient private var fileIn: DataInputStream = null
-  @transient private var isOpen = false
-
   private val confBytes = {
 val baos = new ByteArrayOutputStream()
 SparkHadoopUtil.get.getConfigurationFromJobContext(context).
@@ -175,40 +168,34 @@ class PortableDataStream(
   }
 
   /**
-   * Create a new DataInputStream from the split and context
+   * Create a new DataInputStream from the split and context. The user of this 
method is responsible
+   * for closing the stream after usage.
*/
   def open(): DataInputStream = {
-if (!isOpen) {
-  val pathp = split.getPath(index)
-  val fs = pathp.getFileSystem(conf)
-  fileIn = fs.open(pathp)
-  isOpen = true
-}
-fileIn
+val pathp = split.getPath(index)
+val fs = pathp.getFileSystem(conf)
+fs.open(pathp)
   }
 
   /**
* Read the file as a byte array
*/
   def toArray(): Array[Byte] = {
-open()
-val innerBuffer = ByteStreams.toByteArray(fileIn)
-close()
-innerBuffer
+val stream = open()
+try {
+  ByteStreams.toByteArray(stream)
+} finally {
+  Closeables.close(stream, true)
+}
   }
 
   /**
-   * Close the file (if it is currently open)
+   * Closing the PortableDataStream is not needed anymore. The user either can 
use the
+   * PortableDataStream to get a DataInputStream (which the user needs to 
close after usage),
+   * or a byte array.
*/
+  @deprecated("Closing the PortableDataStream is not needed anymore.", "1.6.0")
   def close(): Unit = {
-if (isOpen) {
-  try {
-fileIn.close()
-isOpen = false
-  } catch {
-case ioe: java.io.IOException => // do nothing
-  }
-}
   }
 
   def getPath(): String = path


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-11506][MLLIB] Removed redundant operation in Online LDA implementation

2015-11-05 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master 7bdc92197 -> a94671a02


[SPARK-11506][MLLIB] Removed redundant operation in Online LDA implementation

In file LDAOptimizer.scala:

line 441: since "idx" was never used, replaced unrequired zipWithIndex.foreach 
with foreach.

-  nonEmptyDocs.zipWithIndex.foreach { case ((_, termCounts: Vector), idx: 
Int) =>
+  nonEmptyDocs.foreach { case (_, termCounts: Vector) =>

Author: a1singh 

Closes #9456 from a1singh/master.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a94671a0
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a94671a0
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a94671a0

Branch: refs/heads/master
Commit: a94671a027c29bacea37f56b95eccb115638abff
Parents: 7bdc921
Author: a1singh 
Authored: Thu Nov 5 12:51:10 2015 +
Committer: Sean Owen 
Committed: Thu Nov 5 12:51:10 2015 +

--
 .../scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/a94671a0/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
--
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala 
b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
index 38486e9..17c0609 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
@@ -438,7 +438,7 @@ final class OnlineLDAOptimizer extends LDAOptimizer {
 
   val stat = BDM.zeros[Double](k, vocabSize)
   var gammaPart = List[BDV[Double]]()
-  nonEmptyDocs.zipWithIndex.foreach { case ((_, termCounts: Vector), idx: 
Int) =>
+  nonEmptyDocs.foreach { case (_, termCounts: Vector) =>
 val ids: List[Int] = termCounts match {
   case v: DenseVector => (0 until v.size).toList
   case v: SparseVector => v.indices.toList


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-11511][STREAMING] Fix NPE when an InputDStream is not used

2015-11-06 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master 253e87e8a -> cf69ce136


[SPARK-11511][STREAMING] Fix NPE when an InputDStream is not used

Just ignored `InputDStream`s that have null `rememberDuration` in 
`DStreamGraph.getMaxInputStreamRememberDuration`.

Author: Shixiong Zhu 

Closes #9476 from zsxwing/SPARK-11511.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/cf69ce13
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/cf69ce13
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/cf69ce13

Branch: refs/heads/master
Commit: cf69ce136590fea51843bc54f44f0f45c7d0ac36
Parents: 253e87e
Author: Shixiong Zhu 
Authored: Fri Nov 6 14:51:53 2015 +
Committer: Sean Owen 
Committed: Fri Nov 6 14:51:53 2015 +

--
 .../org/apache/spark/streaming/DStreamGraph.scala   |  3 ++-
 .../spark/streaming/StreamingContextSuite.scala | 16 
 2 files changed, 18 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/cf69ce13/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala
--
diff --git 
a/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala 
b/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala
index 1b0b789..7829f5e 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala
@@ -167,7 +167,8 @@ final private[streaming] class DStreamGraph extends 
Serializable with Logging {
* safe remember duration which can be used to perform cleanup operations.
*/
   def getMaxInputStreamRememberDuration(): Duration = {
-inputStreams.map { _.rememberDuration }.maxBy { _.milliseconds }
+// If an InputDStream is not used, its `rememberDuration` will be null and 
we can ignore them
+inputStreams.map(_.rememberDuration).filter(_ != 
null).maxBy(_.milliseconds)
   }
 
   @throws(classOf[IOException])

http://git-wip-us.apache.org/repos/asf/spark/blob/cf69ce13/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
--
diff --git 
a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
 
b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
index c7a8771..860fac2 100644
--- 
a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
+++ 
b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
@@ -780,6 +780,22 @@ class StreamingContextSuite extends SparkFunSuite with 
BeforeAndAfter with Timeo
   "Please don't use queueStream when checkpointing is enabled."))
   }
 
+  test("Creating an InputDStream but not using it should not crash") {
+ssc = new StreamingContext(master, appName, batchDuration)
+val input1 = addInputStream(ssc)
+val input2 = addInputStream(ssc)
+val output = new TestOutputStream(input2)
+output.register()
+val batchCount = new BatchCounter(ssc)
+ssc.start()
+// Just wait for completing 2 batches to make sure it triggers
+// `DStream.getMaxInputStreamRememberDuration`
+batchCount.waitUntilBatchesCompleted(2, 1)
+// Throw the exception if crash
+ssc.awaitTerminationOrTimeout(1)
+ssc.stop()
+  }
+
   def addInputStream(s: StreamingContext): DStream[Int] = {
 val input = (1 to 100).map(i => 1 to i)
 val inputStream = new TestInputStream(s, input, 1)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-11511][STREAMING] Fix NPE when an InputDStream is not used

2015-11-06 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/branch-1.6 1cfad7d55 -> 0a430f04e


[SPARK-11511][STREAMING] Fix NPE when an InputDStream is not used

Just ignored `InputDStream`s that have null `rememberDuration` in 
`DStreamGraph.getMaxInputStreamRememberDuration`.

Author: Shixiong Zhu 

Closes #9476 from zsxwing/SPARK-11511.

(cherry picked from commit cf69ce136590fea51843bc54f44f0f45c7d0ac36)
Signed-off-by: Sean Owen 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0a430f04
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0a430f04
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0a430f04

Branch: refs/heads/branch-1.6
Commit: 0a430f04eef3445fb0095adc806d91759eea5d32
Parents: 1cfad7d
Author: Shixiong Zhu 
Authored: Fri Nov 6 14:51:53 2015 +
Committer: Sean Owen 
Committed: Fri Nov 6 14:52:08 2015 +

--
 .../org/apache/spark/streaming/DStreamGraph.scala   |  3 ++-
 .../spark/streaming/StreamingContextSuite.scala | 16 
 2 files changed, 18 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/0a430f04/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala
--
diff --git 
a/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala 
b/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala
index 1b0b789..7829f5e 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala
@@ -167,7 +167,8 @@ final private[streaming] class DStreamGraph extends 
Serializable with Logging {
* safe remember duration which can be used to perform cleanup operations.
*/
   def getMaxInputStreamRememberDuration(): Duration = {
-inputStreams.map { _.rememberDuration }.maxBy { _.milliseconds }
+// If an InputDStream is not used, its `rememberDuration` will be null and 
we can ignore them
+inputStreams.map(_.rememberDuration).filter(_ != 
null).maxBy(_.milliseconds)
   }
 
   @throws(classOf[IOException])

http://git-wip-us.apache.org/repos/asf/spark/blob/0a430f04/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
--
diff --git 
a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
 
b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
index c7a8771..860fac2 100644
--- 
a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
+++ 
b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
@@ -780,6 +780,22 @@ class StreamingContextSuite extends SparkFunSuite with 
BeforeAndAfter with Timeo
   "Please don't use queueStream when checkpointing is enabled."))
   }
 
+  test("Creating an InputDStream but not using it should not crash") {
+ssc = new StreamingContext(master, appName, batchDuration)
+val input1 = addInputStream(ssc)
+val input2 = addInputStream(ssc)
+val output = new TestOutputStream(input2)
+output.register()
+val batchCount = new BatchCounter(ssc)
+ssc.start()
+// Just wait for completing 2 batches to make sure it triggers
+// `DStream.getMaxInputStreamRememberDuration`
+batchCount.waitUntilBatchesCompleted(2, 1)
+// Throw the exception if crash
+ssc.awaitTerminationOrTimeout(1)
+ssc.stop()
+  }
+
   def addInputStream(s: StreamingContext): DStream[Int] = {
 val input = (1 to 100).map(i => 1 to i)
 val inputStream = new TestInputStream(s, input, 1)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

< 9 10 11 12 13 14 15 16 17 18 >

1301 - 1400 of 5050 matches

Mail list logo