spark git commit: [SPARK-8468] [ML] Take the negative of some metrics in RegressionEvaluator to get correct cross validation

2015-06-20 Thread jkbradley
Repository: spark
Updated Branches:
  refs/heads/master 1b6fe9b1a - 0b8995168


[SPARK-8468] [ML] Take the negative of some metrics in RegressionEvaluator to 
get correct cross validation

JIRA: https://issues.apache.org/jira/browse/SPARK-8468

Author: Liang-Chi Hsieh vii...@gmail.com

Closes #6905 from viirya/cv_min and squashes the following commits:

930d3db [Liang-Chi Hsieh] Fix python unit test and add document.
d632135 [Liang-Chi Hsieh] Merge remote-tracking branch 'upstream/master' into 
cv_min
16e3b2c [Liang-Chi Hsieh] Take the negative instead of reciprocal.
c3dd8d9 [Liang-Chi Hsieh] For comments.
b5f52c1 [Liang-Chi Hsieh] Add param to CrossValidator for choosing whether to 
maximize evaulation value.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0b899516
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0b899516
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0b899516

Branch: refs/heads/master
Commit: 0b8995168f02bb55afb0a5b7dbdb941c3c89cb4c
Parents: 1b6fe9b
Author: Liang-Chi Hsieh vii...@gmail.com
Authored: Sat Jun 20 13:01:59 2015 -0700
Committer: Joseph K. Bradley jos...@databricks.com
Committed: Sat Jun 20 13:01:59 2015 -0700

--
 .../ml/evaluation/RegressionEvaluator.scala | 10 --
 .../org/apache/spark/ml/param/params.scala  |  2 +-
 .../evaluation/RegressionEvaluatorSuite.scala   |  4 +--
 .../spark/ml/tuning/CrossValidatorSuite.scala   | 35 ++--
 python/pyspark/ml/evaluation.py |  8 +++--
 5 files changed, 48 insertions(+), 11 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/0b899516/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala
--
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala 
b/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala
index 8670e96..01c000b 100644
--- 
a/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala
@@ -37,6 +37,10 @@ final class RegressionEvaluator(override val uid: String)
 
   /**
* param for metric name in evaluation (supports `rmse` (default), 
`mse`, `r2`, and `mae`)
+   *
+   * Because we will maximize evaluation value (ref: `CrossValidator`),
+   * when we evaluate a metric that is needed to minimize (e.g., `rmse`, 
`mse`, `mae`),
+   * we take and output the negative of this metric.
* @group param
*/
   val metricName: Param[String] = {
@@ -70,13 +74,13 @@ final class RegressionEvaluator(override val uid: String)
 val metrics = new RegressionMetrics(predictionAndLabels)
 val metric = $(metricName) match {
   case rmse =
-metrics.rootMeanSquaredError
+-metrics.rootMeanSquaredError
   case mse =
-metrics.meanSquaredError
+-metrics.meanSquaredError
   case r2 =
 metrics.r2
   case mae =
-metrics.meanAbsoluteError
+-metrics.meanAbsoluteError
 }
 metric
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/0b899516/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
--
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala 
b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
index 15ebad8..50c0d85 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
@@ -297,7 +297,7 @@ class DoubleArrayParam(parent: Params, name: String, doc: 
String, isValid: Array
 
 /**
  * :: Experimental ::
- * A param amd its value.
+ * A param and its value.
  */
 @Experimental
 case class ParamPair[T](param: Param[T], value: T) {

http://git-wip-us.apache.org/repos/asf/spark/blob/0b899516/mllib/src/test/scala/org/apache/spark/ml/evaluation/RegressionEvaluatorSuite.scala
--
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/evaluation/RegressionEvaluatorSuite.scala
 
b/mllib/src/test/scala/org/apache/spark/ml/evaluation/RegressionEvaluatorSuite.scala
index aa722da..5b20378 100644
--- 
a/mllib/src/test/scala/org/apache/spark/ml/evaluation/RegressionEvaluatorSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/ml/evaluation/RegressionEvaluatorSuite.scala
@@ -63,7 +63,7 @@ class RegressionEvaluatorSuite extends SparkFunSuite with 
MLlibTestSparkContext
 
 // default = rmse
 val evaluator = new RegressionEvaluator()
-assert(evaluator.evaluate(predictions) ~== 0.1019382 absTol 0.001)
+assert(evaluator.evaluate(predictions) ~== -0.1019382 absTol 0.001)
 
 

spark git commit: [SPARK-8468] [ML] Take the negative of some metrics in RegressionEvaluator to get correct cross validation

2015-06-20 Thread jkbradley
Repository: spark
Updated Branches:
  refs/heads/branch-1.4 9b16508d2 - fe59a4a5f


[SPARK-8468] [ML] Take the negative of some metrics in RegressionEvaluator to 
get correct cross validation

JIRA: https://issues.apache.org/jira/browse/SPARK-8468

Author: Liang-Chi Hsieh vii...@gmail.com

Closes #6905 from viirya/cv_min and squashes the following commits:

930d3db [Liang-Chi Hsieh] Fix python unit test and add document.
d632135 [Liang-Chi Hsieh] Merge remote-tracking branch 'upstream/master' into 
cv_min
16e3b2c [Liang-Chi Hsieh] Take the negative instead of reciprocal.
c3dd8d9 [Liang-Chi Hsieh] For comments.
b5f52c1 [Liang-Chi Hsieh] Add param to CrossValidator for choosing whether to 
maximize evaulation value.

(cherry picked from commit 0b8995168f02bb55afb0a5b7dbdb941c3c89cb4c)
Signed-off-by: Joseph K. Bradley jos...@databricks.com


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fe59a4a5
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fe59a4a5
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fe59a4a5

Branch: refs/heads/branch-1.4
Commit: fe59a4a5f5d7d48747a4cd0b0464df0ff93271ef
Parents: 9b16508
Author: Liang-Chi Hsieh vii...@gmail.com
Authored: Sat Jun 20 13:01:59 2015 -0700
Committer: Joseph K. Bradley jos...@databricks.com
Committed: Sat Jun 20 13:02:14 2015 -0700

--
 .../ml/evaluation/RegressionEvaluator.scala | 10 --
 .../org/apache/spark/ml/param/params.scala  |  2 +-
 .../evaluation/RegressionEvaluatorSuite.scala   |  4 +--
 .../spark/ml/tuning/CrossValidatorSuite.scala   | 35 ++--
 python/pyspark/ml/evaluation.py |  8 +++--
 5 files changed, 48 insertions(+), 11 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/fe59a4a5/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala
--
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala 
b/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala
index 8670e96..01c000b 100644
--- 
a/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala
@@ -37,6 +37,10 @@ final class RegressionEvaluator(override val uid: String)
 
   /**
* param for metric name in evaluation (supports `rmse` (default), 
`mse`, `r2`, and `mae`)
+   *
+   * Because we will maximize evaluation value (ref: `CrossValidator`),
+   * when we evaluate a metric that is needed to minimize (e.g., `rmse`, 
`mse`, `mae`),
+   * we take and output the negative of this metric.
* @group param
*/
   val metricName: Param[String] = {
@@ -70,13 +74,13 @@ final class RegressionEvaluator(override val uid: String)
 val metrics = new RegressionMetrics(predictionAndLabels)
 val metric = $(metricName) match {
   case rmse =
-metrics.rootMeanSquaredError
+-metrics.rootMeanSquaredError
   case mse =
-metrics.meanSquaredError
+-metrics.meanSquaredError
   case r2 =
 metrics.r2
   case mae =
-metrics.meanAbsoluteError
+-metrics.meanAbsoluteError
 }
 metric
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/fe59a4a5/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
--
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala 
b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
index 15ebad8..50c0d85 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
@@ -297,7 +297,7 @@ class DoubleArrayParam(parent: Params, name: String, doc: 
String, isValid: Array
 
 /**
  * :: Experimental ::
- * A param amd its value.
+ * A param and its value.
  */
 @Experimental
 case class ParamPair[T](param: Param[T], value: T) {

http://git-wip-us.apache.org/repos/asf/spark/blob/fe59a4a5/mllib/src/test/scala/org/apache/spark/ml/evaluation/RegressionEvaluatorSuite.scala
--
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/evaluation/RegressionEvaluatorSuite.scala
 
b/mllib/src/test/scala/org/apache/spark/ml/evaluation/RegressionEvaluatorSuite.scala
index aa722da..5b20378 100644
--- 
a/mllib/src/test/scala/org/apache/spark/ml/evaluation/RegressionEvaluatorSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/ml/evaluation/RegressionEvaluatorSuite.scala
@@ -63,7 +63,7 @@ class RegressionEvaluatorSuite extends SparkFunSuite with 
MLlibTestSparkContext
 
 // default = rmse
 val evaluator = new RegressionEvaluator()
-

spark git commit: [SPARK-8301] [SQL] Improve UTF8String substring/startsWith/endsWith/contains performance

2015-06-20 Thread davies
Repository: spark
Updated Branches:
  refs/heads/master 004f57374 - 41ab2853f


[SPARK-8301] [SQL] Improve UTF8String substring/startsWith/endsWith/contains 
performance

Jira: https://issues.apache.org/jira/browse/SPARK-8301

Added the private method startsWith(prefix, offset) to implement startsWith, 
endsWith and contains without copying the array

I hope that the component SQL is still correct. I copied it from the Jira 
ticket.

Author: Tarek Auel tarek.a...@googlemail.com
Author: Tarek Auel tarek.a...@gmail.com

Closes #6804 from tarekauel/SPARK-8301 and squashes the following commits:

f5d6b9a [Tarek Auel] fixed parentheses and annotation
6d7b068 [Tarek Auel] [SPARK-8301] removed null checks
9ca0473 [Tarek Auel] [SPARK-8301] removed null checks
1c327eb [Tarek Auel] [SPARK-8301] removed new
9f17cc8 [Tarek Auel] [SPARK-8301] fixed conversion byte to string in codegen
3a0040f [Tarek Auel] [SPARK-8301] changed call of UTF8String.set to 
UTF8String.from
e4530d2 [Tarek Auel] [SPARK-8301] changed call of UTF8String.set to 
UTF8String.from
a5f853a [Tarek Auel] [SPARK-8301] changed visibility of set to protected. 
Changed annotation of bytes from Nullable to Nonnull
d2fb05f [Tarek Auel] [SPARK-8301] added additional null checks
79cb55b [Tarek Auel] [SPARK-8301] null check. Added test cases for null check.
b17909e [Tarek Auel] [SPARK-8301] removed unnecessary copying of UTF8String. 
Added a private function startsWith(prefix, offset) to implement the check for 
startsWith, endsWith and contains.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/41ab2853
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/41ab2853
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/41ab2853

Branch: refs/heads/master
Commit: 41ab2853f41de2abc415358b69671f37a0653533
Parents: 004f573
Author: Tarek Auel tarek.a...@googlemail.com
Authored: Sat Jun 20 20:03:59 2015 -0700
Committer: Davies Liu dav...@databricks.com
Committed: Sat Jun 20 20:03:59 2015 -0700

--
 .../sql/catalyst/expressions/UnsafeRow.java |  4 +--
 .../spark/sql/catalyst/expressions/Cast.scala   |  6 ++--
 .../apache/spark/unsafe/types/UTF8String.java   | 30 
 3 files changed, 22 insertions(+), 18 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/41ab2853/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
--
diff --git 
a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
 
b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
index c4b7f84..ed04d2e 100644
--- 
a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
+++ 
b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
@@ -312,7 +312,6 @@ public final class UnsafeRow extends BaseMutableRow {
 
   public UTF8String getUTF8String(int i) {
 assertIndexIsValid(i);
-final UTF8String str = new UTF8String();
 final long offsetToStringSize = getLong(i);
 final int stringSizeInBytes =
   (int) PlatformDependent.UNSAFE.getLong(baseObject, baseOffset + 
offsetToStringSize);
@@ -324,8 +323,7 @@ public final class UnsafeRow extends BaseMutableRow {
   PlatformDependent.BYTE_ARRAY_OFFSET,
   stringSizeInBytes
 );
-str.set(strBytes);
-return str;
+return UTF8String.fromBytes(strBytes);
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/spark/blob/41ab2853/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index b20086b..ad920f2 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -438,17 +438,17 @@ case class Cast(child: Expression, dataType: DataType) 
extends UnaryExpression w
 
   case (BinaryType, StringType) =
 defineCodeGen (ctx, ev, c =
-  snew ${ctx.stringType}().set($c))
+  s${ctx.stringType}.fromBytes($c))
   case (DateType, StringType) =
 defineCodeGen(ctx, ev, c =
-  snew ${ctx.stringType}().set(
+  s${ctx.stringType}.fromString(
 org.apache.spark.sql.catalyst.util.DateUtils.toString($c)))
   // Special handling required for timestamps in hive test cases since the 
toString function
   // does not match the expected output.
   case (TimestampType, StringType) =
 super.genCode(ctx, ev)
  

spark git commit: [SPARK-8422] [BUILD] [PROJECT INFRA] Add a module abstraction to dev/run-tests

2015-06-20 Thread davies
Repository: spark
Updated Branches:
  refs/heads/master 0b8995168 - 7a3c424ec


[SPARK-8422] [BUILD] [PROJECT INFRA] Add a module abstraction to dev/run-tests

This patch builds upon #5694 to add a 'module' abstraction to the 
`dev/run-tests` script which groups together the per-module test logic, 
including the mapping from file paths to modules, the mapping from modules to 
test goals and build profiles, and the dependencies / relationships between 
modules.

This refactoring makes it much easier to increase the granularity of test 
modules, which will let us skip even more tests.  It's also a prerequisite for 
other changes that will reduce test time, such as running subsets of the Python 
tests based on which files / modules have changed.

This patch also adds doctests for the new graph traversal / change mapping code.

Author: Josh Rosen joshro...@databricks.com

Closes #6866 from JoshRosen/more-dev-run-tests-refactoring and squashes the 
following commits:

75de450 [Josh Rosen] Use module system to determine which build profiles to 
enable.
4224da5 [Josh Rosen] Add documentation to Module.
a86a953 [Josh Rosen] Clean up modules; add new modules for streaming external 
projects
e46539f [Josh Rosen] Fix camel-cased endswith()
35a3052 [Josh Rosen] Enable Hive tests when running all tests
df10e23 [Josh Rosen] update to reflect fact that no module depends on root
3670d50 [Josh Rosen] mllib should depend on streaming
dc6f1c6 [Josh Rosen] Use changed files' extensions to decide whether to run 
style checks
7092d3e [Josh Rosen] Skip SBT tests if no test goals are specified
43a0ced [Josh Rosen] Minor fixes
3371441 [Josh Rosen] Test everything if nothing has changed (needed for non-PRB 
builds)
37f3fb3 [Josh Rosen] Remove doc profiles option, since it's not actually needed 
(see #6865)
f53864b [Josh Rosen] Finish integrating module changes
f0249bd [Josh Rosen] WIP


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7a3c424e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7a3c424e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7a3c424e

Branch: refs/heads/master
Commit: 7a3c424ecf815b9d5e06e222dd875e5a31a26400
Parents: 0b89951
Author: Josh Rosen joshro...@databricks.com
Authored: Sat Jun 20 16:04:45 2015 -0700
Committer: Davies Liu dav...@databricks.com
Committed: Sat Jun 20 16:05:54 2015 -0700

--
 dev/run-tests.py | 567 --
 1 file changed, 411 insertions(+), 156 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/7a3c424e/dev/run-tests.py
--
diff --git a/dev/run-tests.py b/dev/run-tests.py
index c64c71f..2cccfed 100755
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@@ -17,6 +17,7 @@
 # limitations under the License.
 #
 
+import itertools
 import os
 import re
 import sys
@@ -28,6 +29,361 @@ SPARK_HOME = 
os.path.join(os.path.dirname(os.path.realpath(__file__)), ..)
 USER_HOME = os.environ.get(HOME)
 
 
+# 
-
+# Test module definitions and functions for traversing module dependency graph
+# 
-
+
+
+all_modules = []
+
+
+class Module(object):
+
+A module is the basic abstraction in our test runner script. Each module 
consists of a set of
+source files, a set of test commands, and a set of dependencies on other 
modules. We use modules
+to define a dependency graph that lets determine which tests to run based 
on which files have
+changed.
+
+
+def __init__(self, name, dependencies, source_file_regexes, 
build_profile_flags=(),
+ sbt_test_goals=(), should_run_python_tests=False, 
should_run_r_tests=False):
+
+Define a new module.
+
+:param name: A short module name, for display in logging and error 
messages.
+:param dependencies: A set of dependencies for this module. This 
should only include direct
+dependencies; transitive dependencies are resolved automatically.
+:param source_file_regexes: a set of regexes that match source files 
belonging to this
+module. These regexes are applied by attempting to match at the 
beginning of the
+filename strings.
+:param build_profile_flags: A set of profile flags that should be 
passed to Maven or SBT in
+order to build and test this module (e.g. '-PprofileName').
+:param sbt_test_goals: A set of SBT test goals for testing this module.
+:param should_run_python_tests: If true, changes in this module will 
trigger Python tests.
+For now, this has the effect of causing 

spark git commit: [SPARK-8495] [SPARKR] Add a `.lintr` file to validate the SparkR files and the `lint-r` script

2015-06-20 Thread shivaram
Repository: spark
Updated Branches:
  refs/heads/master 7a3c424ec - 004f57374


[SPARK-8495] [SPARKR] Add a `.lintr` file to validate the SparkR files and the 
`lint-r` script

Thank Shivaram Venkataraman for your support. This is a prototype script to 
validate the R files.

Author: Yu ISHIKAWA yuu.ishik...@gmail.com

Closes #6922 from yu-iskw/SPARK-6813 and squashes the following commits:

c1ffe6b [Yu ISHIKAWA] Modify to save result to a log file and add a rule to 
validate
5520806 [Yu ISHIKAWA] Exclude the .lintr file not to check Apache lincence
8f94680 [Yu ISHIKAWA] [SPARK-8495][SparkR] Add a `.lintr` file to validate the 
SparkR files and the `lint-r` script


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/004f5737
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/004f5737
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/004f5737

Branch: refs/heads/master
Commit: 004f57374b98c4df32d9f1e19221f68e92639a49
Parents: 7a3c424
Author: Yu ISHIKAWA yuu.ishik...@gmail.com
Authored: Sat Jun 20 16:10:14 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Sat Jun 20 16:10:14 2015 -0700

--
 .gitignore|  1 +
 .rat-excludes |  1 +
 R/pkg/.lintr  |  2 ++
 dev/lint-r| 30 ++
 dev/lint-r.R  | 29 +
 5 files changed, 63 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/004f5737/.gitignore
--
diff --git a/.gitignore b/.gitignore
index 3624d12..debad77 100644
--- a/.gitignore
+++ b/.gitignore
@@ -66,6 +66,7 @@ scalastyle-output.xml
 R-unit-tests.log
 R/unit-tests.out
 python/lib/pyspark.zip
+lint-r-report.log
 
 # For Hive
 metastore_db/

http://git-wip-us.apache.org/repos/asf/spark/blob/004f5737/.rat-excludes
--
diff --git a/.rat-excludes b/.rat-excludes
index aa008e6..c24667c 100644
--- a/.rat-excludes
+++ b/.rat-excludes
@@ -86,3 +86,4 @@ local-1430917381535_2
 DESCRIPTION
 NAMESPACE
 test_support/*
+.lintr

http://git-wip-us.apache.org/repos/asf/spark/blob/004f5737/R/pkg/.lintr
--
diff --git a/R/pkg/.lintr b/R/pkg/.lintr
new file mode 100644
index 000..b10ebd3
--- /dev/null
+++ b/R/pkg/.lintr
@@ -0,0 +1,2 @@
+linters: with_defaults(line_length_linter(100), camel_case_linter = NULL)
+exclusions: list(inst/profile/general.R = 1, inst/profile/shell.R)

http://git-wip-us.apache.org/repos/asf/spark/blob/004f5737/dev/lint-r
--
diff --git a/dev/lint-r b/dev/lint-r
new file mode 100755
index 000..7d5f4cd
--- /dev/null
+++ b/dev/lint-r
@@ -0,0 +1,30 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the License); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+SCRIPT_DIR=$( cd $( dirname $0 )  pwd )
+SPARK_ROOT_DIR=$(dirname $SCRIPT_DIR)
+LINT_R_REPORT_FILE_NAME=$SPARK_ROOT_DIR/dev/lint-r-report.log
+
+
+if ! type Rscript  /dev/null; then
+  echo ERROR: You should install R
+  exit
+fi
+
+`which Rscript` --vanilla $SPARK_ROOT_DIR/dev/lint-r.R $SPARK_ROOT_DIR | 
tee $LINT_R_REPORT_FILE_NAME

http://git-wip-us.apache.org/repos/asf/spark/blob/004f5737/dev/lint-r.R
--
diff --git a/dev/lint-r.R b/dev/lint-r.R
new file mode 100644
index 000..dcb1a18
--- /dev/null
+++ b/dev/lint-r.R
@@ -0,0 +1,29 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the License); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the