spark git commit: [SPARK-20596][ML][TEST] Consolidate and improve ALS recommendAll test cases

2017-05-08 Thread mlnick
Repository: spark
Updated Branches:
  refs/heads/master 15526653a -> 58518d070


[SPARK-20596][ML][TEST] Consolidate and improve ALS recommendAll test cases

Existing test cases for `recommendForAllX` methods (added in 
[SPARK-19535](https://issues.apache.org/jira/browse/SPARK-19535)) test `k < num 
items` and `k = num items`. Technically we should also test that `k > num 
items` returns the same results as `k = num items`.

## How was this patch tested?

Updated existing unit tests.

Author: Nick Pentreath 

Closes #17860 from MLnick/SPARK-20596-als-rec-tests.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/58518d07
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/58518d07
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/58518d07

Branch: refs/heads/master
Commit: 58518d070777fc0665c4d02bad8adf910807df98
Parents: 1552665
Author: Nick Pentreath 
Authored: Mon May 8 12:45:00 2017 +0200
Committer: Nick Pentreath 
Committed: Mon May 8 12:45:00 2017 +0200

--
 .../spark/ml/recommendation/ALSSuite.scala  | 63 
 1 file changed, 25 insertions(+), 38 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/58518d07/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
--
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala 
b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
index 7574af3..9d31e79 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
@@ -671,58 +671,45 @@ class ALSSuite
   .setItemCol("item")
   }
 
-  test("recommendForAllUsers with k < num_items") {
-val topItems = getALSModel.recommendForAllUsers(2)
-assert(topItems.count() == 3)
-assert(topItems.columns.contains("user"))
-
-val expected = Map(
-  0 -> Array((3, 54f), (4, 44f)),
-  1 -> Array((3, 39f), (5, 33f)),
-  2 -> Array((3, 51f), (5, 45f))
-)
-checkRecommendations(topItems, expected, "item")
-  }
-
-  test("recommendForAllUsers with k = num_items") {
-val topItems = getALSModel.recommendForAllUsers(4)
-assert(topItems.count() == 3)
-assert(topItems.columns.contains("user"))
-
+  test("recommendForAllUsers with k <, = and > num_items") {
+val model = getALSModel
+val numUsers = model.userFactors.count
+val numItems = model.itemFactors.count
 val expected = Map(
   0 -> Array((3, 54f), (4, 44f), (5, 42f), (6, 28f)),
   1 -> Array((3, 39f), (5, 33f), (4, 26f), (6, 16f)),
   2 -> Array((3, 51f), (5, 45f), (4, 30f), (6, 18f))
 )
-checkRecommendations(topItems, expected, "item")
-  }
 
-  test("recommendForAllItems with k < num_users") {
-val topUsers = getALSModel.recommendForAllItems(2)
-assert(topUsers.count() == 4)
-assert(topUsers.columns.contains("item"))
-
-val expected = Map(
-  3 -> Array((0, 54f), (2, 51f)),
-  4 -> Array((0, 44f), (2, 30f)),
-  5 -> Array((2, 45f), (0, 42f)),
-  6 -> Array((0, 28f), (2, 18f))
-)
-checkRecommendations(topUsers, expected, "user")
+Seq(2, 4, 6).foreach { k =>
+  val n = math.min(k, numItems).toInt
+  val expectedUpToN = expected.mapValues(_.slice(0, n))
+  val topItems = model.recommendForAllUsers(k)
+  assert(topItems.count() == numUsers)
+  assert(topItems.columns.contains("user"))
+  checkRecommendations(topItems, expectedUpToN, "item")
+}
   }
 
-  test("recommendForAllItems with k = num_users") {
-val topUsers = getALSModel.recommendForAllItems(3)
-assert(topUsers.count() == 4)
-assert(topUsers.columns.contains("item"))
-
+  test("recommendForAllItems with k <, = and > num_users") {
+val model = getALSModel
+val numUsers = model.userFactors.count
+val numItems = model.itemFactors.count
 val expected = Map(
   3 -> Array((0, 54f), (2, 51f), (1, 39f)),
   4 -> Array((0, 44f), (2, 30f), (1, 26f)),
   5 -> Array((2, 45f), (0, 42f), (1, 33f)),
   6 -> Array((0, 28f), (2, 18f), (1, 16f))
 )
-checkRecommendations(topUsers, expected, "user")
+
+Seq(2, 3, 4).foreach { k =>
+  val n = math.min(k, numUsers).toInt
+  val expectedUpToN = expected.mapValues(_.slice(0, n))
+  val topUsers = getALSModel.recommendForAllItems(k)
+  assert(topUsers.count() == numItems)
+  assert(topUsers.columns.contains("item"))
+  checkRecommendations(topUsers, expectedUpToN, "user")
+}
   }
 
   private def checkRecommendations(


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commi

spark git commit: [SPARK-20596][ML][TEST] Consolidate and improve ALS recommendAll test cases

2017-05-08 Thread mlnick
Repository: spark
Updated Branches:
  refs/heads/branch-2.2 d8a5a0d34 -> 7b9d05ad0


[SPARK-20596][ML][TEST] Consolidate and improve ALS recommendAll test cases

Existing test cases for `recommendForAllX` methods (added in 
[SPARK-19535](https://issues.apache.org/jira/browse/SPARK-19535)) test `k < num 
items` and `k = num items`. Technically we should also test that `k > num 
items` returns the same results as `k = num items`.

## How was this patch tested?

Updated existing unit tests.

Author: Nick Pentreath 

Closes #17860 from MLnick/SPARK-20596-als-rec-tests.

(cherry picked from commit 58518d070777fc0665c4d02bad8adf910807df98)
Signed-off-by: Nick Pentreath 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7b9d05ad
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7b9d05ad
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7b9d05ad

Branch: refs/heads/branch-2.2
Commit: 7b9d05ad00455daa53ae4ef1a602a6c64c2c95a4
Parents: d8a5a0d
Author: Nick Pentreath 
Authored: Mon May 8 12:45:00 2017 +0200
Committer: Nick Pentreath 
Committed: Mon May 8 12:45:17 2017 +0200

--
 .../spark/ml/recommendation/ALSSuite.scala  | 63 
 1 file changed, 25 insertions(+), 38 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/7b9d05ad/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
--
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala 
b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
index 7574af3..9d31e79 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
@@ -671,58 +671,45 @@ class ALSSuite
   .setItemCol("item")
   }
 
-  test("recommendForAllUsers with k < num_items") {
-val topItems = getALSModel.recommendForAllUsers(2)
-assert(topItems.count() == 3)
-assert(topItems.columns.contains("user"))
-
-val expected = Map(
-  0 -> Array((3, 54f), (4, 44f)),
-  1 -> Array((3, 39f), (5, 33f)),
-  2 -> Array((3, 51f), (5, 45f))
-)
-checkRecommendations(topItems, expected, "item")
-  }
-
-  test("recommendForAllUsers with k = num_items") {
-val topItems = getALSModel.recommendForAllUsers(4)
-assert(topItems.count() == 3)
-assert(topItems.columns.contains("user"))
-
+  test("recommendForAllUsers with k <, = and > num_items") {
+val model = getALSModel
+val numUsers = model.userFactors.count
+val numItems = model.itemFactors.count
 val expected = Map(
   0 -> Array((3, 54f), (4, 44f), (5, 42f), (6, 28f)),
   1 -> Array((3, 39f), (5, 33f), (4, 26f), (6, 16f)),
   2 -> Array((3, 51f), (5, 45f), (4, 30f), (6, 18f))
 )
-checkRecommendations(topItems, expected, "item")
-  }
 
-  test("recommendForAllItems with k < num_users") {
-val topUsers = getALSModel.recommendForAllItems(2)
-assert(topUsers.count() == 4)
-assert(topUsers.columns.contains("item"))
-
-val expected = Map(
-  3 -> Array((0, 54f), (2, 51f)),
-  4 -> Array((0, 44f), (2, 30f)),
-  5 -> Array((2, 45f), (0, 42f)),
-  6 -> Array((0, 28f), (2, 18f))
-)
-checkRecommendations(topUsers, expected, "user")
+Seq(2, 4, 6).foreach { k =>
+  val n = math.min(k, numItems).toInt
+  val expectedUpToN = expected.mapValues(_.slice(0, n))
+  val topItems = model.recommendForAllUsers(k)
+  assert(topItems.count() == numUsers)
+  assert(topItems.columns.contains("user"))
+  checkRecommendations(topItems, expectedUpToN, "item")
+}
   }
 
-  test("recommendForAllItems with k = num_users") {
-val topUsers = getALSModel.recommendForAllItems(3)
-assert(topUsers.count() == 4)
-assert(topUsers.columns.contains("item"))
-
+  test("recommendForAllItems with k <, = and > num_users") {
+val model = getALSModel
+val numUsers = model.userFactors.count
+val numItems = model.itemFactors.count
 val expected = Map(
   3 -> Array((0, 54f), (2, 51f), (1, 39f)),
   4 -> Array((0, 44f), (2, 30f), (1, 26f)),
   5 -> Array((2, 45f), (0, 42f), (1, 33f)),
   6 -> Array((0, 28f), (2, 18f), (1, 16f))
 )
-checkRecommendations(topUsers, expected, "user")
+
+Seq(2, 3, 4).foreach { k =>
+  val n = math.min(k, numUsers).toInt
+  val expectedUpToN = expected.mapValues(_.slice(0, n))
+  val topUsers = getALSModel.recommendForAllItems(k)
+  assert(topUsers.count() == numItems)
+  assert(topUsers.columns.contains("item"))
+  checkRecommendations(topUsers, expectedUpToN, "user")
+}
   }
 
   private def checkRecommendations(