spark git commit: [SPARK-20596][ML][TEST] Consolidate and improve ALS recommendAll test cases
Repository: spark Updated Branches: refs/heads/master 15526653a -> 58518d070 [SPARK-20596][ML][TEST] Consolidate and improve ALS recommendAll test cases Existing test cases for `recommendForAllX` methods (added in [SPARK-19535](https://issues.apache.org/jira/browse/SPARK-19535)) test `k < num items` and `k = num items`. Technically we should also test that `k > num items` returns the same results as `k = num items`. ## How was this patch tested? Updated existing unit tests. Author: Nick Pentreath Closes #17860 from MLnick/SPARK-20596-als-rec-tests. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/58518d07 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/58518d07 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/58518d07 Branch: refs/heads/master Commit: 58518d070777fc0665c4d02bad8adf910807df98 Parents: 1552665 Author: Nick Pentreath Authored: Mon May 8 12:45:00 2017 +0200 Committer: Nick Pentreath Committed: Mon May 8 12:45:00 2017 +0200 -- .../spark/ml/recommendation/ALSSuite.scala | 63 1 file changed, 25 insertions(+), 38 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/58518d07/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala -- diff --git a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala index 7574af3..9d31e79 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala @@ -671,58 +671,45 @@ class ALSSuite .setItemCol("item") } - test("recommendForAllUsers with k < num_items") { -val topItems = getALSModel.recommendForAllUsers(2) -assert(topItems.count() == 3) -assert(topItems.columns.contains("user")) - -val expected = Map( - 0 -> Array((3, 54f), (4, 44f)), - 1 -> Array((3, 39f), (5, 33f)), - 2 -> Array((3, 51f), (5, 45f)) -) -checkRecommendations(topItems, expected, "item") - } - - test("recommendForAllUsers with k = num_items") { -val topItems = getALSModel.recommendForAllUsers(4) -assert(topItems.count() == 3) -assert(topItems.columns.contains("user")) - + test("recommendForAllUsers with k <, = and > num_items") { +val model = getALSModel +val numUsers = model.userFactors.count +val numItems = model.itemFactors.count val expected = Map( 0 -> Array((3, 54f), (4, 44f), (5, 42f), (6, 28f)), 1 -> Array((3, 39f), (5, 33f), (4, 26f), (6, 16f)), 2 -> Array((3, 51f), (5, 45f), (4, 30f), (6, 18f)) ) -checkRecommendations(topItems, expected, "item") - } - test("recommendForAllItems with k < num_users") { -val topUsers = getALSModel.recommendForAllItems(2) -assert(topUsers.count() == 4) -assert(topUsers.columns.contains("item")) - -val expected = Map( - 3 -> Array((0, 54f), (2, 51f)), - 4 -> Array((0, 44f), (2, 30f)), - 5 -> Array((2, 45f), (0, 42f)), - 6 -> Array((0, 28f), (2, 18f)) -) -checkRecommendations(topUsers, expected, "user") +Seq(2, 4, 6).foreach { k => + val n = math.min(k, numItems).toInt + val expectedUpToN = expected.mapValues(_.slice(0, n)) + val topItems = model.recommendForAllUsers(k) + assert(topItems.count() == numUsers) + assert(topItems.columns.contains("user")) + checkRecommendations(topItems, expectedUpToN, "item") +} } - test("recommendForAllItems with k = num_users") { -val topUsers = getALSModel.recommendForAllItems(3) -assert(topUsers.count() == 4) -assert(topUsers.columns.contains("item")) - + test("recommendForAllItems with k <, = and > num_users") { +val model = getALSModel +val numUsers = model.userFactors.count +val numItems = model.itemFactors.count val expected = Map( 3 -> Array((0, 54f), (2, 51f), (1, 39f)), 4 -> Array((0, 44f), (2, 30f), (1, 26f)), 5 -> Array((2, 45f), (0, 42f), (1, 33f)), 6 -> Array((0, 28f), (2, 18f), (1, 16f)) ) -checkRecommendations(topUsers, expected, "user") + +Seq(2, 3, 4).foreach { k => + val n = math.min(k, numUsers).toInt + val expectedUpToN = expected.mapValues(_.slice(0, n)) + val topUsers = getALSModel.recommendForAllItems(k) + assert(topUsers.count() == numItems) + assert(topUsers.columns.contains("item")) + checkRecommendations(topUsers, expectedUpToN, "user") +} } private def checkRecommendations( - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commi
spark git commit: [SPARK-20596][ML][TEST] Consolidate and improve ALS recommendAll test cases
Repository: spark Updated Branches: refs/heads/branch-2.2 d8a5a0d34 -> 7b9d05ad0 [SPARK-20596][ML][TEST] Consolidate and improve ALS recommendAll test cases Existing test cases for `recommendForAllX` methods (added in [SPARK-19535](https://issues.apache.org/jira/browse/SPARK-19535)) test `k < num items` and `k = num items`. Technically we should also test that `k > num items` returns the same results as `k = num items`. ## How was this patch tested? Updated existing unit tests. Author: Nick Pentreath Closes #17860 from MLnick/SPARK-20596-als-rec-tests. (cherry picked from commit 58518d070777fc0665c4d02bad8adf910807df98) Signed-off-by: Nick Pentreath Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7b9d05ad Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7b9d05ad Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7b9d05ad Branch: refs/heads/branch-2.2 Commit: 7b9d05ad00455daa53ae4ef1a602a6c64c2c95a4 Parents: d8a5a0d Author: Nick Pentreath Authored: Mon May 8 12:45:00 2017 +0200 Committer: Nick Pentreath Committed: Mon May 8 12:45:17 2017 +0200 -- .../spark/ml/recommendation/ALSSuite.scala | 63 1 file changed, 25 insertions(+), 38 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7b9d05ad/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala -- diff --git a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala index 7574af3..9d31e79 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala @@ -671,58 +671,45 @@ class ALSSuite .setItemCol("item") } - test("recommendForAllUsers with k < num_items") { -val topItems = getALSModel.recommendForAllUsers(2) -assert(topItems.count() == 3) -assert(topItems.columns.contains("user")) - -val expected = Map( - 0 -> Array((3, 54f), (4, 44f)), - 1 -> Array((3, 39f), (5, 33f)), - 2 -> Array((3, 51f), (5, 45f)) -) -checkRecommendations(topItems, expected, "item") - } - - test("recommendForAllUsers with k = num_items") { -val topItems = getALSModel.recommendForAllUsers(4) -assert(topItems.count() == 3) -assert(topItems.columns.contains("user")) - + test("recommendForAllUsers with k <, = and > num_items") { +val model = getALSModel +val numUsers = model.userFactors.count +val numItems = model.itemFactors.count val expected = Map( 0 -> Array((3, 54f), (4, 44f), (5, 42f), (6, 28f)), 1 -> Array((3, 39f), (5, 33f), (4, 26f), (6, 16f)), 2 -> Array((3, 51f), (5, 45f), (4, 30f), (6, 18f)) ) -checkRecommendations(topItems, expected, "item") - } - test("recommendForAllItems with k < num_users") { -val topUsers = getALSModel.recommendForAllItems(2) -assert(topUsers.count() == 4) -assert(topUsers.columns.contains("item")) - -val expected = Map( - 3 -> Array((0, 54f), (2, 51f)), - 4 -> Array((0, 44f), (2, 30f)), - 5 -> Array((2, 45f), (0, 42f)), - 6 -> Array((0, 28f), (2, 18f)) -) -checkRecommendations(topUsers, expected, "user") +Seq(2, 4, 6).foreach { k => + val n = math.min(k, numItems).toInt + val expectedUpToN = expected.mapValues(_.slice(0, n)) + val topItems = model.recommendForAllUsers(k) + assert(topItems.count() == numUsers) + assert(topItems.columns.contains("user")) + checkRecommendations(topItems, expectedUpToN, "item") +} } - test("recommendForAllItems with k = num_users") { -val topUsers = getALSModel.recommendForAllItems(3) -assert(topUsers.count() == 4) -assert(topUsers.columns.contains("item")) - + test("recommendForAllItems with k <, = and > num_users") { +val model = getALSModel +val numUsers = model.userFactors.count +val numItems = model.itemFactors.count val expected = Map( 3 -> Array((0, 54f), (2, 51f), (1, 39f)), 4 -> Array((0, 44f), (2, 30f), (1, 26f)), 5 -> Array((2, 45f), (0, 42f), (1, 33f)), 6 -> Array((0, 28f), (2, 18f), (1, 16f)) ) -checkRecommendations(topUsers, expected, "user") + +Seq(2, 3, 4).foreach { k => + val n = math.min(k, numUsers).toInt + val expectedUpToN = expected.mapValues(_.slice(0, n)) + val topUsers = getALSModel.recommendForAllItems(k) + assert(topUsers.count() == numItems) + assert(topUsers.columns.contains("item")) + checkRecommendations(topUsers, expectedUpToN, "user") +} } private def checkRecommendations(