Repository: spark Updated Branches: refs/heads/master 63c340a71 -> c1b4ce432
[SPARK-10535] Sync up API for matrix factorization model between Scala and PySpark Support for recommendUsersForProducts and recommendProductsForUsers in matrix factorization model for PySpark Author: Vladimir Vladimirov <[email protected]> Closes #8700 from smartkiwi/SPARK-10535_. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c1b4ce43 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c1b4ce43 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c1b4ce43 Branch: refs/heads/master Commit: c1b4ce43264fa8b9945df3c599a51d4d2a675705 Parents: 63c340a Author: Vladimir Vladimirov <[email protected]> Authored: Fri Oct 9 14:16:13 2015 -0700 Committer: Davies Liu <[email protected]> Committed: Fri Oct 9 14:16:13 2015 -0700 ---------------------------------------------------------------------- .../MatrixFactorizationModelWrapper.scala | 8 +++++ python/pyspark/mllib/recommendation.py | 32 +++++++++++++++++--- 2 files changed, 36 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/c1b4ce43/mllib/src/main/scala/org/apache/spark/mllib/api/python/MatrixFactorizationModelWrapper.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/MatrixFactorizationModelWrapper.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/MatrixFactorizationModelWrapper.scala index 534edac..eeb7cba 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/MatrixFactorizationModelWrapper.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/MatrixFactorizationModelWrapper.scala @@ -42,4 +42,12 @@ private[python] class MatrixFactorizationModelWrapper(model: MatrixFactorization case (product, feature) => (product, Vectors.dense(feature)) }.asInstanceOf[RDD[(Any, Any)]]) } + + def wrappedRecommendProductsForUsers(num: Int): RDD[Array[Any]] = { + SerDe.fromTuple2RDD(recommendProductsForUsers(num).asInstanceOf[RDD[(Any, Any)]]) + } + + def wrappedRecommendUsersForProducts(num: Int): RDD[Array[Any]] = { + SerDe.fromTuple2RDD(recommendUsersForProducts(num).asInstanceOf[RDD[(Any, Any)]]) + } } http://git-wip-us.apache.org/repos/asf/spark/blob/c1b4ce43/python/pyspark/mllib/recommendation.py ---------------------------------------------------------------------- diff --git a/python/pyspark/mllib/recommendation.py b/python/pyspark/mllib/recommendation.py index 95047b5..b9442b0 100644 --- a/python/pyspark/mllib/recommendation.py +++ b/python/pyspark/mllib/recommendation.py @@ -76,16 +76,28 @@ class MatrixFactorizationModel(JavaModelWrapper, JavaSaveable, JavaLoader): >>> first_user = model.userFeatures().take(1)[0] >>> latents = first_user[1] - >>> len(latents) == 4 - True + >>> len(latents) + 4 >>> model.productFeatures().collect() [(1, array('d', [...])), (2, array('d', [...]))] >>> first_product = model.productFeatures().take(1)[0] >>> latents = first_product[1] - >>> len(latents) == 4 - True + >>> len(latents) + 4 + + >>> products_for_users = model.recommendProductsForUsers(1).collect() + >>> len(products_for_users) + 2 + >>> products_for_users[0] + (1, (Rating(user=1, product=2, rating=...),)) + + >>> users_for_products = model.recommendUsersForProducts(1).collect() + >>> len(users_for_products) + 2 + >>> users_for_products[0] + (1, (Rating(user=2, product=1, rating=...),)) >>> model = ALS.train(ratings, 1, nonnegative=True, seed=10) >>> model.predict(2, 2) @@ -166,6 +178,18 @@ class MatrixFactorizationModel(JavaModelWrapper, JavaSaveable, JavaLoader): """ return list(self.call("recommendProducts", user, num)) + def recommendProductsForUsers(self, num): + """ + Recommends top "num" products for all users. The number returned may be less than this. + """ + return self.call("wrappedRecommendProductsForUsers", num) + + def recommendUsersForProducts(self, num): + """ + Recommends top "num" users for all products. The number returned may be less than this. + """ + return self.call("wrappedRecommendUsersForProducts", num) + @property @since("1.4.0") def rank(self): --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
