This is an automated email from the ASF dual-hosted git repository.

mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/main by this push:
     new 6085c4b869 [SYSTEMDS-3329] PageRank builtin function
6085c4b869 is described below

commit 6085c4b869d708705904a3e88fab36efe2fbffe2
Author: MarcNic <[email protected]>
AuthorDate: Sat Mar 16 18:41:21 2024 +0100

    [SYSTEMDS-3329] PageRank builtin function
    
    Closes #1904.
---
 scripts/builtin/pageRank.dml                       | 50 ++++++++++++++++++++++
 .../java/org/apache/sysds/common/Builtins.java     |  1 +
 .../sysds/test/applications/PageRankTest.java      |  4 +-
 .../scripts/applications/page_rank/PageRank.dml    | 17 ++------
 .../functions/codegen/SystemDS-config-codegen.xml  |  2 +-
 5 files changed, 57 insertions(+), 17 deletions(-)

diff --git a/scripts/builtin/pageRank.dml b/scripts/builtin/pageRank.dml
new file mode 100644
index 0000000000..bd024e77b2
--- /dev/null
+++ b/scripts/builtin/pageRank.dml
@@ -0,0 +1,50 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# DML builtin method for PageRank algorithm (power iterations)
+#
+# INPUT:
+# 
------------------------------------------------------------------------------
+# G           Input Matrix
+# p           initial page rank vector (number of nodes), e.g., rand intialized
+# e           additional customization, default vector of ones
+# u           personalization vector (number of nodes)
+# alpha       teleport probability
+# max_iter    maximum number of iterations
+# 
------------------------------------------------------------------------------
+#
+# OUTPUT:
+# ---------------------------------------------------------------------------
+# pprime      computed pagerank
+# ---------------------------------------------------------------------------
+
+m_pageRank = function (Matrix[Double] G, Matrix[Double] p,
+  Matrix[Double] e, Matrix[Double] u, Double alpha = 0.85, Int max_iter = 20)
+  return (Matrix[double] pprime)
+{
+  i = 0;
+  while( i < max_iter ) {
+    p = alpha * (G %*% p) + (1 - alpha) * (e %*% u %*% p);
+    i += 1;
+  }
+  pprime = p
+}
+
diff --git a/src/main/java/org/apache/sysds/common/Builtins.java 
b/src/main/java/org/apache/sysds/common/Builtins.java
index 3dae7a80ae..4d0e13791f 100644
--- a/src/main/java/org/apache/sysds/common/Builtins.java
+++ b/src/main/java/org/apache/sysds/common/Builtins.java
@@ -253,6 +253,7 @@ public enum Builtins {
        OUTLIER_IQR_APPLY("outlierByIQRApply", true),
        OUTLIER_SD("outlierBySd", true),
        OUTLIER_SD_APPLY("outlierBySdApply", true),
+       PAGERANK("pageRank", true),
        PCA("pca", true),
        PCAINVERSE("pcaInverse", true),
        PCATRANSFORM("pcaTransform", true),
diff --git a/src/test/java/org/apache/sysds/test/applications/PageRankTest.java 
b/src/test/java/org/apache/sysds/test/applications/PageRankTest.java
index ed322f00a9..d606243ac0 100644
--- a/src/test/java/org/apache/sysds/test/applications/PageRankTest.java
+++ b/src/test/java/org/apache/sysds/test/applications/PageRankTest.java
@@ -49,7 +49,7 @@ public class PageRankTest extends AutomatedTestBase {
 
        @Parameters
        public static Collection<Object[]> data() {
-               Object[][] data = new Object[][] { { 50, 50 }, { 1500, 1500 }, 
{ 7500, 7500 } };
+               Object[][] data = new Object[][] { { 50, 50 }, { 1500, 1500 }, 
{ 7500, 7500 }, {10000,10000} };
                return Arrays.asList(data);
        }
 
@@ -83,7 +83,7 @@ public class PageRankTest extends AutomatedTestBase {
                
                fullDMLScriptName = getScript();
 
-               double[][] g = getRandomMatrix(rows, cols, 1, 1, 0.000374962, 
-1);
+               double[][] g = getRandomMatrix(rows, cols, 1, 1, 0.0000042159, 
-1);
                double[][] p = getRandomMatrix(rows, 1, 1, 1, 1, -1);
                double[][] e = getRandomMatrix(rows, 1, 1, 1, 1, -1);
                double[][] u = getRandomMatrix(1, cols, 1, 1, 1, -1);
diff --git a/src/test/scripts/applications/page_rank/PageRank.dml 
b/src/test/scripts/applications/page_rank/PageRank.dml
index b3293cf9f6..cf974f25e7 100644
--- a/src/test/scripts/applications/page_rank/PageRank.dml
+++ b/src/test/scripts/applications/page_rank/PageRank.dml
@@ -19,25 +19,14 @@
 #
 #-------------------------------------------------------------
 
-# How to invoke this dml script PageRank.dml?
-# Assume PAGE_RANK_HOME is set to the home of the dml script
-# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
-# Assume rows = 1000 and cols = 1000 for g,  rows = 1000 and cols = 1 for p, 
rows = 1000 and cols = 1 for e, rows = 1 and cols = 1000 for u,
-# Assume alpha = 0.85, max_iteration = 3
-# hadoop jar SystemDS.jar -f $PAGE_RANK_HOME/PageRank.dml -args "$INPUT_DIR/g" 
"$INPUT_DIR/p" "$INPUT_DIR/e" "$INPUT_DIR/u" 0.85 3 "$OUPUT_DIR/w"
-
 G = read($1);
-# dense
 p = read($2);
 e = read($3);
 u = read($4);
 alpha = $5;
-max_iteration = $6;
-i = 0;
+maxi = $6;
 
-while(i < max_iteration) {
-       p = alpha * (G %*% p) + (1 - alpha) * (e %*% u %*% p);
-       i = i + 1;
-}
+p = pageRank(G=G, p=p, e=e, u=u, alpha=alpha, max_iter=maxi);
 
 write(p, $7, format="text");
+
diff --git a/src/test/scripts/functions/codegen/SystemDS-config-codegen.xml 
b/src/test/scripts/functions/codegen/SystemDS-config-codegen.xml
index f77d94dd4e..0bc25e014f 100644
--- a/src/test/scripts/functions/codegen/SystemDS-config-codegen.xml
+++ b/src/test/scripts/functions/codegen/SystemDS-config-codegen.xml
@@ -25,7 +25,7 @@
    <sysds.codegen.plancache>true</sysds.codegen.plancache>
    <sysds.codegen.literals>1</sysds.codegen.literals>
 
-   <!-- The number of theads for the spark instance artificially selected-->
+   <!-- The number of threads for the spark instance artificially selected-->
    <sysds.local.spark.number.threads>16</sysds.local.spark.number.threads>
 
    <sysds.codegen.api>auto</sysds.codegen.api>

Reply via email to