[systemds] 01/02: [MINOR] Update DecisionTree and RandomForest docs

baunsgaard Wed, 10 May 2023 01:54:40 -0700

This is an automated email from the ASF dual-hosted git repository.

baunsgaard pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git


commit ddcc1867788d21dc2da607e5b858109b217c1dc7
Author: baunsgaard <[email protected]>
AuthorDate: Wed May 10 10:53:10 2023 +0200

    [MINOR] Update DecisionTree and RandomForest docs
    
    This commit update the documentation of Decision Tree and
    Random Forest to work with the python docs.
---
 scripts/builtin/decisionTree.dml                   | 31 +++++++++--------
 scripts/builtin/randomForest.dml                   | 39 ++++++++++++----------
 .../operator/algorithm/builtin/decisionTree.py     | 29 ++++++++++------
 .../operator/algorithm/builtin/randomForest.py     | 36 +++++++++++---------
 4 files changed, 78 insertions(+), 57 deletions(-)

diff --git a/scripts/builtin/decisionTree.dml b/scripts/builtin/decisionTree.dml
index 85e414c61d..41ba72e024 100644
--- a/scripts/builtin/decisionTree.dml
+++ b/scripts/builtin/decisionTree.dml
@@ -24,6 +24,23 @@
 # regression tree) decision trees depending on the provided labels y, either
 # classification (majority vote per leaf) or regression (average per leaf).
 #
+# .. code-block::
+#
+#   For example, give a feature matrix with features [a,b,c,d]
+#   and the following trees, M would look as follows:
+#
+#   (L1)               |d<5|
+#                     /     \
+#   (L2)           P1:2    |a<7|
+#                          /   \
+#   (L3)                 P2:2 P3:1
+#
+#   --> M :=
+#   [[4, 5, 0, 2, 1, 7, 0, 0, 0, 0, 0, 2, 0, 1]]
+#    |(L1)| |  (L2)   | |        (L3)         |
+#
+#
+#
 # INPUT:
 # 
------------------------------------------------------------------------------
 # X               Feature matrix in recoded/binned representation
@@ -45,19 +62,7 @@
 #
 # OUTPUT:
 # 
------------------------------------------------------------------------------
-# M              Matrix M containing the learne trees, in linearized form
-#                For example, give a feature matrix with features [a,b,c,d]
-#                and the following trees, M would look as follows:
-#
-#                (L1)               |d<5|
-#                                  /     \
-#                (L2)           P1:2    |a<7|
-#                                       /   \
-#                (L3)                 P2:2 P3:1
-#
-#                --> M :=
-#                [[4, 5, 0, 2, 1, 7, 0, 0, 0, 0, 0, 2, 0, 1]]
-#                 |(L1)| |  (L2)   | |        (L3)         |
+# M              Matrix M containing the learned trees, in linearized form
 # 
------------------------------------------------------------------------------
 
 m_decisionTree = function(Matrix[Double] X, Matrix[Double] y, Matrix[Double] 
ctypes,
diff --git a/scripts/builtin/randomForest.dml b/scripts/builtin/randomForest.dml
index 7e39c9064e..ccebd59d86 100644
--- a/scripts/builtin/randomForest.dml
+++ b/scripts/builtin/randomForest.dml
@@ -26,6 +26,26 @@
 # and optionally subset of features (columns). During tree construction, split
 # candidates are additionally chosen on a sample of remaining features.
 #
+# .. code-block::
+#
+#   For example, given a feature matrix with features [a,b,c,d]
+#   and the following two trees, M (the output) would look as follows:
+#
+#   (L1)          |a<7|                   |d<5|
+#                /     \                 /     \
+#   (L2)     |c<3|     |b<4|         |a<7|     P3:2
+#            /   \     /   \         /   \
+#   (L3)   P1:2 P2:1 P3:1 P4:2     P1:2 P2:1
+#   --> M :=
+#   [[1, 7, 3, 3, 2, 4, 0, 2, 0, 1, 0, 1, 0, 2],  (1st tree)
+#    [4, 5, 1, 7, 0, 2, 0, 2, 0, 1, 0, 0, 0, 0]]  (2nd tree)
+#    |(L1)| |  (L2)   | |        (L3)         |
+#
+#   With feature sampling (feature_frac < 1), each tree is
+#   prefixed by a one-hot vector of sampled features
+#   (e.g., [1,1,1,0] if we sampled a,b,c of the four features)
+#
+#
 # INPUT:
 # 
------------------------------------------------------------------------------
 # X               Feature matrix in recoded/binned representation
@@ -49,24 +69,7 @@
 #
 # OUTPUT:
 # 
------------------------------------------------------------------------------
-# M              Matrix M containing the learned trees, in linearized form
-#                For example, give a feature matrix with features [a,b,c,d]
-#                and the following two trees, M would look as follows:
-#
-#                (L1)          |a<7|                   |d<5|
-#                             /     \                 /     \
-#                (L2)     |c<3|     |b<4|         |a<7|     P3:2
-#                         /   \     /   \         /   \
-#                (L3)   P1:2 P2:1 P3:1 P4:2     P1:2 P2:1
-#
-#                --> M :=
-#                [[1, 7, 3, 3, 2, 4, 0, 2, 0, 1, 0, 1, 0, 2],  (1st tree)
-#                 [4, 5, 1, 7, 0, 2, 0, 2, 0, 1, 0, 0, 0, 0]]  (2nd tree)
-#                 |(L1)| |  (L2)   | |        (L3)         |
-#
-#                With feature sampling (feature_frac < 1), each tree is
-#                prefixed by a one-hot vector of sampled features
-#                (e.g., [1,1,1,0] if we sampled a,b,c of the four features)
+# M              Matrix M containing the learned trees, in linearized form.
 # 
------------------------------------------------------------------------------
 
 m_randomForest = function(Matrix[Double] X, Matrix[Double] y, Matrix[Double] 
ctypes,
diff --git 
a/src/main/python/systemds/operator/algorithm/builtin/decisionTree.py 
b/src/main/python/systemds/operator/algorithm/builtin/decisionTree.py
index 399a21fd50..38ab517a8a 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/decisionTree.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/decisionTree.py
@@ -39,6 +39,23 @@ def decisionTree(X: Matrix,
      regression tree) decision trees depending on the provided labels y, either
      classification (majority vote per leaf) or regression (average per leaf).
     
+     .. code-block::
+    
+       For example, give a feature matrix with features [a,b,c,d]
+       and the following trees, M would look as follows:
+    
+       (L1)               |d<5|
+                         /     \
+       (L2)           P1:2    |a<7|
+                              /   \
+       (L3)                 P2:2 P3:1
+    
+       --> M :=
+       [[4, 5, 0, 2, 1, 7, 0, 0, 0, 0, 0, 2, 0, 1]]
+        |(L1)| |  (L2)   | |        (L3)         |
+    
+    
+    
     
     
     :param X: Feature matrix in recoded/binned representation
@@ -56,17 +73,7 @@ def decisionTree(X: Matrix,
     :param impurity: Impurity measure: entropy, gini (default), rss 
(regression)
     :param seed: Fixed seed for randomization of samples and split candidates
     :param verbose: Flag indicating verbose debug output
-    :return: Matrix M containing the learne trees, in linearized form
-        For example, give a feature matrix with features [a,b,c,d]
-        and the following trees, M would look as follows:
-        (L1)               |d<5|
-        /     \
-        (L2)           P1:2    |a<7|
-        /   \
-        (L3)                 P2:2 P3:1
-        --> M :=
-        [[4, 5, 0, 2, 1, 7, 0, 0, 0, 0, 0, 2, 0, 1]]
-        |(L1)| |  (L2)   | |        (L3)         |
+    :return: Matrix M containing the learned trees, in linearized form
     """
 
     params_dict = {'X': X, 'y': y, 'ctypes': ctypes}
diff --git 
a/src/main/python/systemds/operator/algorithm/builtin/randomForest.py 
b/src/main/python/systemds/operator/algorithm/builtin/randomForest.py
index 5c4bb0438a..c0659d47bd 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/randomForest.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/randomForest.py
@@ -41,6 +41,26 @@ def randomForest(X: Matrix,
      and optionally subset of features (columns). During tree construction, 
split
      candidates are additionally chosen on a sample of remaining features.
     
+     .. code-block::
+    
+       For example, given a feature matrix with features [a,b,c,d]
+       and the following two trees, M (the output) would look as follows:
+    
+       (L1)          |a<7|                   |d<5|
+                    /     \                 /     \
+       (L2)     |c<3|     |b<4|         |a<7|     P3:2
+                /   \     /   \         /   \
+       (L3)   P1:2 P2:1 P3:1 P4:2     P1:2 P2:1
+       --> M :=
+       [[1, 7, 3, 3, 2, 4, 0, 2, 0, 1, 0, 1, 0, 2],  (1st tree)
+        [4, 5, 1, 7, 0, 2, 0, 2, 0, 1, 0, 0, 0, 0]]  (2nd tree)
+        |(L1)| |  (L2)   | |        (L3)         |
+    
+       With feature sampling (feature_frac < 1), each tree is
+       prefixed by a one-hot vector of sampled features
+       (e.g., [1,1,1,0] if we sampled a,b,c of the four features)
+    
+    
     
     
     :param X: Feature matrix in recoded/binned representation
@@ -60,21 +80,7 @@ def randomForest(X: Matrix,
     :param impurity: Impurity measure: entropy, gini (default), rss 
(regression)
     :param seed: Fixed seed for randomization of samples and split candidates
     :param verbose: Flag indicating verbose debug output
-    :return: Matrix M containing the learned trees, in linearized form
-        For example, give a feature matrix with features [a,b,c,d]
-        and the following two trees, M would look as follows:
-        (L1)          |a<7|                   |d<5|
-        /     \                 /     \
-        (L2)     |c<3|     |b<4|         |a<7|     P3:2
-        /   \     /   \         /   \
-        (L3)   P1:2 P2:1 P3:1 P4:2     P1:2 P2:1
-        --> M :=
-        [[1, 7, 3, 3, 2, 4, 0, 2, 0, 1, 0, 1, 0, 2],  (1st tree)
-        [4, 5, 1, 7, 0, 2, 0, 2, 0, 1, 0, 0, 0, 0]]  (2nd tree)
-        |(L1)| |  (L2)   | |        (L3)         |
-        With feature sampling (feature_frac < 1), each tree is
-        prefixed by a one-hot vector of sampled features
-        (e.g., [1,1,1,0] if we sampled a,b,c of the four features)
+    :return: Matrix M containing the learned trees, in linearized form.
     """
 
     params_dict = {'X': X, 'y': y, 'ctypes': ctypes}

[systemds] 01/02: [MINOR] Update DecisionTree and RandomForest docs

Reply via email to