This is an automated email from the ASF dual-hosted git repository. baunsgaard pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/systemds.git
commit 23177b7779f1868fd51c7af8f98e2ab8221b9011 Author: Sebastian Baunsgaard <[email protected]> AuthorDate: Thu Oct 19 12:03:45 2023 +0200 [MINOR] Various Builtin Algorithm Cleanups This commit modifies our LM built-in, to no longer do a prediction and accuracy test if set to verbose. The modification removes a matrix multiplication of the bias matrix trained with the X input, reducing the overall execution time of our linear models when verbose is true. The logic of the statistics printing is now in lmPredictStats. Also modified is our normalizeApply, which now does not divide by zero in edge cases of constant columns. I also fixed the spelling in various other built-in scripts. Closes #1921 --- docs/site/builtins-reference.md | 34 ---- scripts/builtin/confusionMatrix.dml | 2 +- scripts/builtin/csplineCG.dml | 4 +- scripts/builtin/dbscan.dml | 77 +++---- scripts/builtin/decisionTree.dml | 2 +- scripts/builtin/dist.dml | 2 +- scripts/builtin/l2svmPredict.dml | 21 +- scripts/builtin/lm.dml | 18 +- scripts/builtin/lmCG.dml | 225 +++++++++------------ scripts/builtin/lmDS.dml | 156 +++++--------- scripts/builtin/lmPredictStats.dml | 59 ++++-- scripts/builtin/multiLogRegPredict.dml | 17 +- scripts/builtin/normalizeApply.dml | 10 +- scripts/builtin/scaleApply.dml | 8 +- scripts/builtin/scaleMinMax.dml | 14 +- .../java/org/apache/sysds/parser/DMLProgram.java | 1 + .../sysds/parser/FunctionStatementBlock.java | 14 +- .../federated/algorithms/FederatedLmPipeline.java | 9 +- src/test/scripts/functions/builtin/dbscan.dml | 2 +- src/test/scripts/functions/builtin/dbscanApply.dml | 2 +- 20 files changed, 311 insertions(+), 366 deletions(-) diff --git a/docs/site/builtins-reference.md b/docs/site/builtins-reference.md index 6977dadfd1..22b335866c 100644 --- a/docs/site/builtins-reference.md +++ b/docs/site/builtins-reference.md @@ -400,40 +400,6 @@ y = X %*% rand(rows = ncol(X), cols = 1) [predict, beta] = cvlm(X = X, y = y, k = 4) ``` - -## `DBSCAN`-Function - -The dbscan() implements the DBSCAN Clustering algorithm using Euclidian distance. - -### Usage - -```r -Y = dbscan(X = X, eps = 2.5, minPts = 5) -``` - -### Arguments - -| Name | Type | Default | Description | -| :--------- | :-------------- | :--------- | :---------- | -| X | Matrix[Double] | required | The input Matrix to do DBSCAN on. | -| eps | Double | `0.5` | Maximum distance between two points for one to be considered reachable for the other. | -| minPts | Int | `5` | Number of points in a neighborhood for a point to be considered as a core point (includes the point itself). | - -### Returns - -| Type | Description | -| :-----------| :---------- | -| Matrix[Integer] | The mapping of records to clusters | -| Matrix[Double] | The coordinates of all points considered part of a cluster | - -### Example - -```r -X = rand(rows=1780, cols=180, min=1, max=20) -[indices, model] = dbscan(X = X, eps = 2.5, minPts = 360) -``` - - ## `decisionTree`-Function The `decisionTree()` implements the classification tree with both scale and categorical diff --git a/scripts/builtin/confusionMatrix.dml b/scripts/builtin/confusionMatrix.dml index 652f04076e..18228d14c2 100644 --- a/scripts/builtin/confusionMatrix.dml +++ b/scripts/builtin/confusionMatrix.dml @@ -57,6 +57,6 @@ m_confusionMatrix = function(Matrix[Double] P, Matrix[Double] Y) dim = max(max(Y),max(P)) confusionSum = table(P, Y, dim, dim) - # max to avoid devision by 0, in case a colum contain no entries. + # max to avoid division by 0, in case a colum contain no entries. confusionAvg = confusionSum / max(1,colSums(confusionSum)) } diff --git a/scripts/builtin/csplineCG.dml b/scripts/builtin/csplineCG.dml index 37d557b8a1..a6e8b2077e 100644 --- a/scripts/builtin/csplineCG.dml +++ b/scripts/builtin/csplineCG.dml @@ -27,7 +27,7 @@ # monotonically increasing and there is no duplicates points in X # Y 1-column matrix of corresponding y values knots # inp_x the given input x, for which the cspline will find predicted y. -# tol Tolerance (epsilon); conjugate graduent procedure terminates early if +# tol Tolerance (epsilon); conjugate gradient procedure terminates early if # L2 norm of the beta-residual is less than tolerance * its initial norm # maxi Maximum number of conjugate gradient iterations, 0 = no maximum # ---------------------------------------------------------------------------------------------- @@ -51,7 +51,7 @@ m_csplineCG = function (Matrix[Double] X, Matrix[Double] Y, Double inp_x, Double #given X<nx1> and corresponding Y<nx1> values for the function. where each (xi,yi) represents a knot. -#it calculates the first derivates i.e k of the interpolated polynomial +#it calculates the first derivatives i.e k of the interpolated polynomial calcKnotsDerivKsCG = function ( matrix[Double] X, matrix[Double] Y, Integer max_iteration, diff --git a/scripts/builtin/dbscan.dml b/scripts/builtin/dbscan.dml index 69c6887e67..5fb652287c 100644 --- a/scripts/builtin/dbscan.dml +++ b/scripts/builtin/dbscan.dml @@ -19,64 +19,67 @@ # #------------------------------------------------------------- -# Implements the DBSCAN clustering algorithm using Euclidian distance matrix +# Implements the DBSCAN clustering algorithm using an Euclidean +# distance matrix. # # INPUT: -# ------------------------------------------------- +# ------------------------------------------------------------ # X The input Matrix to do DBSCAN on. -# eps Maximum distance between two points for one to be considered reachable for the other. -# minPts Number of points in a neighborhood for a point to be considered as a core point +# eps Maximum distance between two points for one to +# be considered reachable for the other. +# minPts Number of points in a neighborhood for a point to +# be considered as a core point # (includes the point itself). -# ------------------------------------------------------------------------------------------- +# ------------------------------------------------------------ # # OUTPUT: -# ------------------------------------------------------------------------------------------------ -# clusterMembers clustering Matrix -# ------------------------------------------------------------------------------------------------ +# ------------------------------------------------------------ +# clusterMembers The clustering matrix +# clusterMembers The cluster model +# ------------------------------------------------------------ m_dbscan = function (Matrix[Double] X, Double eps = 0.5, Integer minPts = 5) - return (Matrix[Double] X, Matrix[Double] clusterModel, Double eps) + return (Matrix[Double] clusterMembers, Matrix[Double] clusterModel) { #check input parameter assertions - if(minPts < 0) { stop("DBSCAN: Stopping due to invalid inputs: minPts should be greater than 0"); } - if(eps < 0) - { - print("DBSCAN: Epsilon (eps) should be greater than 0. Setting eps = 0.5"); + if(minPts < 0){ + stop("DBSCAN: Stopping due to invalid inputs: minPts should be greater than 0") + } + if(eps < 0){ + print("DBSCAN: Epsilon (eps) should be greater than 0. Setting eps = 0.5") eps = 0.5 } - UNASSIGNED = 0; - - num_records = nrow(X); - num_features = ncol(X); + num_records = nrow(X) + num_features = ncol(X) - neighbors = dist(X); + # Construct the full all to all distance matrix. + neighbors = dist(X) #find same pts and set their distance to the smallest double representation neighbors = replace(target = neighbors, pattern = 0, replacement = 2.225e-307) - neighbors = neighbors - diag(diag(neighbors)); + neighbors = neighbors - diag(diag(neighbors)) # neighbors within eps - withinEps = ((neighbors <= eps) * (0 < neighbors)); - corePts = rowSums(withinEps) + 1 >= minPts; + withinEps = ((neighbors <= eps) * (0 < neighbors)) + corePts = rowSums(withinEps) + 1 >= minPts - clusterMembers = matrix(UNASSIGNED, num_records, 1); - clusterModel = matrix(UNASSIGNED, 0, num_features); + if (sum(corePts) <= 0) { + stop("Invalid DBScan there was no neighbors within epsilon") + } - if (sum(corePts) != 0) { - # leave only density reachable pts - neighbors = (neighbors * corePts * withinEps) > 0; - # border pts of multiple clusters - border = neighbors * (t(corePts) == 0 & colSums(neighbors) > 1) * seq(num_records, 1); - border = (border - colMaxs(border)) == 0; - neighbors = neighbors * border; + # leave only density reachable pts + neighbors = (neighbors * corePts * withinEps) > 0 + # border pts of multiple clusters + border = neighbors * (t(corePts) == 0 & colSums(neighbors) > 1) * seq(num_records, 1) + border = (border - colMaxs(border)) == 0 + neighbors = neighbors * border - adjacency = (neighbors + t(neighbors)) > 0; + adjacency = (neighbors + t(neighbors)) > 0 - clusterMembers = components(G=adjacency, verbose=FALSE); - # noise to 0 - clusterMembers = clusterMembers * (rowSums(adjacency) > 0); - clusterModel = removeEmpty(target=X, margin="rows", select = (clusterMembers > 0)) - X = clusterMembers - } + clusterMembers = components(G=adjacency, verbose=FALSE) + # noise to 0 + clusterMembers = clusterMembers * (rowSums(adjacency) > 0) + clusterModel = removeEmpty(target=X, margin="rows", select = (clusterMembers > 0)) + } diff --git a/scripts/builtin/decisionTree.dml b/scripts/builtin/decisionTree.dml index c128f094c9..69bf12af90 100644 --- a/scripts/builtin/decisionTree.dml +++ b/scripts/builtin/decisionTree.dml @@ -106,7 +106,7 @@ m_decisionTree = function(Matrix[Double] X, Matrix[Double] y, Matrix[Double] cty if( verbose ) { print("decisionTree: initialize with max_depth=" + max_depth + ", max_features=" - + max_features +", max_dataratio" + max_dataratio + ", impurity=" + + max_features +", max_dataratio=" + max_dataratio + ", impurity=" + impurity + ", seed=" + seed + "."); print("decisionTree: basic statistics:"); print("-- impurity: " + as.scalar(computeImpurity(y2, I, impurity))); diff --git a/scripts/builtin/dist.dml b/scripts/builtin/dist.dml index 197f9d8b07..26ded9a197 100644 --- a/scripts/builtin/dist.dml +++ b/scripts/builtin/dist.dml @@ -34,5 +34,5 @@ m_dist = function(Matrix[Double] X) return (Matrix[Double] Y) { G = X %*% t(X); Y = sqrt(-2 * G + outer(diag(G), t(diag(G)), "+")); - Y = replace(target = Y, pattern=0/0, replacement = 0); + Y = replace(target = Y, pattern=NaN, replacement = 0); } diff --git a/scripts/builtin/l2svmPredict.dml b/scripts/builtin/l2svmPredict.dml index 88b1497c06..9281861b81 100644 --- a/scripts/builtin/l2svmPredict.dml +++ b/scripts/builtin/l2svmPredict.dml @@ -38,15 +38,24 @@ m_l2svmPredict = function(Matrix[Double] X, Matrix[Double] W, Boolean verbose = FALSE) return(Matrix[Double] YRaw, Matrix[Double] Y) { - if(ncol(X) != nrow(W)){ - if(ncol(X) + 1 != nrow(W)){ - stop("l2svm Predict: Invalid shape of W ["+ncol(W)+","+nrow(W)+"] or X ["+ncol(X)+","+nrow(X)+"]") + n = nrow(X) + m = ncol(X) + wn = nrow(W) + wm = ncol(W) + if(m != wn){ + # If intercept was enabled + if(m + 1 != wn){ + stop("l2svm Predict: Invalid shape of W [" + + wm + "," + wn + "] or X [" + m + "," + n + "]") } - YRaw = X %*% W[1:ncol(X),] + W[ncol(X)+1,] - Y = rowIndexMax(YRaw) + # We could append a 1 column to X and multiply + # But slicing W seems faster. + YRaw = X %*% W[1:m,] + W[m+1,] } else{ + # If intercept was disabled YRaw = X %*% W - Y = rowIndexMax(YRaw) } + # Predict Y. + Y = rowIndexMax(YRaw) } diff --git a/scripts/builtin/lm.dml b/scripts/builtin/lm.dml index 9c8fe5b401..b7fc55e0ff 100644 --- a/scripts/builtin/lm.dml +++ b/scripts/builtin/lm.dml @@ -19,11 +19,12 @@ # #------------------------------------------------------------- -# The lm-function solves linear regression using either the direct solve method or the conjugate gradient -# algorithm depending on the input size of the matrices (See lmDS-function and lmCG-function respectively). +# The lm-function solves linear regression using either the direct solve +# method or the conjugate gradient algorithm depending on the input size +# of the matrices (See lmDS-function and lmCG-function respectively). # # INPUT: -# -------------------------------------------------------------------------------------- +# -------------------------------------------------------------------- # X Matrix of feature vectors. # y 1-column matrix of response values. # icpt Intercept presence, shifting and rescaling the columns of X @@ -33,14 +34,15 @@ # norm of the beta-residual is less than tolerance * its initial norm # maxi Maximum number of conjugate gradient iterations. 0 = no maximum # verbose If TRUE print messages are activated -# -------------------------------------------------------------------------------------- +# -------------------------------------------------------------------- # # OUTPUT: -# -------------------------------------------------------------------------------------------- -# B The model fit -# -------------------------------------------------------------------------------------------- +# --------------------------------------------------------------- +# B The model fit beta that can be used as input in lmPredict +# --------------------------------------------------------------- -m_lm = function(Matrix[Double] X, Matrix[Double] y, Integer icpt = 0, Double reg = 1e-7, Double tol = 1e-7, Integer maxi = 0, Boolean verbose = TRUE) +m_lm = function(Matrix[Double] X, Matrix[Double] y, Integer icpt = 0, + Double reg = 1e-7, Double tol = 1e-7, Integer maxi = 0, Boolean verbose = TRUE) return (Matrix[Double] B) { if( ncol(X) <= 1024 ) B = lmDS(X, y, icpt, reg, verbose) diff --git a/scripts/builtin/lmCG.dml b/scripts/builtin/lmCG.dml index cbc6870899..5fa9b0fefc 100644 --- a/scripts/builtin/lmCG.dml +++ b/scripts/builtin/lmCG.dml @@ -5,7 +5,7 @@ # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance +# "License") you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 @@ -22,62 +22,62 @@ # The lmCG function solves linear regression using the conjugate gradient algorithm # # INPUT: -# -------------------------------------------------------------------------------------- +# --------------------------------------------------------------- # X Matrix of feature vectors. # y 1-column matrix of response values. # icpt Intercept presence, shifting and rescaling the columns of X # reg Regularization constant (lambda) for L2-regularization. set to nonzero # for highly dependant/sparse/numerous features -# tol Tolerance (epsilon); conjugate gradient procedure terminates early if L2 +# tol Tolerance (epsilon) conjugate gradient procedure terminates early if L2 # norm of the beta-residual is less than tolerance * its initial norm # maxi Maximum number of conjugate gradient iterations. 0 = no maximum # verbose If TRUE print messages are activated -# -------------------------------------------------------------------------------------- +# --------------------------------------------------------------- # # OUTPUT: -# --------------------------------------------------------------------------------------------- -# B The model fit -# --------------------------------------------------------------------------------------------- - -m_lmCG = function(Matrix[Double] X, Matrix[Double] y, Integer icpt = 0, Double reg = 1e-7, Double tol = 1e-7, - Integer maxi = 0, Boolean verbose = TRUE) return (Matrix[Double] B) { - intercept_status = icpt; - regularization = reg; - tolerance = tol; - max_iteration = maxi; - - n = nrow (X); - m = ncol (X); - ones_n = matrix (1, rows = n, cols = 1); - zero_cell = matrix (0, rows = 1, cols = 1); +# --------------------------------------------------------------- +# B The model fit beta that can be used as input in lmPredict +# --------------------------------------------------------------- - # Introduce the intercept, shift and rescale the columns of X if needed +m_lmCG = function(Matrix[Double] X, Matrix[Double] y, Integer icpt = 0, + Double reg = 1e-7, Double tol = 1e-7, Integer maxi = 0, Boolean verbose = TRUE) + return (Matrix[Double] B) { + intercept_status = icpt + regularization = reg + tolerance = tol + max_iteration = maxi - m_ext = m; - if (intercept_status == 1 | intercept_status == 2) # add the intercept column - { - X = cbind (X, ones_n); - m_ext = ncol (X); - } + n = nrow(X) + m = ncol(X) - scale_lambda = matrix (1, rows = m_ext, cols = 1); - if (intercept_status == 1 | intercept_status == 2) - { - scale_lambda [m_ext, 1] = 0; + # Introduce the intercept, shift and rescale the columns of X if needed + # add the intercept column + if(intercept_status == 1 | intercept_status == 2){ + ones_n = matrix(1, rows = n, cols = 1) + X = cbind(X, ones_n) + m_ext = ncol(X) + scale_lambda = matrix(1, rows = m_ext, cols = 1) + scale_lambda [m_ext, 1] = 0 } + else{ + scale_lambda = matrix(1, rows = m, cols = 1) + m_ext = m + } + - if (intercept_status == 2) # scale-&-shift X columns to mean 0, variance 1 - { # Important assumption: X [, m_ext] = ones_n - avg_X_cols = t(colSums(X)) / n; - var_X_cols = (t(colSums (X ^ 2)) - n * (avg_X_cols ^ 2)) / (n - 1); - is_unsafe = (var_X_cols <= 0); - scale_X = 1.0 / sqrt (var_X_cols * (1 - is_unsafe) + is_unsafe); - scale_X [m_ext, 1] = 1; - shift_X = - avg_X_cols * scale_X; - shift_X [m_ext, 1] = 0; - } else { - scale_X = matrix (1, rows = m_ext, cols = 1); - shift_X = matrix (0, rows = m_ext, cols = 1); + # scale-&-shift X columns to mean 0, variance 1 + # Important assumption: X [, m_ext] = ones_n + if(intercept_status == 2){ + avg_X_cols = t(colSums(X)) / n + var_X_cols = (t(colSums(X ^ 2)) - n * (avg_X_cols ^ 2)) / (n - 1) + is_unsafe = (var_X_cols <= 0) + scale_X = 1.0 / sqrt(var_X_cols * (1 - is_unsafe) + is_unsafe) + scale_X [m_ext, 1] = 1 + shift_X = - avg_X_cols * scale_X + shift_X [m_ext, 1] = 0 + }else{ + scale_X = matrix(1, rows = m_ext, cols = 1) + shift_X = matrix(0, rows = m_ext, cols = 1) } # Henceforth, if intercept_status == 2, we use "X %*% (SHIFT/SCALE TRANSFORM)" @@ -86,117 +86,78 @@ m_lmCG = function(Matrix[Double] X, Matrix[Double] y, Integer icpt = 0, Double r # in which it occurs. To avoid materializing a large matrix, we rewrite it: # # ssX_A = (SHIFT/SCALE TRANSFORM) %*% A --- is rewritten as: - # ssX_A = diag (scale_X) %*% A; - # ssX_A [m_ext, ] = ssX_A [m_ext, ] + t(shift_X) %*% A; + # ssX_A = diag (scale_X) %*% A + # ssX_A [m_ext, ] = ssX_A [m_ext, ] + t(shift_X) %*% A # # tssX_A = t(SHIFT/SCALE TRANSFORM) %*% A --- is rewritten as: - # tssX_A = diag (scale_X) %*% A + shift_X %*% A [m_ext, ]; + # tssX_A = diag (scale_X) %*% A + shift_X %*% A [m_ext, ] - lambda = scale_lambda * regularization; - beta_unscaled = matrix (0, rows = m_ext, cols = 1); + lambda = scale_lambda * regularization + beta_unscaled = matrix(0, rows = m_ext, cols = 1) - if (max_iteration == 0) { - max_iteration = m_ext; + if(max_iteration == 0){ + max_iteration = m_ext } - i = 0; + i = 0 # BEGIN THE CONJUGATE GRADIENT ALGORITHM - if (verbose) print ("Running the CG algorithm..."); + if(verbose) print("Running the CG algorithm...") - r = - t(X) %*% y; + r = - t(X) %*% y - if (intercept_status == 2) { - r = scale_X * r + shift_X %*% r [m_ext, ]; + if(intercept_status == 2){ + r = scale_X * r + shift_X %*% r [m_ext, ] } - p = - r; - norm_r2 = sum (r ^ 2); - norm_r2_initial = norm_r2; - norm_r2_target = norm_r2_initial * tolerance ^ 2; - if (verbose) print ("||r|| initial value = " + sqrt (norm_r2_initial) + ", target value = " + sqrt (norm_r2_target)); - - while (i < max_iteration & norm_r2 > norm_r2_target) - { - if (intercept_status == 2) { - ssX_p = scale_X * p; - ssX_p [m_ext, ] = ssX_p [m_ext, ] + t(shift_X) %*% p; - } else { - ssX_p = p; + p = - r + norm_r2 = sum(r ^ 2) + norm_r2_initial = norm_r2 + norm_r2_target = norm_r2_initial * tolerance ^ 2 + if(verbose){ + print("||r|| initial value = " + sqrt(norm_r2_initial) + + ", target value = " + sqrt(norm_r2_target)) + } + + while(i < max_iteration & norm_r2 > norm_r2_target){ + if(intercept_status == 2){ + ssX_p = scale_X * p + ssX_p [m_ext, ] = ssX_p [m_ext, ] + t(shift_X) %*% p + }else{ + ssX_p = p } - q = t(X) %*% (X %*% ssX_p); + q = t(X) %*% (X %*% ssX_p) - if (intercept_status == 2) { - q = scale_X * q + shift_X %*% q [m_ext, ]; + if(intercept_status == 2) { + q = scale_X * q + shift_X %*% q [m_ext, ] } - q += lambda * p; - a = norm_r2 / sum (p * q); - beta_unscaled += a * p; - r += a * q; - old_norm_r2 = norm_r2; - norm_r2 = sum (r ^ 2); - p = -r + (norm_r2 / old_norm_r2) * p; - i = i + 1; - if (verbose) print ("Iteration " + i + ": ||r|| / ||r init|| = " + sqrt (norm_r2 / norm_r2_initial)); + q += lambda * p + a = norm_r2 / sum(p * q) + beta_unscaled += a * p + r += a * q + old_norm_r2 = norm_r2 + norm_r2 = sum(r ^ 2) + p = -r + (norm_r2 / old_norm_r2) * p + i = i + 1 + if(verbose){ + print("Iteration " + i + ": ||r|| / ||r init|| = " + + sqrt(norm_r2 / norm_r2_initial)) + } } - if (i >= max_iteration) { - if (verbose) print ("Warning: the maximum number of iterations has been reached."); + if(verbose & i >= max_iteration){ + print("Warning: the maximum number of iterations has been reached.") } # END THE CONJUGATE GRADIENT ALGORITHM - if (intercept_status == 2) { - beta = scale_X * beta_unscaled; - beta [m_ext, ] = beta [m_ext, ] + t(shift_X) %*% beta_unscaled; - } else { - beta = beta_unscaled; - } - - if (verbose) { - print ("Computing the statistics..."); - - avg_tot = sum (y) / n; - ss_tot = sum (y ^ 2); - ss_avg_tot = ss_tot - n * avg_tot ^ 2; - var_tot = ss_avg_tot / (n - 1); - y_residual = y - X %*% beta; - avg_res = sum (y_residual) / n; - ss_res = sum (y_residual ^ 2); - ss_avg_res = ss_res - n * avg_res ^ 2; - - R2 = 1 - ss_res / ss_avg_tot; - dispersion = ifelse(n > m_ext, ss_res / (n - m_ext), NaN); - adjusted_R2 = ifelse(n > m_ext, 1 - dispersion / (ss_avg_tot / (n - 1)), NaN); - - R2_nobias = 1 - ss_avg_res / ss_avg_tot; - deg_freedom = n - m - 1; - if (deg_freedom > 0) { - var_res = ss_avg_res / deg_freedom; - adjusted_R2_nobias = 1 - var_res / (ss_avg_tot / (n - 1)); - } else { - var_res = NaN; - adjusted_R2_nobias = NaN; - print ("Warning: zero or negative number of degrees of freedom."); - } - - R2_vs_0 = 1 - ss_res / ss_tot; - adjusted_R2_vs_0 = ifelse(n > m, 1 - (ss_res / (n - m)) / (ss_tot / n), NaN); - - print ("AVG_TOT_Y, " + avg_tot + # Average of the response value Y - "\nSTDEV_TOT_Y, " + sqrt (var_tot) + # Standard Deviation of the response value Y - "\nAVG_RES_Y, " + avg_res + # Average of the residual Y - pred(Y|X), i.e. residual bias - "\nSTDEV_RES_Y, " + sqrt (var_res) + # Standard Deviation of the residual Y - pred(Y|X) - "\nDISPERSION, " + dispersion + # GLM-style dispersion, i.e. residual sum of squares / # d.f. - "\nR2, " + R2 + # R^2 of residual with bias included vs. total average - "\nADJUSTED_R2, " + adjusted_R2 + # Adjusted R^2 of residual with bias included vs. total average - "\nR2_NOBIAS, " + R2_nobias + # R^2 of residual with bias subtracted vs. total average<Paste> - "\nADJUSTED_R2_NOBIAS, " + adjusted_R2_nobias); # Adjusted R^2 of residual with bias subtracted vs. total average - if (intercept_status == 0) { - print ("R2_VS_0, " + R2_vs_0 + # R^2 of residual with bias included vs. zero constant - "\nADJUSTED_R2_VS_0, " + adjusted_R2_vs_0); # Adjusted R^2 of residual with bias included vs. zero constant - } + if(intercept_status == 2){ + beta = scale_X * beta_unscaled + beta[m_ext, ] = beta[m_ext, ] + t(shift_X) %*% beta_unscaled + }else{ + beta = beta_unscaled } - B = beta; + # Set output variable + B = beta } diff --git a/scripts/builtin/lmDS.dml b/scripts/builtin/lmDS.dml index d32e5cbe7a..9c60b33917 100644 --- a/scripts/builtin/lmDS.dml +++ b/scripts/builtin/lmDS.dml @@ -5,7 +5,7 @@ # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance +# "License") you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 @@ -28,54 +28,52 @@ # icpt Intercept presence, shifting and rescaling the columns of X # reg Regularization constant (lambda) for L2-regularization. set to nonzero # for highly dependant/sparse/numerous features -# tol Tolerance (epsilon); conjugate gradient procedure terminates early if L2 +# tol Tolerance (epsilon) conjugate gradient procedure terminates early if L2 # norm of the beta-residual is less than tolerance * its initial norm # maxi Maximum number of conjugate gradient iterations. 0 = no maximum # verbose If TRUE print messages are activated # -------------------------------------------------------------------------------------- # # OUTPUT: -# --------------------------------------------------------------------------------------------- -# B The model fit -# --------------------------------------------------------------------------------------------- +# --------------------------------------------------------------- +# B The model fit beta that can be used as input in lmPredict +# --------------------------------------------------------------- -m_lmDS = function(Matrix[Double] X, Matrix[Double] y, Integer icpt = 0, Double reg = 1e-7, - Boolean verbose = TRUE) return (Matrix[Double] B) { - intercept_status = icpt; - regularization = reg; +m_lmDS = function(Matrix[Double] X, Matrix[Double] y, Integer icpt = 0, + Double reg = 1e-7, Boolean verbose = TRUE) return (Matrix[Double] B) { + intercept_status = icpt + regularization = reg - n = nrow (X); - m = ncol (X); - ones_n = matrix (1, rows = n, cols = 1); - zero_cell = matrix (0, rows = 1, cols = 1); + n = nrow(X) + m = ncol(X) # Introduce the intercept, shift and rescale the columns of X if needed - - m_ext = m; - if (intercept_status == 1 | intercept_status == 2) # add the intercept column - { - X = cbind (X, ones_n); - m_ext = ncol (X); + # add the intercept column + if(intercept_status == 1 | intercept_status == 2){ + ones_n = matrix (1, rows = n, cols = 1) + X = cbind (X, ones_n) + m_ext = ncol (X) + scale_lambda = matrix (1, rows = m_ext, cols = 1) + scale_lambda [m_ext, 1] = 0 } - - scale_lambda = matrix (1, rows = m_ext, cols = 1); - if (intercept_status == 1 | intercept_status == 2) - { - scale_lambda [m_ext, 1] = 0; + else { + scale_lambda = matrix (1, rows = m, cols = 1) + m_ext = m } - if (intercept_status == 2) # scale-&-shift X columns to mean 0, variance 1 - { # Important assumption: X [, m_ext] = ones_n - avg_X_cols = t(colSums(X)) / n; - var_X_cols = (t(colSums (X ^ 2)) - n * (avg_X_cols ^ 2)) / (n - 1); - is_unsafe = (var_X_cols <= 0); - scale_X = 1.0 / sqrt (var_X_cols * (1 - is_unsafe) + is_unsafe); - scale_X [m_ext, 1] = 1; - shift_X = - avg_X_cols * scale_X; - shift_X [m_ext, 1] = 0; - } else { - scale_X = matrix (1, rows = m_ext, cols = 1); - shift_X = matrix (0, rows = m_ext, cols = 1); + # scale-&-shift X columns to mean 0, variance 1 + # Important assumption: X [, m_ext] = ones_n + if(intercept_status == 2){ + avg_X_cols = t(colSums(X)) / n + var_X_cols = (t(colSums(X ^ 2)) - n * (avg_X_cols ^ 2)) / (n - 1) + is_unsafe = (var_X_cols <= 0) + scale_X = 1.0 / sqrt(var_X_cols * (1 - is_unsafe) + is_unsafe) + scale_X [m_ext, 1] = 1 + shift_X = - avg_X_cols * scale_X + shift_X [m_ext, 1] = 0 + }else{ + scale_X = matrix(1, rows = m_ext, cols = 1) + shift_X = matrix(0, rows = m_ext, cols = 1) } # Henceforth, if intercept_status == 2, we use "X %*% (SHIFT/SCALE TRANSFORM)" @@ -84,79 +82,37 @@ m_lmDS = function(Matrix[Double] X, Matrix[Double] y, Integer icpt = 0, Double r # in which it occurs. To avoid materializing a large matrix, we rewrite it: # # ssX_A = (SHIFT/SCALE TRANSFORM) %*% A --- is rewritten as: - # ssX_A = diag (scale_X) %*% A; - # ssX_A [m_ext, ] = ssX_A [m_ext, ] + t(shift_X) %*% A; + # ssX_A = diag (scale_X) %*% A + # ssX_A [m_ext, ] = ssX_A [m_ext, ] + t(shift_X) %*% A # # tssX_A = t(SHIFT/SCALE TRANSFORM) %*% A --- is rewritten as: - # tssX_A = diag (scale_X) %*% A + shift_X %*% A [m_ext, ]; + # tssX_A = diag (scale_X) %*% A + shift_X %*% A [m_ext, ] - lambda = scale_lambda * regularization; + lambda = scale_lambda * regularization # BEGIN THE DIRECT SOLVE ALGORITHM (EXTERNAL CALL) - A = t(X) %*% X; - b = t(X) %*% y; - if (intercept_status == 2) { - A = t(diag (scale_X) %*% A + shift_X %*% A [m_ext, ]); - A = diag (scale_X) %*% A + shift_X %*% A [m_ext, ]; - b = diag (scale_X) %*% b + shift_X %*% b [m_ext, ]; + A = t(X) %*% X + b = t(X) %*% y + if(intercept_status == 2){ + A = t(diag(scale_X) %*% A + shift_X %*% A[m_ext, ]) + A = diag(scale_X) %*% A + shift_X %*% A[m_ext, ] + b = diag(scale_X) %*% b + shift_X %*% b[m_ext, ] } - A = A + diag (lambda); + A = A + diag(lambda) - if (verbose) - print ("Calling the Direct Solver..."); + if(verbose){ + print("Calling the Direct Solver...") + } - beta_unscaled = solve (A, b); + beta_unscaled = solve(A, b) # END THE DIRECT SOLVE ALGORITHM - if (intercept_status == 2) { - beta = scale_X * beta_unscaled; - beta [m_ext, ] = beta [m_ext, ] + t(shift_X) %*% beta_unscaled; - } else { - beta = beta_unscaled; - } - - if (verbose) { - print ("Computing the statistics..."); - avg_tot = sum (y) / n; - ss_tot = sum (y ^ 2); - ss_avg_tot = ss_tot - n * avg_tot ^ 2; - var_tot = ss_avg_tot / (n - 1); - y_residual = y - X %*% beta; - avg_res = sum (y_residual) / n; - ss_res = sum (y_residual ^ 2); - ss_avg_res = ss_res - n * avg_res ^ 2; - - R2 = 1 - ss_res / ss_avg_tot; - dispersion = ifelse(n > m_ext, ss_res / (n - m_ext), NaN); - adjusted_R2 = ifelse(n > m_ext, 1 - dispersion / (ss_avg_tot / (n - 1)), NaN); - - R2_nobias = 1 - ss_avg_res / ss_avg_tot; - deg_freedom = n - m - 1; - if (deg_freedom > 0) { - var_res = ss_avg_res / deg_freedom; - adjusted_R2_nobias = 1 - var_res / (ss_avg_tot / (n - 1)); - } else { - var_res = NaN; - adjusted_R2_nobias = NaN; - print ("Warning: zero or negative number of degrees of freedom."); - } - - R2_vs_0 = 1 - ss_res / ss_tot; - adjusted_R2_vs_0 = ifelse(n > m, 1 - (ss_res / (n - m)) / (ss_tot / n), NaN); - - print ("AVG_TOT_Y, " + avg_tot + # Average of the response value Y - "\nSTDEV_TOT_Y, " + sqrt (var_tot) + # Standard Deviation of the response value Y - "\nAVG_RES_Y, " + avg_res + # Average of the residual Y - pred(Y|X), i.e. residual bias - "\nSTDEV_RES_Y, " + sqrt (var_res) + # Standard Deviation of the residual Y - pred(Y|X) - "\nDISPERSION, " + dispersion + # GLM-style dispersion, i.e. residual sum of squares / # d.f. - "\nR2, " + R2 + # R^2 of residual with bias included vs. total average - "\nADJUSTED_R2, " + adjusted_R2 + # Adjusted R^2 of residual with bias included vs. total average - "\nR2_NOBIAS, " + R2_nobias + # R^2 of residual with bias subtracted vs. total average<Paste> - "\nADJUSTED_R2_NOBIAS, " + adjusted_R2_nobias); # Adjusted R^2 of residual with bias subtracted vs. total average - if (intercept_status == 0) { - print ("R2_VS_0, " + R2_vs_0 + # R^2 of residual with bias included vs. zero constant - "\nADJUSTED_R2_VS_0, " + adjusted_R2_vs_0); # Adjusted R^2 of residual with bias included vs. zero constant - } + if(intercept_status == 2){ + beta = scale_X * beta_unscaled + beta[m_ext, ] = beta[m_ext, ] + t(shift_X) %*% beta_unscaled + }else{ + beta = beta_unscaled } - B = beta; + # Set output variable + B = beta } diff --git a/scripts/builtin/lmPredictStats.dml b/scripts/builtin/lmPredictStats.dml index 48f4b8c9a9..9a2d57c892 100644 --- a/scripts/builtin/lmPredictStats.dml +++ b/scripts/builtin/lmPredictStats.dml @@ -23,33 +23,56 @@ # measures for regression problems. # # INPUT: -# ------------------------------------------------------------------------------ -# yhat column vector of predicted response values y -# ytest column vector of actual response values y -# lm indicator if used for linear regression model -# ------------------------------------------------------------------------------ +# ------------------------------------------------------------ +# yhat A column vector of predicted response values y +# ytest A column vector of actual response values y +# lm An indicator if used for linear regression model +# ------------------------------------------------------------ # # OUTPUT: -# ------------------------------------------------------------------------------ -# R column vector holding avg_res, ss_avg_res, and R2 -# ------------------------------------------------------------------------------ +# ------------------------------------------------------------ +# R A column vector holding avg_res, ss_avg_res, and R2 +# ------------------------------------------------------------ m_lmPredictStats = function(Matrix[Double] yhat, Matrix[Double] ytest, Boolean lm) return (Matrix[Double] R) { + print ("\n\nComputing the statistics..."); + n = nrow(ytest) + + sum_y_test = sum(ytest) + mean_y_test = sum_y_test / n + sum_sq_y_test = sum(ytest^2) + y_residual = ytest - yhat; - avg_res = sum(y_residual) / nrow(ytest); + avg_res = sum(y_residual) / n; ss_res = sum(y_residual^2); - ss_avg_res = ss_res - nrow(ytest) * avg_res^2; + ss_avg_res = ss_res - n * avg_res^2; if( lm ) - R2 = 1 - ss_res / (sum(ytest^2) - nrow(ytest) * (sum(ytest)/nrow(ytest))^2); + R2 = 1 - ss_res / (sum_sq_y_test - n * (sum_y_test/n)^2); else - R2 = sum((yhat - mean(ytest))^2) / sum((ytest - mean(ytest))^2) - print("\nAccuracy:" + - "\n--sum(ytest) = " + sum(ytest) + - "\n--sum(yhat) = " + sum(yhat) + - "\n--AVG_RES_Y: " + avg_res + - "\n--SS_AVG_RES_Y: " + ss_avg_res + - "\n--R2: " + R2 ); + R2 = sum((yhat - mean_y_test)^2) / sum((ytest - mean_y_test)^2) + + avg_tot = sum_y_test / n; + ss_tot = sum_sq_y_test; + ss_avg_tot = ss_tot - n * avg_tot ^ 2; + var_tot = ss_avg_tot / (n - 1); + R2_nobias = 1 - ss_avg_res / ss_avg_tot; + + print("sum(ytest) = " + sum_y_test) + print("sum(yhat) = " + sum(yhat)) + print("SS_AVG_RES_Y: " + ss_avg_res) + # Average of the response value Y + print("AVG_TOT_Y, " + avg_tot) + # Standard Deviation of the response value Y + print("STDEV_TOT_Y, " + sqrt(var_tot)) + # Average of the residual Y - pred(Y|X), i.e. residual bias + print("AVG_RES_Y, " + avg_res) + # R^2 of residual with bias included vs. total average + print("R2, " + R2) + # R^2 of residual with bias subtracted vs. total average<Paste> + print("R2_NOBIAS, " + R2_nobias) + # Adjusted R^2 of residual with bias subtracted vs. total average + R = as.matrix(list(avg_res, ss_avg_res, R2)); } diff --git a/scripts/builtin/multiLogRegPredict.dml b/scripts/builtin/multiLogRegPredict.dml index 16bf08316a..5f177c5730 100644 --- a/scripts/builtin/multiLogRegPredict.dml +++ b/scripts/builtin/multiLogRegPredict.dml @@ -37,14 +37,10 @@ # accuracy scalar value of accuracy # ---------------------------------------------------------------------------------------- -m_multiLogRegPredict = function(Matrix[Double] X, Matrix[Double] B, Matrix[Double] Y, Boolean verbose = FALSE) +m_multiLogRegPredict = function(Matrix[Double] X, Matrix[Double] B, + Matrix[Double] Y = matrix(0,1,1), Boolean verbose = FALSE) return(Matrix[Double] M, Matrix[Double] predicted_Y, Double accuracy) { - if(min(Y) <= 0) { - print("multiLogRegPredict: class labels should be greater than " - + "zero - converting all labels <= 0 to max(Y)+1"); - Y = ifelse(Y <= 0, max(Y) + 1, Y); - } if(ncol(X) < nrow(B)-1) stop("multiLogRegPredict: mismatching ncol(X) and nrow(B): "+ncol(X)+" "+nrow(B)); @@ -62,8 +58,15 @@ m_multiLogRegPredict = function(Matrix[Double] X, Matrix[Double] B, Matrix[Doubl M = probabilities(linear_terms); # compute the probabilities on unknown data predicted_Y = rowIndexMax(M); # extract the class labels - if(nrow(Y) != 0) + if(nrow(Y) != 0){ + if(min(Y) <= 0) { + print("multiLogRegPredict: class labels should be greater than " + + "zero - converting all labels <= 0 to max(Y)+1"); + Y = ifelse(Y <= 0, max(Y) + 1, Y); + } + accuracy = sum((predicted_Y - Y) == 0) / nrow(Y) * 100; + } if(verbose) print("Accuracy (%): " + accuracy); diff --git a/scripts/builtin/normalizeApply.dml b/scripts/builtin/normalizeApply.dml index e4c247ead4..abb3560b0a 100644 --- a/scripts/builtin/normalizeApply.dml +++ b/scripts/builtin/normalizeApply.dml @@ -27,8 +27,8 @@ # INPUT: # ------------------------------------------------ # X Input feature matrix of shape n-by-m -# cmin Colunm minima of shape 1-by-m -# cmax Column maxima of shape 1-by-m +# cmin Column min of shape 1-by-m +# cmax Column max of shape 1-by-m # ------------------------------------------------ # # OUTPUT: @@ -36,10 +36,12 @@ # Y Modified output feature matrix of shape n-by-m # ------------------------------------------------ - m_normalizeApply = function(Matrix[Double] X, Matrix[Double] cmin, Matrix[Double] cmax) return (Matrix[Double] Y) { + diff = (cmax - cmin) + # avoid division by zero and divide by 1 instead + diff = replace(target=diff, pattern=0, replacement=1); # normalize features to given range ([0,1] if indeed min/max) - Y = (X - cmin) / (cmax - cmin); + Y = (X - cmin) / diff; } diff --git a/scripts/builtin/scaleApply.dml b/scripts/builtin/scaleApply.dml index 18f5d729c4..d636cd212a 100644 --- a/scripts/builtin/scaleApply.dml +++ b/scripts/builtin/scaleApply.dml @@ -36,8 +36,10 @@ m_scaleApply = function(Matrix[Double] X, Matrix[Double] Centering, Matrix[Double] ScaleFactor) return (Matrix[Double] Y) { - centered = ifelse(nrow(Centering) > 0 & ncol(Centering) > 0, X - Centering, X) + if(nrow(Centering) > 0 & ncol(Centering) > 0) + Y = X - Centering + else + Y = X if(nrow(ScaleFactor) > 0 & ncol(ScaleFactor) > 0) - Y = centered / ScaleFactor - else Y = centered + Y = Y / ScaleFactor } diff --git a/scripts/builtin/scaleMinMax.dml b/scripts/builtin/scaleMinMax.dml index df2c75d459..b420936501 100644 --- a/scripts/builtin/scaleMinMax.dml +++ b/scripts/builtin/scaleMinMax.dml @@ -20,6 +20,8 @@ #------------------------------------------------------------- # This function performs min-max normalization (rescaling to [0,1]). +# +# This function is deprecated, use normalize instead. # # INPUT: # --------------------------------------------- @@ -28,11 +30,19 @@ # # OUTPUT: # -------------------------------------------- -# Y Scaled output matrix +# Y Scaled output matrix # -------------------------------------------- m_scaleMinMax = function(Matrix[Double] X) return (Matrix[Double] Y) { - Y = (X - colMins(X)) / (colMaxs(X) - colMins(X)); + print("Deprecated scaleMinMax use normalize instead") + # compute feature ranges for transformations + cmin = colMins(X); + cmax = colMaxs(X); + diff = (cmax - cmin) + # avoid division by zero and divide by 1 instead + diff = replace(target=diff, pattern=0, replacement=1); + # normalize features to given range ([0,1] if indeed min/max) + Y = (X - cmin) / diff; } diff --git a/src/main/java/org/apache/sysds/parser/DMLProgram.java b/src/main/java/org/apache/sysds/parser/DMLProgram.java index 99de236651..5b5b5be60f 100644 --- a/src/main/java/org/apache/sysds/parser/DMLProgram.java +++ b/src/main/java/org/apache/sysds/parser/DMLProgram.java @@ -20,6 +20,7 @@ package org.apache.sysds.parser; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; diff --git a/src/main/java/org/apache/sysds/parser/FunctionStatementBlock.java b/src/main/java/org/apache/sysds/parser/FunctionStatementBlock.java index ed70c69e0b..73b63d336c 100644 --- a/src/main/java/org/apache/sysds/parser/FunctionStatementBlock.java +++ b/src/main/java/org/apache/sysds/parser/FunctionStatementBlock.java @@ -82,14 +82,16 @@ public class FunctionStatementBlock extends StatementBlock implements FunctionBl for (DataIdentifier returnValue : returnValues){ DataIdentifier curr = ids.getVariable(returnValue.getName()); if (curr == null){ - raiseValidateError("for function " + fstmt.getName() + ", return variable " + returnValue.getName() + " must be defined in function ", conditional); + raiseValidateError("for function " + fstmt.getName() + ", return variable " + + returnValue.getName() + " must be defined in function ", conditional); } - - if (curr.getDataType() != DataType.UNKNOWN && !curr.getDataType().equals(returnValue.getDataType()) ){ - raiseValidateError("for function " + fstmt.getName() + ", return variable " + curr.getName() + " data type of " + curr.getDataType() + " does not match data type in function signature of " + returnValue.getDataType(), conditional); + else if (curr.getDataType() != DataType.UNKNOWN && !curr.getDataType().equals(returnValue.getDataType()) ){ + raiseValidateError("for function " + fstmt.getName() + ", return variable " + + curr.getName() + " data type of " + curr.getDataType() + + " does not match data type in function signature of " + + returnValue.getDataType(), conditional); } - - if (curr.getValueType() != ValueType.UNKNOWN && returnValue.getValueType() != ValueType.UNKNOWN + else if (curr.getValueType() != ValueType.UNKNOWN && returnValue.getValueType() != ValueType.UNKNOWN && !curr.getValueType().equals(returnValue.getValueType())){ // attempt to convert value type: handle conversion from scalar DOUBLE or INT diff --git a/src/test/java/org/apache/sysds/test/functions/federated/algorithms/FederatedLmPipeline.java b/src/test/java/org/apache/sysds/test/functions/federated/algorithms/FederatedLmPipeline.java index 96c95f5d1e..8b02246e0a 100644 --- a/src/test/java/org/apache/sysds/test/functions/federated/algorithms/FederatedLmPipeline.java +++ b/src/test/java/org/apache/sysds/test/functions/federated/algorithms/FederatedLmPipeline.java @@ -19,6 +19,8 @@ package org.apache.sysds.test.functions.federated.algorithms; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.sysds.common.Types; import org.apache.sysds.common.Types.ExecMode; import org.apache.sysds.runtime.instructions.InstructionUtils; @@ -35,6 +37,8 @@ import org.junit.Test; @net.jcip.annotations.NotThreadSafe public class FederatedLmPipeline extends AutomatedTestBase { + protected static final Log LOG = LogFactory.getLog(FederatedLmPipeline.class.getName()); + private final static String TEST_DIR = "functions/federated/"; private final static String TEST_NAME1 = "FederatedLmPipeline"; private final static String TEST_NAME2 = "FederatedLmPipeline4Workers"; @@ -121,7 +125,7 @@ public class FederatedLmPipeline extends AutomatedTestBase { // Run actual dml script with federated matrix fullDMLScriptName = HOME + TEST_NAME + ".dml"; - programArgs = new String[] {"-stats", "-nvargs", "in_X1=" + TestUtils.federatedAddress(port1, input("X1")), + programArgs = new String[] {"-stats", "100", "-nvargs", "in_X1=" + TestUtils.federatedAddress(port1, input("X1")), "in_X2=" + TestUtils.federatedAddress(port2, input("X2")), "in_X3=" + TestUtils.federatedAddress(port3, input("X3")), "in_X4=" + TestUtils.federatedAddress(port4, input("X4")), "rows=" + rows, "cols=" + (cols + 1), @@ -132,9 +136,10 @@ public class FederatedLmPipeline extends AutomatedTestBase { compareResults(1e-2); TestUtils.shutdownThreads(t1, t2, t3, t4); + // check correct federated operations Assert.assertTrue(Statistics.getCPHeavyHitterCount("fed_mmchain")>10); - Assert.assertTrue(Statistics.getCPHeavyHitterCount("fed_ba+*")==3); + Assert.assertTrue(Statistics.getCPHeavyHitterCount("fed_ba+*") == 2); } finally { resetExecMode(oldExec); diff --git a/src/test/scripts/functions/builtin/dbscan.dml b/src/test/scripts/functions/builtin/dbscan.dml index c28eb59a55..bde15f5835 100644 --- a/src/test/scripts/functions/builtin/dbscan.dml +++ b/src/test/scripts/functions/builtin/dbscan.dml @@ -22,5 +22,5 @@ X = read($X); eps = as.double($eps); minPts = as.integer($minPts); -[Y, model, eps] = dbscan(X, eps, minPts); +[Y, model] = dbscan(X, eps, minPts); write(Y, $Y); \ No newline at end of file diff --git a/src/test/scripts/functions/builtin/dbscanApply.dml b/src/test/scripts/functions/builtin/dbscanApply.dml index 07a1f0d6b1..ee4837a975 100644 --- a/src/test/scripts/functions/builtin/dbscanApply.dml +++ b/src/test/scripts/functions/builtin/dbscanApply.dml @@ -24,6 +24,6 @@ Y = read($Y) eps = as.double($eps); minPts = as.integer($minPts); -[indices, clusterModel, eps] = dbscan(X = X, eps = eps, minPts = minPts); +[indices, clusterModel] = dbscan(X = X, eps = eps, minPts = minPts); [C, Z] = dbscanApply(X=Y, clusterModel = clusterModel, eps = eps); write(Z, $Z);
