This is an automated email from the ASF dual-hosted git repository. baunsgaard pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/systemds.git
commit a0254e5d2c204a1f841226eb399620f2b1bd97e6 Author: baunsgaard <[email protected]> AuthorDate: Fri Jun 10 16:18:33 2022 +0200 [DOCS] Update builtin scripts docs --- scripts/builtin/WoE.dml | 9 +-- scripts/builtin/WoEApply.dml | 8 +- scripts/builtin/abstain.dml | 16 ++-- scripts/builtin/confusionMatrix.dml | 7 +- scripts/builtin/correctTypos.dml | 12 +-- scripts/builtin/correctTyposApply.dml | 12 +-- scripts/builtin/dbscanApply.dml | 4 - scripts/builtin/denialConstraints.dml | 4 +- scripts/builtin/fit_pipeline.dml | 4 - scripts/builtin/fixInvalidLengthsApply.dml | 4 - scripts/builtin/glm.dml | 118 +++++++++++++++-------------- scripts/builtin/glmPredict.dml | 64 +++++++++------- scripts/builtin/hyperband.dml | 6 +- scripts/builtin/img_brightness.dml | 5 +- scripts/builtin/knn.dml | 4 +- scripts/builtin/matrixProfile.dml | 14 ++-- scripts/builtin/steplm.dml | 17 +++-- 17 files changed, 154 insertions(+), 154 deletions(-) diff --git a/scripts/builtin/WoE.dml b/scripts/builtin/WoE.dml index 003d9b48a6..7bc938eedd 100644 --- a/scripts/builtin/WoE.dml +++ b/scripts/builtin/WoE.dml @@ -30,13 +30,12 @@ # # OUTPUT: # ------------------------------------------------ -# X --- -# Y --- -# entropyMatrix --- +# F Weighted X matrix where the entropy mask is applied +# entropyMatrix A entropy matrix to apply to data # ------------------------------------------------ m_WoE = function(Matrix[Double] X, Matrix[Double] Y, Matrix[Double] mask) -return (Matrix[Double] X, Matrix[Double] Y, Matrix[Double] entropyMatrix) { +return (Matrix[Double] F, Matrix[Double] entropyMatrix) { tempX = replace(target=X, pattern=NaN, replacement=1) entropyMatrix = matrix(0, rows=ncol(tempX), cols = max((tempX*mask))) @@ -53,7 +52,7 @@ return (Matrix[Double] X, Matrix[Double] Y, Matrix[Double] entropyMatrix) { } } - X = WoEApply(X, Y, entropyMatrix) + F = WoEApply(X, Y, entropyMatrix) } diff --git a/scripts/builtin/WoEApply.dml b/scripts/builtin/WoEApply.dml index c27fae0d05..6f86a266d3 100644 --- a/scripts/builtin/WoEApply.dml +++ b/scripts/builtin/WoEApply.dml @@ -30,12 +30,12 @@ # # OUTPUT: # ------------------------------------------------ -# X --- +# F Weighted X matrix where the entropy mask is applied # ------------------------------------------------ m_WoEApply = function(Matrix[Double] X, Matrix[Double] Y, Matrix[Double] entropyMatrix) -return (Matrix[Double] X) { - +return (Matrix[Double] F) { + F = matrix(1, nRow(X), nCol(X)) # allocate dense output matrix for(i in 1:ncol(X)) { if(sum(abs(entropyMatrix[i])) > 0) @@ -46,7 +46,7 @@ return (Matrix[Double] X) { resp = matrix(0, nrow(L), idx) resp = (resp + t(seq(1, idx))) == L resp = resp * entropy - X[, i] = rowSums(resp) + F[, i] = rowSums(resp) } } diff --git a/scripts/builtin/abstain.dml b/scripts/builtin/abstain.dml index 6d9035101d..5a4c354327 100644 --- a/scripts/builtin/abstain.dml +++ b/scripts/builtin/abstain.dml @@ -24,16 +24,16 @@ # # INPUT: # ------------------------------------------------------------------------------------- -# X Location to read the matrix of feature vectors -# Y Location to read the matrix with category labels -# threshold --- +# X matrix of feature vectors +# Y matrix with category labels +# threshold threshold to clear otherwise return X and Y unmodified # verbose flag specifying if logging information should be printed # ------------------------------------------------------------------------------------- # # OUTPUT: # ------------------------------------------------------------------------------------- -# Xout --- -# Yout --- +# Xout abstained output X +# Yout abstained output Y # ------------------------------------------------------------------------------------- m_abstain = function(Matrix[Double] X, Matrix[Double] Y, Double threshold, Boolean verbose = FALSE) @@ -41,16 +41,14 @@ return (Matrix[Double] Xout, Matrix[Double] Yout) { Xout = X Yout = Y - # for(i in 1:100) { if(min(Y) != max(Y)) { - betas = multiLogReg(X=X, Y=Y, icpt=1, reg=1e-4, maxi=100, maxii=0, verbose=FALSE) + betas = multiLogReg(X=X, Y=Y, icpt=1, reg=1e-4, maxi=100, maxii=0, verbose=verbose) [prob, yhat, accuracy] = multiLogRegPredict(X, betas, Y, FALSE) - # abstain = cbind(X, Y) + inc = ((yhat != Y) & (rowMaxs(prob) > threshold)) if(sum(inc) > 0) { - # print("inc vector "+toString(inc)) Xout = removeEmpty(target = X, margin = "rows", select = (inc == 0) ) Yout = removeEmpty(target = Y, margin = "rows", select = (inc == 0) ) } diff --git a/scripts/builtin/confusionMatrix.dml b/scripts/builtin/confusionMatrix.dml index a56a936cc9..c15b82621c 100644 --- a/scripts/builtin/confusionMatrix.dml +++ b/scripts/builtin/confusionMatrix.dml @@ -24,17 +24,14 @@ # After which, it calculates and returns the sum of classifications # and the average of each true class. # +# .. code-block:: txt +# # True Labels # 1 2 # 1 TP | FP # Predictions ----+---- # 2 FN | TN # -# TP = True Positives -# FP = False Positives -# FN = False Negatives -# TN = True Negatives -# # INPUT: # -------------------------------------------------------------------------------- # P vector of Predictions diff --git a/scripts/builtin/correctTypos.dml b/scripts/builtin/correctTypos.dml index 2fddfaec1b..01d837a7a2 100644 --- a/scripts/builtin/correctTypos.dml +++ b/scripts/builtin/correctTypos.dml @@ -24,11 +24,13 @@ # and simply swaps strings that do not occur often with similar strings that # occur more often # -# References: -# Fred J. Damerau. 1964. -# A technique for computer detection and correction of spelling errors. -# Commun. ACM 7, 3 (March 1964), 171–176. -# DOI:https://doi.org/10.1145/363958.363994 +# .. code-block:: txt +# +# References: +# Fred J. Damerau. 1964. +# A technique for computer detection and correction of spelling errors. +# Commun. ACM 7, 3 (March 1964), 171–176. +# DOI:https://doi.org/10.1145/363958.363994 # # INPUT: # ---------------------------------------------------------------------------------------- diff --git a/scripts/builtin/correctTyposApply.dml b/scripts/builtin/correctTyposApply.dml index 050bb8132b..3ca4635bfa 100644 --- a/scripts/builtin/correctTyposApply.dml +++ b/scripts/builtin/correctTyposApply.dml @@ -24,11 +24,13 @@ # and simply swaps strings that do not occur often with similar strings that # occur more often # -# References: -# Fred J. Damerau. 1964. -# A technique for computer detection and correction of spelling errors. -# Commun. ACM 7, 3 (March 1964), 171–176. -# DOI:https://doi.org/10.1145/363958.363994 +# .. code-block:: txt +# +# References: +# Fred J. Damerau. 1964. +# A technique for computer detection and correction of spelling errors. +# Commun. ACM 7, 3 (March 1964), 171–176. +# DOI:https://doi.org/10.1145/363958.363994 # # TODO: future: add parameter for list of words that are sure to be correct # diff --git a/scripts/builtin/dbscanApply.dml b/scripts/builtin/dbscanApply.dml index 4a7eb7e6ed..e3ab9723cb 100644 --- a/scripts/builtin/dbscanApply.dml +++ b/scripts/builtin/dbscanApply.dml @@ -23,8 +23,6 @@ # # INPUT: # --------------------------------------------- -# NAME MEANING -# --------------------------------------------- # X The input Matrix to do outlier detection on. # clusterModel Model of clusters to predict outliers against. # eps Maximum distance between two points for one to be considered reachable for the other. @@ -32,8 +30,6 @@ # # OUTPUT: # ---------------------------------------------- -# NAME MEANING -# ---------------------------------------------- # outlierPoints Predicted outliers # ---------------------------------------------- diff --git a/scripts/builtin/denialConstraints.dml b/scripts/builtin/denialConstraints.dml index 23453979e1..d2dc3cfb40 100644 --- a/scripts/builtin/denialConstraints.dml +++ b/scripts/builtin/denialConstraints.dml @@ -21,6 +21,8 @@ # This function considers some constraints indicating statements that can NOT happen in the data (denial constraints). # +# .. code-block:: txt +# # EXAMPLE: # dataFrame: # @@ -48,7 +50,7 @@ # 4 variableCompare TRUE discipline B yrs.service > yrs.since.phd # # -# Example: explanation of constraint 2 --> it can't happen that one professor of rank Prof has more years of service than other, but lower salary. +# Example: explanation of constraint 2 --> it can't happen that one professor of rank Prof has more years of service than other, but lower salary. # # INPUT: # ---------------------------------------------------------------------------------------------------- diff --git a/scripts/builtin/fit_pipeline.dml b/scripts/builtin/fit_pipeline.dml index 4b4665e59e..96023f7b49 100644 --- a/scripts/builtin/fit_pipeline.dml +++ b/scripts/builtin/fit_pipeline.dml @@ -25,8 +25,6 @@ # # INPUT: # ------------------------------------------------------------------------------- -# NAME MEANING -# ------------------------------------------------------------------------------- # trainData --- # testData --- # metaData --- @@ -41,8 +39,6 @@ # # OUTPUT: # ------------------------------------------------------------------------------------------------ -# NAME MEANING -# ------------------------------------------------------------------------------------------------ # scores --- # ------------------------------------------------------------------------------------------------ diff --git a/scripts/builtin/fixInvalidLengthsApply.dml b/scripts/builtin/fixInvalidLengthsApply.dml index a8c10dc052..e566bcbc68 100644 --- a/scripts/builtin/fixInvalidLengthsApply.dml +++ b/scripts/builtin/fixInvalidLengthsApply.dml @@ -23,8 +23,6 @@ # # INPUT: # ------------------------ -# NAME MEANING -# ------------------------ # X --- # mask --- # ql --- @@ -33,8 +31,6 @@ # # OUTPUT: # ------------------------ -# NAME MEANING -# ------------------------ # out --- # M --- # ------------------------ diff --git a/scripts/builtin/glm.dml b/scripts/builtin/glm.dml index c07a98337a..44b0c8cb1e 100644 --- a/scripts/builtin/glm.dml +++ b/scripts/builtin/glm.dml @@ -25,67 +25,71 @@ # In addition, some GLM statistics are provided as console output by setting verbose=TRUE, one comma-separated name-value # pair per each line, as follows: # -# ---------------------------------------------------------------------------------------------------------------------- -# TERMINATION_CODE A positive integer indicating success/failure as follows: -# 1 = Converged successfully; 2 = Maximum number of iterations reached; -# 3 = Input (X, Y) out of range; 4 = Distribution/link is not supported -# BETA_MIN Smallest beta value (regression coefficient), excluding the intercept -# BETA_MIN_INDEX Column index for the smallest beta value -# BETA_MAX Largest beta value (regression coefficient), excluding the intercept -# BETA_MAX_INDEX Column index for the largest beta value -# INTERCEPT Intercept value, or NaN if there is no intercept (if icpt=0) -# DISPERSION Dispersion used to scale deviance, provided as "disp" input parameter -# or estimated (same as DISPERSION_EST) if the "disp" parameter is <= 0 -# DISPERSION_EST Dispersion estimated from the dataset -# DEVIANCE_UNSCALED Deviance from the saturated model, assuming dispersion == 1.0 -# DEVIANCE_SCALED Deviance from the saturated model, scaled by the DISPERSION value -# ---------------------------------------------------------------------------------------------------------------------- +# .. code-block:: txt # -# The Log file, when requested, contains the following per-iteration variables in CSV format, -# each line containing triple (NAME, ITERATION, VALUE) with ITERATION = 0 for initial values: -# -# ---------------------------------------------------------------------------------------------------------------------- -# NUM_CG_ITERS Number of inner (Conj.Gradient) iterations in this outer iteration -# IS_TRUST_REACHED 1 = trust region boundary was reached, 0 = otherwise -# POINT_STEP_NORM L2-norm of iteration step from old point (i.e. "beta") to new point -# OBJECTIVE The loss function we minimize (i.e. negative partial log-likelihood) -# OBJ_DROP_REAL Reduction in the objective during this iteration, actual value -# OBJ_DROP_PRED Reduction in the objective predicted by a quadratic approximation -# OBJ_DROP_RATIO Actual-to-predicted reduction ratio, used to update the trust region -# GRADIENT_NORM L2-norm of the loss function gradient (NOTE: sometimes omitted) -# LINEAR_TERM_MIN The minimum value of X %*% beta, used to check for overflows -# LINEAR_TERM_MAX The maximum value of X %*% beta, used to check for overflows -# IS_POINT_UPDATED 1 = new point accepted; 0 = new point rejected, old point restored -# TRUST_DELTA Updated trust region size, the "delta" -# ---------------------------------------------------------------------------------------------------------------------- +# -------------------------------------------------------------------------------------------- +# TERMINATION_CODE A positive integer indicating success/failure as follows: +# 1 = Converged successfully; 2 = Maximum number of iterations reached; +# 3 = Input (X, Y) out of range; 4 = Distribution/link is not supported +# BETA_MIN Smallest beta value (regression coefficient), excluding the intercept +# BETA_MIN_INDEX Column index for the smallest beta value +# BETA_MAX Largest beta value (regression coefficient), excluding the intercept +# BETA_MAX_INDEX Column index for the largest beta value +# INTERCEPT Intercept value, or NaN if there is no intercept (if icpt=0) +# DISPERSION Dispersion used to scale deviance, provided as "disp" input parameter +# or estimated (same as DISPERSION_EST) if the "disp" parameter is <= 0 +# DISPERSION_EST Dispersion estimated from the dataset +# DEVIANCE_UNSCALED Deviance from the saturated model, assuming dispersion == 1.0 +# DEVIANCE_SCALED Deviance from the saturated model, scaled by the DISPERSION value +# -------------------------------------------------------------------------------------------- +# +# The Log file, when requested, contains the following per-iteration variables in CSV format, +# each line containing triple (NAME, ITERATION, VALUE) with ITERATION = 0 for initial values: +# +# -------------------------------------------------------------------------------------------- +# NUM_CG_ITERS Number of inner (Conj.Gradient) iterations in this outer iteration +# IS_TRUST_REACHED 1 = trust region boundary was reached, 0 = otherwise +# POINT_STEP_NORM L2-norm of iteration step from old point (i.e. "beta") to new point +# OBJECTIVE The loss function we minimize (i.e. negative partial log-likelihood) +# OBJ_DROP_REAL Reduction in the objective during this iteration, actual value +# OBJ_DROP_PRED Reduction in the objective predicted by a quadratic approximation +# OBJ_DROP_RATIO Actual-to-predicted reduction ratio, used to update the trust region +# GRADIENT_NORM L2-norm of the loss function gradient (NOTE: sometimes omitted) +# LINEAR_TERM_MIN The minimum value of X %*% beta, used to check for overflows +# LINEAR_TERM_MAX The maximum value of X %*% beta, used to check for overflows +# IS_POINT_UPDATED 1 = new point accepted; 0 = new point rejected, old point restored +# TRUST_DELTA Updated trust region size, the "delta" +# -------------------------------------------------------------------------------------------- # # SOME OF THE SUPPORTED GLM DISTRIBUTION FAMILIES # AND LINK FUNCTIONS: # -# dfam vpow link lpow Distribution.link nical? -# ---------------------------------------------------------------------------------------------------------------------- -# 1 0.0 1 -1.0 Gaussian.inverse -# 1 0.0 1 0.0 Gaussian.log -# 1 0.0 1 1.0 Gaussian.id Yes -# 1 1.0 1 0.0 Poisson.log Yes -# 1 1.0 1 0.5 Poisson.sqrt -# 1 1.0 1 1.0 Poisson.id -# 1 2.0 1 -1.0 Gamma.inverse Yes -# 1 2.0 1 0.0 Gamma.log -# 1 2.0 1 1.0 Gamma.id -# 1 3.0 1 -2.0 InvGaussian.1/mu^2 Yes -# 1 3.0 1 -1.0 InvGaussian.inverse -# 1 3.0 1 0.0 InvGaussian.log -# 1 3.0 1 1.0 InvGaussian.id -# 1 * 1 * AnyVariance.AnyLink -# ---------------------------------------------------------------------------------------------------------------------- -# 2 * 1 0.0 Binomial.log -# 2 * 1 0.5 Binomial.sqrt -# 2 * 2 * Binomial.logit Yes -# 2 * 3 * Binomial.probit -# 2 * 4 * Binomial.cloglog -# 2 * 5 * Binomial.cauchit -# ---------------------------------------------------------------------------------------------------------------------- +# .. code-block:: txt +# +# dfam vpow link lpow Distribution.link nical? +# --------------------------------------------------- +# 1 0.0 1 -1.0 Gaussian.inverse +# 1 0.0 1 0.0 Gaussian.log +# 1 0.0 1 1.0 Gaussian.id Yes +# 1 1.0 1 0.0 Poisson.log Yes +# 1 1.0 1 0.5 Poisson.sqrt +# 1 1.0 1 1.0 Poisson.id +# 1 2.0 1 -1.0 Gamma.inverse Yes +# 1 2.0 1 0.0 Gamma.log +# 1 2.0 1 1.0 Gamma.id +# 1 3.0 1 -2.0 InvGaussian.1/mu^2 Yes +# 1 3.0 1 -1.0 InvGaussian.inverse +# 1 3.0 1 0.0 InvGaussian.log +# 1 3.0 1 1.0 InvGaussian.id +# 1 * 1 * AnyVariance.AnyLink +# --------------------------------------------------- +# 2 * 1 0.0 Binomial.log +# 2 * 1 0.5 Binomial.sqrt +# 2 * 2 * Binomial.logit Yes +# 2 * 3 * Binomial.probit +# 2 * 4 * Binomial.cloglog +# 2 * 5 * Binomial.cauchit +# --------------------------------------------------- # # INPUT: # -------------------------------------------------------------------------------------------- @@ -111,7 +115,7 @@ # mii Maximum number of inner (Conjugate Gradient) iterations, 0 = no maximum # verbose if the Algorithm should be verbose # ------------------------------------------------------------------------------------------ -# +# # OUTPUT: # -------------------------------------------------------------------------------------------- # beta Matrix beta, whose size depends on icpt: diff --git a/scripts/builtin/glmPredict.dml b/scripts/builtin/glmPredict.dml index 3c0e09c6ba..cde4e17ea8 100644 --- a/scripts/builtin/glmPredict.dml +++ b/scripts/builtin/glmPredict.dml @@ -22,35 +22,41 @@ # Applies the estimated parameters of a GLM type regression to a new dataset # # Additional statistics are printed one per each line, in the following -# CSV format: NAME,[COLUMN],[SCALED],VALUE -# --- -# NAME is the string identifier for the statistic, see the table below. -# COLUMN is an optional integer value that specifies the Y-column for per-column statistics; -# note that a Binomial/Multinomial one-column Y input is converted into multi-column. -# SCALED is an optional Boolean value (TRUE or FALSE) that tells us whether or not the input -# dispersion parameter (disp) scaling has been applied to this statistic. -# VALUE is the value of the statistic. -# --- -# NAME COLUMN SCALED MEANING -# --------------------------------------------------------------------------------------------- -# LOGLHOOD_Z + Log-Likelihood Z-score (in st.dev's from mean) -# LOGLHOOD_Z_PVAL + Log-Likelihood Z-score p-value -# PEARSON_X2 + Pearson residual X^2 statistic -# PEARSON_X2_BY_DF + Pearson X^2 divided by degrees of freedom -# PEARSON_X2_PVAL + Pearson X^2 p-value -# DEVIANCE_G2 + Deviance from saturated model G^2 statistic -# DEVIANCE_G2_BY_DF + Deviance G^2 divided by degrees of freedom -# DEVIANCE_G2_PVAL + Deviance G^2 p-value -# AVG_TOT_Y + Average of Y column for a single response value -# STDEV_TOT_Y + St.Dev. of Y column for a single response value -# AVG_RES_Y + Average of column residual, i.e. of Y - mean(Y|X) -# STDEV_RES_Y + St.Dev. of column residual, i.e. of Y - mean(Y|X) -# PRED_STDEV_RES + + Model-predicted St.Dev. of column residual -# R2 + R^2 of Y column residual with bias included -# ADJUSTED_R2 + Adjusted R^2 of Y column residual with bias included -# R2_NOBIAS + R^2 of Y column residual with bias subtracted -# ADJUSTED_R2_NOBIAS + Adjusted R^2 of Y column residual with bias subtracted -# --------------------------------------------------------------------------------------------- +# +# .. code-block:: txt +# +# CSV format: NAME,[COLUMN],[SCALED],VALUE +# --- +# NAME is the string identifier for the statistic, see the table below. +# COLUMN is an optional integer value that specifies the Y-column for per-column statistics; +# note that a Binomial/Multinomial one-column Y input is converted into multi-column. +# SCALED is an optional Boolean value (TRUE or FALSE) that tells us whether or not the input +# dispersion parameter (disp) scaling has been applied to this statistic. +# VALUE is the value of the statistic. +# --- +# +# .. code-block:: txt +# +# NAME COLUMN SCALED MEANING +# --------------------------------------------------------------------------------------------- +# LOGLHOOD_Z + Log-Likelihood Z-score (in st.dev's from mean) +# LOGLHOOD_Z_PVAL + Log-Likelihood Z-score p-value +# PEARSON_X2 + Pearson residual X^2 statistic +# PEARSON_X2_BY_DF + Pearson X^2 divided by degrees of freedom +# PEARSON_X2_PVAL + Pearson X^2 p-value +# DEVIANCE_G2 + Deviance from saturated model G^2 statistic +# DEVIANCE_G2_BY_DF + Deviance G^2 divided by degrees of freedom +# DEVIANCE_G2_PVAL + Deviance G^2 p-value +# AVG_TOT_Y + Average of Y column for a single response value +# STDEV_TOT_Y + St.Dev. of Y column for a single response value +# AVG_RES_Y + Average of column residual, i.e. of Y - mean(Y|X) +# STDEV_RES_Y + St.Dev. of column residual, i.e. of Y - mean(Y|X) +# PRED_STDEV_RES + + Model-predicted St.Dev. of column residual +# R2 + R^2 of Y column residual with bias included +# ADJUSTED_R2 + Adjusted R^2 of Y column residual with bias included +# R2_NOBIAS + R^2 of Y column residual with bias subtracted +# ADJUSTED_R2_NOBIAS + Adjusted R^2 of Y column residual with bias subtracted +# --------------------------------------------------------------------------------------------- # # INPUT: # ------------------------------------------------------------------- diff --git a/scripts/builtin/hyperband.dml b/scripts/builtin/hyperband.dml index 3c2614e41c..4eede73070 100644 --- a/scripts/builtin/hyperband.dml +++ b/scripts/builtin/hyperband.dml @@ -23,9 +23,9 @@ # elimination. Through multiple parallel brackets and consecutive trials it will return the hyper parameter combination # which performed best on a validation dataset. A set of hyper parameter combinations is drawn from uniform distributions # with given ranges; Those make up the candidates for hyperband. Notes: -# hyperband is hard-coded for lmCG, and uses lmPredict for validation -# hyperband is hard-coded to use the number of iterations as a resource -# hyperband can only optimize continuous hyperparameters +# hyperband is hard-coded for lmCG, and uses lmPredict for validation +# hyperband is hard-coded to use the number of iterations as a resource +# hyperband can only optimize continuous hyperparameters # # INPUT: # ------------------------------------------------------------------------------------------ diff --git a/scripts/builtin/img_brightness.dml b/scripts/builtin/img_brightness.dml index 965c0641cc..100ccb7588 100644 --- a/scripts/builtin/img_brightness.dml +++ b/scripts/builtin/img_brightness.dml @@ -22,7 +22,6 @@ # The img_brightness-function is an image data augmentation function. It changes the brightness of the image. # # INPUT: - # ----------------------------------------------------------------------------------------- # img_in Input matrix/image # value The amount of brightness to be changed for the image @@ -31,9 +30,7 @@ # # OUTPUT: # ---------------------------------------------------------------------------------------------------------------------- -# NAME TYPE MEANING -# ---------------------------------------------------------------------------------------------------------------------- -# img_out Matrix[Double] Output matrix/image +# img_out Output matrix/image # ---------------------------------------------------------------------------------------------------------------------- m_img_brightness = function(Matrix[Double] img_in, Double value, Integer channel_max) return (Matrix[Double] img_out) { diff --git a/scripts/builtin/knn.dml b/scripts/builtin/knn.dml index 6492e777e3..19d4cfffb0 100644 --- a/scripts/builtin/knn.dml +++ b/scripts/builtin/knn.dml @@ -47,8 +47,8 @@ # # OUTPUT: # --------------------------------------------------------------------------------------------- -# NNR_matrix --- -# CL_matrix --- +# NNR_matrix Applied clusters to X +# CL_matrix Cluster matrix # m_feature_importance Feature importance value # --------------------------------------------------------------------------------------------- diff --git a/scripts/builtin/matrixProfile.dml b/scripts/builtin/matrixProfile.dml index ad1f8d6cf6..a06a8ce57e 100644 --- a/scripts/builtin/matrixProfile.dml +++ b/scripts/builtin/matrixProfile.dml @@ -22,12 +22,14 @@ # Builtin function that computes the MatrixProfile of a time series efficiently # using the SCRIMP++ algorithm. # -# References: -# Yan Zhu et al.. 2018. -# Matrix Profile XI: SCRIMP++: Time Series Motif Discovery at Interactive Speeds. -# 2018 IEEE International Conference on Data Mining (ICDM), 2018, pp. 837-846. -# DOI: 10.1109/ICDM.2018.00099. -# https://www.cs.ucr.edu/~eamonn/SCRIMP_ICDM_camera_ready_updated.pdf +# .. code-block:: txt +# +# References: +# Yan Zhu et al.. 2018. +# Matrix Profile XI: SCRIMP++: Time Series Motif Discovery at Interactive Speeds. +# 2018 IEEE International Conference on Data Mining (ICDM), 2018, pp. 837-846. +# DOI: 10.1109/ICDM.2018.00099. +# https://www.cs.ucr.edu/~eamonn/SCRIMP_ICDM_camera_ready_updated.pdf # # INPUT: # ---------------------------------------------------------------------------------- diff --git a/scripts/builtin/steplm.dml b/scripts/builtin/steplm.dml index 6ed2fbb530..164fd54195 100644 --- a/scripts/builtin/steplm.dml +++ b/scripts/builtin/steplm.dml @@ -23,13 +23,16 @@ # This method iteratively runs what-if scenarios and greedily selects the next best feature # until the Akaike information criterion (AIC) does not improve anymore. Each configuration trains a regression model # via lm, which in turn calls either the closed form lmDS or iterative lmGC. -# -# return: Matrix of regression parameters (the betas) and its size depend on icpt input value: -# OUTPUT SIZE: OUTPUT CONTENTS: HOW TO PREDICT Y FROM X AND B: -# icpt=0: ncol(X) x 1 Betas for X only Y ~ X %*% B[1:ncol(X), 1], or just X %*% B -# icpt=1: ncol(X)+1 x 1 Betas for X and intercept Y ~ X %*% B[1:ncol(X), 1] + B[ncol(X)+1, 1] -# icpt=2: ncol(X)+1 x 2 Col.1: betas for X & intercept Y ~ X %*% B[1:ncol(X), 1] + B[ncol(X)+1, 1] -# Col.2: betas for shifted/rescaled X and intercept +# +# .. code-block:: txt +# +# return: Matrix of regression parameters (the betas) and its size depend on icpt input value: +# OUTPUT SIZE: OUTPUT CONTENTS: HOW TO PREDICT Y FROM X AND B: +# icpt=0: ncol(X) x 1 Betas for X only Y ~ X %*% B[1:ncol(X), 1], or just X %*% B +# icpt=1: ncol(X)+1 x 1 Betas for X and intercept Y ~ X %*% B[1:ncol(X), 1] + B[ncol(X)+1, 1] +# icpt=2: ncol(X)+1 x 2 Col.1: betas for X & intercept Y ~ X %*% B[1:ncol(X), 1] + B[ncol(X)+1, 1] +# Col.2: betas for shifted/rescaled X and intercept +# # In addition, in the last run of linear regression some statistics are provided in CSV format, one comma-separated # name-value pair per each line, as follows: #
