http://git-wip-us.apache.org/repos/asf/systemml/blob/d30e1888/src/test/scripts/applications/ctableStats/Binomial.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/ctableStats/Binomial.dml b/src/test/scripts/applications/ctableStats/Binomial.dml index a3cbf5a..6403a66 100644 --- a/src/test/scripts/applications/ctableStats/Binomial.dml +++ b/src/test/scripts/applications/ctableStats/Binomial.dml @@ -41,7 +41,7 @@ binomQuantile = for (i in 1:27) { # Uses "division by half" method to solve equations p_new = (p_min + p_max) / 2.0; [alpha_p_new] = binomProb (n_vector, m_vector, p_new); - move_new_to_max = ppred (alpha_p_new, alpha_vector, "<"); + move_new_to_max = (alpha_p_new < alpha_vector); p_max = (1 - move_new_to_max) * p_max + move_new_to_max * p_new; p_min = (1 - move_new_to_max) * p_new + move_new_to_max * p_min; alpha_p_max = (1 - move_new_to_max) * alpha_p_max + move_new_to_max * alpha_p_new; @@ -64,15 +64,15 @@ binomProb = num_iterations = 100; mean_vector = p_vector * n_vector; - is_opposite = ppred (mean_vector, m_vector, "<"); + is_opposite = (mean_vector < m_vector); l_vector = is_opposite * (n_vector - (m_vector + 1)) + (1 - is_opposite) * m_vector; q_vector = is_opposite * (1.0 - p_vector) + (1 - is_opposite) * p_vector; n_minus_l_vector = n_vector - l_vector; - is_result_zero1 = ppred (l_vector, - 0.0000000001, "<"); - is_result_one1 = ppred (n_minus_l_vector, 0.0000000001, "<"); - is_result_zero2 = ppred (q_vector, 0.9999999999, ">"); - is_result_one2 = ppred (q_vector, 0.0000000001, "<"); + is_result_zero1 = (l_vector < - 0.0000000001); + is_result_one1 = (n_minus_l_vector < 0.0000000001); + is_result_zero2 = (q_vector > 0.9999999999); + is_result_one2 = (q_vector < 0.0000000001); is_result_zero = is_result_zero1 + (1 - is_result_zero1) * is_result_zero2 * (1 - is_result_one1); is_result_one = (is_result_one1 + (1 - is_result_one1) * is_result_one2) * (1 - is_result_zero); @@ -116,8 +116,8 @@ binomProb = denom = denom_new; abs_denom = abs (denom); - denom_too_big = ppred (abs_denom, 10000000000.0, ">"); - denom_too_small = ppred (abs_denom, 0.0000000001, "<"); + denom_too_big = (abs_denom > 10000000000.0); + denom_too_small = (abs_denom < 0.0000000001); denom_normal = 1.0 - denom_too_big - denom_too_small; rescale_vector = denom_too_big * 0.0000000001 + denom_too_small * 10000000000.0 + denom_normal; numer_old = numer_old * rescale_vector; @@ -127,7 +127,7 @@ binomProb = convergence_check_left = abs (numer * denom_old - numer_old * denom); convergence_check_right = abs (numer * denom_old) * 0.000000001; - has_converged = ppred (convergence_check_left, convergence_check_right, "<="); + has_converged = (convergence_check_left <= convergence_check_right); has_converged = still_iterating * has_converged; still_iterating = still_iterating - has_converged; result = result + has_converged * numer / denom;
http://git-wip-us.apache.org/repos/asf/systemml/blob/d30e1888/src/test/scripts/applications/ctableStats/ctci_odds.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/ctableStats/ctci_odds.dml b/src/test/scripts/applications/ctableStats/ctci_odds.dml index 3d97489..f13a25b 100644 --- a/src/test/scripts/applications/ctableStats/ctci_odds.dml +++ b/src/test/scripts/applications/ctableStats/ctci_odds.dml @@ -172,7 +172,7 @@ gaussian_probability = function (Matrix[double] vector_of_points) + t_gp * ( 1.421413741 # U.S. Nat-l Bureau of Standards, 10th print (Dec 1972), Sec. 7.1.26, p. 299 + t_gp * (-1.453152027 + t_gp * 1.061405429)))) * exp (- vector_of_points * vector_of_points / 2.0); - erf_gp = erf_gp * 2.0 * (ppred (vector_of_points, 0.0, ">") - 0.5); + erf_gp = erf_gp * 2.0 * ((vector_of_points > 0.0) - 0.5); vector_of_probabilities = 0.5 + 0.5 * erf_gp; } http://git-wip-us.apache.org/repos/asf/systemml/blob/d30e1888/src/test/scripts/applications/ctableStats/zipftest.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/ctableStats/zipftest.dml b/src/test/scripts/applications/ctableStats/zipftest.dml index 50ac5f4..5750c37 100644 --- a/src/test/scripts/applications/ctableStats/zipftest.dml +++ b/src/test/scripts/applications/ctableStats/zipftest.dml @@ -58,7 +58,7 @@ avg_density_records = rowSums (Probs); avg_density_features = colSums (Probs); Tosses = Rand (rows = num_records, cols = num_features, min = 0.0, max = 1.0); -Data = ppred (Tosses, Probs, "<="); +Data = (Tosses <= Probs); write (avg_density_records, "Zipf.AvgDensity.Rows", format="text"); write (avg_density_features, "Zipf.AvgDensity.Cols", format="text"); http://git-wip-us.apache.org/repos/asf/systemml/blob/d30e1888/src/test/scripts/applications/descriptivestats/Categorical.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/descriptivestats/Categorical.dml b/src/test/scripts/applications/descriptivestats/Categorical.dml index e4aa9a5..e0e9959 100644 --- a/src/test/scripts/applications/descriptivestats/Categorical.dml +++ b/src/test/scripts/applications/descriptivestats/Categorical.dml @@ -40,11 +40,11 @@ s = sum(Nc) Pc = Nc / s # all categorical values of a categorical variable -C = ppred(Nc, 0, ">") +C = (Nc > 0) # mode mx = max(Nc) -Mode = ppred(Nc, mx, "==") +Mode = (Nc == mx) write(Nc, $3, format="text") write(R, $4) http://git-wip-us.apache.org/repos/asf/systemml/blob/d30e1888/src/test/scripts/applications/descriptivestats/Scale.R ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/descriptivestats/Scale.R b/src/test/scripts/applications/descriptivestats/Scale.R index 45c9efb..ef9715a 100644 --- a/src/test/scripts/applications/descriptivestats/Scale.R +++ b/src/test/scripts/applications/descriptivestats/Scale.R @@ -114,7 +114,6 @@ iqm = iqm/(n*0.5) #print(paste("IQM ", iqm)); -# outliers use ppred to describe it out_minus = t(as.numeric(V< mu-5*std_dev)*V) out_plus = t(as.numeric(V> mu+5*std_dev)*V) http://git-wip-us.apache.org/repos/asf/systemml/blob/d30e1888/src/test/scripts/applications/descriptivestats/Scale.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/descriptivestats/Scale.dml b/src/test/scripts/applications/descriptivestats/Scale.dml index 2394270..eee643e 100644 --- a/src/test/scripts/applications/descriptivestats/Scale.dml +++ b/src/test/scripts/applications/descriptivestats/Scale.dml @@ -90,9 +90,8 @@ Q = quantile(V, P) # inter-quartile mean iqm = interQuartileMean(V) -# outliers use ppred to describe it -out_minus = ppred(V, mu-5*std_dev, "<")*V -out_plus = ppred(V, mu+5*std_dev, ">")*V +out_minus = (V < (mu-5*std_dev))*V +out_plus = (V > (mu+5*std_dev))*V write(mu, $5); write(std_dev, $6); http://git-wip-us.apache.org/repos/asf/systemml/blob/d30e1888/src/test/scripts/applications/descriptivestats/WeightedCategoricalTest.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/descriptivestats/WeightedCategoricalTest.dml b/src/test/scripts/applications/descriptivestats/WeightedCategoricalTest.dml index b6ff41a..b9f8e3a 100644 --- a/src/test/scripts/applications/descriptivestats/WeightedCategoricalTest.dml +++ b/src/test/scripts/applications/descriptivestats/WeightedCategoricalTest.dml @@ -41,11 +41,11 @@ s = sum(Nc) Pc = Nc / s # all categorical values of a categorical variable -C = ppred(Nc, 0, ">") +C = (Nc > 0) # mode mx = max(Nc) -Mode = ppred(Nc, mx, "==") +Mode = (Nc == mx) write(Nc, $4, format="text") write(R, $5) http://git-wip-us.apache.org/repos/asf/systemml/blob/d30e1888/src/test/scripts/applications/descriptivestats/WeightedScaleTest.R ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/descriptivestats/WeightedScaleTest.R b/src/test/scripts/applications/descriptivestats/WeightedScaleTest.R index eba3d1c..f539889 100644 --- a/src/test/scripts/applications/descriptivestats/WeightedScaleTest.R +++ b/src/test/scripts/applications/descriptivestats/WeightedScaleTest.R @@ -130,7 +130,6 @@ iqm = iqm/(n*0.5) #print(paste("IQM ", iqm)); -# outliers use ppred to describe it out_minus = t(as.numeric(Temp < mu-5*std_dev)*Temp) out_plus = t(as.numeric(Temp > mu+5*std_dev)*Temp) http://git-wip-us.apache.org/repos/asf/systemml/blob/d30e1888/src/test/scripts/applications/descriptivestats/WeightedScaleTest.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/descriptivestats/WeightedScaleTest.dml b/src/test/scripts/applications/descriptivestats/WeightedScaleTest.dml index d038328..bc4f9e4 100644 --- a/src/test/scripts/applications/descriptivestats/WeightedScaleTest.dml +++ b/src/test/scripts/applications/descriptivestats/WeightedScaleTest.dml @@ -92,9 +92,8 @@ g2 = (wt^2*(wt+1)*m4-3*m2^2*wt^2*(wt-1))/((wt-1)*(wt-2)*(wt-3)*std_dev^4) # Standard error of Kurtosis se_g2= sqrt( (4*(wt^2-1)*se_g1^2)/((wt+5)*(wt-3)) ) -# outliers use ppred to describe it -out_minus = ppred(V, mu-5*std_dev, "<")*V -out_plus = ppred(V, mu+5*std_dev, ">")*V +out_minus = (V < (mu-5*std_dev))*V +out_plus = (V > (mu+5*std_dev))*V # median md = median(V,W); #quantile(V, W, 0.5) http://git-wip-us.apache.org/repos/asf/systemml/blob/d30e1888/src/test/scripts/applications/glm/GLM.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/glm/GLM.dml b/src/test/scripts/applications/glm/GLM.dml index dd5163d..9dc311d 100644 --- a/src/test/scripts/applications/glm/GLM.dml +++ b/src/test/scripts/applications/glm/GLM.dml @@ -198,7 +198,7 @@ if (intercept_status == 2) # scale-&-shift X columns to mean 0, variance 1 { # Important assumption: X [, num_features] = ones_r avg_X_cols = t(colSums(X)) / num_records; var_X_cols = (t(colSums (X ^ 2)) - num_records * (avg_X_cols ^ 2)) / (num_records - 1); - is_unsafe = ppred (var_X_cols, 0.0, "<="); + is_unsafe = (var_X_cols <= 0.0); scale_X = 1.0 / sqrt (var_X_cols * (1 - is_unsafe) + is_unsafe); scale_X [num_features, 1] = 1; shift_X = - avg_X_cols * scale_X; @@ -233,7 +233,7 @@ if (max_iteration_CG == 0) { if (distribution_type == 2 & ncol(Y) == 1) { - is_Y_negative = ppred (Y, bernoulli_No_label, "=="); + is_Y_negative = (Y == bernoulli_No_label); Y = cbind (1 - is_Y_negative, is_Y_negative); count_Y_negative = sum (is_Y_negative); if (count_Y_negative == 0) { @@ -574,14 +574,14 @@ return (Matrix[double] beta, double saturated_log_l, int isNaN) y_corr = Y [, 1]; if (dist_type == 2) { n_corr = rowSums (Y); - is_n_zero = ppred (n_corr, 0.0, "=="); + is_n_zero = (n_corr == 0.0); y_corr = Y [, 1] / (n_corr + is_n_zero) + (0.5 - Y [, 1]) * is_n_zero; } linear_terms = y_corr; if (dist_type == 1 & link_type == 1) { # POWER DISTRIBUTION if (link_power == 0.0) { - if (sum (ppred (y_corr, 0.0, "<")) == 0) { - is_zero_y_corr = ppred (y_corr, 0.0, "=="); + if (sum (y_corr < 0.0) == 0) { + is_zero_y_corr = (y_corr == 0.0); linear_terms = log (y_corr + is_zero_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr); } else { isNaN = 1; } } else { if (link_power == 1.0) { @@ -589,16 +589,16 @@ return (Matrix[double] beta, double saturated_log_l, int isNaN) } else { if (link_power == -1.0) { linear_terms = 1.0 / y_corr; } else { if (link_power == 0.5) { - if (sum (ppred (y_corr, 0.0, "<")) == 0) { + if (sum (y_corr < 0.0) == 0) { linear_terms = sqrt (y_corr); } else { isNaN = 1; } } else { if (link_power > 0.0) { - if (sum (ppred (y_corr, 0.0, "<")) == 0) { - is_zero_y_corr = ppred (y_corr, 0.0, "=="); + if (sum (y_corr < 0.0) == 0) { + is_zero_y_corr = (y_corr == 0.0); linear_terms = (y_corr + is_zero_y_corr) ^ link_power - is_zero_y_corr; } else { isNaN = 1; } } else { - if (sum (ppred (y_corr, 0.0, "<=")) == 0) { + if (sum (y_corr <= 0.0) == 0) { linear_terms = y_corr ^ link_power; } else { isNaN = 1; } }}}}} @@ -606,31 +606,31 @@ return (Matrix[double] beta, double saturated_log_l, int isNaN) if (dist_type == 2 & link_type >= 1 & link_type <= 5) { # BINOMIAL/BERNOULLI DISTRIBUTION if (link_type == 1 & link_power == 0.0) { # Binomial.log - if (sum (ppred (y_corr, 0.0, "<")) == 0) { - is_zero_y_corr = ppred (y_corr, 0.0, "=="); + if (sum (y_corr < 0.0) == 0) { + is_zero_y_corr = (y_corr == 0.0); linear_terms = log (y_corr + is_zero_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr); } else { isNaN = 1; } } else { if (link_type == 1 & link_power > 0.0) { # Binomial.power_nonlog pos - if (sum (ppred (y_corr, 0.0, "<")) == 0) { - is_zero_y_corr = ppred (y_corr, 0.0, "=="); + if (sum (y_corr < 0.0) == 0) { + is_zero_y_corr = (y_corr == 0.0); linear_terms = (y_corr + is_zero_y_corr) ^ link_power - is_zero_y_corr; } else { isNaN = 1; } } else { if (link_type == 1) { # Binomial.power_nonlog neg - if (sum (ppred (y_corr, 0.0, "<=")) == 0) { + if (sum (y_corr <= 0.0) == 0) { linear_terms = y_corr ^ link_power; } else { isNaN = 1; } } else { - is_zero_y_corr = ppred (y_corr, 0.0, "<="); - is_one_y_corr = ppred (y_corr, 1.0, ">="); + is_zero_y_corr = (y_corr <= 0.0); + is_one_y_corr = (y_corr >= 1.0); y_corr = y_corr * (1.0 - is_zero_y_corr) * (1.0 - is_one_y_corr) + 0.5 * (is_zero_y_corr + is_one_y_corr); if (link_type == 2) { # Binomial.logit linear_terms = log (y_corr / (1.0 - y_corr)) + is_one_y_corr / (1.0 - is_one_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr); } else { if (link_type == 3) { # Binomial.probit - y_below_half = y_corr + (1.0 - 2.0 * y_corr) * ppred (y_corr, 0.5, ">"); + y_below_half = y_corr + (1.0 - 2.0 * y_corr) * (y_corr > 0.5); t = sqrt (- 2.0 * log (y_below_half)); approx_inv_Gauss_CDF = - t + (2.515517 + t * (0.802853 + t * 0.010328)) / (1.0 + t * (1.432788 + t * (0.189269 + t * 0.001308))); - linear_terms = approx_inv_Gauss_CDF * (1.0 - 2.0 * ppred (y_corr, 0.5, ">")) + linear_terms = approx_inv_Gauss_CDF * (1.0 - 2.0 * (y_corr > 0.5)) + is_one_y_corr / (1.0 - is_one_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr); } else { if (link_type == 4) { # Binomial.cloglog linear_terms = log (- log (1.0 - y_corr)) @@ -739,19 +739,19 @@ glm_dist = function (Matrix[double] linear_terms, Matrix[double] Y, vec1 = zeros_r; if (link_power == 0.5) { vec1 = 1 / (1 - linear_terms ^ 2); - } else { if (sum (ppred (linear_terms, 0.0, "<")) == 0) { + } else { if (sum (linear_terms < 0.0) == 0) { vec1 = linear_terms ^ (- 2 + 1 / link_power) / (1 - linear_terms ^ (1 / link_power)); } else {isNaN = 1;}} # We want a "zero-protected" version of # vec2 = Y [, 1] / linear_terms; - is_y_0 = ppred (Y [, 1], 0.0, "=="); + is_y_0 = ((Y [, 1]) == 0.0); vec2 = (Y [, 1] + is_y_0) / (linear_terms * (1 - is_y_0) + is_y_0) - is_y_0; g_Y = (vec2 - Y [, 2] * vec1 * linear_terms) / link_power; w = rowSums (Y) * vec1 / link_power ^ 2; } } else { - is_LT_pos_infinite = ppred (linear_terms, 1.0/0.0, "=="); - is_LT_neg_infinite = ppred (linear_terms, -1.0/0.0, "=="); + is_LT_pos_infinite = (linear_terms == (1.0/0.0)); + is_LT_neg_infinite = (linear_terms == (-1.0/0.0)); is_LT_infinite = is_LT_pos_infinite %*% one_zero + is_LT_neg_infinite %*% zero_one; finite_linear_terms = replace (target = linear_terms, pattern = 1.0/0.0, replacement = 0); finite_linear_terms = replace (target = finite_linear_terms, pattern = -1.0/0.0, replacement = 0); @@ -762,7 +762,7 @@ glm_dist = function (Matrix[double] linear_terms, Matrix[double] Y, g_Y = rowSums (Y * (Y_prob %*% flip_neg)); ### = y_residual; w = rowSums (Y * (Y_prob %*% flip_pos) * Y_prob); ### = y_variance; } else { if (link_type == 3) { # Binomial.probit - is_lt_pos = ppred (linear_terms, 0.0, ">="); + is_lt_pos = (linear_terms >= 0.0); t_gp = 1.0 / (1.0 + abs (finite_linear_terms) * 0.231641888); # 0.231641888 = 0.3275911 / sqrt (2.0) pt_gp = t_gp * ( 0.254829592 + t_gp * (-0.284496736 # "Handbook of Mathematical Functions", ed. by M. Abramowitz and I.A. Stegun, @@ -777,7 +777,7 @@ glm_dist = function (Matrix[double] linear_terms, Matrix[double] Y, } else { if (link_type == 4) { # Binomial.cloglog the_exp = exp (linear_terms) the_exp_exp = exp (- the_exp); - is_too_small = ppred (10000000 + the_exp, 10000000, "=="); + is_too_small = ((10000000 + the_exp) == 10000000); the_exp_ratio = (1 - is_too_small) * (1 - the_exp_exp) / (the_exp + is_too_small) + is_too_small * (1 - the_exp / 2); g_Y = (rowSums (Y) * the_exp_exp - Y [, 2]) / the_exp_ratio; w = the_exp_exp * the_exp * rowSums (Y) / the_exp_ratio; @@ -811,38 +811,38 @@ glm_log_likelihood_part = function (Matrix[double] linear_terms, Matrix[double] is_natural_parameter_log_zero = zeros_r; if (var_power == 1.0 & link_power == 0.0) { # Poisson.log b_cumulant = exp (linear_terms); - is_natural_parameter_log_zero = ppred (linear_terms, -1.0/0.0, "=="); + is_natural_parameter_log_zero = (linear_terms == -1.0/0.0); natural_parameters = replace (target = linear_terms, pattern = -1.0/0.0, replacement = 0); } else { if (var_power == 1.0 & link_power == 1.0) { # Poisson.id - if (sum (ppred (linear_terms, 0.0, "<")) == 0) { + if (sum (linear_terms < 0.0) == 0) { b_cumulant = linear_terms; - is_natural_parameter_log_zero = ppred (linear_terms, 0.0, "=="); + is_natural_parameter_log_zero = (linear_terms == 0.0); natural_parameters = log (linear_terms + is_natural_parameter_log_zero); } else {isNaN = 1;} } else { if (var_power == 1.0 & link_power == 0.5) { # Poisson.sqrt - if (sum (ppred (linear_terms, 0.0, "<")) == 0) { + if (sum (linear_terms < 0.0) == 0) { b_cumulant = linear_terms ^ 2; - is_natural_parameter_log_zero = ppred (linear_terms, 0.0, "=="); + is_natural_parameter_log_zero = (linear_terms == 0.0); natural_parameters = 2.0 * log (linear_terms + is_natural_parameter_log_zero); } else {isNaN = 1;} } else { if (var_power == 1.0 & link_power > 0.0) { # Poisson.power_nonlog, pos - if (sum (ppred (linear_terms, 0.0, "<")) == 0) { - is_natural_parameter_log_zero = ppred (linear_terms, 0.0, "=="); + if (sum (linear_terms < 0.0) == 0) { + is_natural_parameter_log_zero = (linear_terms == 0.0); b_cumulant = (linear_terms + is_natural_parameter_log_zero) ^ (1.0 / link_power) - is_natural_parameter_log_zero; natural_parameters = log (linear_terms + is_natural_parameter_log_zero) / link_power; } else {isNaN = 1;} } else { if (var_power == 1.0) { # Poisson.power_nonlog, neg - if (sum (ppred (linear_terms, 0.0, "<=")) == 0) { + if (sum (linear_terms <= 0.0) == 0) { b_cumulant = linear_terms ^ (1.0 / link_power); natural_parameters = log (linear_terms) / link_power; } else {isNaN = 1;} } else { if (var_power == 2.0 & link_power == -1.0) { # Gamma.inverse - if (sum (ppred (linear_terms, 0.0, "<=")) == 0) { + if (sum (linear_terms <= 0.0) == 0) { b_cumulant = - log (linear_terms); natural_parameters = - linear_terms; } else {isNaN = 1;} } else { if (var_power == 2.0 & link_power == 1.0) { # Gamma.id - if (sum (ppred (linear_terms, 0.0, "<=")) == 0) { + if (sum (linear_terms <= 0.0) == 0) { b_cumulant = log (linear_terms); natural_parameters = - 1.0 / linear_terms; } else {isNaN = 1;} @@ -850,7 +850,7 @@ glm_log_likelihood_part = function (Matrix[double] linear_terms, Matrix[double] b_cumulant = linear_terms; natural_parameters = - exp (- linear_terms); } else { if (var_power == 2.0) { # Gamma.power_nonlog - if (sum (ppred (linear_terms, 0.0, "<=")) == 0) { + if (sum (linear_terms <= 0.0) == 0) { b_cumulant = log (linear_terms) / link_power; natural_parameters = - linear_terms ^ (- 1.0 / link_power); } else {isNaN = 1;} @@ -867,7 +867,7 @@ glm_log_likelihood_part = function (Matrix[double] linear_terms, Matrix[double] } else { if ( 2 * link_power == 1.0 - var_power) { natural_parameters = linear_terms ^ 2 / (1.0 - var_power); } else { - if (sum (ppred (linear_terms, 0.0, "<=")) == 0) { + if (sum (linear_terms <= 0.0) == 0) { power = (1.0 - var_power) / link_power; natural_parameters = (linear_terms ^ power) / (1.0 - var_power); } else {isNaN = 1;} @@ -881,7 +881,7 @@ glm_log_likelihood_part = function (Matrix[double] linear_terms, Matrix[double] } else { if ( 2 * link_power == 2.0 - var_power) { b_cumulant = linear_terms ^ 2 / (2.0 - var_power); } else { - if (sum (ppred (linear_terms, 0.0, "<=")) == 0) { + if (sum (linear_terms <= 0.0) == 0) { power = (2.0 - var_power) / link_power; b_cumulant = (linear_terms ^ power) / (2.0 - var_power); } else {isNaN = 1;} @@ -905,7 +905,7 @@ glm_log_likelihood_part = function (Matrix[double] linear_terms, Matrix[double] [Y_prob, isNaN] = binomial_probability_two_column (linear_terms, link_type, link_power); if (isNaN == 0) { - does_prob_contradict = ppred (Y_prob, 0.0, "<="); + does_prob_contradict = (Y_prob <= 0.0); if (sum (does_prob_contradict * abs (Y)) == 0.0) { log_l = sum (Y * log (Y_prob * (1 - does_prob_contradict) + does_prob_contradict)); if (log_l != log_l | (log_l == log_l + 1.0 & log_l == log_l * 2.0)) { @@ -954,13 +954,13 @@ binomial_probability_two_column = } else { if (link_power == 0.5) { # Binomial.sqrt Y_prob = (linear_terms ^ 2) %*% p_one_m_one + ones_r %*% zero_one; } else { # Binomial.power_nonlog - if (sum (ppred (linear_terms, 0.0, "<")) == 0) { + if (sum (linear_terms < 0.0) == 0) { Y_prob = (linear_terms ^ (1.0 / link_power)) %*% p_one_m_one + ones_r %*% zero_one; } else {isNaN = 1;} }} } else { # Binomial.non_power - is_LT_pos_infinite = ppred (linear_terms, 1.0/0.0, "=="); - is_LT_neg_infinite = ppred (linear_terms, -1.0/0.0, "=="); + is_LT_pos_infinite = (linear_terms == (1.0/0.0)); + is_LT_neg_infinite = (linear_terms == (-1.0/0.0)); is_LT_infinite = is_LT_pos_infinite %*% one_zero + is_LT_neg_infinite %*% zero_one; finite_linear_terms = replace (target = linear_terms, pattern = 1.0/0.0, replacement = 0); finite_linear_terms = replace (target = finite_linear_terms, pattern = -1.0/0.0, replacement = 0); @@ -968,7 +968,7 @@ binomial_probability_two_column = Y_prob = exp (finite_linear_terms) %*% one_zero + ones_r %*% zero_one; Y_prob = Y_prob / (rowSums (Y_prob) %*% ones_2); } else { if (link_type == 3) { # Binomial.probit - lt_pos_neg = ppred (finite_linear_terms, 0.0, ">=") %*% p_one_m_one + ones_r %*% zero_one; + lt_pos_neg = (finite_linear_terms >= 0.0) %*% p_one_m_one + ones_r %*% zero_one; t_gp = 1.0 / (1.0 + abs (finite_linear_terms) * 0.231641888); # 0.231641888 = 0.3275911 / sqrt (2.0) pt_gp = t_gp * ( 0.254829592 + t_gp * (-0.284496736 # "Handbook of Mathematical Functions", ed. by M. Abramowitz and I.A. Stegun, @@ -980,7 +980,7 @@ binomial_probability_two_column = } else { if (link_type == 4) { # Binomial.cloglog the_exp = exp (finite_linear_terms); the_exp_exp = exp (- the_exp); - is_too_small = ppred (10000000 + the_exp, 10000000, "=="); + is_too_small = ((10000000 + the_exp) == 10000000); Y_prob [, 1] = (1 - is_too_small) * (1 - the_exp_exp) + is_too_small * the_exp * (1 - the_exp / 2); Y_prob [, 2] = the_exp_exp; } else { if (link_type == 5) { # Binomial.cauchit http://git-wip-us.apache.org/repos/asf/systemml/blob/d30e1888/src/test/scripts/applications/glm/GLM.pydml ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/glm/GLM.pydml b/src/test/scripts/applications/glm/GLM.pydml index 5f8be13..1875f33 100644 --- a/src/test/scripts/applications/glm/GLM.pydml +++ b/src/test/scripts/applications/glm/GLM.pydml @@ -195,7 +195,7 @@ if (intercept_status == 2): # scale-&-shift X columns to mean 0, variance 1 # Important assumption: X [, num_features] = ones_r avg_X_cols = transpose(colSums(X)) / num_records var_X_cols = (transpose(colSums (X ** 2)) - num_records * (avg_X_cols ** 2)) / (num_records - 1) - is_unsafe = ppred (var_X_cols, 0.0, "<=") + is_unsafe = (var_X_cols <= 0.0) scale_X = 1.0 / sqrt (var_X_cols * (1 - is_unsafe) + is_unsafe) scale_X [num_features-1, 0] = 1 shift_X = - avg_X_cols * scale_X @@ -227,7 +227,7 @@ if (max_iteration_CG == 0): # In Bernoulli case, convert one-column "Y" into two-column if (distribution_type == 2 & ncol(Y) == 1): - is_Y_negative = ppred (Y, bernoulli_No_label, "==") + is_Y_negative = (Y == bernoulli_No_label) Y = cbind (1 - is_Y_negative, is_Y_negative) count_Y_negative = sum (is_Y_negative) if (count_Y_negative == 0): @@ -644,14 +644,14 @@ def glm_initialize(X: matrix[float], Y: matrix[float], dist_type: int, var_power y_corr = Y [, 0] if (dist_type == 2): n_corr = rowSums (Y) - is_n_zero = ppred (n_corr, 0.0, "==") + is_n_zero = (n_corr == 0.0) y_corr = Y [, 0] / (n_corr + is_n_zero) + (0.5 - Y [, 0]) * is_n_zero linear_terms = y_corr if (dist_type == 1 & link_type == 1): # POWER DISTRIBUTION if (link_power == 0.0): - if (sum (ppred (y_corr, 0.0, "<")) == 0): - is_zero_y_corr = ppred (y_corr, 0.0, "==") + if (sum (y_corr < 0.0) == 0): + is_zero_y_corr = (y_corr == 0.0) linear_terms = log (y_corr + is_zero_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr) else: isNaN = 1 @@ -664,21 +664,21 @@ def glm_initialize(X: matrix[float], Y: matrix[float], dist_type: int, var_power linear_terms = 1.0 / y_corr else: if (link_power == 0.5): - if (sum (ppred (y_corr, 0.0, "<")) == 0): + if (sum (y_corr < 0.0) == 0): linear_terms = sqrt (y_corr) else: isNaN = 1 else: if (link_power > 0.0): - if (sum (ppred (y_corr, 0.0, "<")) == 0): - is_zero_y_corr = ppred (y_corr, 0.0, "==") + if (sum (y_corr < 0.0) == 0): + is_zero_y_corr = (y_corr == 0.0) linear_terms = (y_corr + is_zero_y_corr) ** link_power - is_zero_y_corr else: isNaN = 1 else: - if (sum (ppred (y_corr, 0.0, "<=")) == 0): + if (sum (y_corr <= 0.0) == 0): linear_terms = y_corr ** link_power else: isNaN = 1 @@ -691,39 +691,39 @@ def glm_initialize(X: matrix[float], Y: matrix[float], dist_type: int, var_power if (dist_type == 2 & link_type >= 1 & link_type <= 5): # BINOMIAL/BERNOULLI DISTRIBUTION if (link_type == 1 & link_power == 0.0): # Binomial.log - if (sum (ppred (y_corr, 0.0, "<")) == 0): - is_zero_y_corr = ppred (y_corr, 0.0, "==") + if (sum (y_corr < 0.0) == 0): + is_zero_y_corr = (y_corr == 0.0) linear_terms = log (y_corr + is_zero_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr) else: isNaN = 1 else: if (link_type == 1 & link_power > 0.0): # Binomial.power_nonlog pos - if (sum (ppred (y_corr, 0.0, "<")) == 0): - is_zero_y_corr = ppred (y_corr, 0.0, "==") + if (sum (y_corr < 0.0) == 0): + is_zero_y_corr = (y_corr == 0.0) linear_terms = (y_corr + is_zero_y_corr) ** link_power - is_zero_y_corr else: isNaN = 1 else: if (link_type == 1): # Binomial.power_nonlog neg - if (sum (ppred (y_corr, 0.0, "<=")) == 0): + if (sum (y_corr <= 0.0) == 0): linear_terms = y_corr ** link_power else: isNaN = 1 else: - is_zero_y_corr = ppred (y_corr, 0.0, "<=") - is_one_y_corr = ppred (y_corr, 1.0, ">=") + is_zero_y_corr = (y_corr <= 0.0) + is_one_y_corr = (y_corr >= 1.0) y_corr = y_corr * (1.0 - is_zero_y_corr) * (1.0 - is_one_y_corr) + 0.5 * (is_zero_y_corr + is_one_y_corr) if (link_type == 2): # Binomial.logit linear_terms = log (y_corr / (1.0 - y_corr)) + is_one_y_corr / (1.0 - is_one_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr) else: if (link_type == 3): # Binomial.probit - y_below_half = y_corr + (1.0 - 2.0 * y_corr) * ppred (y_corr, 0.5, ">") + y_below_half = y_corr + (1.0 - 2.0 * y_corr) * (y_corr > 0.5) t = sqrt (- 2.0 * log (y_below_half)) approx_inv_Gauss_CDF = - t + (2.515517 + t * (0.802853 + t * 0.010328)) / (1.0 + t * (1.432788 + t * (0.189269 + t * 0.001308))) - linear_terms = approx_inv_Gauss_CDF * (1.0 - 2.0 * ppred (y_corr, 0.5, ">")) + is_one_y_corr / (1.0 - is_one_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr) + linear_terms = approx_inv_Gauss_CDF * (1.0 - 2.0 * (y_corr > 0.5)) + is_one_y_corr / (1.0 - is_one_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr) else: if (link_type == 4): # Binomial.cloglog linear_terms = log (- log (1.0 - y_corr)) - log (- log (0.5)) * (is_zero_y_corr + is_one_y_corr) + is_one_y_corr / (1.0 - is_one_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr) @@ -838,7 +838,7 @@ def glm_dist(linear_terms: matrix[float], Y: matrix[float], dist_type: int, var_ if (link_power == 0.5): vec1 = 1 / (1 - linear_terms ** 2) else: - if (sum (ppred (linear_terms, 0.0, "<")) == 0): + if (sum (linear_terms < 0.0) == 0): vec1 = linear_terms ** (- 2 + 1 / link_power) / (1 - linear_terms ** (1 / link_power)) else: isNaN = 1 @@ -846,14 +846,14 @@ def glm_dist(linear_terms: matrix[float], Y: matrix[float], dist_type: int, var_ # We want a "zero-protected" version of # vec2 = Y [, 1] / linear_terms - is_y_0 = ppred (Y [, 0], 0.0, "==") + is_y_0 = (Y[, 0] == 0.0) vec2 = (Y [, 0] + is_y_0) / (linear_terms * (1 - is_y_0) + is_y_0) - is_y_0 g_Y = (vec2 - Y [, 1] * vec1 * linear_terms) / link_power w = rowSums (Y) * vec1 / link_power ** 2 else: - is_LT_pos_infinite = ppred (linear_terms, 1.0/0.0, "==") - is_LT_neg_infinite = ppred (linear_terms, -1.0/0.0, "==") + is_LT_pos_infinite = (linear_terms == (1.0/0.0)) + is_LT_neg_infinite = (linear_terms == (-1.0/0.0)) is_LT_infinite = dot(is_LT_pos_infinite, one_zero) + dot(is_LT_neg_infinite, zero_one) finite_linear_terms = replace (target = linear_terms, pattern = 1.0/0.0, replacement = 0) finite_linear_terms = replace (target = finite_linear_terms, pattern = -1.0/0.0, replacement = 0) @@ -865,7 +865,7 @@ def glm_dist(linear_terms: matrix[float], Y: matrix[float], dist_type: int, var_ w = rowSums (Y * (dot(Y_prob, flip_pos)) * Y_prob) # = y_variance else: if (link_type == 3): # Binomial.probit - is_lt_pos = ppred (linear_terms, 0.0, ">=") + is_lt_pos = (linear_terms >= 0.0) t_gp = 1.0 / (1.0 + abs (finite_linear_terms) * 0.231641888) # 0.231641888 = 0.3275911 / sqrt (2.0) pt_gp = t_gp * ( 0.254829592 + t_gp * (-0.284496736 # "Handbook of Mathematical Functions", ed. by M. Abramowitz and I.A. Stegun, @@ -881,7 +881,7 @@ def glm_dist(linear_terms: matrix[float], Y: matrix[float], dist_type: int, var_ if (link_type == 4): # Binomial.cloglog the_exp = exp (linear_terms) the_exp_exp = exp (- the_exp) - is_too_small = ppred (10000000 + the_exp, 10000000, "==") + is_too_small = ((10000000 + the_exp) == 10000000) the_exp_ratio = (1 - is_too_small) * (1 - the_exp_exp) / (the_exp + is_too_small) + is_too_small * (1 - the_exp / 2) g_Y = (rowSums (Y) * the_exp_exp - Y [, 1]) / the_exp_ratio w = the_exp_exp * the_exp * rowSums (Y) / the_exp_ratio @@ -915,30 +915,30 @@ def glm_log_likelihood_part(linear_terms: matrix[float], Y: matrix[float], is_natural_parameter_log_zero = zeros_r if (var_power == 1.0 & link_power == 0.0): # Poisson.log b_cumulant = exp (linear_terms) - is_natural_parameter_log_zero = ppred (linear_terms, -1.0/0.0, "==") + is_natural_parameter_log_zero = (linear_terms == (-1.0/0.0)) natural_parameters = replace (target = linear_terms, pattern = -1.0/0.0, replacement = 0) else: if (var_power == 1.0 & link_power == 1.0): # Poisson.id - if (sum (ppred (linear_terms, 0.0, "<")) == 0): + if (sum (linear_terms < 0.0) == 0): b_cumulant = linear_terms - is_natural_parameter_log_zero = ppred (linear_terms, 0.0, "==") + is_natural_parameter_log_zero = (linear_terms == 0.0) natural_parameters = log (linear_terms + is_natural_parameter_log_zero) else: isNaN = 1 else: if (var_power == 1.0 & link_power == 0.5): # Poisson.sqrt - if (sum (ppred (linear_terms, 0.0, "<")) == 0): + if (sum (linear_terms < 0.0) == 0): b_cumulant = linear_terms ** 2 - is_natural_parameter_log_zero = ppred (linear_terms, 0.0, "==") + is_natural_parameter_log_zero = (linear_terms == 0.0) natural_parameters = 2.0 * log (linear_terms + is_natural_parameter_log_zero) else: isNaN = 1 else: if (var_power == 1.0 & link_power > 0.0): # Poisson.power_nonlog, pos - if (sum (ppred (linear_terms, 0.0, "<")) == 0): - is_natural_parameter_log_zero = ppred (linear_terms, 0.0, "==") + if (sum (linear_terms < 0.0) == 0): + is_natural_parameter_log_zero = (linear_terms == 0.0) b_cumulant = (linear_terms + is_natural_parameter_log_zero) ** (1.0 / link_power) - is_natural_parameter_log_zero natural_parameters = log (linear_terms + is_natural_parameter_log_zero) / link_power else: @@ -946,7 +946,7 @@ def glm_log_likelihood_part(linear_terms: matrix[float], Y: matrix[float], else: if (var_power == 1.0): # Poisson.power_nonlog, neg - if (sum (ppred (linear_terms, 0.0, "<=")) == 0): + if (sum (linear_terms <= 0.0) == 0): b_cumulant = linear_terms ** (1.0 / link_power) natural_parameters = log (linear_terms) / link_power else: @@ -954,7 +954,7 @@ def glm_log_likelihood_part(linear_terms: matrix[float], Y: matrix[float], else: if (var_power == 2.0 & link_power == -1.0): # Gamma.inverse - if (sum (ppred (linear_terms, 0.0, "<=")) == 0): + if (sum (linear_terms <= 0.0) == 0): b_cumulant = - log (linear_terms) natural_parameters = - linear_terms else: @@ -962,7 +962,7 @@ def glm_log_likelihood_part(linear_terms: matrix[float], Y: matrix[float], else: if (var_power == 2.0 & link_power == 1.0): # Gamma.id - if (sum (ppred (linear_terms, 0.0, "<=")) == 0): + if (sum (linear_terms <= 0.0) == 0): b_cumulant = log (linear_terms) natural_parameters = - 1.0 / linear_terms else: @@ -974,7 +974,7 @@ def glm_log_likelihood_part(linear_terms: matrix[float], Y: matrix[float], natural_parameters = - exp (- linear_terms) else: if (var_power == 2.0): # Gamma.power_nonlog - if (sum (ppred (linear_terms, 0.0, "<=")) == 0): + if (sum(linear_terms <= 0.0) == 0): b_cumulant = log (linear_terms) / link_power natural_parameters = - linear_terms ** (- 1.0 / link_power) else: @@ -997,7 +997,7 @@ def glm_log_likelihood_part(linear_terms: matrix[float], Y: matrix[float], if ( 2 * link_power == 1.0 - var_power): natural_parameters = linear_terms ** 2 / (1.0 - var_power) else: - if (sum (ppred (linear_terms, 0.0, "<=")) == 0): + if (sum (linear_terms <= 0.0) == 0): power = (1.0 - var_power) / link_power natural_parameters = (linear_terms ** power) / (1.0 - var_power) else: @@ -1019,7 +1019,7 @@ def glm_log_likelihood_part(linear_terms: matrix[float], Y: matrix[float], if ( 2 * link_power == 2.0 - var_power): b_cumulant = linear_terms ** 2 / (2.0 - var_power) else: - if (sum (ppred (linear_terms, 0.0, "<=")) == 0): + if (sum (linear_terms <= 0.0) == 0): power = (2.0 - var_power) / link_power b_cumulant = (linear_terms ** power) / (2.0 - var_power) else: @@ -1053,7 +1053,7 @@ def glm_log_likelihood_part(linear_terms: matrix[float], Y: matrix[float], [Y_prob, isNaN] = binomial_probability_two_column (linear_terms, link_type, link_power) if (isNaN == 0): - does_prob_contradict = ppred (Y_prob, 0.0, "<=") + does_prob_contradict = (Y_prob <= 0.0) if (sum (does_prob_contradict * abs (Y)) == 0.0): log_l = sum (Y * log (Y_prob * (1 - does_prob_contradict) + does_prob_contradict)) if (log_l != log_l | (log_l == log_l + 1.0 & log_l == log_l * 2.0)): @@ -1095,13 +1095,13 @@ def binomial_probability_two_column(linear_terms: matrix[float], link_type: int, if (link_power == 0.5): # Binomial.sqrt Y_prob = dot((linear_terms ** 2), p_one_m_one) + dot(ones_r, zero_one) else: # Binomial.power_nonlog - if (sum (ppred (linear_terms, 0.0, "<")) == 0): + if (sum(linear_terms < 0.0) == 0): Y_prob = dot((linear_terms ** (1.0 / link_power)), p_one_m_one) + dot(ones_r, zero_one) else: isNaN = 1 else: # Binomial.non_power - is_LT_pos_infinite = ppred (linear_terms, 1.0/0.0, "==") - is_LT_neg_infinite = ppred (linear_terms, -1.0/0.0, "==") + is_LT_pos_infinite = (linear_terms == (1.0/0.0)) + is_LT_neg_infinite = (linear_terms == (-1.0/0.0)) is_LT_infinite = dot(is_LT_pos_infinite, one_zero) + dot(is_LT_neg_infinite, zero_one) finite_linear_terms = replace (target = linear_terms, pattern = 1.0/0.0, replacement = 0) finite_linear_terms = replace (target = finite_linear_terms, pattern = -1.0/0.0, replacement = 0) @@ -1110,7 +1110,7 @@ def binomial_probability_two_column(linear_terms: matrix[float], link_type: int, Y_prob = Y_prob / (dot(rowSums (Y_prob), ones_2)) else: if (link_type == 3): # Binomial.probit - lt_pos_neg = dot(ppred (finite_linear_terms, 0.0, ">="), p_one_m_one) + dot(ones_r, zero_one) + lt_pos_neg = dot((finite_linear_terms >= 0.0), p_one_m_one) + dot(ones_r, zero_one) t_gp = 1.0 / (1.0 + abs (finite_linear_terms) * 0.231641888) # 0.231641888 = 0.3275911 / sqrt (2.0) pt_gp = t_gp * ( 0.254829592 + t_gp * (-0.284496736 # "Handbook of Mathematical Functions", ed. by M. Abramowitz and I.A. Stegun, @@ -1123,7 +1123,7 @@ def binomial_probability_two_column(linear_terms: matrix[float], link_type: int, if (link_type == 4): # Binomial.cloglog the_exp = exp (finite_linear_terms) the_exp_exp = exp (- the_exp) - is_too_small = ppred (10000000 + the_exp, 10000000, "==") + is_too_small = ((10000000 + the_exp) == 10000000) Y_prob [, 0] = (1 - is_too_small) * (1 - the_exp_exp) + is_too_small * the_exp * (1 - the_exp / 2) Y_prob [, 1] = the_exp_exp else: http://git-wip-us.apache.org/repos/asf/systemml/blob/d30e1888/src/test/scripts/applications/id3/id3.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/id3/id3.dml b/src/test/scripts/applications/id3/id3.dml index a127fc8..69b59e1 100644 --- a/src/test/scripts/applications/id3/id3.dml +++ b/src/test/scripts/applications/id3/id3.dml @@ -100,7 +100,7 @@ id3_learn = function(Matrix[Double] X, Matrix[Double] y, Matrix[Double] X_subset #with non-zero samples #and to pull out the most popular label - num_non_zero_labels = sum(ppred(hist_labels, 0, ">")); + num_non_zero_labels = sum(hist_labels > 0); most_popular_label = rowIndexMax(t(hist_labels)); num_remaining_attrs = sum(attributes) @@ -126,7 +126,7 @@ id3_learn = function(Matrix[Double] X, Matrix[Double] y, Matrix[Double] X_subset hist_labels2 = aggregate(target=X_subset, groups=y, fn="sum") num_samples2 = sum(X_subset) print(num_samples2+" #samples") - zero_entries_in_hist1 = ppred(hist_labels2, 0, "==") + zero_entries_in_hist1 = (hist_labels2 == 0) pi1 = hist_labels2/num_samples2 log_term1 = zero_entries_in_hist1*1 + (1-zero_entries_in_hist1)*pi1 entropy_vector1 = -pi1*log(log_term1) @@ -144,12 +144,12 @@ id3_learn = function(Matrix[Double] X, Matrix[Double] y, Matrix[Double] X_subset for(j in 1:nrow(attr_domain), check=0){ if(as.scalar(attr_domain[j,1]) != 0){ val = j - Tj = X_subset * ppred(X[,i], val, "==") + Tj = X_subset * (X[,i] == val) #entropy = compute_entropy(Tj, y) hist_labels1 = aggregate(target=Tj, groups=y, fn="sum") num_samples1 = sum(Tj) - zero_entries_in_hist = ppred(hist_labels1, 0, "==") + zero_entries_in_hist = (hist_labels1 == 0) pi = hist_labels1/num_samples1 log_term = zero_entries_in_hist*1 + (1-zero_entries_in_hist)*pi entropy_vector = -pi*log(log_term) @@ -203,7 +203,7 @@ id3_learn = function(Matrix[Double] X, Matrix[Double] y, Matrix[Double] X_subset for(i1 in 1:nrow(attr_domain), check=0){ - Ti = X_subset * ppred(X[,best_attr], i1, "==") + Ti = X_subset * (X[,best_attr] == i1) num_nodes_Ti = sum(Ti) if(num_nodes_Ti > 0){ @@ -234,7 +234,7 @@ id3_learn = function(Matrix[Double] X, Matrix[Double] y, Matrix[Double] X_subset #edges from root to children if(1==1){ - sz = sum(ppred(numSubtreeNodes, 0, ">")) + num_edges_in_subtrees + sz = sum(numSubtreeNodes > 0) + num_edges_in_subtrees } edges = matrix(1, rows=sz, cols=3) numEdges = 0 @@ -295,7 +295,7 @@ y = y + labelCorrection + 0 [nodes, edges] = id3_learn(X, y, X_subset, attributes, minsplit) # decoding outputs -nodes[,2] = nodes[,2] - labelCorrection * ppred(nodes[,1], -1, "==") +nodes[,2] = nodes[,2] - labelCorrection * (nodes[,1] == -1) for(i3 in 1:nrow(edges)){ #parfor(i3 in 1:nrow(edges)){ e_parent = as.scalar(edges[i3,1]) http://git-wip-us.apache.org/repos/asf/systemml/blob/d30e1888/src/test/scripts/applications/id3/id3.pydml ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/id3/id3.pydml b/src/test/scripts/applications/id3/id3.pydml index 02b80b5..9bfc70a 100644 --- a/src/test/scripts/applications/id3/id3.pydml +++ b/src/test/scripts/applications/id3/id3.pydml @@ -100,7 +100,7 @@ def id3_learn(X:matrix[float], y:matrix[float], X_subset:matrix[float], attribut #with non-zero samples #and to pull out the most popular label - num_non_zero_labels = sum(ppred(hist_labels, 0, ">")) + num_non_zero_labels = sum(hist_labels > 0) most_popular_label = rowIndexMax(t(hist_labels)) num_remaining_attrs = sum(attributes) @@ -126,7 +126,7 @@ def id3_learn(X:matrix[float], y:matrix[float], X_subset:matrix[float], attribut hist_labels2 = aggregate(target=X_subset, groups=y, fn="sum") num_samples2 = sum(X_subset) print(num_samples2+" #samples") - zero_entries_in_hist1 = ppred(hist_labels2, 0, "==") + zero_entries_in_hist1 = (hist_labels2 == 0) pi1 = hist_labels2/num_samples2 log_term1 = zero_entries_in_hist1*1 + (1-zero_entries_in_hist1)*pi1 entropy_vector1 = -pi1*log(log_term1) @@ -144,12 +144,12 @@ def id3_learn(X:matrix[float], y:matrix[float], X_subset:matrix[float], attribut for(j in 1:nrow(attr_domain), check=0): if(scalar(attr_domain[j-1,0]) != 0): val = j - Tj = X_subset * ppred(X[,i-1], val, "==") + Tj = X_subset * (X[,i-1] == val) #entropy = compute_entropy(Tj, y) hist_labels1 = aggregate(target=Tj, groups=y, fn="sum") num_samples1 = sum(Tj) - zero_entries_in_hist = ppred(hist_labels1, 0, "==") + zero_entries_in_hist = (hist_labels1 == 0) pi = hist_labels1/num_samples1 log_term = zero_entries_in_hist*1 + (1-zero_entries_in_hist)*pi entropy_vector = -pi*log(log_term) @@ -194,7 +194,7 @@ def id3_learn(X:matrix[float], y:matrix[float], X_subset:matrix[float], attribut for(i1 in 1:nrow(attr_domain), check=0): - Ti = X_subset * ppred(X[,best_attr-1], i1, "==") + Ti = X_subset * (X[,best_attr-1] == i1) num_nodes_Ti = sum(Ti) if(num_nodes_Ti > 0): @@ -222,7 +222,7 @@ def id3_learn(X:matrix[float], y:matrix[float], X_subset:matrix[float], attribut #edges from root to children if(1==1): - sz = sum(ppred(numSubtreeNodes, 0, ">")) + num_edges_in_subtrees + sz = sum(numSubtreeNodes > 0) + num_edges_in_subtrees edges = full(1, rows=sz, cols=3) numEdges = 0 @@ -276,7 +276,7 @@ y = y + labelCorrection + 0 [nodes, edges] = id3_learn(X, y, X_subset, attributes, minsplit) # decoding outputs -nodes[,1] = nodes[,1] - labelCorrection * ppred(nodes[,0], -1, "==") +nodes[,1] = nodes[,1] - labelCorrection * (nodes[,0] == -1) for(i3 in 1:nrow(edges)): #parfor(i3 in 1:nrow(edges)): e_parent = scalar(edges[i3-1,0]) http://git-wip-us.apache.org/repos/asf/systemml/blob/d30e1888/src/test/scripts/applications/impute/tmp.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/impute/tmp.dml b/src/test/scripts/applications/impute/tmp.dml index c852cce..605ab3e 100644 --- a/src/test/scripts/applications/impute/tmp.dml +++ b/src/test/scripts/applications/impute/tmp.dml @@ -108,8 +108,8 @@ atan_temporary = function (Matrix [double] Args) return (Matrix [double] AtanArgs) { AbsArgs = abs (Args); - Eks = AbsArgs + ppred (AbsArgs, 0.0, "==") * 0.000000000001; - Eks = ppred (AbsArgs, 1.0, "<=") * Eks + ppred (AbsArgs, 1.0, ">") / Eks; + Eks = AbsArgs + (AbsArgs == 0.0) * 0.000000000001; + Eks = (AbsArgs <= 1.0) * Eks + (AbsArgs > 1.0) / Eks; EksSq = Eks * Eks; AtanEks = Eks * ( 1.0000000000 + @@ -122,7 +122,7 @@ atan_temporary = EksSq * (-0.0161657367 + EksSq * 0.0028662257 )))))))); pi_over_two = 1.5707963267948966192313216916398; - AtanAbsArgs = ppred (AbsArgs, 1.0, "<=") * AtanEks + ppred (AbsArgs, 1.0, ">") * (pi_over_two - AtanEks); - AtanArgs = (ppred (Args, 0.0, ">=") - ppred (Args, 0.0, "<")) * AtanAbsArgs; + AtanAbsArgs = (AbsArgs <= 1.0) * AtanEks + (AbsArgs > 1.0) * (pi_over_two - AtanEks); + AtanArgs = ((Args >= 0.0) - (Args < 0.0)) * AtanAbsArgs; } */ \ No newline at end of file http://git-wip-us.apache.org/repos/asf/systemml/blob/d30e1888/src/test/scripts/applications/kmeans/Kmeans.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/kmeans/Kmeans.dml b/src/test/scripts/applications/kmeans/Kmeans.dml index 368b98d..dd73dec 100644 --- a/src/test/scripts/applications/kmeans/Kmeans.dml +++ b/src/test/scripts/applications/kmeans/Kmeans.dml @@ -51,7 +51,7 @@ if (num_records > sample_size) { # Sample approximately 1000 records (Bernoulli sampling) P = Rand( rows = num_records, cols = 1, min = 0.0, max = 1.0 ); - P = ppred( P * num_records, sample_size, "<=" ); + P = ((P * num_records) <= sample_size); X_sample = X * (P %*% t( one_per_feature )); X_sample = removeEmpty( target = X_sample, margin = "rows" ); } @@ -91,7 +91,7 @@ while (centroid_change > eps) D = one_per_record %*% t(rowSums (Y * Y)) - 2.0 * X %*% t(Y); } # Find the closest centroid for each record - P = ppred (D, rowMins (D) %*% t(one_per_centroid), "<="); + P = (D <= (rowMins (D) %*% t(one_per_centroid))); # If some records belong to multiple centroids, share them equally P = P / (rowSums (P) %*% t(one_per_centroid)); # Normalize the columns of P to compute record weights for new centroids http://git-wip-us.apache.org/repos/asf/systemml/blob/d30e1888/src/test/scripts/applications/l2svm/L2SVM.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/l2svm/L2SVM.dml b/src/test/scripts/applications/l2svm/L2SVM.dml index d546d9a..3794827 100644 --- a/src/test/scripts/applications/l2svm/L2SVM.dml +++ b/src/test/scripts/applications/l2svm/L2SVM.dml @@ -40,8 +40,8 @@ Y = read($Y) check_min = min(Y) check_max = max(Y) -num_min = sum(ppred(Y, check_min, "==")) -num_max = sum(ppred(Y, check_max, "==")) +num_min = sum(Y == check_min) +num_max = sum(Y == check_max) if(num_min + num_max != nrow(Y)) print("please check Y, it should contain only 2 labels") else{ if(check_min != -1 | check_max != +1) @@ -84,7 +84,7 @@ while(continue == 1 & iter < maxiterations) { while(continue1 == 1){ tmp_Xw = Xw + step_sz*Xd out = 1 - Y * (tmp_Xw) - sv = ppred(out, 0, ">") + sv = (out > 0) out = out * sv g = wd + step_sz*dd - sum(out * Y * Xd) h = dd + sum(Xd * sv * Xd) @@ -99,7 +99,7 @@ while(continue == 1 & iter < maxiterations) { Xw = Xw + step_sz*Xd out = 1 - Y * Xw - sv = ppred(out, 0, ">") + sv = (out > 0) out = sv * out obj = 0.5 * sum(out * out) + lambda/2 * sum(w * w) g_new = t(X) %*% (out * Y) - lambda * w http://git-wip-us.apache.org/repos/asf/systemml/blob/d30e1888/src/test/scripts/applications/l2svm/L2SVM.pydml ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/l2svm/L2SVM.pydml b/src/test/scripts/applications/l2svm/L2SVM.pydml index b28e584..19f074f 100644 --- a/src/test/scripts/applications/l2svm/L2SVM.pydml +++ b/src/test/scripts/applications/l2svm/L2SVM.pydml @@ -40,8 +40,8 @@ Y = load($Y) check_min = min(Y) check_max = max(Y) -num_min = sum(ppred(Y, check_min, "==")) -num_max = sum(ppred(Y, check_max, "==")) +num_min = sum(Y == check_min) +num_max = sum(Y == check_max) if(num_min + num_max != nrow(Y)): print("please check Y, it should contain only 2 labels") else: @@ -82,7 +82,7 @@ while(continue == 1 & iter < maxiterations): while(continue1 == 1): tmp_Xw = Xw + step_sz*Xd out = 1 - Y * (tmp_Xw) - sv = ppred(out, 0, ">") + sv = (out > 0) out = out * sv g = wd + step_sz*dd - sum(out * Y * Xd) h = dd + sum(Xd * sv * Xd) @@ -95,7 +95,7 @@ while(continue == 1 & iter < maxiterations): Xw = Xw + step_sz*Xd out = 1 - Y * Xw - sv = ppred(out, 0, ">") + sv = (out > 0) out = sv * out obj = 0.5 * sum(out * out) + lambda/2 * sum(w * w) g_new = dot(transpose(X), (out * Y)) - lambda * w http://git-wip-us.apache.org/repos/asf/systemml/blob/d30e1888/src/test/scripts/applications/linearLogReg/LinearLogReg.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/linearLogReg/LinearLogReg.dml b/src/test/scripts/applications/linearLogReg/LinearLogReg.dml index bf47bd1..9bd98e8 100644 --- a/src/test/scripts/applications/linearLogReg/LinearLogReg.dml +++ b/src/test/scripts/applications/linearLogReg/LinearLogReg.dml @@ -215,7 +215,7 @@ while(!converge) { ot = Xt %*% w ot2 = yt * ot - correct = sum(ppred(ot2, 0, ">")) + correct = sum(ot2 > 0) accuracy = correct*100.0/Nt iter = iter + 1 converge = (norm_grad < (tol * norm_grad_initial)) | (iter > maxiter) http://git-wip-us.apache.org/repos/asf/systemml/blob/d30e1888/src/test/scripts/applications/linearLogReg/LinearLogReg.pydml ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/linearLogReg/LinearLogReg.pydml b/src/test/scripts/applications/linearLogReg/LinearLogReg.pydml index 16160a3..1a94534 100644 --- a/src/test/scripts/applications/linearLogReg/LinearLogReg.pydml +++ b/src/test/scripts/applications/linearLogReg/LinearLogReg.pydml @@ -198,7 +198,7 @@ while(!converge): ot = dot(Xt, w) ot2 = yt * ot - correct = sum(ppred(ot2, 0, ">")) + correct = sum(ot2 > 0) accuracy = correct*100.0/Nt iter = iter + 1 converge = (norm_grad < (tol * norm_grad_initial)) | (iter > maxiter) http://git-wip-us.apache.org/repos/asf/systemml/blob/d30e1888/src/test/scripts/applications/m-svm/m-svm.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/m-svm/m-svm.dml b/src/test/scripts/applications/m-svm/m-svm.dml index 439002c..400627d 100644 --- a/src/test/scripts/applications/m-svm/m-svm.dml +++ b/src/test/scripts/applications/m-svm/m-svm.dml @@ -67,7 +67,7 @@ if(check_X == 0){ debug_mat = matrix(-1, rows=max_iterations, cols=num_classes) parfor(iter_class in 1:num_classes){ - Y_local = 2 * ppred(Y, iter_class, "==") - 1 + Y_local = 2 * (Y == iter_class) - 1 w_class = matrix(0, rows=num_features, cols=1) if (intercept == 1) { zero_matrix = matrix(0, rows=1, cols=1); @@ -90,7 +90,7 @@ if(check_X == 0){ while(continue1 == 1){ tmp_Xw = Xw + step_sz*Xd out = 1 - Y_local * (tmp_Xw) - sv = ppred(out, 0, ">") + sv = (out > 0) out = out * sv g = wd + step_sz*dd - sum(out * Y_local * Xd) h = dd + sum(Xd * sv * Xd) @@ -105,14 +105,14 @@ if(check_X == 0){ Xw = Xw + step_sz*Xd out = 1 - Y_local * Xw - sv = ppred(out, 0, ">") + sv = (out > 0) out = sv * out obj = 0.5 * sum(out * out) + lambda/2 * sum(w_class * w_class) g_new = t(X) %*% (out * Y_local) - lambda * w_class tmp = sum(s * g_old) - train_acc = sum(ppred(Y_local*(X%*%w_class), 0, ">="))/num_samples*100 + train_acc = sum((Y_local*(X%*%w_class)) >= 0)/num_samples*100 print("For class " + iter_class + " iteration " + iter + " training accuracy: " + train_acc) debug_mat[iter+1,iter_class] = obj http://git-wip-us.apache.org/repos/asf/systemml/blob/d30e1888/src/test/scripts/applications/m-svm/m-svm.pydml ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/m-svm/m-svm.pydml b/src/test/scripts/applications/m-svm/m-svm.pydml index 8c01806..1543f0c 100644 --- a/src/test/scripts/applications/m-svm/m-svm.pydml +++ b/src/test/scripts/applications/m-svm/m-svm.pydml @@ -65,7 +65,7 @@ else: debug_mat = full(-1, rows=max_iterations, cols=num_classes) parfor(iter_class in 1:num_classes): - Y_local = 2 * ppred(Y, iter_class, "==") - 1 + Y_local = 2 * (Y == iter_class) - 1 w_class = full(0, rows=num_features, cols=1) if (intercept == 1): zero_matrix = full(0, rows=1, cols=1) @@ -86,7 +86,7 @@ else: while(continue1 == 1): tmp_Xw = Xw + step_sz*Xd out = 1 - Y_local * (tmp_Xw) - sv = ppred(out, 0, ">") + sv = (out > 0) out = out * sv g = wd + step_sz*dd - sum(out * Y_local * Xd) h = dd + sum(Xd * sv * Xd) @@ -99,14 +99,14 @@ else: Xw = Xw + step_sz*Xd out = 1 - Y_local * Xw - sv = ppred(out, 0, ">") + sv = (out > 0) out = sv * out obj = 0.5 * sum(out * out) + lambda/2 * sum(w_class * w_class) g_new = dot(transpose(X), (out * Y_local)) - lambda * w_class tmp = sum(s * g_old) - train_acc = sum(ppred(Y_local*(dot(X, w_class)), 0, ">="))/num_samples*100 + train_acc = sum(Y_local*(dot(X, w_class)) >= 0)/num_samples*100 print("For class " + iter_class + " iteration " + iter + " training accuracy: " + train_acc) debug_mat[iter,iter_class-1] = obj http://git-wip-us.apache.org/repos/asf/systemml/blob/d30e1888/src/test/scripts/applications/mdabivar/MDABivariateStats.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/mdabivar/MDABivariateStats.dml b/src/test/scripts/applications/mdabivar/MDABivariateStats.dml index 5bb980c..1a47440 100644 --- a/src/test/scripts/applications/mdabivar/MDABivariateStats.dml +++ b/src/test/scripts/applications/mdabivar/MDABivariateStats.dml @@ -222,7 +222,7 @@ bivar_cc = function(Matrix[Double] A, Matrix[Double] B) return (Double pval, Mat r = rowSums(F) c = colSums(F) E = (r %*% c)/W - E = ppred(E, 0, "==")*0.0001 + E + E = (E == 0)*0.0001 + E T = (F-E)^2/E chi_squared = sum(T) @@ -250,7 +250,7 @@ bivar_sc = function(Matrix[Double] Y, Matrix[Double] A) return (Double pVal, Mat # category-wise (frequencies, means, variances) CFreqs1 = aggregate(target=Y, groups=A, fn="count") - present_domain_vals_mat = removeEmpty(target=diag(1-ppred(CFreqs1, 0, "==")), margin="rows") + present_domain_vals_mat = removeEmpty(target=diag(1-(CFreqs1 == 0)), margin="rows") CFreqs = present_domain_vals_mat %*% CFreqs1 CMeans = present_domain_vals_mat %*% aggregate(target=Y, groups=A, fn="mean") http://git-wip-us.apache.org/repos/asf/systemml/blob/d30e1888/src/test/scripts/applications/mdabivar/MDABivariateStats.pydml ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/mdabivar/MDABivariateStats.pydml b/src/test/scripts/applications/mdabivar/MDABivariateStats.pydml index fd3cda6..2fbc506 100644 --- a/src/test/scripts/applications/mdabivar/MDABivariateStats.pydml +++ b/src/test/scripts/applications/mdabivar/MDABivariateStats.pydml @@ -203,7 +203,7 @@ def bivar_cc(A:matrix[float], B:matrix[float]) -> (pval:float, contingencyTable: r = rowSums(F) c = colSums(F) E = (dot(r, c))/W - E = ppred(E, 0, "==")*0.0001 + E + E = (E == 0)*0.0001 + E T = (F-E)**2/E chi_squared = sum(T) # compute p-value @@ -228,7 +228,7 @@ def bivar_sc(Y:matrix[float], A:matrix[float]) -> (pVal:float, CFreqs:matrix[flo # category-wise (frequencies, means, variances) CFreqs1 = aggregate(target=Y, groups=A, fn="count") - present_domain_vals_mat = removeEmpty(target=diag(1-ppred(CFreqs1, 0, "==")), axis=0) + present_domain_vals_mat = removeEmpty(target=diag(1-(CFreqs1 == 0)), axis=0) CFreqs = dot(present_domain_vals_mat, CFreqs1) CMeans = dot(present_domain_vals_mat, aggregate(target=Y, groups=A, fn="mean")) http://git-wip-us.apache.org/repos/asf/systemml/blob/d30e1888/src/test/scripts/applications/naive-bayes-parfor/naive-bayes.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/naive-bayes-parfor/naive-bayes.dml b/src/test/scripts/applications/naive-bayes-parfor/naive-bayes.dml index 83ddbf7..60f481b 100644 --- a/src/test/scripts/applications/naive-bayes-parfor/naive-bayes.dml +++ b/src/test/scripts/applications/naive-bayes-parfor/naive-bayes.dml @@ -67,7 +67,7 @@ D_w_ones = cbind(D, ones) model = cbind(class_conditionals, class_prior) log_probs = D_w_ones %*% t(log(model)) pred = rowIndexMax(log_probs) -acc = sum(ppred(pred, C, "==")) / numRows * 100 +acc = sum(pred == C) / numRows * 100 acc_str = "Training Accuracy (%): " + acc print(acc_str) http://git-wip-us.apache.org/repos/asf/systemml/blob/d30e1888/src/test/scripts/applications/naive-bayes-parfor/naive-bayes.pydml ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/naive-bayes-parfor/naive-bayes.pydml b/src/test/scripts/applications/naive-bayes-parfor/naive-bayes.pydml index 58a23c6..09fa8ec 100644 --- a/src/test/scripts/applications/naive-bayes-parfor/naive-bayes.pydml +++ b/src/test/scripts/applications/naive-bayes-parfor/naive-bayes.pydml @@ -68,7 +68,7 @@ log_model = log(model) transpose_log_model = log_model.transpose() log_probs = dot(D_w_ones, transpose_log_model) pred = rowIndexMax(log_probs) -acc = sum(ppred(pred, C, "==")) / numRows * 100 +acc = sum(pred == C) / numRows * 100 acc_str = "Training Accuracy (%): " + acc print(acc_str) http://git-wip-us.apache.org/repos/asf/systemml/blob/d30e1888/src/test/scripts/applications/parfor/parfor_cv_multiclasssvm0.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/parfor/parfor_cv_multiclasssvm0.dml b/src/test/scripts/applications/parfor/parfor_cv_multiclasssvm0.dml index 9c0ca9c..547176c 100644 --- a/src/test/scripts/applications/parfor/parfor_cv_multiclasssvm0.dml +++ b/src/test/scripts/applications/parfor/parfor_cv_multiclasssvm0.dml @@ -47,9 +47,9 @@ stats = matrix(0, rows=k, cols=1); #k-folds x 1-stats for( i in 1:k ) { #prepare train/test fold projections - vPxi = ppred( P, i, "==" ); # Select 1/k fraction of the rows + vPxi = (P == i); # Select 1/k fraction of the rows mPxi = (vPxi %*% ones); # for the i-th fold TEST set - #nvPxi = ppred( P, i, "!=" ); + #nvPxi = (P != i); #nmPxi = (nvPxi %*% ones); #note: inefficient for sparse data #create train/test folds @@ -113,7 +113,7 @@ scoreMultiClassSVM = function( Matrix[double] X, Matrix[double] y, Matrix[double predicted_y = rowIndexMax( scores); - correct_percentage = sum( ppred( predicted_y - y, 0, "==")) / Nt * 100; + correct_percentage = sum((predicted_y - y) == 0) / Nt * 100; out_correct_pct = correct_percentage; @@ -142,7 +142,7 @@ multiClassSVM = function (Matrix[double] X, Matrix[double] Y, Integer intercept, iter_class = 1 - Y_local = 2 * ppred( Y, iter_class, "==") - 1 + Y_local = 2 * (Y == iter_class) - 1 w_class = matrix( 0, rows=num_features, cols=1 ) if (intercept == 1) { @@ -165,7 +165,7 @@ multiClassSVM = function (Matrix[double] X, Matrix[double] Y, Integer intercept, while(continue1 == 1){ tmp_w = w_class + step_sz*s out = 1 - Y_local * (X %*% tmp_w) - sv = ppred(out, 0, ">") + sv = (out > 0) out = out * sv g = wd + step_sz*dd - sum(out * Y_local * Xd) h = dd + sum(Xd * sv * Xd) @@ -179,14 +179,14 @@ multiClassSVM = function (Matrix[double] X, Matrix[double] Y, Integer intercept, w_class = w_class + step_sz*s out = 1 - Y_local * (X %*% w_class) - sv = ppred(out, 0, ">") + sv = (out > 0) out = sv * out obj = 0.5 * sum(out * out) + lambda/2 * sum(w_class * w_class) g_new = t(X) %*% (out * Y_local) - lambda * w_class tmp = sum(s * g_old) - train_acc = sum(ppred(Y_local*(X%*%w_class), 0, ">="))/num_samples*100 + train_acc = sum((Y_local*(X%*%w_class)) >= 0)/num_samples*100 #print("For class " + iter_class + " iteration " + iter + " training accuracy: " + train_acc) if((step_sz*tmp < epsilon*obj) | (iter >= max_iterations-1)){ @@ -206,7 +206,7 @@ multiClassSVM = function (Matrix[double] X, Matrix[double] Y, Integer intercept, iter_class = iter_class + 1 while(iter_class <= num_classes){ - Y_local = 2 * ppred(Y, iter_class, "==") - 1 + Y_local = 2 * (Y == iter_class) - 1 w_class = matrix(0, rows=ncol(X), cols=1) if (intercept == 1) { zero_matrix = matrix(0, rows=1, cols=1); @@ -228,7 +228,7 @@ multiClassSVM = function (Matrix[double] X, Matrix[double] Y, Integer intercept, while(continue1 == 1){ tmp_w = w_class + step_sz*s out = 1 - Y_local * (X %*% tmp_w) - sv = ppred(out, 0, ">") + sv = (out > 0) out = out * sv g = wd + step_sz*dd - sum(out * Y_local * Xd) h = dd + sum(Xd * sv * Xd) @@ -242,14 +242,14 @@ multiClassSVM = function (Matrix[double] X, Matrix[double] Y, Integer intercept, w_class = w_class + step_sz*s out = 1 - Y_local * (X %*% w_class) - sv = ppred(out, 0, ">") + sv = (out > 0) out = sv * out obj = 0.5 * sum(out * out) + lambda/2 * sum(w_class * w_class) g_new = t(X) %*% (out * Y_local) - lambda * w_class tmp = sum(s * g_old) - train_acc = sum(ppred(Y_local*(X%*%w_class), 0, ">="))/num_samples*100 + train_acc = sum((Y_local*(X%*%w_class)) >= 0)/num_samples*100 #print("For class " + iter_class + " iteration " + iter + " training accuracy: " + train_acc) if((step_sz*tmp < epsilon*obj) | (iter >= max_iterations-1)){ http://git-wip-us.apache.org/repos/asf/systemml/blob/d30e1888/src/test/scripts/applications/parfor/parfor_cv_multiclasssvm1.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/parfor/parfor_cv_multiclasssvm1.dml b/src/test/scripts/applications/parfor/parfor_cv_multiclasssvm1.dml index 1dc2a34..0ef4bf4 100644 --- a/src/test/scripts/applications/parfor/parfor_cv_multiclasssvm1.dml +++ b/src/test/scripts/applications/parfor/parfor_cv_multiclasssvm1.dml @@ -47,9 +47,9 @@ stats = matrix(0, rows=k, cols=1); #k-folds x 1-stats parfor( i in 1:k, par=4, mode=LOCAL, opt=NONE ) { #prepare train/test fold projections - vPxi = ppred( P, i, "==" ); # Select 1/k fraction of the rows + vPxi = (P == i); # Select 1/k fraction of the rows mPxi = (vPxi %*% ones); # for the i-th fold TEST set - #nvPxi = ppred( P, i, "!=" ); + #nvPxi = (P != i); #nmPxi = (nvPxi %*% ones); #note: inefficient for sparse data #create train/test folds @@ -113,7 +113,7 @@ scoreMultiClassSVM = function( Matrix[double] X, Matrix[double] y, Matrix[double predicted_y = rowIndexMax( scores); - correct_percentage = sum( ppred( predicted_y - y, 0, "==")) / Nt * 100; + correct_percentage = sum((predicted_y - y) == 0) / Nt * 100; out_correct_pct = correct_percentage; @@ -142,7 +142,7 @@ multiClassSVM = function (Matrix[double] X, Matrix[double] Y, Integer intercept, iter_class = 1 - Y_local = 2 * ppred( Y, iter_class, "==") - 1 + Y_local = 2 * (Y == iter_class) - 1 w_class = matrix( 0, rows=num_features, cols=1 ) if (intercept == 1) { @@ -165,7 +165,7 @@ multiClassSVM = function (Matrix[double] X, Matrix[double] Y, Integer intercept, while(continue1 == 1){ tmp_w = w_class + step_sz*s out = 1 - Y_local * (X %*% tmp_w) - sv = ppred(out, 0, ">") + sv = (out > 0) out = out * sv g = wd + step_sz*dd - sum(out * Y_local * Xd) h = dd + sum(Xd * sv * Xd) @@ -179,14 +179,14 @@ multiClassSVM = function (Matrix[double] X, Matrix[double] Y, Integer intercept, w_class = w_class + step_sz*s out = 1 - Y_local * (X %*% w_class) - sv = ppred(out, 0, ">") + sv = (out > 0) out = sv * out obj = 0.5 * sum(out * out) + lambda/2 * sum(w_class * w_class) g_new = t(X) %*% (out * Y_local) - lambda * w_class tmp = sum(s * g_old) - train_acc = sum(ppred(Y_local*(X%*%w_class), 0, ">="))/num_samples*100 + train_acc = sum((Y_local*(X%*%w_class)) >= 0)/num_samples*100 #print("For class " + iter_class + " iteration " + iter + " training accuracy: " + train_acc) if((step_sz*tmp < epsilon*obj) | (iter >= max_iterations-1)){ @@ -206,7 +206,7 @@ multiClassSVM = function (Matrix[double] X, Matrix[double] Y, Integer intercept, iter_class = iter_class + 1 while(iter_class <= num_classes){ - Y_local = 2 * ppred(Y, iter_class, "==") - 1 + Y_local = 2 * (Y == iter_class) - 1 w_class = matrix(0, rows=ncol(X), cols=1) if (intercept == 1) { zero_matrix = matrix(0, rows=1, cols=1); @@ -228,7 +228,7 @@ multiClassSVM = function (Matrix[double] X, Matrix[double] Y, Integer intercept, while(continue1 == 1){ tmp_w = w_class + step_sz*s out = 1 - Y_local * (X %*% tmp_w) - sv = ppred(out, 0, ">") + sv = (out > 0) out = out * sv g = wd + step_sz*dd - sum(out * Y_local * Xd) h = dd + sum(Xd * sv * Xd) @@ -242,14 +242,14 @@ multiClassSVM = function (Matrix[double] X, Matrix[double] Y, Integer intercept, w_class = w_class + step_sz*s out = 1 - Y_local * (X %*% w_class) - sv = ppred(out, 0, ">") + sv = (out > 0) out = sv * out obj = 0.5 * sum(out * out) + lambda/2 * sum(w_class * w_class) g_new = t(X) %*% (out * Y_local) - lambda * w_class tmp = sum(s * g_old) - train_acc = sum(ppred(Y_local*(X%*%w_class), 0, ">="))/num_samples*100 + train_acc = sum((Y_local*(X%*%w_class)) >= 0)/num_samples*100 #print("For class " + iter_class + " iteration " + iter + " training accuracy: " + train_acc) if((step_sz*tmp < epsilon*obj) | (iter >= max_iterations-1)){ http://git-wip-us.apache.org/repos/asf/systemml/blob/d30e1888/src/test/scripts/applications/parfor/parfor_cv_multiclasssvm4.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/parfor/parfor_cv_multiclasssvm4.dml b/src/test/scripts/applications/parfor/parfor_cv_multiclasssvm4.dml index b94f168..06bd9d5 100644 --- a/src/test/scripts/applications/parfor/parfor_cv_multiclasssvm4.dml +++ b/src/test/scripts/applications/parfor/parfor_cv_multiclasssvm4.dml @@ -47,9 +47,9 @@ stats = matrix(0, rows=k, cols=1); #k-folds x 1-stats parfor( i in 1:k ) { #prepare train/test fold projections - vPxi = ppred( P, i, "==" ); # Select 1/k fraction of the rows + vPxi = (P == i); # Select 1/k fraction of the rows mPxi = (vPxi %*% ones); # for the i-th fold TEST set - #nvPxi = ppred( P, i, "!=" ); + #nvPxi = (P != i); #nmPxi = (nvPxi %*% ones); #note: inefficient for sparse data #create train/test folds @@ -113,7 +113,7 @@ scoreMultiClassSVM = function( Matrix[double] X, Matrix[double] y, Matrix[double predicted_y = rowIndexMax( scores); - correct_percentage = sum( ppred( predicted_y - y, 0, "==")) / Nt * 100; + correct_percentage = sum((predicted_y - y) == 0) / Nt * 100; out_correct_pct = correct_percentage; @@ -142,7 +142,7 @@ multiClassSVM = function (Matrix[double] X, Matrix[double] Y, Integer intercept, iter_class = 1 - Y_local = 2 * ppred( Y, iter_class, "==") - 1 + Y_local = 2 * (Y == iter_class) - 1 w_class = matrix( 0, rows=num_features, cols=1 ) if (intercept == 1) { @@ -165,7 +165,7 @@ multiClassSVM = function (Matrix[double] X, Matrix[double] Y, Integer intercept, while(continue1 == 1){ tmp_w = w_class + step_sz*s out = 1 - Y_local * (X %*% tmp_w) - sv = ppred(out, 0, ">") + sv = (out > 0) out = out * sv g = wd + step_sz*dd - sum(out * Y_local * Xd) h = dd + sum(Xd * sv * Xd) @@ -179,14 +179,14 @@ multiClassSVM = function (Matrix[double] X, Matrix[double] Y, Integer intercept, w_class = w_class + step_sz*s out = 1 - Y_local * (X %*% w_class) - sv = ppred(out, 0, ">") + sv = (out > 0) out = sv * out obj = 0.5 * sum(out * out) + lambda/2 * sum(w_class * w_class) g_new = t(X) %*% (out * Y_local) - lambda * w_class tmp = sum(s * g_old) - train_acc = sum(ppred(Y_local*(X%*%w_class), 0, ">="))/num_samples*100 + train_acc = sum((Y_local*(X%*%w_class)) >= 0)/num_samples*100 #print("For class " + iter_class + " iteration " + iter + " training accuracy: " + train_acc) if((step_sz*tmp < epsilon*obj) | (iter >= max_iterations-1)){ @@ -206,7 +206,7 @@ multiClassSVM = function (Matrix[double] X, Matrix[double] Y, Integer intercept, iter_class = iter_class + 1 while(iter_class <= num_classes){ - Y_local = 2 * ppred(Y, iter_class, "==") - 1 + Y_local = 2 * (Y == iter_class) - 1 w_class = matrix(0, rows=ncol(X), cols=1) if (intercept == 1) { zero_matrix = matrix(0, rows=1, cols=1); @@ -228,7 +228,7 @@ multiClassSVM = function (Matrix[double] X, Matrix[double] Y, Integer intercept, while(continue1 == 1){ tmp_w = w_class + step_sz*s out = 1 - Y_local * (X %*% tmp_w) - sv = ppred(out, 0, ">") + sv = (out > 0) out = out * sv g = wd + step_sz*dd - sum(out * Y_local * Xd) h = dd + sum(Xd * sv * Xd) @@ -242,14 +242,14 @@ multiClassSVM = function (Matrix[double] X, Matrix[double] Y, Integer intercept, w_class = w_class + step_sz*s out = 1 - Y_local * (X %*% w_class) - sv = ppred(out, 0, ">") + sv = (out > 0) out = sv * out obj = 0.5 * sum(out * out) + lambda/2 * sum(w_class * w_class) g_new = t(X) %*% (out * Y_local) - lambda * w_class tmp = sum(s * g_old) - train_acc = sum(ppred(Y_local*(X%*%w_class), 0, ">="))/num_samples*100 + train_acc = sum((Y_local*(X%*%w_class)) >= 0)/num_samples*100 #print("For class " + iter_class + " iteration " + iter + " training accuracy: " + train_acc) if((step_sz*tmp < epsilon*obj) | (iter >= max_iterations-1)){ http://git-wip-us.apache.org/repos/asf/systemml/blob/d30e1888/src/test/scripts/applications/parfor/parfor_sample.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/parfor/parfor_sample.dml b/src/test/scripts/applications/parfor/parfor_sample.dml index 98ac395..386e9b0 100644 --- a/src/test/scripts/applications/parfor/parfor_sample.dml +++ b/src/test/scripts/applications/parfor/parfor_sample.dml @@ -60,8 +60,8 @@ svUpBnd = cumsum(sv); # Construct sampling matrix SM, and apply to create samples parfor ( i in 1:nrow(sv)) { - T1 = ppred(R, as.scalar(svUpBnd[i,1]), "<="); - T2 = ppred(R, as.scalar(svLowBnd[i,1]), ">"); + T1 = (R <= as.scalar(svUpBnd[i,1])); + T2 = (R > as.scalar(svLowBnd[i,1])); SM = T1 * T2; P = removeEmpty(target=diag(SM), margin="rows"); iX = P %*% X; http://git-wip-us.apache.org/repos/asf/systemml/blob/d30e1888/src/test/scripts/applications/parfor/parfor_univariate0.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/parfor/parfor_univariate0.dml b/src/test/scripts/applications/parfor/parfor_univariate0.dml index 78397cb..448c1c3 100644 --- a/src/test/scripts/applications/parfor/parfor_univariate0.dml +++ b/src/test/scripts/applications/parfor/parfor_univariate0.dml @@ -142,7 +142,7 @@ else { mode = rowIndexMax(t(cat_counts)); mx = max(cat_counts) - modeArr = ppred(cat_counts, mx, "==") + modeArr = (cat_counts == mx) numModes = sum(modeArr); # place the computed statistics in output matrices http://git-wip-us.apache.org/repos/asf/systemml/blob/d30e1888/src/test/scripts/applications/parfor/parfor_univariate1.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/parfor/parfor_univariate1.dml b/src/test/scripts/applications/parfor/parfor_univariate1.dml index 64ee633..ad1e78f 100644 --- a/src/test/scripts/applications/parfor/parfor_univariate1.dml +++ b/src/test/scripts/applications/parfor/parfor_univariate1.dml @@ -142,7 +142,7 @@ else { mode = rowIndexMax(t(cat_counts)); mx = max(cat_counts) - modeArr = ppred(cat_counts, mx, "==") + modeArr = (cat_counts == mx) numModes = sum(modeArr); # place the computed statistics in output matrices http://git-wip-us.apache.org/repos/asf/systemml/blob/d30e1888/src/test/scripts/applications/parfor/parfor_univariate4.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/parfor/parfor_univariate4.dml b/src/test/scripts/applications/parfor/parfor_univariate4.dml index 465cb8f..62646ef 100644 --- a/src/test/scripts/applications/parfor/parfor_univariate4.dml +++ b/src/test/scripts/applications/parfor/parfor_univariate4.dml @@ -142,7 +142,7 @@ else { mode = rowIndexMax(t(cat_counts)); mx = max(cat_counts) - modeArr = ppred(cat_counts, mx, "==") + modeArr = (cat_counts == mx) numModes = sum(modeArr); # place the computed statistics in output matrices http://git-wip-us.apache.org/repos/asf/systemml/blob/d30e1888/src/test/scripts/applications/validation/CV_LogisticRegression.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/validation/CV_LogisticRegression.dml b/src/test/scripts/applications/validation/CV_LogisticRegression.dml index 191e6d1..6e765e3 100644 --- a/src/test/scripts/applications/validation/CV_LogisticRegression.dml +++ b/src/test/scripts/applications/validation/CV_LogisticRegression.dml @@ -60,9 +60,9 @@ stats = matrix(0, rows=k, cols=40); #k-folds x 40-stats parfor( i in 1:k ) { #prepare train/test fold projections - vPxi = ppred( P, i, "==" ); + vPxi = (P == i); mPxi = (vPxi %*% ones); - #nvPxi = ppred( P, i, "!=" ); + #nvPxi = (P != i); #nmPxi = (nvPxi %*% ones); #note: inefficient for sparse data #create train/test folds @@ -292,7 +292,7 @@ logisticRegression = function (Matrix[double] X, Matrix[double] y, Integer in_in } o2 = y * o - correct = sum(ppred(o2, 0, ">")) + correct = sum(o2 > 0) accuracy = correct*100.0/N iter = iter + 1 #converge = (norm_grad < (tol * norm_grad_initial)) | (iter > maxiter) @@ -352,7 +352,7 @@ scoreLogRegModel = function (Matrix[double] X_train, Matrix[double] y_train, Mat prob_train = 1.0 / (1.0 + exp (- linear_train)); est_value_POS_train = value_TP * prob_train - cost_FP * (1.0 - prob_train); est_value_NEG_train = value_TN * (1.0 - prob_train) - cost_FN * prob_train; - y_train_pred = 2 * ppred (est_value_POS_train, est_value_NEG_train, ">") - 1; + y_train_pred = 2 * (est_value_POS_train > est_value_NEG_train) - 1; # Compute the estimated number of true/false positives/negatives @@ -411,7 +411,7 @@ scoreLogRegModel = function (Matrix[double] X_train, Matrix[double] y_train, Mat prob_test = 1.0 / (1.0 + exp (- linear_test)); est_value_POS_test = value_TP * prob_test - cost_FP * (1.0 - prob_test); est_value_NEG_test = value_TN * (1.0 - prob_test) - cost_FN * prob_test; - y_test_pred = 2 * ppred (est_value_POS_test, est_value_NEG_test, ">") - 1; + y_test_pred = 2 * (est_value_POS_test > est_value_NEG_test) - 1; # Compute the estimated number of true/false positives/negatives