Le 31/05/2017 à 17:30, Serguei Sokol a écrit :
More thorough reading revealed that I have overlooked this phrase in the line's doc: "left and right /thirds/ of the data" (emphasis is mine).
Oops. I have read the first ref returned by google and it happened to be tibco's doc, not the R's one. The layout is very similar hence my mistake. The latter does not mention "thirds" but ... Anyway, here is a new line's patch which still gives a result slightly different form MMline(). The slope is the same but not the intercept. What are the exact terms for intercept calculation that should be implemented? Serguei.
--- line.c.orig 2016-03-17 00:03:03.000000000 +0100 +++ line.c 2017-05-31 18:24:55.330030280 +0200 @@ -17,7 +17,7 @@ * https://www.R-project.org/Licenses/ */ -#include <R_ext/Utils.h> /* R_rsort() */ +#include <R_ext/Utils.h> /* R_rsort() */ #include <math.h> #include <Rinternals.h> @@ -25,8 +25,8 @@ /* Speed up by `inlining' these (as macros) [since R version 1.2] : */ #if 1 -#define il(n,x) (int)floor((n - 1) * x) -#define iu(n,x) (int) ceil((n - 1) * x) +#define il(n,x) (int)floor(((n) - 1) * (x)) +#define iu(n,x) (int) ceil(((n) - 1) * (x)) #else static int il(int n, double x) @@ -41,71 +41,68 @@ #endif static void line(double *x, double *y, /* input (x[i],y[i])s */ - double *z, double *w, /* work and output: resid. & fitted */ - /* all the above of length */ int n, - double coef[2]) + double *z, double *w, /* work and output: resid. & fitted */ + int *indx, /* to get thirds of points independently of repeated values */ + /* all the above of length */ int n, + double coef[2]) { int i, j, k; double xb, x1, x2, xt, yt, yb, tmp1, tmp2; double slope, yint; for(i = 0 ; i < n ; i++) { - z[i] = x[i]; - w[i] = y[i]; + z[i] = x[i]; + w[i] = y[i]; + indx[i] = i; } - R_rsort(z, n);/* z = ordered abscissae */ + rsort_with_index(z, indx, n);/* z = ordered abscissae */ - tmp1 = z[il(n, 1./6.)]; - tmp2 = z[iu(n, 1./6.)]; xb = 0.5*(tmp1+tmp2); - - tmp1 = z[il(n, 2./6.)]; - tmp2 = z[iu(n, 2./6.)]; x1 = 0.5*(tmp1+tmp2); - - tmp1 = z[il(n, 4./6.)]; - tmp2 = z[iu(n, 4./6.)]; x2 = 0.5*(tmp1+tmp2); - - tmp1 = z[il(n, 5./6.)]; - tmp2 = z[iu(n, 5./6.)]; xt = 0.5*(tmp1+tmp2); + k=(n+1)/3; + tmp1 = z[il(k, 0.5)]; + tmp2 = z[iu(k, 0.5)]; + /* xb := Median(first third of x) */ + xb = 0.5*(tmp1+tmp2); + + tmp1 = z[n-k+il(k, 0.5)]; + tmp2 = z[n-k+iu(k, 0.5)]; + /* xt := Median(last third of x) */ + xt = 0.5*(tmp1+tmp2); slope = 0.; for(j = 1 ; j <= 1 ; j++) { - /* yb := Median(y[i]; x[i] <= quantile(x, 1/3) */ - k = 0; - for(i = 0 ; i < n ; i++) - if(x[i] <= x1) - z[k++] = w[i]; - R_rsort(z, k); - yb = 0.5 * (z[il(k, 0.5)] + z[iu(k, 0.5)]); - - /* yt := Median(y[i]; x[i] >= quantile(x, 2/3) */ - k = 0; - for(i = 0 ; i < n ; i++) - if(x[i] >= x2) - z[k++] = w[i]; - R_rsort(z,k); - yt = 0.5 * (z[il(k, 0.5)] + z[iu(k, 0.5)]); - - slope += (yt - yb)/(xt - xb); - for(i = 0 ; i < n ; i++) { - z[i] = y[i] - slope*x[i]; - /* never used: w[i] = z[i]; */ - } - R_rsort(z,n); - yint = 0.5 * (z[il(n, 0.5)] + z[iu(n, 0.5)]); + /* yb := Median(y[i]; x[i] in first third of x) */ + for(i = 0 ; i < k ; i++) + z[i] = w[indx[i]]; + R_rsort(z, k); + yb = 0.5 * (z[il(k, 0.5)] + z[iu(k, 0.5)]); + + /* yt := Median(y[i]; x[i] in last third of x */ + for(i = 0 ; i < k ; i++) + z[i] = w[indx[n-k+i]]; + R_rsort(z,k); + yt = 0.5 * (z[il(k, 0.5)] + z[iu(k, 0.5)]); + + slope += (yt - yb)/(xt - xb); + for(i = 0 ; i < n ; i++) { + z[i] = y[i] - slope*x[i]; + /* never used: w[i] = z[i]; */ + } + R_rsort(z,n); + yint = 0.5 * (z[il(n, 0.5)] + z[iu(n, 0.5)]); } for( i = 0 ; i < n ; i++ ) { - w[i] = yint + slope*x[i]; - z[i] = y[i] - w[i]; + w[i] = yint + slope*x[i]; + z[i] = y[i] - w[i]; } coef[0] = yint; coef[1] = slope; } -void tukeyline0(double *x, double *y, double *z, double *w, int *n, - double *coef) +void tukeyline0(double *x, double *y, double *z, double *w, int *indx, int *n, + double *coef) { - line(x, y, z, w, *n, coef); + line(x, y, z, w, indx, *n, coef); } SEXP tukeyline(SEXP x, SEXP y, SEXP call) @@ -127,7 +124,8 @@ SET_VECTOR_ELT(ans, 2, res); SEXP fit = allocVector(REALSXP, n); SET_VECTOR_ELT(ans, 3, fit); - line(REAL(x), REAL(y), REAL(res), REAL(fit), n, REAL(coef)); + SEXP indx = allocVector(INTSXP, n); + line(REAL(x), REAL(y), REAL(res), REAL(fit), INTEGER(indx), n, REAL(coef)); UNPROTECT(1); return ans; }
______________________________________________ R-devel@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-devel