Le 31/05/2017 à 17:30, Serguei Sokol a écrit :

More thorough reading revealed that I have overlooked this phrase in the
line's doc: "left and right /thirds/ of the data" (emphasis is mine).
Oops. I have read the first ref returned by google and it happened to be
tibco's doc, not the R's one. The layout is very similar hence my mistake.
The latter does not mention "thirds" but ...
Anyway, here is a new line's patch which still gives a result slightly different
form MMline(). The slope is the same but not the intercept.
What are the exact terms for intercept calculation that should be implemented?

Serguei.
--- line.c.orig 2016-03-17 00:03:03.000000000 +0100
+++ line.c      2017-05-31 18:24:55.330030280 +0200
@@ -17,7 +17,7 @@
  *  https://www.R-project.org/Licenses/
  */
 
-#include <R_ext/Utils.h>       /* R_rsort() */
+#include <R_ext/Utils.h>    /* R_rsort() */
 #include <math.h>
 
 #include <Rinternals.h>
@@ -25,8 +25,8 @@
 
 /* Speed up by `inlining' these (as macros) [since R version 1.2] : */
 #if 1
-#define il(n,x)        (int)floor((n - 1) * x)
-#define iu(n,x)        (int) ceil((n - 1) * x)
+#define il(n,x) (int)floor(((n) - 1) * (x))
+#define iu(n,x) (int) ceil(((n) - 1) * (x))
 
 #else
 static int il(int n, double x)
@@ -41,71 +41,68 @@
 #endif
 
 static void line(double *x, double *y, /* input (x[i],y[i])s */
-                double *z, double *w, /* work and output: resid. & fitted */
-                /* all the above of length */ int n,
-                double coef[2])
+         double *z, double *w, /* work and output: resid. & fitted */
+         int *indx, /* to get thirds of points independently of repeated 
values */
+         /* all the above of length */ int n,
+         double coef[2])
 {
     int i, j, k;
     double xb, x1, x2, xt, yt, yb, tmp1, tmp2;
     double slope, yint;
 
     for(i = 0 ; i < n ; i++) {
-       z[i] = x[i];
-       w[i] = y[i];
+        z[i] = x[i];
+        w[i] = y[i];
+        indx[i] = i;
     }
-    R_rsort(z, n);/* z = ordered abscissae */
+    rsort_with_index(z, indx, n);/* z = ordered abscissae */
 
-    tmp1 = z[il(n, 1./6.)];
-    tmp2 = z[iu(n, 1./6.)];    xb = 0.5*(tmp1+tmp2);
-
-    tmp1 = z[il(n, 2./6.)];
-    tmp2 = z[iu(n, 2./6.)];    x1 = 0.5*(tmp1+tmp2);
-
-    tmp1 = z[il(n, 4./6.)];
-    tmp2 = z[iu(n, 4./6.)];    x2 = 0.5*(tmp1+tmp2);
-
-    tmp1 = z[il(n, 5./6.)];
-    tmp2 = z[iu(n, 5./6.)];    xt = 0.5*(tmp1+tmp2);
+    k=(n+1)/3;
+    tmp1 = z[il(k, 0.5)];
+    tmp2 = z[iu(k, 0.5)];
+    /* xb := Median(first third of x) */
+    xb = 0.5*(tmp1+tmp2);
+
+    tmp1 = z[n-k+il(k, 0.5)];
+    tmp2 = z[n-k+iu(k, 0.5)];
+    /* xt := Median(last third of x) */
+    xt = 0.5*(tmp1+tmp2);
 
     slope = 0.;
 
     for(j = 1 ; j <= 1 ; j++) {
-       /* yb := Median(y[i]; x[i] <= quantile(x, 1/3) */
-       k = 0;
-       for(i = 0 ; i < n ; i++)
-           if(x[i] <= x1)
-               z[k++] = w[i];
-       R_rsort(z, k);
-       yb = 0.5 * (z[il(k, 0.5)] + z[iu(k, 0.5)]);
-
-       /* yt := Median(y[i]; x[i] >= quantile(x, 2/3) */
-       k = 0;
-       for(i = 0 ; i < n ; i++)
-           if(x[i] >= x2)
-               z[k++] = w[i];
-       R_rsort(z,k);
-       yt = 0.5 * (z[il(k, 0.5)] + z[iu(k, 0.5)]);
-
-       slope += (yt - yb)/(xt - xb);
-       for(i = 0 ; i < n ; i++) {
-           z[i] = y[i] - slope*x[i];
-           /* never used: w[i] = z[i]; */
-       }
-       R_rsort(z,n);
-       yint = 0.5 * (z[il(n, 0.5)] + z[iu(n, 0.5)]);
+        /* yb := Median(y[i]; x[i] in first third of x) */
+        for(i = 0 ; i < k ; i++)
+            z[i] = w[indx[i]];
+        R_rsort(z, k);
+        yb = 0.5 * (z[il(k, 0.5)] + z[iu(k, 0.5)]);
+
+        /* yt := Median(y[i]; x[i] in last third of x */
+        for(i = 0 ; i < k ; i++)
+            z[i] = w[indx[n-k+i]];
+        R_rsort(z,k);
+        yt = 0.5 * (z[il(k, 0.5)] + z[iu(k, 0.5)]);
+
+        slope += (yt - yb)/(xt - xb);
+        for(i = 0 ; i < n ; i++) {
+            z[i] = y[i] - slope*x[i];
+            /* never used: w[i] = z[i]; */
+        }
+        R_rsort(z,n);
+        yint = 0.5 * (z[il(n, 0.5)] + z[iu(n, 0.5)]);
     }
     for( i = 0 ; i < n ; i++ ) {
-       w[i] = yint + slope*x[i];
-       z[i] = y[i] - w[i];
+        w[i] = yint + slope*x[i];
+        z[i] = y[i] - w[i];
     }
     coef[0] = yint;
     coef[1] = slope;
 }
 
-void tukeyline0(double *x, double *y, double *z, double *w, int *n,
-              double *coef)
+void tukeyline0(double *x, double *y, double *z, double *w, int *indx, int *n,
+           double *coef)
 {
-    line(x, y, z, w, *n, coef);
+    line(x, y, z, w, indx, *n, coef);
 }
 
 SEXP tukeyline(SEXP x, SEXP y, SEXP call)
@@ -127,7 +124,8 @@
     SET_VECTOR_ELT(ans, 2, res);
     SEXP fit = allocVector(REALSXP, n);
     SET_VECTOR_ELT(ans, 3, fit);
-    line(REAL(x), REAL(y), REAL(res), REAL(fit), n, REAL(coef));
+    SEXP indx = allocVector(INTSXP, n);
+    line(REAL(x), REAL(y), REAL(res), REAL(fit), INTEGER(indx), n, REAL(coef));
     UNPROTECT(1);
     return ans;
 }
______________________________________________
R-devel@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-devel

Reply via email to