Hi all, I have been trying to add the line:
h = n - p0 + 1; just after h = n / 2 + 1; (line 131) in the source code (the original paper mention this is possible for p0<n). with p0 declared as an int (actually i used the same declaration protocol as n everywhere in the code). The "new" source compiles, but when i give it reasonable values of p0, it runs unto an infinite loop. (where i leave the p0 variable in the source, but comment out my modified line everything works perfectly, again). I rewrote the qn code in R, by "translating" the source code. qn.R always gives the expected result (i.e. does not run unto an infinite loop). I'm a tiny bit puzzled. Below i attach the modified qn.c and qn.R sources: Thanks in advance for any hint /* * Copyright (C) 2005--2007 Martin Maechler, ETH Zurich * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* This is a merge of the C version of original files qn.f and sn.f, * translated by f2c (version 20010821). ==== ==== * and then by f2c-clean,v 1.9 2000/01/13 13:46:53 * and further clean-edited manually by Martin Maechler. * * Further added interface functions to be called via .C() from R or S-plus * Note that Peter Rousseeuw has explicitely given permission to * use his code under the GPL for the R project. */ /* Original comments by the authors of the Fortran original code, * (merged for Qn & Sn in one file by M.M.): This file contains fortran functions for two new robust estimators of scale denoted as Qn and Sn, decribed in Rousseeuw and Croux (1993). These estimators have a high breakdown point and a bounded influence function. The implementation given here is very fast (running in O(n logn) time) and needs little storage space. Rousseeuw, P.J. and Croux, C. (1993) Alternatives to the Median Absolute Deviation", Journal of the American Statistical Association, Vol. 88, 1273-1283. For both estimators, implementations in the pascal language can be obtained from the original authors. This software may be used and copied freely for scientific and/or non-commercial purposes, provided reference is made to the abovementioned paper. Note by MM: We have explicit permission from P.Rousseeuw to licence it under the GNU Public Licence. See also ../inst/Copyrights */ #include <inttypes.h> /* ^^^^^^^^^^ is supposedly more common and standard than * #include <stdint.h> * or #include <sys/types.h> */ /* --> int64_t ; if people don't have the above, they can forget about it.. */ /* #include "int64.h" */ #include <R.h> #include <Rmath.h> /* -> <math.h> and much more */ /* Interface routines to be called via .C() : */ #include "robustbase.h" /* ----------------- Further Declarations ------------------------------ */ /* sn0() and qn0() --- but also mc_C() in ./mc.c * ----- ---- ------ use pull(a,n,k): finds the k-th order statistic of an array a[] of length n (preserving a[]) */ /* whimed_i(a,iw,n): finds the weighted high median of an array a[] of length n, with positive int weights iw[] (using auxiliary arrays acand[], a_srt[] & iw_cand[] all of length n). */ /* qn0() uses (and for C API:) */ /* Main routines for C API */ double qn(double *x, int n, int p0, int finite_corrn); /* these have no extra factors (no consistency factor & finite_corr): */ double qn0(double *x, int n, int p0); /* ----------- Implementations -----------------------------------*/ void Qn0(double *x, Sint *n, Sint *p0, double *res) { *res = qn0(x, (int)*n, (int)*p0); } double qn0(double *x, int n, int p0) { /*-------------------------------------------------------------------- Efficient algorithm for the scale estimator: Q*_n = { |x_i - x_j|; i<j }_(k) [= Qn without scaling ] i.e. the k-th order statistic of the |x_i - x_j| Parameters of the function Qn : x : double array containing the observations n : number of observations (n >=2) */ double *y = (double *)R_alloc(n, sizeof(double)); double *work = (double *)R_alloc(n, sizeof(double)); double *a_srt = (double *)R_alloc(n, sizeof(double)); double *a_cand = (double *)R_alloc(n, sizeof(double)); int *left = (int *)R_alloc(n, sizeof(int)); int *right = (int *)R_alloc(n, sizeof(int)); int *p = (int *)R_alloc(n, sizeof(int)); int *q = (int *)R_alloc(n, sizeof(int)); int *weight = (int *)R_alloc(n, sizeof(int)); double trial = R_NaReal;/* -Wall */ Rboolean found; int h, i, j,jj,jh; /* Following should be `long long int' : they can be of order n^2 */ int64_t k, knew, nl,nr, sump,sumq; h = n / 2 + 1; //h = n - p0 + 1; k = (int64_t)h * (h - 1) / 2; for (i = 0; i < n; ++i) { y[i] = x[i]; left [i] = n - i + 1; right[i] = (i <= h) ? n : n - (i - h); /* the n - (i-h) is from the paper; original code had `n' */ } R_qsort(y, 1, n); /* y := sort(x) */ nl = (int64_t)n * (n + 1) / 2; nr = (int64_t)n * n; knew = k + nl;/* = k + (n+1 \over 2) */ found = FALSE; #ifdef DEBUG_qn REprintf("qn0(): h,k= %2d,%2d; nl,nr= %d,%d\n", h,k, nl,nr); #endif /* L200: */ while(!found && nr - nl > n) { j = 0; /* Truncation to float : try to make sure that the same values are got later (guard bits !) */ for (i = 1; i < n; ++i) { if (left[i] <= right[i]) { weight[j] = right[i] - left[i] + 1; jh = left[i] + weight[j] / 2; work[j] = (float)(y[i] - y[n - jh]); ++j; } } trial = whimed_i(work, weight, j, a_cand, a_srt, /*iw_cand*/ p); #ifdef DEBUG_qn REprintf(" ..!found: whimed("); # ifdef DEBUG_long REprintf("wrk=c("); for(i=0; i < j; i++) REprintf("%s%g", (i>0)? ", " : "", work[i]); REprintf("),\n wgt=c("); for(i=0; i < j; i++) REprintf("%s%d", (i>0)? ", " : "", weight[i]); REprintf("), j= %3d) -> trial= %7g\n", j, trial); # else REprintf("j=%3d) -> trial= %g:", j, trial); # endif #endif j = 0; for (i = n - 1; i >= 0; --i) { while (j < n && ((float)(y[i] - y[n - j - 1])) < trial) ++j; p[i] = j; } #ifdef DEBUG_qn REprintf(" f_1: j=%2d", j); #endif j = n + 1; for (i = 0; i < n; ++i) { while ((float)(y[i] - y[n - j + 1]) > trial) --j; q[i] = j; } sump = 0; sumq = 0; for (i = 0; i < n; ++i) { sump += p[i]; sumq += q[i] - 1; } #ifdef DEBUG_qn REprintf(" f_2 -> j=%2d, sump|q= %lld,%lld", j, sump,sumq); #endif if (knew <= sump) { for (i = 0; i < n; ++i) right[i] = p[i]; nr = sump; #ifdef DEBUG_qn REprintf("knew <= sump =: nr , new right[]\n"); #endif } else if (knew > sumq) { for (i = 0; i < n; ++i) left[i] = q[i]; nl = sumq; #ifdef DEBUG_qn REprintf("knew > sumq =: nl , new left[]\n"); #endif } else { /* sump < knew <= sumq */ found = TRUE; #ifdef DEBUG_qn REprintf("sump < knew <= sumq ---> FOUND\n"); #endif } } /* while */ if (found) return trial; else { #ifdef DEBUG_qn REprintf(".. not fnd -> new work[]"); #endif j = 0; for (i = 1; i < n; ++i) { for (jj = left[i]; jj <= right[i]; ++jj) { work[j] = y[i] - y[n - jj]; j++; }/* j will be = sum_{i=2}^n (right[i] - left[i] + 1)_{+} */ } #ifdef DEBUG_qn REprintf(" of length %d; knew-nl=%d\n", j, knew-nl); #endif /* return pull(work, j - 1, knew - nl) : */ knew -= (nl + 1); /* -1: 0-indexing */ rPsort(work, j, knew); return(work[knew]); } } /* qn0 */ double qn(double *x, int n, int p0, int finite_corr) { /* Efficient algorithm for the scale estimator: Qn = dn * 2.2219 * {|x_i-x_j|; i<j}_(k) */ double r, dn = 1./*-Wall*/; r = 2.2219 * qn0(x, n, p0); /* asymptotic consistency for sigma^2 */ /* === */ if (finite_corr) { if (n <= 9) { if (n == 2) dn = .399; else if (n == 3) dn = .994; else if (n == 4) dn = .512; else if (n == 5) dn = .844; else if (n == 6) dn = .611; else if (n == 7) dn = .857; else if (n == 8) dn = .669; else if (n == 9) dn = .872; } else { if (n % 2 == 1) dn = n / (n + 1.4); else /* (n % 2 == 0) */ dn = n / (n + 3.8); } return dn * r; } else return r; } /* qn */ /* pull(): auxiliary routine for Qn and Sn * ====== ======== --------------------- */ double pull(double *a_in, int n, int k) { /* Finds the k-th order statistic of an array a[] of length n * -------------------- */ int j; double *a, ax; char* vmax = vmaxget(); a = (double *)R_alloc(n, sizeof(double)); /* Copy a[] and use copy since it will be re-shuffled: */ for (j = 0; j < n; j++) a[j] = a_in[j]; k--; /* 0-indexing */ rPsort(a, n, k); ax = a[k]; vmaxset(vmax); return ax; } /* pull */ /* Local variables section * Local variables: * mode: c * kept-old-versions: 12 * kept-new-versions: 20 * End: */ double whimed_i(double *a, int *w, int n, double* a_cand, double *a_srt, int* w_cand) { /* Algorithm to compute the weighted high median in O(n) time. The whimed is defined as the smallest a[j] such that the sum of the weights of all a[i] <= a[j] is strictly greater than half of the total weight. Arguments: a: double array containing the observations n: number of observations w: array of (int/double) weights of the observations. */ int n2, i, kcand; /* sum of weights: `int' do overflow when n ~>= 1e5 */ double wleft, wmid, wright, w_tot, wrest; double trial; w_tot = 0; for (i = 0; i < n; ++i) w_tot += w[i]; wrest = 0; /* REPEAT : */ do { for (i = 0; i < n; ++i) a_srt[i] = a[i]; n2 = n/2;/* =^= n/2 +1 with 0-indexing */ rPsort(a_srt, n, n2); trial = a_srt[n2]; wleft = 0; wmid = 0; wright= 0; for (i = 0; i < n; ++i) { if (a[i] < trial) wleft += w[i]; else if (a[i] > trial) wright += w[i]; else wmid += w[i]; } /* wleft = sum_{i; a[i] < trial} w[i] * wmid = sum_{i; a[i] == trial} w[i] at least one 'i' since trial is one a[]! * wright= sum_{i; a[i] > trial} w[i] */ kcand = 0; if (2 * (wrest + wleft) > w_tot) { for (i = 0; i < n; ++i) { if (a[i] < trial) { a_cand[kcand] = a[i]; w_cand[kcand] = w[i]; ++kcand; } } } else if (2 * (wrest + wleft + wmid) <= w_tot) { for (i = 0; i < n; ++i) { if (a[i] > trial) { a_cand[kcand] = a[i]; w_cand[kcand] = w[i]; ++kcand; } } wrest += wleft + wmid; } else { return trial; /*==========*/ } n = kcand; for (i = 0; i < n; ++i) { a[i] = a_cand[i]; w[i] = w_cand[i]; } } while(1); } /* _WHIMED_ */ ############################################################# mqn<-function(x,p0){ n<-length(x) h<-as.integer((n/2)+1) h<-as.integer(n-p0) k<-h*(h-1)/2 y<-left<-right<-weight<-work<-p<-q<-rep(NA,n) pulla<-function(a_in,n,k){ a<-rep(NA,n) for(j in 0:(n-1)) a[j+1]<-a_in[j+1] ax<-sort(a)[k] return(ax) } y<-sort(x) for(i in 1:n){ left[i]<-n-i+2 right[i]<-ifelse(i<=h,n,n-(i-h)) } nl<-n*(n+1)/2 nr<-n*n knew<-as.integer(k+nl) found<-F while(found==F & (nr-nl>n)){ j<-1 for(i in 2:n){ if(left[i]<=right[i]){ weight[j]=as.integer(right[i]-left[i]+1) jh<-as.integer(left[i]+weight[j]/2); work[j]=y[i]-y[as.integer(n-jh+1)] j<-j+1 } } trial<-weightedMedian(x=work[1:(j-1)],w=weight[1:(j-1)]) j<-0 for(i in n:1){ while((j<(n-1)) & ((y[i]-y[n-j])<trial)) j<-j+1 p[i]<-j } j<-n+1 for(i in 1:n){ while((y[i]-y[n-j+2])>trial) j<-j-1 q[i]<-j } sump<-sumq<-0 for(i in 1:n){ sump<-sump+p[i] sumq<-sumq+q[i]-1 } if(knew<=sump){ for(i in 1:n) right[i]<-p[i] nr<-sump } if(knew>sumq){ for(i in 1:n) left[i]<-q[i] nl<-sumq } if(knew>sump & knew<=sumq) found<-T } if(found==T) ret<-trial if(found==F){ j<-1 for(i in 2:n){ if(left[i]<=right[i]){ for(jj in left[i]:right[i]){ work[j]=y[i]-y[n-jj+1] j<-j+1 } } } ret<-pulla(a_in=work,n=j-1,k=knew-nl) } ifelse(n%%2==0,ret*2.2219*n/(n+3.8),ret*2.2219*n/(n+1.4)) } -- View this message in context: http://r.789695.n4.nabble.com/C-source-code-question-Robustbase-edition-tp3465905p3465905.html Sent from the R help mailing list archive at Nabble.com. ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.