Bug and Fix for Inverse Chi-Squared Method

Janes, Robert (Columbus) Thu, 09 Jan 2020 05:38:10 -0800

Hello,

I ran into a pretty serious issue with the cdf_chisq_Pinv method.  Thank 
goodness for open-source because I was able to come up with a simple fix.


In case Outlook considers a makefile sort of an executable I've copied it here 
rather than attach it.  You may have to add tabs back into it.
sample: testing
./testing

testing: obj/testing.o obj/imsl_c.o
gfortran -o testing obj/imsl_c.o obj/testing.o \
  -L/home/rjanes/gsl/gsl-2.6/.libs -l:libgsl.a -lgslcblas

obj/%.o: %.f
gfortran -c -g -o obj/$*.o $*.f

obj/%.o: %.c
gcc -c -g -o obj/$*.o $*.c

The problem is that for large degrees of freedom the method will abort with an 
error, "inverse failed to converge".  In looking at the code and running test 
cases I found that the limit of 32 iterations is too small.  I found the method 
failed many times once degrees of freedom got above 1000 or so.  I increased 
the iteration limit to 50 and did not find a single failure for a huge range of 
degrees of freedom.  I found successful iterations in this range were taking 
28, 29 and 31 iterations.  So hitting 32 maybe isn't much of a surprise.  But 
it suggests that maybe this isn't too efficient an iteration, at least for 
large degrees of freedom.  But I'll leave that to you experts.

Sorry about the Fortran code.  I'm porting a large application from Intel 
Fortran / IMSL to gfortran / gsl.  Rather than change the base application 
code, I just implemented the IMSL calls with gsl.

Of the attached files, testing.f is the test Fortran program.  imsl.c is the 
implementation of the IMSL routine with gsl.  And gammainv.c is the updated gsl 
code.  The only change was 32 -> 50.

Let me know of you need anything else from me.  Thank you.


Rob Janes

SGS Transportation

Senior Software Engineer

SGS – CMX

2860 N. National Road Suite A

US – 47201 – Columbus, Indiana

Phone: +1 - 812 - 378 - 7966

Fax: +1 812 - 378 - 3393

Email: [email protected]



Information in this email and any attachments is confidential and intended 
solely for the use of the individual(s) to whom it is addressed or otherwise 
directed. Please note that any views or opinions presented in this email are 
solely those of the author and do not necessarily represent those of the 
Company. Finally, the recipient should check this email and any attachments for 
the presence of viruses. The Company accepts no liability for any damage caused 
by any virus transmitted by this email. All SGS services are rendered in 
accordance with the applicable SGS conditions of service available on request 
and accessible at https://www.sgs.com/en/terms-and-conditions

/* cdf/gammainv.c
 * 
 * Copyright (C) 2003, 2007 Brian Gough
 * 
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 3 of the License, or (at
 * your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 */

#include <config.h>
#include <math.h>
#include <gsl/gsl_cdf.h>
#include <gsl/gsl_math.h>
#include <gsl/gsl_randist.h>
#include <gsl/gsl_sf_gamma.h>

#include <stdio.h>

double
gsl_cdf_gamma_Pinv (const double P, const double a, const double b)
{
  double x;

  if (P == 1.0)
    {
      return GSL_POSINF;
    }
  else if (P == 0.0)
    {
      return 0.0;
    }

  /* Consider, small, large and intermediate cases separately.  The
     boundaries at 0.05 and 0.95 have not been optimised, but seem ok
     for an initial approximation.

     BJG: These approximations aren't really valid, the relevant
     criterion is P*gamma(a+1) < 1. Need to rework these routines and
     use a single bisection style solver for all the inverse
     functions.
  */

  if (P < 0.05)
    {
      double x0 = exp ((gsl_sf_lngamma (a) + log (P)) / a);
      x = x0;
    }
  else if (P > 0.95)
    {
      double x0 = -log1p (-P) + gsl_sf_lngamma (a);
      x = x0;
    }
  else
    {
      double xg = gsl_cdf_ugaussian_Pinv (P);
      double x0 = (xg < -0.5*sqrt (a)) ? a : sqrt (a) * xg + a;
      x = x0;
    }

  /* Use Lagrange's interpolation for E(x)/phi(x0) to work backwards
     to an improved value of x (Abramowitz & Stegun, 3.6.6) 

     where E(x)=P-integ(phi(u),u,x0,x) and phi(u) is the pdf.
   */

  {
    double lambda, dP, phi;
    unsigned int n = 0;

  start:
    dP = P - gsl_cdf_gamma_P (x, a, 1.0);
    phi = gsl_ran_gamma_pdf (x, a, 1.0);
// printf("%12.5f, %12.5f, %12.5f, %12.5f\n", x, a, dP, phi);

    if (dP == 0.0 || n++ > 50)
      goto end;

    lambda = dP / GSL_MAX (2 * fabs (dP / x), phi);

    {
      double step0 = lambda;
      double step1 = -((a - 1) / x - 1) * lambda * lambda / 4.0;

      double step = step0;
      if (fabs (step1) < 0.5 * fabs (step0))
        step += step1;

      if (x + step > 0)
        x += step;
      else
        {
          x /= 2.0;
        }

      if (fabs (step0) > 1e-10 * x || fabs(step0 * phi) > 1e-10 * P)
        goto start;
    }

  end:
    if (fabs(dP) > GSL_SQRT_DBL_EPSILON * P)
      {
        GSL_ERROR_VAL("inverse failed to converge", GSL_EFAILED, GSL_NAN);
      }
    
    return b * x;
  }
}

double
gsl_cdf_gamma_Qinv (const double Q, const double a, const double b)
{
  double x;

  if (Q == 1.0)
    {
      return 0.0;
    }
  else if (Q == 0.0)
    {
      return GSL_POSINF;
    }

  /* Consider, small, large and intermediate cases separately.  The
     boundaries at 0.05 and 0.95 have not been optimised, but seem ok
     for an initial approximation. */

  if (Q < 0.05)
    {
      double x0 = -log (Q) + gsl_sf_lngamma (a);
      x = x0;
    }
  else if (Q > 0.95)
    {
      double x0 = exp ((gsl_sf_lngamma (a) + log1p (-Q)) / a);
      x = x0;
    }
  else
    {
      double xg = gsl_cdf_ugaussian_Qinv (Q);
      double x0 = (xg < -0.5*sqrt (a)) ? a : sqrt (a) * xg + a;
      x = x0;
    }

  /* Use Lagrange's interpolation for E(x)/phi(x0) to work backwards
     to an improved value of x (Abramowitz & Stegun, 3.6.6) 

     where E(x)=P-integ(phi(u),u,x0,x) and phi(u) is the pdf.
   */

  {
    double lambda, dQ, phi;
    unsigned int n = 0;

  start:
    dQ = Q - gsl_cdf_gamma_Q (x, a, 1.0);
    phi = gsl_ran_gamma_pdf (x, a, 1.0);

    if (dQ == 0.0 || n++ > 32)
      goto end;

    lambda = -dQ / GSL_MAX (2 * fabs (dQ / x), phi);

    {
      double step0 = lambda;
      double step1 = -((a - 1) / x - 1) * lambda * lambda / 4.0;

      double step = step0;
      if (fabs (step1) < 0.5 * fabs (step0))
        step += step1;

      if (x + step > 0)
        x += step;
      else
        {
          x /= 2.0;
        }

      if (fabs (step0) > 1e-10 * x)
        goto start;
    }

  }

end:
  return b * x;
}

#include "gsl/gsl_cdf.h"

/* real function  chiin(p, d) */
float chiin_(float * x, float * a) {
   double result, d, ad;
   float s;
   d = *x;
   ad = *a;
   result = gsl_cdf_chisq_Pinv(d, ad);
   s = result;
   return s;
}

C     PREDICTOR TESTING OF ABORT
      p = .975
      do 10 i = 3, 12000
C        if (i .eq. 2320) go to 10
C        write (*, *) i
         df = i
         xx = chiin(p, df)
   10 continue
      end

Bug and Fix for Inverse Chi-Squared Method

Reply via email to