question about the trust region in the gsl library.

Aleja Carmento Tue, 02 May 2023 04:30:40 -0700

 Dear Sir/Madam,



I am currently working on a gsl least squares method. In this specific case
, i have it set to the levenberg-marquardt with geodesic acceleration. i
used the same example from the gsl website and added a few changes to it
like so :


#include <stdlib.h>#include <stdio.h>#include
<gsl/gsl_vector.h>#include <gsl/gsl_matrix.h>#include
<gsl/gsl_blas.h>#include <gsl/gsl_multifit_nlinear.h>#include
<gsl/gsl_rng.h>#include <gsl/gsl_errno.h>#include <gsl/gsl_randist.h>


#define max_iterations 100


struct data
{
    double* t;
    double* y;
    size_t n;
};

static double scaled_norm(const gsl_vector* D, const gsl_vector* a)
{
    const size_t n = a->size;
    double e2 = 0.0;
    size_t i;

    for (i = 0; i < n; ++i) {
        double Di = gsl_vector_get(D, i);
        double ai = gsl_vector_get(a, i);
        double u = Di * ai;

        e2 += u * u;
    }

    return sqrt(e2);
}
/* model function: a * exp( -1/2 * [ (t - b) / c ]^2 )
*/doublegaussian(const double a, const double b, const double c, const
double t)
{
    const double z = (t - b) / c;
    return (a * exp(-0.5 * z * z));
}
intfunc_f(const gsl_vector* x, void* params, gsl_vector* f)
{
    struct data* d = (struct data*)params;
    double a = gsl_vector_get(x, 0);
    double b = gsl_vector_get(x, 1);
    double c = gsl_vector_get(x, 2);
    size_t i;

    for (i = 0; i < d->n; ++i)
    {
        double ti = d->t[i];
        double yi = d->y[i];
        double y = gaussian(a, b, c, ti);

        gsl_vector_set(f, i, yi - y);
    }

    return GSL_SUCCESS;
}
intfunc_df(const gsl_vector* x, void* params, gsl_matrix* J)
{
    struct data* d = (struct data*)params;
    double a = gsl_vector_get(x, 0);
    double b = gsl_vector_get(x, 1);
    double c = gsl_vector_get(x, 2);
    size_t i;

    for (i = 0; i < d->n; ++i)
    {
        double ti = d->t[i];
        double zi = (ti - b) / c;
        double ei = exp(-0.5 * zi * zi);

        gsl_matrix_set(J, i, 0, -ei);
        gsl_matrix_set(J, i, 1, -(a / c) * ei * zi);
        gsl_matrix_set(J, i, 2, -(a / c) * ei * zi * zi);
    }

    return GSL_SUCCESS;
}
intfunc_fvv(const gsl_vector* x, const gsl_vector* v,
    void* params, gsl_vector* fvv)
{
    struct data* d = (struct data*)params;
    double a = gsl_vector_get(x, 0);
    double b = gsl_vector_get(x, 1);
    double c = gsl_vector_get(x, 2);
    double va = gsl_vector_get(v, 0);
    double vb = gsl_vector_get(v, 1);
    double vc = gsl_vector_get(v, 2);
    size_t i;

    for (i = 0; i < d->n; ++i)
    {
        double ti = d->t[i];
        double zi = (ti - b) / c;
        double ei = exp(-0.5 * zi * zi);
        double Dab = -zi * ei / c;
        double Dac = -zi * zi * ei / c;
        double Dbb = a * ei / (c * c) * (1.0 - zi * zi);
        double Dbc = a * zi * ei / (c * c) * (2.0 - zi * zi);
        double Dcc = a * zi * zi * ei / (c * c) * (3.0 - zi * zi);
        double sum;

        sum = 2.0 * va * vb * Dab +
            2.0 * va * vc * Dac +
            vb * vb * Dbb +
            2.0 * vb * vc * Dbc +
            vc * vc * Dcc;

        gsl_vector_set(fvv, i, sum);
    }

    return GSL_SUCCESS;
}
void callback(const size_t iter, void* params,
    const gsl_multifit_nlinear_workspace* w)
{
    gsl_vector* f = gsl_multifit_nlinear_residual(w);
    gsl_vector* x = gsl_multifit_nlinear_position(w);
    double avratio = gsl_multifit_nlinear_avratio(w);
    double rcond;


    (void)params; /* not used */


    gsl_multifit_nlinear_trust_state* state =
(gsl_multifit_nlinear_trust_state*)w->state;
    /* compute reciprocal condition number of J(x) */
    gsl_multifit_nlinear_rcond(&rcond, w);

    fprintf(stderr, "iter %2zu: a = %.4f, b = %.4f, c = %.4f, |a|/|v|
= %.4f cond(J) = %8.4f, |f(x)| = %.4f\n",
        iter,
        gsl_vector_get(x, 0),
        gsl_vector_get(x, 1),
        gsl_vector_get(x, 2),
        avratio,
        1.0 / rcond,
        gsl_blas_dnrm2(f));
}
voidsolve_system(gsl_vector* x, gsl_multifit_nlinear_fdf* fdf,
    gsl_multifit_nlinear_parameters* params)
{
    const gsl_multifit_nlinear_type* T = gsl_multifit_nlinear_trust;
    const size_t max_iter = 200;
    const double xtol = 1.0e-8;
    const double gtol = 1.0e-8;
    const double ftol = 1.0e-8;

    const size_t n = fdf->n;
    const size_t p = fdf->p;

    size_t iter = 0;
    int status, info;

    gsl_multifit_nlinear_workspace* work =
gsl_multifit_nlinear_alloc(T, params, n, p);



    gsl_vector* f = gsl_multifit_nlinear_residual(work);
    gsl_vector* y = gsl_multifit_nlinear_position(work);


    double chisq0, chisq, rcond;


    double trust_radius_list[max_iterations];
    double step_size_list[max_iterations];

    /* initialize solver */
    gsl_multifit_nlinear_init(x, fdf, work);

    /* store initial cost */
    gsl_blas_ddot(f, f, &chisq0);

    /* iterate until convergence */
    if (callback) callback(iter, NULL, work);

    /*initialize scale and trust region*/
    gsl_vector* diag = gsl_vector_alloc(p);
    work->params.scale->init(work->J, diag);

    double scale = scaled_norm(diag, work->x);
    double trust_region_radius = 0.3 * GSL_MAX(1.0, scale);


    trust_radius_list[0] = trust_region_radius; // initialize  the
trust region radius
    step_size_list[0] = gsl_blas_dnrm2(work->dx);




    do
    {
        status = gsl_multifit_nlinear_iterate(work);


        /*update the trust region information*/
        work->params.scale->update(work->J, diag);


        scale = scaled_norm(diag, work->dx);
        trust_region_radius = scale;
        trust_radius_list[iter + 1] = trust_region_radius;
        step_size_list[iter + 1] = gsl_blas_dnrm2(work->dx);

        if (status == GSL_ENOPROG && iter == 0)
        {
            info = status;
        }

        ++iter;

        if (callback) callback(iter,NULL, work);

        /* test for convergence */
        status = gsl_multifit_nlinear_test(xtol, gtol, ftol, &info, work);

    } while (status == GSL_CONTINUE && iter < max_iter);


    if (status == GSL_ETOLF || status == GSL_ETOLX || status == GSL_ETOLG)
    {
        info = status;
        status = GSL_SUCCESS;
    }

    /* check if max iterations reached */
    if (iter >= max_iter && status != GSL_SUCCESS)
        status = GSL_EMAXITER;

    int i;
    for (i = 0; i < iter + 1; i++) {


        fprintf(stdout, "trust-radius: %g ", trust_radius_list[i]);


    }

    /* store final cost */
    gsl_blas_ddot(f, f, &chisq);

    /* store cond(J(x)) */
    gsl_multifit_nlinear_rcond(&rcond, work);

    gsl_vector_memcpy(x, y);

    /* print summary */

    fprintf(stderr, "NITER         = %zu\n", gsl_multifit_nlinear_niter(work));
    fprintf(stderr, "NFEV          = %zu\n", fdf->nevalf);
    fprintf(stderr, "NJEV          = %zu\n", fdf->nevaldf);
    fprintf(stderr, "NAEV          = %zu\n", fdf->nevalfvv);
    fprintf(stderr, "initial cost  = %.12e\n", chisq0);
    fprintf(stderr, "final cost    = %.12e\n", chisq);
    fprintf(stderr, "final x       = (%.12e, %.12e, %12e)\n",
        gsl_vector_get(x, 0), gsl_vector_get(x, 1), gsl_vector_get(x, 2));
    fprintf(stderr, "final cond(J) = %.12e\n", 1.0 / rcond);

    gsl_multifit_nlinear_free(work);
}
intmain(void)
{
    const size_t n = 300;  /* number of data points to fit */
    const size_t p = 3;    /* number of model parameters */
    const double a = 5.0;  /* amplitude */
    const double b = 0.4;  /* center */
    const double c = 0.15; /* width */
    const gsl_rng_type* T = gsl_rng_default;
    gsl_vector* f = gsl_vector_alloc(n);
    gsl_vector* x = gsl_vector_alloc(p);
    gsl_multifit_nlinear_fdf fdf;
    gsl_multifit_nlinear_parameters fdf_params =
        gsl_multifit_nlinear_default_parameters();
    struct data fit_data;
    gsl_rng* r;
    size_t i;

    gsl_rng_env_setup();
    r = gsl_rng_alloc(T);

    fit_data.t = (double *)malloc(n * sizeof(double));
    fit_data.y = (double *)malloc(n * sizeof(double));
    fit_data.n = n;

    /* generate synthetic data with noise */
    for (i = 0; i < n; ++i)
    {
        double t = (double)i / (double)n;
        double y0 = gaussian(a, b, c, t);
        double dy = gsl_ran_gaussian(r, 0.1 * y0);

        fit_data.t[i] = t;
        fit_data.y[i] = y0 + dy;
    }

    /* define function to be minimized */
    fdf.f = func_f;
    fdf.df = func_df;
    fdf.fvv = func_fvv;
    fdf.n = n;
    fdf.p = p;
    fdf.params = &fit_data;

    /* starting point */
    gsl_vector_set(x, 0, 1.0);
    gsl_vector_set(x, 1, 0.0);
    gsl_vector_set(x, 2, 1.0);

    fdf_params.trs = gsl_multifit_nlinear_trs_lmaccel;
    solve_system(x, &fdf, &fdf_params);

    /* print data and model */
    {
        double A = gsl_vector_get(x, 0);
        double B = gsl_vector_get(x, 1);
        double C = gsl_vector_get(x, 2);

        for (i = 0; i < n; ++i)
        {
            double ti = fit_data.t[i];
            double yi = fit_data.y[i];
            double fi = gaussian(A, B, C, ti);

            printf("%f %f %f\n", ti, yi, fi);
        }
    }

    gsl_vector_free(f);
    gsl_vector_free(x);
    gsl_rng_free(r);

    return 0;
}




This code initializes the trust radius and updates it and stores it in a
list. I started with the same initialization method as trust.c which a
source file in the gsl library used to calculate the trust region
information.


 work->params.scale->init(work->J, diag);
 double scale = scaled_norm(diag, work->x);
 double trust_region_radius = 0.3 * GSL_MAX(1.0, scale);



but for the update , i used the step instead:

work->params.scale->update(work->J, diag);
scale = scaled_norm(diag, work->dx);
trust_region_radius = scale;

i did not do factor up and down to make the trust region bigger or smaller
since i assumed that that was already done if i multiply the diagonal of
the scaling matrix with the step.

I tested the values by debugging both from the source file and from my
method and they were different.

the factor up and factor down parameters are accessed like so :

work->params.factor_up;
work->params.factor_down;

and those increase or decrease the trust region.

is the reason why my code isn't giving similar results because i didnt use
factor_up and factor_down. even though they should already be taken into
account by the step?

Keep in mind that my trust region calculation is done the other way around
, meaning that the trust region was already calculated internally but i am
trying to recalculate it so i can plot it.


i also realized that the predicted reduction is calculated differently for
every method.


I would really appreciate your help with this.

thank you so much,

Sincerely,

Aleja Carmento.

question about the trust region in the gsl library.

Reply via email to