On 09/06/12 19:54, Martin Sebor wrote:
I'm not sure how easily we can do that. Almost all of locale
is initialized lazily. Some of the layers might depend on the
facets being initialized lazily as well. This was a deliberate
design choice. One of the constraints was to avoid dynamic
initialization or allocation at startup. [...]

There would be a performance degradation. IMHO, it would be minor and would 
simplify the code considerably.

I have collected some numbers over the w/e. Using the test program I posted 
earlier, with minor tweaks, I timed a number of approaches. In each test I 
obtained the grouping string object and used its c_str method in an strcmp to a 
known string, in a tight loop, in multiple threads. The results seem to favor a 
non-caching implementation:

Times:

1. With current code, caching, no locking, safe because we initialize the 
grouping member outside the thread function:

real    0m45.414s
user    1m3.147s
sys     9m40.410s

2. With caching of grouping value and DCII, using two additional mutex member 
vars in std::numpunct:

real    0m34.360s
user    0m52.313s
sys     8m2.001s

3. With caching and DCII, using an additional mutex member in std::numpunct and 
an atomic exchange of the flag:

real    0m34.073s
user    0m52.028s
sys     7m57.889s

4. Without caching of grouping values, grouping() delegates always to 
do_grouping():

real    0m5.668s
user    1m11.389s
sys     0m3.952s

Thanks.

Liviu

The test program:

$ cat t.cpp
#include <iostream>
#include <locale>

#include <cstdio>
#include <cstdlib>
#include <cstring>

#include <pthread.h>
#include <unistd.h>

#define MAX_THREADS    16
#define MAX_LOOPS      10000000

static bool volatile hold = true;

typedef std::numpunct<char> Numpunct;

extern "C" {

static void*
f (void* pv)
{
    Numpunct const& fac = *reinterpret_cast< Numpunct* > (pv);
while (hold) ;

    for (int i = 0; i < MAX_LOOPS; ++i) {
        const std::string grouping = fac.grouping ();
        if (strcmp (grouping.c_str (), "\003\003")) {
            abort ();
        }
    }

    return 0;
}

}

int
main (int, char** argv)
{
    std::locale const loc = std::locale (argv [1]);
    Numpunct const& fac = std::use_facet<Numpunct > (loc);

    fac.grouping (); // Only for testing the current revision!

    pthread_t tid [MAX_THREADS] = { 0 };

    for (int i = 0; i < MAX_THREADS; ++i) {
        if (pthread_create (tid + i, 0, f, const_cast<Numpunct*> (&fac)))
            exit (-1);
    }

    sleep (1);
    hold = false;

    for (int i = 0; i < MAX_THREADS; ++i) {
        if (tid [i])
            pthread_join (tid [i], 0);
    }

    return 0;
}

The relevant facet code:

[...]

private:

    int         _C_flags;           // bitmap of "cached data valid" flags
    string      _C_grouping;        // cached results of virtual members
    string_type _C_truename;
    string_type _C_falsename;
    char_type   _C_decimal_point;
    char_type   _C_thousands_sep;

    _RW::__rw_mutex _C_mutex1;
    _RW::__rw_mutex _C_mutex2;
};

[...]

template <class _CharT>
inline string numpunct<_CharT>::grouping () const
{
#if 1

    if (!(_C_flags & _RW::__rw_gr)) {

        numpunct* const __self = _RWSTD_CONST_CAST (numpunct*, this);

        // [try to] get the grouping first (may throw)
        // then set a flag to avoid future initializations
        __self->_C_grouping  = do_grouping ();
        __self->_C_flags    |= _RW::__rw_gr;
    }

    return _C_grouping;

#elif 0

    if (!(_C_flags & _RW::__rw_gr)) {

        numpunct* const __self = _RWSTD_CONST_CAST (numpunct*, this);

        _RWSTD_MT_GUARD (__self->_C_mutex1);

        if (!(_C_flags & _RW::__rw_gr)) {

            // [try to] get the grouping first (may throw)
            // then set a flag to avoid future initializations
            __self->_C_grouping  = do_grouping ();

            // Atomic exchange has acquire and release semantics on
            // x86 and x86_64. Can still be re-ordered by the compiler.
            int tmp = __self->_C_flags |= _RW::__rw_gr;
            _RW::__rw_atomic_exchange (__self->_C_flags, tmp, true);
        }
    }

    return _C_grouping;

#elif 0

    if (!(_C_flags & _RW::__rw_gr)) {

        numpunct* const __self = _RWSTD_CONST_CAST (numpunct*, this);

        _RWSTD_MT_GUARD (__self->_C_mutex1);

        if (!(_C_flags & _RW::__rw_gr)) {

            // [try to] get the grouping first (may throw)
            // then set a flag to avoid future initializations
            __self->_C_grouping  = do_grouping ();

            // Forces the compiler to preserve the order and introduces
            //barriers.
            _RWSTD_MT_GUARD (__self->_C_mutex2);
            __self->_C_flags |= _RW::__rw_gr;
        }
    }

    return _C_grouping;

#else

    return do_grouping ();

#endif // 0
}


Reply via email to