On 09/26/12 20:12, Liviu Nicoara wrote:
I have created STDCXX-1071 and linked to STDCXX-1056. [...]
I am open to all questions, the more the better. Most of my opinions have been
expressed earlier, but please ask if you want to know more.
I am attaching here the proposed (4.3.x) patch and the timings results (after
re-verifying the correctness of the timing program and the results). The 4.2.x
patch, the 4.3.x patch, the test program and the results file are also attached
to the incident.
Thanks,
Liviu
Index: include/loc/_numpunct.h
===================================================================
--- include/loc/_numpunct.h (revision 1388733)
+++ include/loc/_numpunct.h (working copy)
@@ -61,7 +61,7 @@ struct numpunct: _RW::__rw_facet
string_type;
_EXPLICIT numpunct (_RWSTD_SIZE_T __ref = 0)
- : _RW::__rw_facet (__ref), _C_flags (0) { }
+ : _RW::__rw_facet (__ref) { }
virtual ~numpunct () _RWSTD_ATTRIBUTE_NOTHROW;
@@ -109,15 +109,6 @@ protected:
virtual string_type do_falsename () const {
return _RW::__rw_get_punct (this, _RW::__rw_fn, char_type ());
}
-
-private:
-
- int _C_flags; // bitmap of "cached data valid" flags
- string _C_grouping; // cached results of virtual members
- string_type _C_truename;
- string_type _C_falsename;
- char_type _C_decimal_point;
- char_type _C_thousands_sep;
};
@@ -139,17 +130,7 @@ template <class _CharT>
inline _TYPENAME numpunct<_CharT>::char_type
numpunct<_CharT>::decimal_point () const
{
- if (!(_C_flags & _RW::__rw_dp)) {
-
- numpunct* const __self = _RWSTD_CONST_CAST (numpunct*, this);
-
- // [try to] get the decimal point first (may throw)
- // then set a flag to avoid future initializations
- __self->_C_decimal_point = do_decimal_point ();
- __self->_C_flags |= _RW::__rw_dp;
- }
-
- return _C_decimal_point;
+ return do_decimal_point ();
}
@@ -157,34 +138,14 @@ template <class _CharT>
inline _TYPENAME numpunct<_CharT>::char_type
numpunct<_CharT>::thousands_sep () const
{
- if (!(_C_flags & _RW::__rw_ts)) {
-
- numpunct* const __self = _RWSTD_CONST_CAST (numpunct*, this);
-
- // [try to] get the thousands_sep first (may throw)
- // then set a flag to avoid future initializations
- __self->_C_thousands_sep = do_thousands_sep ();
- __self->_C_flags |= _RW::__rw_ts;
- }
-
- return _C_thousands_sep;
+ return do_thousands_sep ();
}
template <class _CharT>
inline string numpunct<_CharT>::grouping () const
{
- if (!(_C_flags & _RW::__rw_gr)) {
-
- numpunct* const __self = _RWSTD_CONST_CAST (numpunct*, this);
-
- // [try to] get the grouping first (may throw)
- // then set a flag to avoid future initializations
- __self->_C_grouping = do_grouping ();
- __self->_C_flags |= _RW::__rw_gr;
- }
-
- return _C_grouping;
+ return do_grouping ();
}
@@ -192,17 +153,7 @@ template <class _CharT>
inline _TYPENAME numpunct<_CharT>::string_type
numpunct<_CharT>::truename () const
{
- if (!(_C_flags & _RW::__rw_tn)) {
-
- numpunct* const __self = _RWSTD_CONST_CAST (numpunct*, this);
-
- // [try to] get the true name first (may throw)
- // then set a flag to avoid future initializations
- __self->_C_truename = do_truename ();
- __self->_C_flags |= _RW::__rw_tn;
- }
-
- return _C_truename;
+ return do_truename ();
}
@@ -210,17 +161,7 @@ template <class _CharT>
inline _TYPENAME numpunct<_CharT>::string_type
numpunct<_CharT>::falsename () const
{
- if (!(_C_flags & _RW::__rw_fn)) {
-
- numpunct* const __self = _RWSTD_CONST_CAST (numpunct*, this);
-
- // [try to] get the false name first (may throw)
- // then set a flag to avoid future initializations
- __self->_C_falsename = do_falsename ();
- __self->_C_flags |= _RW::__rw_fn;
- }
-
- return _C_falsename;
+ return do_falsename ();
}
// #endif _RWSTD_NO_EXT_NUMPUNCT_PRIMARY
-*- mode: org -*-
* Machines:
** iMac, Intel, 4 cores:
$ uname -a; gcc -v
Darwin imax 11.4.0 Darwin Kernel Version 11.4.0: Mon Apr 9 19:32:15 PDT 2012;
root:xnu-1699.26.8~1/RELEASE_X86_64 x86_64
gcc version 4.7.1 (GCC)
** Linux Slackware, AMD, 16 cores:
$ uname -a; gcc -v
Linux behemoth 2.6.37.6 #3 SMP Sat Apr 9 22:49:32 CDT 2011 x86_64 AMD
Opteron(tm) Processor 6134 AuthenticAMD GNU/Linux
gcc version 4.5.2 (GCC)
* Method
** Library
Apply the patch. Build an optimized library (I used 12S in all runs). Build the
library, rwtest, and locale database:
$ nice make -Clib
$ nice make -Cbin locales
$ nice make -Crwtest
Properly export the necessary envar if running against STDCXX locale
database or unset, otherwise:
$ export RWSTD_LOCALE_ROOT=/path/to/.../nls
** Test program
Place the multi-threaded program source file, t.cpp, in
<srcdir>/tests/localization and run make in the builddir:
$ cd <builddir>/tests; nice make t
** Run the test
The simplest run for the program, with one locale name argument runs
16 threads and 10 mil. iterations. This is adjustable with command
line arguments:
$ ./t en_US.UTF-8 4 2000
runs with 4 threads and 2000 iterations.
* Results
** iMac results
*** Current implementation, system locale database
$ for t in 16 8 4 2 1; do time ./t en_US.UTF-8 $t 50000000; done
16, 50000000
real 0m33.104s
user 2m10.449s
sys 0m0.112s
8, 50000000
real 0m16.542s
user 1m4.338s
sys 0m0.068s
4, 50000000
real 0m8.252s
user 0m31.018s
sys 0m0.040s
2, 50000000
real 0m3.818s
user 0m7.619s
sys 0m0.005s
1, 50000000
real 0m1.057s
user 0m1.055s
sys 0m0.001s
*** Non-caching implementation, system locale database
$ for t in 16 8 4 2 1; do time ./t en_US.UTF-8 $t 50000000; done
16, 50000000
real 0m24.898s
user 1m37.334s
sys 0m0.630s
8, 50000000
real 0m11.637s
user 0m45.625s
sys 0m0.202s
4, 50000000
real 0m5.273s
user 0m20.678s
sys 0m0.021s
2, 50000000
real 0m4.797s
user 0m9.573s
sys 0m0.002s
1, 50000000
real 0m4.772s
user 0m4.770s
sys 0m0.002s
*** Curent implementation, STDCXX locales database
$ for t in 16 8 4 2 1; do time ./t en_US.UTF-8 $t 50000000; done16, 50000000
real 0m33.701s
user 2m12.132s
sys 0m0.132s
8, 50000000
real 0m16.854s
user 1m6.015s
sys 0m0.070s
4, 50000000
real 0m8.424s
user 0m33.142s
sys 0m0.029s
2, 50000000
real 0m4.217s
user 0m8.411s
sys 0m0.004s
1, 50000000
real 0m1.061s
user 0m1.059s
sys 0m0.001s
*** Non-caching implementation, STDCXX locale database
$ for t in 16 8 4 2 1; do time ./t en_US.UTF-8 $t 50000000; done16, 50000000
real 0m22.630s
user 1m28.019s
sys 0m0.674s
8, 50000000
real 0m11.032s
user 0m43.181s
sys 0m0.179s
4, 50000000
real 0m5.812s
user 0m22.502s
sys 0m0.022s
2, 50000000
real 0m4.801s
user 0m9.593s
sys 0m0.003s
1, 50000000
real 0m4.758s
user 0m4.755s
sys 0m0.002s
** Linux results
*** Current implementation, system locale database
$ for t in 16 8 4 2 1; do time ./t en_US.utf8 $t 50000000; done
16, 50000000
real 2m30.836s
user 2m52.162s
sys 34m17.302s
8, 50000000
real 1m47.536s
user 2m8.017s
sys 11m3.016s
4, 50000000
real 4m29.681s
user 6m10.412s
sys 10m0.532s
2, 50000000
real 0m17.843s
user 0m23.615s
sys 0m9.642s
1, 50000000
real 0m3.342s
user 0m3.338s
sys 0m0.003s
*** Non-caching implementation, system locale database
$ for t in 16 8 4 2 1; do time ./t en_US.utf8 $t 50000000; done
16, 50000000
real 0m13.573s
user 3m12.970s
sys 0m12.893s
8, 50000000
real 0m7.924s
user 1m2.516s
sys 0m0.047s
4, 50000000
real 0m8.051s
user 0m31.175s
sys 0m0.004s
2, 50000000
real 0m8.070s
user 0m15.789s
sys 0m0.002s
1, 50000000
real 0m7.610s
user 0m7.605s
sys 0m0.003s
*** Current implementation, STDCXX locale database
$ for t in 16 8 4 2 1; do time ./t en_US.UTF-8 $t 50000000; done
16, 50000000
real 2m35.381s
user 2m54.088s
sys 34m25.967s
8, 50000000
real 1m51.603s
user 2m14.352s
sys 11m16.013s
4, 50000000
real 4m38.439s
user 6m26.472s
sys 10m13.408s
2, 50000000
real 0m13.011s
user 0m14.130s
sys 0m9.338s
1, 50000000
real 0m3.342s
user 0m3.338s
sys 0m0.002s
*** Non-caching implementation, STDCXX locale database
$ for t in 16 8 4 2 1; do time ./t en_US.UTF-8 $t 50000000; done
16, 50000000
real 0m13.674s
user 3m15.103s
sys 0m14.144s
8, 50000000
real 0m8.038s
user 1m2.721s
sys 0m0.005s
4, 50000000
real 0m7.961s
user 0m31.211s
sys 0m0.003s
2, 50000000
real 0m7.899s
user 0m15.627s
sys 0m0.003s
1, 50000000
real 0m7.836s
user 0m7.830s
sys 0m0.004s