Below is an attempt to add automatic tuning of HGCD2_DIV2_METHOD. On my laptop, method 1 (using div1 in the common case) is 9% faster than the current default method 2 (bitwise division).
I wonder if it's really useful to support all variants in speed, or if that could be deleted, to simplify a bit? Regards, /Niels diff -r a615debc4e93 tune/Makefile.am --- a/tune/Makefile.am Fri Jan 24 04:38:57 2020 +0100 +++ b/tune/Makefile.am Fri Jan 24 05:54:15 2020 +0100 @@ -58,7 +58,7 @@ gcdext_single.c gcdext_double.c gcdextod.c gcdextos.c \ hgcd_lehmer.c hgcd_appr_lehmer.c hgcd_reduce_1.c hgcd_reduce_2.c \ jacbase1.c jacbase2.c jacbase3.c jacbase4.c \ - hgcd2-1.c hgcd2-2.c hgcd2-3.c hgcd2-4.c hgcd2-5.c \ + $(MPN_HGCD2_SRCS) \ mod_1_div.c mod_1_inv.c mod_1_1-1.c mod_1_1-2.c modlinv.c \ noop.c powm_mod.c powm_redc.c pre_divrem_1.c \ set_strb.c set_strs.c set_strp.c time.c @@ -151,12 +151,26 @@ mulmid.c mulmid_n.c toom42_mulmid.c sqrlo.c sqrlo_basecase.c \ nussbaumer_mul.c toom6h_mul.c toom8h_mul.c toom6_sqr.c toom8_sqr.c \ toom22_mul.c toom2_sqr.c toom33_mul.c toom3_sqr.c toom44_mul.c toom4_sqr.c +MPN_HGCD2_SRCS = \ + hgcd2_1_1.c hgcd2_2_1.c hgcd2_3_1.c hgcd2_4_1.c hgcd2_5_1.c \ + hgcd2_1_2.c hgcd2_2_2.c hgcd2_3_2.c hgcd2_4_2.c hgcd2_5_2.c $(TUNE_MPN_SRCS_BASIC): for i in $(TUNE_MPN_SRCS_BASIC); do \ echo "#define TUNE_PROGRAM_BUILD 1" >$$i; \ echo "#include \"mpn/generic/$$i\"" >>$$i; \ done +$(MPN_HGCD2_SRCS): + for i in 1 2 3 4 5; do for j in 1 2 ; do \ + echo '#include "gmp-impl.h"' > hgcd2_$${i}_$${j}.c; \ + echo "#undef HGCD2_DIV1_METHOD" >> hgcd2_$${i}_$${j}.c; \ + echo "#undef HGCD2_DIV2_METHOD" >> hgcd2_$${i}_$${j}.c; \ + echo "#define HGCD2_DIV1_METHOD $$i" >> hgcd2_$${i}_$${j}.c; \ + echo "#define HGCD2_DIV2_METHOD $$j" >> hgcd2_$${i}_$${j}.c; \ + echo "#define __gmpn_hgcd2 mpn_hgcd2_$${i}_$${j}" >> hgcd2_$${i}_$${j}.c; \ + echo "#define __gmpn_hgcd_mul_matrix1_vector unused_$${i}_$${j}" >> hgcd2_$${i}_$${j}.c; \ + echo '#include "mpn/generic/hgcd2.c"' >> hgcd2_$${i}_$${j}.c; \ + done; done divrem_1.c: echo "#define TUNE_PROGRAM_BUILD 1" >divrem_1.c diff -r a615debc4e93 tune/common.c --- a/tune/common.c Fri Jan 24 04:38:57 2020 +0100 +++ b/tune/common.c Fri Jan 24 05:54:15 2020 +0100 @@ -1639,29 +1639,54 @@ SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2); } double -speed_mpn_hgcd2_1 (struct speed_params *s) +speed_mpn_hgcd2_1_1 (struct speed_params *s) { - SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_1); + SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_1_1); } double -speed_mpn_hgcd2_2 (struct speed_params *s) +speed_mpn_hgcd2_2_1 (struct speed_params *s) +{ + SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_2_1); +} +double +speed_mpn_hgcd2_3_1 (struct speed_params *s) { - SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_2); + SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_3_1); +} +double +speed_mpn_hgcd2_4_1 (struct speed_params *s) +{ + SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_4_1); } double -speed_mpn_hgcd2_3 (struct speed_params *s) +speed_mpn_hgcd2_5_1 (struct speed_params *s) { - SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_3); + SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_5_1); +} +double +speed_mpn_hgcd2_1_2 (struct speed_params *s) +{ + SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_1_2); } double -speed_mpn_hgcd2_4 (struct speed_params *s) +speed_mpn_hgcd2_2_2 (struct speed_params *s) { - SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_4); + SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_2_2); } double -speed_mpn_hgcd2_5 (struct speed_params *s) +speed_mpn_hgcd2_3_2 (struct speed_params *s) +{ + SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_3_2); +} +double +speed_mpn_hgcd2_4_2 (struct speed_params *s) { - SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_5); + SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_4_2); +} +double +speed_mpn_hgcd2_5_2 (struct speed_params *s) +{ + SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_5_2); } double diff -r a615debc4e93 tune/hgcd2-1.c --- a/tune/hgcd2-1.c Fri Jan 24 04:38:57 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,39 +0,0 @@ -/* mpn/generic/hgcd2.c method 1. - -Copyright 2019 Free Software Foundation, Inc. - -This file is part of the GNU MP Library. - -The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. - -The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ - -#include "gmp-impl.h" - -#undef HGCD2_DIV1_METHOD -#define HGCD2_DIV1_METHOD 1 -#define __gmpn_hgcd2 mpn_hgcd2_1 -/* Not used, but renamed to not get duplicate definitions */ -#define __gmpn_hgcd_mul_matrix1_vector mpn_hgcd_mul_matrix1_vector_1 - -#include "mpn/generic/hgcd2.c" diff -r a615debc4e93 tune/hgcd2-2.c --- a/tune/hgcd2-2.c Fri Jan 24 04:38:57 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,39 +0,0 @@ -/* mpn/generic/hgcd2.c method 2. - -Copyright 2019 Free Software Foundation, Inc. - -This file is part of the GNU MP Library. - -The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. - -The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ - -#include "gmp-impl.h" - -#undef HGCD2_DIV1_METHOD -#define HGCD2_DIV1_METHOD 2 -#define __gmpn_hgcd2 mpn_hgcd2_2 -/* Not used, but renamed to not get duplicate definitions */ -#define __gmpn_hgcd_mul_matrix1_vector mpn_hgcd_mul_matrix1_vector_2 - -#include "mpn/generic/hgcd2.c" diff -r a615debc4e93 tune/hgcd2-3.c --- a/tune/hgcd2-3.c Fri Jan 24 04:38:57 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,39 +0,0 @@ -/* mpn/generic/hgcd2.c method 3. - -Copyright 2019 Free Software Foundation, Inc. - -This file is part of the GNU MP Library. - -The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. - -The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ - -#include "gmp-impl.h" - -#undef HGCD2_DIV1_METHOD -#define HGCD2_DIV1_METHOD 3 -#define __gmpn_hgcd2 mpn_hgcd2_3 -/* Not used, but renamed to not get duplicate definitions */ -#define __gmpn_hgcd_mul_matrix1_vector mpn_hgcd_mul_matrix1_vector_3 - -#include "mpn/generic/hgcd2.c" diff -r a615debc4e93 tune/hgcd2-4.c --- a/tune/hgcd2-4.c Fri Jan 24 04:38:57 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,39 +0,0 @@ -/* mpn/generic/hgcd2.c method 4. - -Copyright 2019 Free Software Foundation, Inc. - -This file is part of the GNU MP Library. - -The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. - -The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ - -#include "gmp-impl.h" - -#undef HGCD2_DIV1_METHOD -#define HGCD2_DIV1_METHOD 4 -#define __gmpn_hgcd2 mpn_hgcd2_4 -/* Not used, but renamed to not get duplicate definitions */ -#define __gmpn_hgcd_mul_matrix1_vector mpn_hgcd_mul_matrix1_vector_4 - -#include "mpn/generic/hgcd2.c" diff -r a615debc4e93 tune/hgcd2-5.c --- a/tune/hgcd2-5.c Fri Jan 24 04:38:57 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,39 +0,0 @@ -/* mpn/generic/hgcd2.c method 5. - -Copyright 2019 Free Software Foundation, Inc. - -This file is part of the GNU MP Library. - -The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. - -The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ - -#include "gmp-impl.h" - -#undef HGCD2_DIV1_METHOD -#define HGCD2_DIV1_METHOD 5 -#define __gmpn_hgcd2 mpn_hgcd2_5 -/* Not used, but renamed to not get duplicate definitions */ -#define __gmpn_hgcd_mul_matrix1_vector mpn_hgcd_mul_matrix1_vector_5 - -#include "mpn/generic/hgcd2.c" diff -r a615debc4e93 tune/speed.c --- a/tune/speed.c Fri Jan 24 04:38:57 2020 +0100 +++ b/tune/speed.c Fri Jan 24 05:54:15 2020 +0100 @@ -286,11 +286,16 @@ { "mpn_matrix22_mul", speed_mpn_matrix22_mul }, { "mpn_hgcd2", speed_mpn_hgcd2, FLAG_NODATA }, - { "mpn_hgcd2_1", speed_mpn_hgcd2_1, FLAG_NODATA }, - { "mpn_hgcd2_2", speed_mpn_hgcd2_2, FLAG_NODATA }, - { "mpn_hgcd2_3", speed_mpn_hgcd2_3, FLAG_NODATA }, - { "mpn_hgcd2_4", speed_mpn_hgcd2_4, FLAG_NODATA }, - { "mpn_hgcd2_5", speed_mpn_hgcd2_5, FLAG_NODATA }, + { "mpn_hgcd2_1_1", speed_mpn_hgcd2_1_1, FLAG_NODATA }, + { "mpn_hgcd2_2_1", speed_mpn_hgcd2_2_1, FLAG_NODATA }, + { "mpn_hgcd2_3_1", speed_mpn_hgcd2_3_1, FLAG_NODATA }, + { "mpn_hgcd2_4_1", speed_mpn_hgcd2_4_1, FLAG_NODATA }, + { "mpn_hgcd2_5_1", speed_mpn_hgcd2_5_1, FLAG_NODATA }, + { "mpn_hgcd2_1_2", speed_mpn_hgcd2_1_2, FLAG_NODATA }, + { "mpn_hgcd2_2_2", speed_mpn_hgcd2_2_2, FLAG_NODATA }, + { "mpn_hgcd2_3_2", speed_mpn_hgcd2_3_2, FLAG_NODATA }, + { "mpn_hgcd2_4_2", speed_mpn_hgcd2_4_2, FLAG_NODATA }, + { "mpn_hgcd2_5_2", speed_mpn_hgcd2_5_2, FLAG_NODATA }, { "mpn_hgcd", speed_mpn_hgcd }, { "mpn_hgcd_lehmer", speed_mpn_hgcd_lehmer }, { "mpn_hgcd_appr", speed_mpn_hgcd_appr }, diff -r a615debc4e93 tune/speed.h --- a/tune/speed.h Fri Jan 24 04:38:57 2020 +0100 +++ b/tune/speed.h Fri Jan 24 05:54:15 2020 +0100 @@ -216,11 +216,16 @@ double speed_mpn_fib2_ui (struct speed_params *); double speed_mpn_matrix22_mul (struct speed_params *); double speed_mpn_hgcd2 (struct speed_params *); -double speed_mpn_hgcd2_1 (struct speed_params *); -double speed_mpn_hgcd2_2 (struct speed_params *); -double speed_mpn_hgcd2_3 (struct speed_params *); -double speed_mpn_hgcd2_4 (struct speed_params *); -double speed_mpn_hgcd2_5 (struct speed_params *); +double speed_mpn_hgcd2_1_1 (struct speed_params *); +double speed_mpn_hgcd2_2_1 (struct speed_params *); +double speed_mpn_hgcd2_3_1 (struct speed_params *); +double speed_mpn_hgcd2_4_1 (struct speed_params *); +double speed_mpn_hgcd2_5_1 (struct speed_params *); +double speed_mpn_hgcd2_1_2 (struct speed_params *); +double speed_mpn_hgcd2_2_2 (struct speed_params *); +double speed_mpn_hgcd2_3_2 (struct speed_params *); +double speed_mpn_hgcd2_4_2 (struct speed_params *); +double speed_mpn_hgcd2_5_2 (struct speed_params *); double speed_mpn_hgcd (struct speed_params *); double speed_mpn_hgcd_lehmer (struct speed_params *); double speed_mpn_hgcd_appr (struct speed_params *); @@ -490,11 +495,16 @@ int mpn_jacobi_base_3 (mp_limb_t, mp_limb_t, int); int mpn_jacobi_base_4 (mp_limb_t, mp_limb_t, int); -int mpn_hgcd2_1 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1*); -int mpn_hgcd2_2 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1*); -int mpn_hgcd2_3 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1*); -int mpn_hgcd2_4 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1*); -int mpn_hgcd2_5 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1*); +int mpn_hgcd2_1_1 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1*); +int mpn_hgcd2_2_1 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1*); +int mpn_hgcd2_3_1 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1*); +int mpn_hgcd2_4_1 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1*); +int mpn_hgcd2_5_1 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1*); +int mpn_hgcd2_1_2 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1*); +int mpn_hgcd2_2_2 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1*); +int mpn_hgcd2_3_2 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1*); +int mpn_hgcd2_4_2 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1*); +int mpn_hgcd2_5_2 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1*); mp_limb_t mpn_mod_1_div (mp_srcptr, mp_size_t, mp_limb_t); mp_limb_t mpn_mod_1_inv (mp_srcptr, mp_size_t, mp_limb_t); diff -r a615debc4e93 tune/tuneup.c --- a/tune/tuneup.c Fri Jan 24 04:38:57 2020 +0100 +++ b/tune/tuneup.c Fri Jan 24 05:54:15 2020 +0100 @@ -1962,25 +1962,57 @@ tune_hgcd2 (void) { static struct param_t param; - hgcd2_func_t *f[5] = - { mpn_hgcd2_1, - mpn_hgcd2_2, - mpn_hgcd2_3, - mpn_hgcd2_4, - mpn_hgcd2_5 }; - speed_function_t speed_f[5] = - { speed_mpn_hgcd2_1, - speed_mpn_hgcd2_2, - speed_mpn_hgcd2_3, - speed_mpn_hgcd2_4, - speed_mpn_hgcd2_5 }; - int best; - + hgcd2_func_t *f[2][5] = + { + { + mpn_hgcd2_1_1, + mpn_hgcd2_2_1, + mpn_hgcd2_3_1, + mpn_hgcd2_4_1, + mpn_hgcd2_5_1, + }, + { + mpn_hgcd2_1_2, + mpn_hgcd2_2_2, + mpn_hgcd2_3_2, + mpn_hgcd2_4_2, + mpn_hgcd2_5_2, + }, + }; + speed_function_t speed_f[2][5] = + { + { + speed_mpn_hgcd2_1_1, + speed_mpn_hgcd2_2_1, + speed_mpn_hgcd2_3_1, + speed_mpn_hgcd2_4_1, + speed_mpn_hgcd2_5_1 + }, + { + speed_mpn_hgcd2_1_2, + speed_mpn_hgcd2_2_2, + speed_mpn_hgcd2_3_2, + speed_mpn_hgcd2_4_2, + speed_mpn_hgcd2_5_2 + } + }; + int best1, best2; + speed_function_t speed_method2[2]; + + /* Select div1 method, always using div2 method 2. */ s.size = 1; - best = one_method (5, speed_f, "mpn_hgcd2", "HGCD2_DIV1_METHOD", ¶m); + best1 = one_method (5, speed_f[1], "mpn_hgcd2 div1", "HGCD2_DIV1_METHOD", + ¶m); + + /* Select div2 method */ + speed_method2[0] = speed_f[0][best1]; + speed_method2[1] = speed_f[1][best1]; + + best2 = one_method (2, speed_method2, "mpn_hgcd2 div2", "HGCD2_DIV2_METHOD", + ¶m); /* Use selected function when tuning hgcd and gcd */ - hgcd2_func = f[best]; + hgcd2_func = f[best2][best1]; } void -- Niels Möller. PGP-encrypted email is preferred. Keyid 368C6677. Internet email is subject to wholesale government surveillance. _______________________________________________ gmp-devel mailing list gmp-devel@gmplib.org https://gmplib.org/mailman/listinfo/gmp-devel