I've been running speed some tests with the Moller patches, and it looks to me like they work just fine for larger numbers, but at smaller limb counts they are slower than the original code. I've attached my test code so that you can see what I'm doing.
I suspect that I need to add some tuning to figure out what the threshold values are for flipping to the half-gcd based scheme, but otherwise the patches seem to be sound. for the random numbers in the test code here's what I see: MPIR svn version 1487 (pre-patch): 17,900 cycles MPIR svn version 1503 (current): 16,000 cycles --jason Jason Worth Martin Asst. Professor of Mathematics http://www.math.jmu.edu/~martin --~--~---------~--~----~------------~-------~--~----~ You received this message because you are subscribed to the Google Groups "mpir-devel" group. To post to this group, send email to [email protected] To unsubscribe from this group, send email to [email protected] For more options, visit this group at http://groups.google.com/group/mpir-devel?hl=en -~----------~----~----~----~------~----~------~--~---
Makefile
Description: Binary data
#include "gmp.h"
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
char *a_str =
"190872345987238923456987123459827349823489234892412908343190872345987238923456987123459827349823489234892412908343190872345987238923456987123459827349823489234892412908343190872345987238923456987123459827349823489234892412908343190872345987238923456987123459827349823489234892412908343190872345987238923456987123459827349823489234892412908343190872345987238923456987123459827349823489234892412908343190872345987238923456987123459827349823489234892412908343190872345987238923456987123459827349823489234892412908343190872345987238923456987123459827349823489234892412908343190872345987238923456987123459827349823489234892412908343";
char *b_str =
"398734589700062398475092345982344494973459720202003034535398734589700062398475092345982344494973459720202003034535398734589700062398475092345982344494973459720202003034535398734589700062398475092345982344494973459720202003034535398734589700062398475092345982344494973459720202003034535398734589700062398475092345982344494973459720202003034535398734589700062398475092345982344494973459720202003034535398734589700062398475092345982344494973459720202003034535398734589700062398475092345982344494973459720202003034535398734589700062398475092345982344494973459720202003034535398734589700062398475092345982344494973459720202003034535";
/*
* These functions are defined in the associate
* assembly file.
*/
uint32_t read_time_stamp_counter_low();
uint32_t read_time_stamp_counter_high();
uint64_t read_time_stamp_counter()
{
uint64_t counter;
uint32_t counter_low;
uint32_t counter_high;
counter_low = read_time_stamp_counter_low();
counter_high = read_time_stamp_counter_high();
counter = ((uint64_t)counter_high) << 32;
counter |= ((uint64_t)counter_low) & 0x00000000ffffffffLL;
return(counter);
}
/*
* Returns the number of clock cycles required to compute the gcd.
*/
double speed_test(mpz_t a, mpz_t b, int iterations)
{
int i;
double output;
mpz_t tmp;
uint64_t cpu_clock;
mpz_init(tmp);
cpu_clock = read_time_stamp_counter();
for (i=0;i<iterations;i++)
{
mpz_gcd(tmp,a,b);
}
cpu_clock = read_time_stamp_counter() - cpu_clock;
output = ((double)cpu_clock)/((double)iterations);
return(output);
}
main()
{
mpz_t a;
mpz_t b;
double result;
mpz_init(a);
mpz_init(b);
mpz_set_str(a,a_str,10);
mpz_set_str(b,b_str,10);
result = speed_test(a,b,100);
printf("%.2f cycles\n");
}
read_time_stamp_counter.yasm
Description: Binary data
