Le mardi 22 août 2006 12:55, Mc Osten a écrit : > In fact Python here is faster. Suppose it has a really optimized set > class...
Maybe I'm missing something but the posted c++codes are not equivalent IMO to what python is doing. I discarded the "slow" version, and tried to get the equivalent in c++ of : """ #!/usr/bin/env python size = 1000000 def f(): a = [] for i in range(size): a.append('What do you know') a.append('so long...') a.append('chicken crosses road') a.append('fool') b = set(a) for s in b: print s import time from time import clock f_start = clock() f() f_end = clock() print "Elapsed: %f seconds" % (f_end - f_start) """ I came at first with the following, which is still slower than the python version : """ void print_occurence_of_unique_strings(){ vector<string> a; const string& s1 = "What do you know?" ; const string& s2 = "so long..." ; const string& s3 = "chicken crosses road"; const string& s4 = "fool" ; for (long int i=0; i<SIZE ; ++i){ a.push_back(s1); a.push_back(s2); a.push_back(s3); a.push_back(s4); } set<string> b(a.begin(), a.end()); copy(b.begin(), b.end(), ostream_iterator<string>(cout, "\n")); } """ Here, all strings, while passed by reference to the vector, are copied one by one. Then, I tried this, it just overcome the performance of python code, but not in the proportion I expected : """ void print_occurence_of_unique_strings_compare_by_adresses(){ vector<string*> a; string s1 = "What do you know?"; string s2 = "so long..."; string s3 = "chicken crosses road"; string s4 = "fool"; for (long int i=0; i<SIZE ; ++i){ a.push_back(&s1); a.push_back(&s2); a.push_back(&s3); a.push_back(&s4); } set<string> b; for (vector<string*>::iterator it=a.begin(); it!=a.end(); it++) b.insert(**it); copy(b.begin(), b.end(), ostream_iterator<string>(cout, "\n")); } """ The problem here, is that the strings in the set are compared by value, which is not optimal, and I guess python compare them by adress ("s*n is s*n" has the same complexity than "s*n == s*n" in CPython, right ?). so, finally, the code in c++, about ten times faster than the equivalent in python, must be : """ void print_occurence_of_unique_strings_compared_by_address(){ cout << "print_occurence_of_unique_strings_compared_by_address" << endl; vector<string*> a; string s1 = "What do you know?"; string s2 = "so long..."; string s3 = "chicken crosses road"; string s4 = "fool"; for (long int i=0; i<SIZE ; ++i){ a.push_back(&s1); a.push_back(&s2); a.push_back(&s3); a.push_back(&s4); } set<string*> b(a.begin(), a.end()); set<string> c; // well ordered set (b is ordered by address) for (set<string*>::iterator it=b.begin(); it!=b.end(); it++) c.insert(**it); copy(c.begin(), c.end(), ostream_iterator<string>(cout, "\n")); } """ the result on my box is : [EMAIL PROTECTED] mar aoû 22 22:24:23:~$ g++ -O3 -o testcpp testcpp.cpp [EMAIL PROTECTED] mar aoû 22 22:24:29:~$ ./testcpp print_occurence_of_strings What do you know? chicken crosses road fool so long... print_occurence_of_unique_strings What do you know? chicken crosses road fool so long... print_occurence_of_unique_strings_compared_by_address What do you know? chicken crosses road fool so long... strings : 1.89 unique strings : 0.87 compared by address : 0.18 [EMAIL PROTECTED] mar aoû 22 22:24:38:~$ python2.4 testpython.py so long... What do you know fool chicken crosses road Elapsed: 1.680000 seconds [EMAIL PROTECTED] mar aoû 22 22:24:51:~$ g++ -v Using built-in specs. Target: i486-linux-gnu Configured with: ../src/configure -v --enable-languages=c,c++,java,fortran,objc,obj-c++,ada,treelang --prefix=/usr --enable-shared --with-system-zlib --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --enable-nls --program-suffix=-4.1 --enable-__cxa_atexit --enable-clocale=gnu --enable-libstdcxx-debug --enable-java-awt=gtk --enable-gtk-cairo --with-java-home=/usr/lib/jvm/java-1.4.2-gcj-4.1-1.4.2.0/jre --enable-mpfr --with-tune=i686 --enable-checking=release i486-linux-gnu Thread model: posix gcc version 4.1.2 20060613 (prerelease) (Debian 4.1.1-5) I've joined the full c++ file as an attachment. -- _____________ Maric Michaud _____________ Aristote - www.aristote.info 3 place des tapis 69004 Lyon Tel: +33 426 880 097
#include <iostream> #include <ostream> #include <iterator> #include <string> #include <vector> #include <set> #include <algorithm> #include <ctime> using namespace std; #define SIZE 1000000 void print_occurence_of_strings(){ cout << "print_occurence_of_strings" << endl; vector<string> a; const string& s1 = "What do you know?"; const string& s2 = "so long..."; const string& s3 = "chicken crosses road"; const string& s4 = "fool"; for (long int i=0; i<SIZE ; ++i){ a.push_back(s1); a.push_back(s2); a.push_back(s3); a.push_back(s4); } set<string> b(a.begin(), a.end()); copy(b.begin(), b.end(), ostream_iterator<string>(cout, "\n")); } void print_occurence_of_unique_strings(){ cout << "print_occurence_of_unique_strings" << endl; vector<string*> a; string s1 = "What do you know?"; string s2 = "so long..."; string s3 = "chicken crosses road"; string s4 = "fool"; for (long int i=0; i<SIZE ; ++i){ a.push_back(&s1); a.push_back(&s2); a.push_back(&s3); a.push_back(&s4); } set<string> b; for (vector<string*>::iterator it=a.begin(); it!=a.end(); it++) b.insert(**it); copy(b.begin(), b.end(), ostream_iterator<string>(cout, "\n")); } void print_occurence_of_unique_strings_compared_by_address(){ cout << "print_occurence_of_unique_strings_compared_by_address" << endl; vector<string*> a; string s1 = "What do you know?"; string s2 = "so long..."; string s3 = "chicken crosses road"; string s4 = "fool"; for (long int i=0; i<SIZE ; ++i){ a.push_back(&s1); a.push_back(&s2); a.push_back(&s3); a.push_back(&s4); } set<string*> b(a.begin(), a.end()); set<string> c; // well ordered set (b is ordered by address) for (set<string*>::iterator it=b.begin(); it!=b.end(); it++) c.insert(**it); copy(c.begin(), c.end(), ostream_iterator<string>(cout, "\n")); } int main(){ clock_t f_start1, f_end1, f_start2, f_end2, f_start3, f_end3; f_start1 = clock(); print_occurence_of_strings(); f_end1 = clock(); f_start2 = clock(); print_occurence_of_unique_strings(); f_end2 = clock(); f_start3 = clock(); print_occurence_of_unique_strings_compared_by_address(); f_end3 = clock(); cout << "strings : " << (f_end1 - f_start1) / double(CLOCKS_PER_SEC) << endl; cout << "unique strings : " << (f_end2 - f_start2) / double(CLOCKS_PER_SEC) << endl; cout << "compared by address : " << (f_end3 - f_start3) / double(CLOCKS_PER_SEC) << endl; }
-- http://mail.python.org/mailman/listinfo/python-list