bearophile wrote:
>> This is the middle parts (the two loops) of Yzlm.objectReputationUpdate
>> compiled with ldc, this is one of the two main hot spots of the program, the
>> you can compared this asm with the asm of the same part from the C++ version:
>
> To find why the C++ code is faster you can show me the equivalent C++ code
> that I can compile myself, or you can compile your C++ code and show me the
> asm of the two functions that are hot spots.
I took a little time last night and wrote up a C++ version that is much
closer in design to the D code (as well as smaller, simpler, less hell
to read...). It has libboost as a dependency for random number
generation and for the BOOST_FOREACH macro.
Following your observation with D, it looks like the C++ also benefits
from using references in the FOREACH loops for elements larger than
size_t -- this shaves about 20s off running time on my machine. Either
way, like its uglier predecessor it's quite a bit faster than DMD or
LDC. Unfortunately I can't get llvm-g++ running on my machine, or I
would compare performance there too.
Hope this makes an interesting point of comparison. :-)
Best wishes,
-- Joe
#include <iostream>
#include <vector>
#include <boost/foreach.hpp>
#include <boost/random.hpp>
using namespace std;
using namespace boost;
namespace fric
{
class Rating {
private:
size_t _user;
size_t _object;
double _value;
public:
Rating() {}
Rating(size_t u,
size_t o,
double v)
{
_user = u;
_object = o;
_value = v;
}
const size_t &user() { return _user; }
const size_t &object() { return _object; }
const double &value() { return _value; }
};
class ReputationAlgorithm {
virtual size_t reputation(vector<double> &reputationUser,
vector<double> &reputationObject,
vector<Rating> &ratings)
{
return 0;
}
virtual size_t addRating(vector<double> &reputationUser,
vector<double> &reputationObject,
vector<Rating> &ratings,
Rating r)
{
return 0;
}
};
namespace algorithm
{
class AvgWeighted : ReputationAlgorithm {
protected:
vector<double> weightSum;
public:
virtual size_t reputation(vector<double> &reputationUser,
vector<double> &reputationObject,
vector<Rating> &ratings)
{
weightSum.assign(reputationObject.size(),0.0);
reputationObject.assign(reputationObject.size(),0.0);
BOOST_FOREACH(Rating &r, ratings) {
reputationObject.at(r.object()) += reputationUser.at(r.user()) * r.value();
weightSum.at(r.object()) += reputationUser.at(r.user());
}
for(size_t o=0;o!=reputationObject.size();++o)
reputationObject.at(o) /= (weightSum.at(o)>0) ? weightSum.at(o) : 1;
return 0;
}
};
class AvgArithmetic : AvgWeighted {
public:
virtual size_t reputation(vector<double> &reputationUser,
vector<double> &reputationObject,
vector<Rating> &ratings)
{
reputationUser.assign(reputationUser.size(),1.0);
return AvgWeighted::reputation(reputationUser,reputationObject,ratings);
}
};
class Yzlm : AvgWeighted {
private:
double _beta;
double _convergence;
double _errorMin;
protected:
vector<double> oldReputationObject;
vector<size_t> userLinks;
double objectReputationUpdate(vector<double> &reputationUser,
vector<double> &reputationObject,
vector<Rating> &ratings)
{
double diff = 0.0;
oldReputationObject.assign(reputationObject.begin(), reputationObject.end());
AvgWeighted::reputation(reputationUser, reputationObject, ratings);
for(size_t o=0;o!=reputationObject.size();++o) {
double aux = reputationObject.at(o) - oldReputationObject.at(o);
diff += aux*aux;
}
return sqrt(diff);
}
void userReputationUpdate(vector<double> &reputationUser,
vector<double> &reputationObject,
vector<Rating> &ratings)
{
reputationUser.assign(reputationUser.size(),0.0);
BOOST_FOREACH(Rating &r, ratings) {
double aux = r.value() - reputationObject.at(r.object());
reputationUser.at(r.user()) += aux*aux;
}
for(size_t u=0;u!=reputationUser.size();++u) {
if(userLinks.at(u)!=0)
reputationUser.at(u) = pow( (reputationUser.at(u)/userLinks.at(u)) + errorMin(), -beta() );
}
}
public:
void init(double b,
double c,
double e)
{
_beta = b;
_convergence = c;
_errorMin = e;
}
const double &beta() { return _beta; }
const double &convergence() { return _convergence; }
const double &errorMin() { return _errorMin; }
virtual size_t reputation(vector<double> &reputationUser,
vector<double> &reputationObject,
vector<Rating> &ratings)
{
double diff;
size_t iterations = 0;
userLinks.assign(reputationUser.size(),0);
oldReputationObject.resize(reputationObject.size());
BOOST_FOREACH(Rating &r, ratings)
userLinks.at(r.user())++;
do {
userReputationUpdate(reputationUser,reputationObject,ratings);
diff = objectReputationUpdate(reputationUser,reputationObject,ratings);
++iterations;
} while(diff > convergence());
cout << "Exited in " << iterations << " iterations with diff = " << diff << endl;
return iterations;
}
Yzlm(double b,
double c,
double e)
{
init(b,c,e);
}
};
}
}
using namespace fric;
int main()
{
vector<double> reputationUser, reputationObject;
vector<double> userError, objectQuality;
vector<Rating> ratings;
mt19937 rng;
algorithm::AvgArithmetic aa;
algorithm::Yzlm yzlm(0.8,1e-12,1e-36);
reputationUser.resize(100);
reputationObject.resize(1);
rng.seed(1001);
for(size_t u=0;u!=reputationUser.size();++u)
ratings.push_back(Rating(u,0,u+1));
aa.reputation(reputationUser,reputationObject,ratings);
BOOST_FOREACH(double r, reputationObject)
cout << r << endl;
objectQuality.resize(1000);
userError.resize(1000);
reputationObject.resize(objectQuality.size());
reputationUser.resize(userError.size());
ratings.reserve(objectQuality.size() * userError.size());
size_t iterTotal = 0;
for(size_t i=0;i!=100;++i) {
BOOST_FOREACH(double &Q, objectQuality)
Q = uniform_real<double> (0.0, 10.0) (rng);
BOOST_FOREACH(double &err, userError)
err = uniform_real<double> (0.0, 1.0) (rng);
ratings.resize(0);
for(size_t o=0;o!=objectQuality.size();++o) {
for(size_t u=0;u!=userError.size();++u) {
ratings.push_back( Rating(u, o, uniform_real<double> (objectQuality.at(o)-userError.at(u), objectQuality.at(o)+userError.at(u)) (rng) ) );
}
}
cout << "We now have " << ratings.size() << " ratings, with capacity for " << ratings.capacity() << endl;
aa.reputation(reputationUser, reputationObject, ratings);
iterTotal += yzlm.reputation(reputationUser, reputationObject, ratings);
double deltaQ = 0.0;
for(size_t o=0;o!=objectQuality.size();++o) {
double aux = reputationObject.at(o) - objectQuality.at(o);
deltaQ += aux*aux;
}
deltaQ = sqrt(deltaQ/objectQuality.size());
cout << "[" << i << "] Error in quality estimate: " << deltaQ << endl;
}
cout << endl;
cout << "Total iterations: " << iterTotal << endl;
return 0;
}