Code speed [was: Re: Memory leak with dynamic array]

Joseph Wakeling Tue, 13 Apr 2010 16:15:31 -0700

Don wrote:
> The D code compiles directly to a memset and memcpy, which are
> intrinsics. There's no way C++ could beat it.


OK.  I'm just bemused about why -- having solved the memory leak -- my D
code is running significantly slower than equivalent C++ code.

For now I'm just playing with D by doing a rewrite of some C++ code I
did for a project a while back -- to see if I can write a neater version
of the code in the new language, and learn something about D in the
process.  I may have fallen into the trap you described earlier, of just
'translating' C++ style code directly into D, but I don't think that's
the issue here.

There are a couple of algorithmic inefficiencies in there with
deliberate reason, but removing them for testing purposes doesn't
improve matters.  The array append is slow, but it's not the major part
of the running time of the program.

For the sake of clarity, I've attached the complete code.  The really
important part to look at is the file yzlm.d in the opCall() function
and class functions objectReputationUpdate() and userReputationUpdate().
This is a reputation-generating process which takes a set of ratings by
users of objects, and uses them to iteratively recalculate user
reputation and re-aggregate the ratings taking this into account.

I don't see anything particularly bad about the way these algorithms are
written but maybe they are imperfectly D-ish ... :-)

Best wishes,

    -- Joe

module dregs.algorithms.yzlm;

import std.math;
import std.stdio;
public import dregs.core;

class Yzlm : ReputationAlgorithm
{
	double beta;
	double convergenceRequirement;
	double errorMin;
//	uint[] userLinks; // commented out because for now it
	                  // has a constant value for all users
	double[] weightSum;
	double[] oldReputationObject;
	
	double objectReputationUpdate(ref Rating[] ratings,
	                              ref double[] reputationUser,
	                              ref double[] reputationObject)
	{
		double diff = 0;
		double[] temp = oldReputationObject;

		// Original version had:
		//
		//    oldReputationObject[] = reputationObject[]
		//
		// This version is an attempt to save effort
		// by just switching round the memory the two
		// arrays are pointing at -- not sure if it
		// actually does what I'm expecting it to.
		// Doesn't seem to improve speed. :-(
		oldReputationObject = reputationObject;
		reputationObject = temp;
		reputationObject[] = 0;
		
		weightSum[] = 0;
		
		foreach(Rating r; ratings) {
			reputationObject[r.object] += reputationUser[r.user] * r.value;
			weightSum[r.object] += reputationUser[r.user];
		}
		
		foreach(uint object, ref double r; reputationObject) {
			r /= (weightSum[object]>0) ? weightSum[object] : 1;
			diff += pow( (r - oldReputationObject[object]), 2);
		}
		
		return sqrt(diff);
	} 
	
	void userReputationUpdate(ref Rating[] ratings,
	                          ref double[] reputationUser,
	                          ref double[] reputationObject)
	{
		reputationUser[] = 0;
		
		foreach(Rating r; ratings)
			reputationUser[r.user] += pow( (r.value - reputationObject[r.object]), 2);
		
		foreach(uint user, ref double r; reputationUser) {
			//if(userLinks[user]>0)
				r = pow( (r/reputationObject.length/*userLinks[user]*/) + errorMin, -beta);
		}
	}
	
	void opCall(ref Rating[] ratings,
	            ref double[] reputationUser,
	            ref double[] reputationObject)
	{
	//	userLinks.length = reputationUser.length;
	//	userLinks[] = 0;
		
		weightSum.length = reputationObject.length;
		oldReputationObject.length = reputationObject.length;
		
	//	foreach(Rating r; ratings)
	//		userLinks[r.user]++;
		
		double diff;
		uint iterations=0;
		do {
			userReputationUpdate(ratings,
			                     reputationUser,
			                     reputationObject);
			diff = objectReputationUpdate(ratings,
			                              reputationUser,
			                              reputationObject);
			++iterations;
		} while(diff > convergenceRequirement);
		writefln("Exited in %u iterations with diff = %g < %g",iterations,diff,convergenceRequirement);
	}
	
	this() {}
	
	this(double b,
	     double c,
	     double e)
	{
		beta = b;
		convergenceRequirement = c;
		errorMin = e;
		assert(beta>=0);
		assert(convergenceRequirement>0);
		assert(errorMin>=0);
	}
	
	this(ref Rating[] ratings,
	     ref double[] reputationUser,
	     ref double[] reputationObject,
	     double b,
	     double c,
	     double e)
	{
		this(b,c,e);
		
		opCall(ratings,
		       reputationUser,
		       reputationObject);
	}
}

module dregs.algorithms.avg;

public import dregs.core;

class AvgWeighted : ReputationAlgorithm
{
	double[] weightSum;

	void opCall(ref Rating[] ratings,
	            ref double[] reputationUser,
	            ref double[] reputationObject)
	{
		weightSum.length = reputationObject.length;
		weightSum[] = 0;
		
		reputationObject[] = 0;
		
		foreach(Rating r; ratings) {
			reputationObject[r.object] += reputationUser[r.user]*r.value;
			weightSum[r.object] += reputationUser[r.user];
		}
		
		foreach(uint o, ref double r; reputationObject)
			r /= weightSum[o];
	}
	
	this() {}
	
	this(ref Rating[] ratings,
	     ref double[] reputationUser,
	     ref double[] reputationObject)
	{
		opCall(ratings,reputationUser,reputationObject);
	}	
}

class AvgArithmetic : AvgWeighted
{
	void opCall(ref Rating[] ratings,
	            ref double[] reputationUser,
	            ref double[] reputationObject)
	{
		reputationUser[] = 1;
		super.opCall(ratings,reputationUser,reputationObject);
	}
	
	this() {}
	
	this(ref Rating[] ratings,
	     ref double[] reputationUser,
	     ref double[] reputationObject)
	{
		opCall(ratings,reputationUser,reputationObject);
	}	
}

module dregs.core;

struct Rating
{
	uint user;
	uint object;
	double value;
}

class ReputationAlgorithm
{
	this() {}
	this(ref Rating[] ratings,
	     ref double[] reputationUser,
	     ref double[] reputationObject) {}
}

import std.array;
import std.math;
import std.random;
import std.stdio;
import dregs.algorithms.avg;
import dregs.algorithms.yzlm;

void main()
{
	Rating[] ratings;
	double[] reputationObject, reputationUser;
	double[] objectQuality, userError;
	Mt19937 rng;
	auto aa = new AvgArithmetic;
	auto yzlm = new Yzlm(0.8,1e-12,1e-36);
	
	rng.seed(1001);
	
	reputationObject.length = 1000;
	reputationUser.length = 1000;
	
	objectQuality.length = reputationObject.length;
	userError.length = reputationUser.length;
	
	ratings.reserve(reputationObject.length * reputationUser.length);
	
	foreach(uint i;0..100) {	
		foreach(ref double Q; objectQuality)
			Q = uniform(0.0L, 10.0L, rng);
	
		foreach(ref double sigma2; userError)
			sigma2 = uniform(0.0L, 1.0L, rng);
		
		ratings.length = 0;
		assumeSafeAppend(ratings);
		
		auto ratingsAppend = appender(&ratings);
		
		foreach(uint object, double Q; objectQuality) {
			foreach(uint user, double sigma2; userError) {	
				double v = uniform( (Q-sigma2), (Q+sigma2), rng );
				Rating r = { user, object, v };
				ratingsAppend.put(r);
			}
		}
		
		writefln("We now have %u ratings.",ratings.length);
	
		aa(ratings,reputationUser,reputationObject);
		yzlm(ratings,reputationUser,reputationObject);

		double deltaQ = 0;
		foreach(uint object, double r; reputationObject)
			deltaQ += pow( (r-objectQuality[object]), 2);
		deltaQ = sqrt(deltaQ/reputationObject.length);
		writefln("[%u] Error in quality estimate: %g",i,deltaQ);
	}
}

Code speed [was: Re: Memory leak with dynamic array]

Reply via email to