On 11/28/2010 12:44 PM, bearophile wrote:
Robert Jacques:

I've spent some time having fun this afternoon optimizing array-equals
using vectorization techniques. I found that vectorizing using ulongs
worked best on my system except with the shortest strings, where a simple
Duff's device edged it out. If you'd like to try it out on your data set:

Thank you for your work :-)
A version with your function, D version #8:


// D version #8
import std.file: read;
import std.c.stdio: printf;
import std.exception: assumeUnique;

bool arrayComp(bool useBitCompare=true, T)
               (const T[] a, const T[] b) pure nothrow {
     if (a.length != b.length)
         return false;

     static if (useBitCompare) {
         auto pab = cast(ubyte*)a.ptr;
         auto pbb = cast(ubyte*)b.ptr;
         if (pab is pbb)
             return true;

         auto byte_length = a.length * T.sizeof;
         auto pa_end = cast(ulong*)(pab + byte_length);

         final switch (byte_length % ulong.sizeof) {
             case 7: if (*pab++ != *pbb++) return false;
             case 6: if (*pab++ != *pbb++) return false;
             case 5: if (*pab++ != *pbb++) return false;
             case 4: if (*pab++ != *pbb++) return false;
             case 3: if (*pab++ != *pbb++) return false;
             case 2: if (*pab++ != *pbb++) return false;
             case 1: if (*pab++ != *pbb++) return false;
             case 0:
         }

         auto pa = cast(ulong*)pab;
         auto pb = cast(ulong*)pbb;

         while (pa<  pa_end) {
             if (*pa++ != *pb++)
                 return false;
         }
     } else { // default to a short duff's device
         auto pa = a.ptr;
         auto pb = b.ptr;
         if (pa == pb)
             return true;
         auto n  = (a.length + 3) / 4;

         final switch (a.length % 4) {
             case 0: do { if (*pa++ != *pb++) return false;
             case 3:      if (*pa++ != *pb++) return false;
             case 2:      if (*pa++ != *pb++) return false;
             case 1:      if (*pa++ != *pb++) return false;
                        } while (--n>  0);
         }
     }

     return true;
}

int test(string data) {
     int count;
     foreach (i; 0 ..  data.length - 3) {
         auto codon = data[i .. i + 3];
         if (arrayComp(codon, "TAG") || arrayComp(codon, "TGA") || arrayComp(codon, 
"TAA"))
             count++;
     }
     return count;
}

void main() {
     char[] data0 = cast(char[])read("data.txt");
     int n = 300;
     char[] data = new char[data0.length * n];
     for (size_t pos; pos<  data.length; pos += data0.length)
         data[pos .. pos+data0.length] = data0;
     string sdata = assumeUnique(data);

     printf("%d\n", test(sdata));
}


Timings, dmd compiler, best of 4, seconds:
   D #1: 5.72
   D #4: 1.84
   D #5: 1.73
   Psy:  1.59
   D #8: 1.51
   D #7: 0.56 (like #6 without length comparisons)
   D #2: 0.55
   D #6: 0.47
   D #3: 0.34


Your function can't be inlined because it's big, so this code isn't faster than 
inlined code like this generated by the compiler:
(codon.length == 3&&  codon[0] == 'T'&&  codon[1] == 'A'&&  codon[2] == 'G')

Bye,
bearophile

I don't have your data set, but for me using random data this was within a factor 2 of your #3, without any fiddly code.

int test(string data) {
    int count;
    while (true) {
        data = data.find("TAG", "TGA", "TAA")[0];

        if (data.empty) return count;

        count += 1;
        data.popFront;
    }
}

Also, this one is far easier to generalize for strings of different lengths, and such :-)

Reply via email to