On Thu, 14 Oct 2010 03:47:12 +0400, Andrei Alexandrescu
<seewebsiteforem...@erdani.org> wrote:

On 10/13/2010 06:23 PM, Denis Koroskin wrote:
On Thu, 14 Oct 2010 03:06:30 +0400, Andrei Alexandrescu
<seewebsiteforem...@erdani.org> wrote:
Gnu offers two specialized routines:
http://www.gnu.org/s/libc/manual/html_node/Line-Input.html. It is many
times more efficient than anything that can be done in client code
using the stdio API. I'm thinking along those lines.


I can easily implement similar interface on top of chunked read:
ubyte[] readLine(ubyte[] lineBuffer); or bool readLine(ref ubyte[]
lineBuffer);

You can't.

I've quickly looked through an implementation, too, and it's still
filling a buffer first, and then copying character byte-by-byte to the
output string (making realloc when needed) until a delimiter is found.
It is exactly as efficient as implemented externally.

Except you don't have an interface to copy byte by byte. Oops...

It does the same
amount of copying and memory allocations. "Many times more efficient" is
just an overestimation.

It's not. I measured because it was important in an application I was working on. It's shocking how some seemingly minor changes can make a big difference in throughput.

BTW, did you see my message about std.concurrency?

Yes, but I'll need to leave the bulk of it to Sean. Thanks.


Andrei

Okay. Now give me your best and tell me mine is slower (sorry for a lack
of comments):


enum BUFFER_SIZE = 16 * 1024;

import core.stdc.stdio;
import core.stdc.string;
import core.memory;

class InputStream
{
         this(const char* fileName)
         {
                 f = fopen(fileName, "r".ptr);
         }

         size_t read(ubyte[] buffer)
         {
                 return .fread(buffer.ptr, 1, buffer.length, f);
         }

         FILE* f;
}

struct ByLine
{
         this(InputStream inputStream, char delim = '\n')
         {
                 this.inputStream = inputStream;
                 this.delim = delim;
                 this.ptr = this.end = buffer.ptr;
         }

         private void refill()
         {
                 ptr = buffer.ptr;
                 end = ptr + inputStream.read(buffer);
         }

         ubyte[] readLine(ubyte[] line)
         {
                 if (ptr is null) {
                         return null;
                 }

                 ubyte* lineStart = line.ptr;
                 ubyte* linePtr = lineStart;
                 ubyte* lineEnd = lineStart + line.length;
                 while (true) {
                         ubyte* pos = cast(ubyte*)memchr(ptr, delim, end -
ptr);
                         if (pos is null) {
                                 int size = end - ptr;
                                 ubyte* newLinePtr = linePtr + size;
                                 if (newLinePtr > lineEnd) {
                                         size_t offset = linePtr -
lineStart;
                                         lineStart =
cast(ubyte*)GC.realloc(lineStart, newLinePtr - lineStart);
                                         linePtr = lineStart + offset;
                                         newLinePtr = linePtr + size;
                                 }
                                 memcpy(linePtr, ptr, size);
                                 linePtr = newLinePtr;

                                 refill();
                                 if (ptr !is end) {
                                         continue;
                                 }

                                 ptr = null;
                                 return lineStart[0..linePtr - lineStart];
                         }

                         int size = pos - ptr + 1;
                         ubyte* newLinePtr = linePtr + size;
                         if (newLinePtr > lineEnd) {
                                 size_t offset = linePtr - lineStart;
                                 lineStart =
cast(ubyte*)GC.realloc(lineStart, newLinePtr - lineStart);
                                 linePtr = lineStart + offset;
                                 newLinePtr = linePtr + size;
                         }
                         memcpy(linePtr, ptr, size);
                         linePtr = newLinePtr;

                         ptr = pos + 1;

                         return lineStart[0..linePtr - lineStart];
                 }
         }

         InputStream inputStream;
         ubyte* ptr;
         ubyte* end;
         ubyte buffer[BUFFER_SIZE];
         int delim;
}

int main()
{
         InputStream inputStream = new InputStream("very-large-file.txt");

         ubyte[] line = new ubyte[128];
         ByLine byLine = ByLine(inputStream);

         int numLines = 0;
         int numChars = 0;
         while (true) {
                 line = byLine.readLine(line);
                 if (line.ptr is null) {
                         break;
                 }

                 numChars += line.length;
                 numLines++;
         }

         printf("numLines: %d\n", numLines);
         printf("numChars: %d\n", numChars);

         return 0;
}

Reply via email to