On 03/31/2012 11:53 AM, Ali Çehreli wrote:

> The solution is to use ranges when pulling Unicode characters out of
> strings. std.stdin does not provide this yet, but it will eventually
> happen (so I've heard :)).

Here is a Unicode character range, which is unfortunately pretty inefficient because it relies on an exception that is thrown from isValidDchar! :p

import std.stdio;
import std.utf;
import std.array;

struct UnicodeRange
{
    File file;
    char[4] codes;
    bool ready;

    this(File file)
    {
        this.file = file;
        this.ready = false;
    }

    bool empty() const @property
    {
        return file.eof();
    }

    dchar front() const @property
    {
        if (!ready) {
            // Sorry, no 'mutable' in D! :p
            UnicodeRange * mutable_this = cast(UnicodeRange*)&this;
            mutable_this.readNext();
        }
        return codes.front;
    }

    void popFront()
    {
        codes = codes.init;
        ready = false;
    }

    void readNext()
    {
        foreach (ref code; codes) {
            file.readf("%s", &code);

            if (file.eof()) {
                codes[] = '\0';
                ready = false;
                break;
            }

            // Expensive way of determining "ready"!
            try {
                if (isValidDchar(codes.front)) {
                    ready = true;
                    break;
                }

            } catch (Exception) {
                // not ready
            }
        }
    }
}

UnicodeRange byUnicode(File file = stdin)
{
    return UnicodeRange(file);
}

void main()
{
    foreach(c; byUnicode()) {
        writeln(c);
    }
}

Ali

Reply via email to