As a little fun thing to do I implemented it for you.

It won't allocate. Making this perfect for you.
With a bit of work you could make Result have buffers for result instead of using the input array allow for the source to be an input range itself.

I made this up on dpaste and single quotes were not playing nicely there. So you'll see "\r"[0] as a workaround.

struct FastQRecord {
        const(char)[] sequenceId;
        const(char)[] sequenceLetters;
        const(char)[] quality;
        
        static auto parse(const(char)[] from) {
                struct Result {
                        private {
                                const(char)[] source;
                                FastQRecord value;
                                bool isEmpty;
                        }
                        
                        this(const(char)[] source) {
                                this.source = source;
                                popFront;
                        }
                        
                        @property {
                                FastQRecord front() {
                                        return value;
                                }
                                
                                bool empty() {
                                        return isEmpty;
                                }
                        }
                        
                        void popFront() {
                                import std.string : indexOf;
                                
                                if (source is null) {
                                        isEmpty = true;
                                        return;
                                }
                                
                                void tidyInput() {
                                        foreach(i, c; source) {
                                                switch(c) {
                                                        case 0: .. case ' ':
                                                                break;
                                                        default:
                                                                source = 
source[i .. $];
                                                                return;
                                                }
                                        }
                                        
                                        source = null;
                                }
                                
                                tidyInput();
                                
                                if (source is null)
                                        return;
                                
                                // sequenceId
                                
                                assert(source[0] == '@');
                                
                                ptrdiff_t len = source.indexOf("\n");
                                assert(len > 0);
                                
                                value.sequenceId = source[1 .. len];
                                if (value.sequenceId[$-1] == "\r"[0])
                                        value.sequenceId = value.sequenceId[0 
.. $-1];
                                        
                                source = source[len + 1 .. $];
                                
                                // sequenceLetters
                                
                                len = source.indexOf("\n");
                                assert(len > 0);
                                
                                value.sequenceLetters = source[0 .. len];
                                if (value.sequenceLetters[$-1] == "\r"[0])
                                        value.sequenceLetters = 
value.sequenceLetters[0 .. $-1];
                                        
                                source = source[len + 1 .. $];
                                
                                // +sequenceId
                                
                                len = source.indexOf("\n");
                                assert(len > 0);
                                source = source[len + 1 .. $];
                                
                                // quality
                                
                                len = source.indexOf("\n");
                                assert(len > 0);
                                
                                value.quality = source[0 .. len];
                                if (value.quality[$-1] == "\r"[0])
                                        value.quality = value.quality[0 .. $-1];
                                
                                if (source.length > len + 1) {
                                        source = source[len + 1 .. $];
                                        tidyInput();
                                } else
                                        source = null;
                        }
                }
                
                return Result(from);
        }
}

void main() {}

unittest {
        string input = """
@seq1
TTATTTTAAT
+
?+BBB/DHH@
@seq2
GACCCTTTGCA
+
?+BHB/DIH@
@SEQ_ID
GATTTGGGGTTCAAAGCAGTATCGATCAAATAGTAAATCCATTTGTTCAACTCACAGTTT
+
!''*((((***+))%%%++)(%%%%).1***-+*''))**55CCF>>>>>>CCCCCCC65          
"""[1 .. $];
                
        foreach(record; FastQRecord.parse(input)) {
                import std.stdio;
                writeln(record);
        }
}

---
This email has been checked for viruses by Avast antivirus software.
https://www.avast.com/antivirus

Reply via email to