Here's the latest with naming convention (hopefully) followed. I've implemented my own squeeze() function and used sizeof in the memmove calls.
How can I specify wide strings for the literals? Thanks, Ben module main; import std.algorithm; import std.array; import std.c.string; import std.string; import std.stdio; template regex(CharT) { struct BasicStringToken { bool negated = false; CharT[] charset; enum size_t MAX_CHARS = CharT.max + 1; enum size_t START_CHAR = cast(CharT) 0x80 < 0 ? 0x80 : 0; this(const bool negated_, ref CharT[] charset_) { negated = negated_; charset = charset_; } void removeDuplicates() { charset.sort; squeeze(charset); } void normalise() { if (charset.length == MAX_CHARS) { negated = !negated; charset.clear(); } else if (charset.length > MAX_CHARS / 2) { negate(); } } void negate() { CharT curr_char = START_CHAR; CharT[] temp; CharT *ptr = cast(CharT *) 0; CharT *curr = charset.ptr; CharT *end = curr + charset.length; size_t i = 0; negated = !negated; temp.length = MAX_CHARS - charset.length; ptr = temp.ptr; while (curr < end) { while (*curr > curr_char) { *ptr = curr_char; ++ptr; ++curr_char; ++i; } ++curr_char; ++curr; ++i; } for (; i < MAX_CHARS; ++i) { *ptr = curr_char; ++ptr; ++curr_char; } charset = temp; } bool empty() { return charset.length == 0 && !negated; } bool any() { return charset.length == 0 && negated; } void clear() { negated = false; charset.length = 0; } void intersect(ref BasicStringToken rhs, ref BasicStringToken overlap) { if ((any() && rhs.any()) || (negated == rhs.negated && !any() && !rhs.any())) { intersectSameTypes(rhs, overlap); } else { intersectDiffTypes(rhs, overlap); } } private: void intersectSameTypes(ref BasicStringToken rhs, ref BasicStringToken overlap) { if (any()) { clear(); overlap.negated = true; rhs.clear(); } else { CharT *iter = charset.ptr; CharT *end = iter + charset.length; CharT *rhs_iter = rhs.charset.ptr; CharT *rhs_end = rhs_iter + rhs.charset.length; overlap.negated = negated; while (iter != end && rhs_iter != rhs_end) { if (*iter < *rhs_iter) { ++iter; } else if (*iter > *rhs_iter) { ++rhs_iter; } else { overlap.charset ~= *iter; memmove(iter, iter + 1, (charset.ptr + charset.length - iter) * CharT.sizeof); --end; charset.length -= 1; memmove(rhs_iter, rhs_iter + 1, (rhs.charset.ptr + rhs.charset.length - rhs_iter) * CharT.sizeof); --rhs_end; rhs.charset.length -= 1; } } if (negated) { // duplicates already merged // src, dest merge(charset, overlap.charset); // duplicates already merged // src, dest merge(rhs.charset, overlap.charset); negated = false; rhs.negated = false; swap(charset, rhs.charset); normalise(); overlap.normalise(); rhs.normalise(); } else if (!overlap.charset.length == 0) { normalise(); overlap.normalise(); rhs.normalise(); } } } void intersectDiffTypes(ref BasicStringToken rhs, ref BasicStringToken overlap) { if (any()) { intersectAny(rhs, overlap); } else if (negated) { intersectNegated(rhs, overlap); } else // negated == false { intersectCharset(rhs, overlap); } } void intersectAny(ref BasicStringToken rhs, ref BasicStringToken overlap) { if (rhs.negated) { rhs.intersectNegated(this, overlap); } else // rhs.negated == false { rhs.intersectCharset(this, overlap); } } void intersectNegated(ref BasicStringToken rhs, ref BasicStringToken overlap) { if (rhs.any()) { overlap.negated = true; overlap.charset = charset; rhs.negated = false; rhs.charset = charset; clear(); } else // rhs.negated == false { rhs.intersectCharset(this, overlap); } } void intersectCharset(ref BasicStringToken rhs, ref BasicStringToken overlap) { if (rhs.any()) { overlap.charset = charset; rhs.negated = true; rhs.charset = charset; clear(); } else // rhs.negated == true { CharT *iter = charset.ptr; CharT *end = iter + charset.length; CharT *rhs_iter = rhs.charset.ptr; CharT *rhs_end = rhs_iter + rhs.charset.length; while (iter != end && rhs_iter != rhs_end) { if (*iter < *rhs_iter) { overlap.charset ~= *iter; rhs.charset.length += 1; rhs_iter = rhs.charset.ptr; rhs_end = rhs_iter + rhs.charset.length; memmove(rhs_iter + 1, rhs_iter, (rhs.charset.length - (rhs_end - rhs_iter - 1)) * CharT.sizeof); ++rhs_iter; memmove(iter, iter + 1, (charset.ptr + charset.length - iter) * CharT.sizeof); charset.length -= 1; --end; } else if (*iter > *rhs_iter) { ++rhs_iter; } else { ++iter; ++rhs_iter; } } if (iter != end) { CharT[] temp; temp.length = end - iter; memmove(temp.ptr, iter, temp.length * CharT.sizeof); // nothing bigger in rhs than iter // src, dest merge(temp, overlap.charset); memmove(iter, iter + 1, (charset.ptr + charset.length - iter) * CharT.sizeof); charset.length -= 1; } if (!overlap.charset.empty()) { merge(overlap.charset, rhs.charset); // possible duplicates, so check for any and erase. squeeze(rhs.charset); normalise(); overlap.normalise(); rhs.normalise(); } } } void squeeze(ref CharT[] str) { if (str.length > 1) { CharT *write = str.ptr; CharT *end = write + str.length; CharT *read = write + 1; while (read != end) { while (read != end && *read == *write) { ++read; } if (read == end) break; ++write; if (read > write) { *write = *read; } ++read; } str.length = write + 1 - str.ptr; } } void merge(ref CharT[] src, ref CharT[] dest) { CharT[] temp; CharT *ptr; CharT *iter = src.ptr; CharT *end = iter + src.length; CharT *dest_iter = dest.ptr; CharT *dest_end = dest_iter + dest.length; temp.length = src.length + dest.length; ptr = temp.ptr; while (iter != end && dest_iter != dest_end) { if (*iter < *dest_iter) { *ptr++ = *iter++; } else { *ptr++ = *dest_iter++; } } while (iter != end) { *ptr++ = *iter++; } while (dest_iter != dest_end) { *ptr++ = *dest_iter++; } dest = temp; } }; } int main(char[][]argv) { regex!(char).BasicStringToken lhs; regex!(char).BasicStringToken rhs; regex!(char).BasicStringToken intersect; lhs.charset = "aaabbc".dup; lhs.negated = true; lhs.removeDuplicates(); rhs.charset = "bccddd".dup; rhs.negated = true; rhs.removeDuplicates(); writeln(lhs.charset, '(', lhs.negated, ") intersect ", rhs.charset, '(', rhs.negated, ") ="); lhs.intersect(rhs, intersect); writeln(lhs.charset, '(', lhs.negated, "), ", rhs.charset, '(', rhs.negated, "), ", intersect.charset, '(', intersect.negated, ')'); return 0; }