
#include "xstring.h"

const xstring::size_type xstring::npos =  ustring::npos;

xstring::size_type xstring::size() const {
	return _string.size();
}

xstring::size_type xstring::length() const {
	return xstring_traits::length((XMLChar*)_string.c_str());
}

xstring::size_type xstring::max_size() const {
	return _string.max_size();
}

xstring::size_type xstring::capacity() const {
	return _string.capacity();
}

bool xstring::empty() const {
	return _string.empty();
}

xstring::reference xstring::operator[](xstring::size_type n) {
	return reinterpret_cast<reference>(_string[n]);
}

xstring::const_reference xstring::operator[](xstring::size_type n) const {
	return reinterpret_cast<const_reference>(_string[n]);
}

const XMLChar* xstring::c_str() const {
	return reinterpret_cast<const XMLChar*>(_string.c_str());
}

const XMLChar* xstring::data() const {
	return reinterpret_cast<const XMLChar*>(_string.data());
}

/** UTF8 OFFSET FUNCTIONS 
 * All utf8_*_offset() functions return npos if offset is out of range.
 * The caller should decide if npos is a valid argument and just marks
 * the whole string, or if it is not allowed (e.g. for start positions).
 * In the latter case std::out_of_range should be thrown, but usually
 *  ustring will do that for us.
 */
 
/**
 * calculate the byte offset from a source string
 * @param str the source string
 * @param offset the number of characters to offset
 * @return the corresponding byte offset
 */
static xstring::size_type utf8_byte_offset(const unsigned char* str, xstring::size_type offset) {

	if (str == NULL)
		throw std::invalid_argument("utf8_byte_offset: str");

  	if(offset == xstring::npos)
    	return xstring::npos;

  	const unsigned char* p = str;

  	for(; offset != 0; --offset)
  	{
    	if(*p == '\0')
      	return xstring::npos;
    	p += xstring_traits::utf8_size((XMLChar*)p);
  	}
  	return (p - str);
}

/**
 * calculate the byte offset, stop when reaching maxlen
 * @param str the source string
 * @param offset the number of characters to advance
 * @param maxlen the maximum length, in bytes, to traverse
 * @return the byte offset 
 */
static xstring::size_type utf8_byte_offset(const unsigned char* str, xstring::size_type offset, xstring::size_type maxlen) {
	if (str == NULL)
		throw std::invalid_argument("utf8_byte_offset: str");

  	if(offset == xstring::npos)
    	return xstring::npos;

  	const unsigned char *const pend = str + maxlen;
  	const unsigned char* p = str;

  	for(; offset != 0; --offset) {
    	if(p >= pend)
      		return xstring::npos;
    	p += xstring_traits::utf8_size((XMLChar*)p);
  	}
  	return (p - str);
}

/**
 * Calculate the byte offset, stop when reaching str.size()
 * @param str the source string
 * @param offset the offset in characters to advance
 * @return the byte
 */
inline xstring::size_type utf8_byte_offset(const ustring& str, xstring::size_type offset) {
  return utf8_byte_offset(str.data(), offset, str.size());
}



/**
 * Calculate the numberof characters hte given byte offset represents
 */
static xstring::size_type utf8_char_offset(const unsigned char*str,xstring::size_type n) {
	if (n == xstring::npos)
		return xstring::npos;

	const unsigned char*p = str;
	xstring::size_type ret = 0;
	while ((unsigned)(p - str) < n) {
		ret++;
		p += xstring_traits::utf8_size( (const XMLChar*) p);
	}
	return ret;
}

/**
 * The bounds structure is used to easily determine the start and end byte indexes
 * for a given string given the start character and the number of characters to process
 */

struct utf8_bounds
{
  xstring::size_type i;
  xstring::size_type n;

  utf8_bounds(const ustring& str, xstring::size_type ci, xstring::size_type cn)
  :
    i (utf8_byte_offset(str, ci)),
    n (xstring::npos)
  {
    if(i != xstring::npos)
      n = utf8_byte_offset(str.data() + i, cn, str.size() - i);
  }
};


/** CONSTRUCTORS */

xstring::xstring() : _string() { }

/** Generalization of the copy constructor 
 * @param s the source string to copy
 * @param pos the starting position to copy, in characters
 * @param size the maximum number of characters to copy
 */
xstring::xstring(const xstring& s, size_type pos, size_type n) : _string() {
	const utf8_bounds bounds(s._string, pos, n);
	_string.assign(s._string,bounds.i, bounds.n);
}

/** 
 * Construct a string from a null terminated character length
 * @param the null terminated character string (utf-8 encoded), must not be NULL
 */
xstring::xstring(const XMLChar* s)  : _string(s) {};

/**
 * Construct a string from a character array and a length
 * @param s the character array, encoded in utf-8
 * @param n the length to copy (in characters)
 */
xstring::xstring(const XMLChar* s, size_type n) : _string() {
	xstring::size_type x = utf8_byte_offset(s,n);
	_string.assign( s, x);
}

/**
 * Create a string with n copies of c
 * @param n the of copies of the character
 * @param c a pointer to the character to copy
 */
xstring::xstring(size_type n, XMLChar* c) : _string() {
	xstring::size_type x = xstring_traits::utf8_size(c);
	_string.reserve(x * n + 1);
	while (--n)
		_string.append(  c,x);
}

xstring::xstring(size_type n, XMLChar c) : _string(n,(char)c) {}

xstring::xstring(const wchar_t *s) : _string() {
	assign(s);
}

xstring::xstring(const std::wstring& s) : _string() {
	assign(s.c_str());
}


/** OPERATORS */
/** the assignment operator */
xstring& xstring::operator=(const xstring& s) {
	_string = s._string;
	return *this;
}


xstring& xstring::operator=(const wchar_t *s) {
	return assign(s);
}

/* note:  this assumes a little endian structure */
xstring& xstring::operator=(const std::wstring& s) {
	return assign(s.c_str());
}

/** Assign an null-terminated character array (encoded in utf-8) to a string */
xstring& xstring::operator=(const XMLChar* s) {
	_string = s;
	return *this;
}

/** Assign a single character to the string */
xstring& xstring::operator=(XMLChar c) {
	_string =  c;
	return *this;
}

/** reserve the number of BYTES specified */
void xstring::reserve(size_t n) {
	_string.reserve(n);
}

/** swap the contents of two strings */
void xstring::swap(xstring& s) {
	_string.swap(s._string);
}

/** INSERT METHODS */

/** inserts x before pos */
xstring::iterator xstring::insert(xstring::iterator pos, const value_type& x) {
	size_type s = pos.base() - _string.begin();
	_string.insert(s, 1, x);
	return iterator(_string.begin() + s);
}

xstring::iterator xstring::insert(xstring::iterator pos, size_type n, const value_type& x) {
	size_type s = pos.base() - _string.begin();
	_string.insert(s,n,x);
	return iterator(_string.begin() + s);
}


xstring& xstring::insert(size_type pos, const xstring& s) {
	_string.insert(utf8_byte_offset(_string,pos),s._string);
	return *this;
}

/** Inserts a substring of s before pos */
xstring& xstring::insert(size_type pos, const xstring& s, size_type pos1, size_type pos2) {
	const utf8_bounds bounds (s._string,pos1,pos2);
	_string.insert(utf8_byte_offset(_string,pos),bounds.i, bounds.n);
	return *this;
}

/** Insert s before pos */
xstring& xstring::insert(size_type pos, const XMLChar* s) {
	_string.insert(utf8_byte_offset(_string,pos),s);
	return *this;
}

/** Inserts the first n characters of s before pos */
xstring& xstring::insert(size_type pos, const XMLChar*s, size_type n) {
	_string.insert(utf8_byte_offset(_string,pos),s, utf8_byte_offset( s,pos + n));
	return *this;
}

/** Insert n copies of c before pos */
xstring& xstring::insert(size_type pos, size_type n, XMLChar c) {
	_string.insert(utf8_byte_offset(_string,pos),n,(char)c);
	return *this;
}

/** Insert n copies of c before pos 
 * @todo:  need to check if c is a utf8 char and if not encode it to utf8*/
xstring& xstring::insert(size_type pos, size_type n, char c) {
	_string.insert(utf8_byte_offset(_string,pos),n,(char)c);
	return *this;
}

/** Append s to *this */
xstring& xstring::append(const xstring& s) {
	_string.append(s._string);
	return *this;
}

xstring& xstring::append(const xstring&s, size_type pos, size_type n) {
	const utf8_bounds bounds(s._string, pos, n);
	_string.append(s._string, bounds.i, bounds.n);
	return *this;
}

/** append s to *this */
xstring& xstring::append(const XMLChar* s) {
	_string.append( s);
	return *this;
}

/** append the first n charactesr of s to *this */
xstring& xstring::append(const XMLChar* s, size_type n) {
	_string.append( s, utf8_byte_offset(s,n));
	return *this;
}

/** append n copies of the character c */
xstring& xstring::append(size_type n, XMLChar c) {
	_string.append(n,(char)c);
	return *this;
}

/** append n copies of the character c */
xstring& xstring::append(size_type n, char c) {
	_string.append(n,c);
	return *this;
}

/** append a single character to this */
void xstring::push_back(XMLChar c) {
	_string.append(1,c);
}

xstring::iterator xstring::erase(iterator p) {
	return iterator(_string.erase(p.base()));
}

xstring::iterator xstring::erase(iterator first, iterator last) {
	return iterator(_string.erase(first.base(), last.base()));
}

/**Erases a range */
xstring& xstring::erase(size_type pos , size_type n) {
	_string.erase(utf8_byte_offset(_string,pos), n == npos ? npos : utf8_byte_offset(_string,pos+n));
	return *this;
}

void xstring::resize(size_type n, value_type c) {
	_string.resize(n,(char) c);
}

/** Erases the entire container */
void xstring::clear() {
	_string.erase(0,ustring::npos);
}

/** synonm for operator= */
xstring& xstring::assign(const xstring& s) {
	_string.assign(s._string);
	return *this;
}

/** assigns a substring of s to *this */
xstring& xstring::assign(const xstring& s, size_type pos, size_type n) {
	const utf8_bounds bounds(s._string,pos,n);
	_string.assign(s._string,bounds.i,bounds.n);
	return *this;
}

/** assings the first n characters of s to *this */
xstring& xstring::assign(const XMLChar* s, size_type n) {
	_string.assign(s, utf8_byte_offset(s,n));
	return *this;
}

/** assigns a null terminated array of characters to *this */
xstring& xstring::assign(const XMLChar*s) {
	_string.assign( s);
	return *this;
}

/** Erases hte existing characters and replaces them by n copies of c */
xstring& xstring::assign(size_type n, XMLChar c) {
	_string.assign(n,(char)c);
	return *this;
}

xstring& xstring::assign(const wchar_t *s) {
	unsigned int c, d;
	int bits;
	clear();
	/* double the length should be enough to avoid many resizes */
	if (*s)
		_string.reserve(wcslen(s)*2);

	const wchar_t* i = s;

	while (*i) {
		c = *i++;
		if ( (c & 0xFC00) == 0xD800) { /* surrogates */
			d = *i++;
			if ((d & 0xFC00) == 0xDC00) {
				c &= 0x03FF;
				c <<= 10;
				c |= d & 0x03FF;
				c += 0x10000;
			} else
				throw encoding_error("invalid surrogate");
		}

		/* assertion: c is a single UTF-4 value */
		if (c < 0x80) {
			_string.append(1,c);
			bits = -6;
		} else if (c < 0x8000) {
			_string.append(1,((c >> 6) & 0x1F) | 0xC0);
			bits = 0;
		} else if (c < 0x10000) {
			_string.append(1, ((c >> 12) & 0x0F) | 0xE0);
			bits = 6;
		} else {
			_string.append(1,((c >> 18) & 0x07) | 0xF0); 
			bits = 12;
		}
		for (; bits >= 0; bits -= 6) {
			_string.append(1, (( c >> bits) & 0x3F) | 0x80);
		}
	}
	return *this;
}


/** replace a substring of *this with the string s */
xstring& xstring::replace(size_type pos, size_type n, const xstring& s) {
	const utf8_bounds bounds (_string,pos,n);
	_string.replace(bounds.i, bounds.n, s._string);
	return *this;
}

/** replace a substring of *this with a substring of s */
xstring& xstring::replace(size_type pos, size_type n, const xstring& s, size_type pos1, size_type n1) {
	const utf8_bounds b1(_string,pos,n);
	const utf8_bounds b2(s._string,pos1,n1);

	_string.replace(b1.i,b1.n,s._string,b2.i,b2.n);
	return *this;
}

/** replace a substring of *this with the first n1 characters of s */
xstring& xstring::replace(size_type pos, size_type n, const XMLChar* s, size_type n1) {
	const utf8_bounds bounds(_string,pos,n);

	_string.replace(bounds.i, bounds.n, s, utf8_byte_offset(s, n1));
	return *this;
}

/** replace a substring of *this with a null-terminated character array (utf-8 encoded) */
xstring& xstring::replace(size_type pos, size_type n, const XMLChar* s) {
	const utf8_bounds bounds(_string,pos,n);
	_string.replace(bounds.i, bounds.n, s);
	return *this;
}

/** replaces a substring of *this with n1 copies of c */
xstring& xstring::replace(size_type pos, size_type n, size_type n1, XMLChar c) {
	const utf8_bounds bounds(_string,pos, n);
	_string.replace(bounds.i, bounds.n,n1,(char) c);
	return *this;
}

xstring& xstring::replace(iterator first, iterator last, const xstring& s) {
	_string.replace(first.base(), last.base(), s._string);
	return *this;
}

xstring& xstring::replace(iterator first, iterator last, const pointer s, size_type n) {
	/** calculate the offset of n characters of s */
	_string.replace(first.base(), last.base(), s, utf8_byte_offset(s,n));
	return *this;
}

xstring& xstring::replace(iterator first, iterator last, const pointer s) {
	_string.replace(first.base(), last.base(), s);
	return *this;
}

xstring& xstring::replace(iterator first, iterator last, size_type n, value_type c) {
	_string.replace(first.base(), last.base(), n, c);
	return *this;
}

/** copies a substring of *this to a buffer */
xstring::size_type xstring::copy(XMLChar* buf, size_type n, size_type pos ) const {
	const utf8_bounds bounds(_string,pos,n);
	return _string.copy(buf, bounds.n == npos ? npos : bounds.n - bounds.i, bounds.i);
}

/** searches for s as a substring of *this, beginning at character pos of *this */
xstring::size_type xstring::find(const xstring& s, size_type pos ) {
	size_type tmp = _string.find(s._string, utf8_byte_offset(_string,pos));
	/** figure out how many characters it takes to get */
	return utf8_char_offset(_string.data(),tmp);
}

/** searches for the first n characters of s as a substring of *this, beginning at character pos of *this */
xstring::size_type xstring::find(const XMLChar* s, size_type pos, size_type n) const {
	const utf8_bounds bounds(s, pos, n);
	size_type tmp = _string.find( s, bounds.i, bounds.n);
	return utf8_char_offset(_string.data(),tmp);
}

/** searches for a null-terminated character array as a substring of *this, beginning at character pos of *this */
xstring::size_type xstring::find(const XMLChar* s, size_type pos ) const {
	size_type tmp = _string.find(s, utf8_byte_offset(_string,pos));
	return utf8_char_offset(_string.data(),tmp);
}

/** searches for the chracter c, beginning at character position pos */
xstring::size_type xstring::find(XMLChar c, size_type pos ) const {
	size_type tmp = _string.find((char)c, utf8_byte_offset(_string,pos));
	return utf8_char_offset(_string.data(),tmp);
}

/** searches backward for s as a substring of *this beginning
 * at characteer positoin min(pos,size()) */
xstring::size_type xstring::rfind(const xstring& s, size_type pos) const {
	size_type tmp = _string.rfind(s._string, utf8_byte_offset(_string,pos));
	return utf8_char_offset(_string.data(),tmp);
}

/** seraches backward for the first n characters of s as a substring of *this, beginning at character position min(pos,size()) */
xstring::size_type xstring::rfind(const XMLChar* s, size_type pos, size_type n) const {
	const utf8_bounds bounds( s, pos, n);
	size_type tmp = _string.rfind(s, bounds.i, bounds.n);
	return utf8_char_offset(_string.data(),tmp);
}

/** searches backward for a null-terminated character array as a substring of *this, beginning at character min(pos,size()) */
xstring::size_type xstring::rfind(const XMLChar* s, size_type pos ) const {
	size_type tmp = _string.rfind(s, utf8_byte_offset(_string,pos));
	return utf8_char_offset(_string.data(),tmp);
}

xstring::size_type xstring::rfind(XMLChar c, size_type pos ) const {
	size_type s = _string.rfind(c, utf8_byte_offset(_string,pos));
	return utf8_char_offset(_string.data(),s);
}

xstring xstring::substr(size_type pos, size_type n ) const {
	const utf8_bounds bounds(_string,pos,n);
	return xstring((const XMLChar*)_string.substr(bounds.i, bounds.n).c_str());
}

int xstring::compare(const xstring& s) const {
	return _string.compare(s._string);
}

int xstring::compare(size_type pos, size_type n, const xstring& s) const {
	const utf8_bounds bounds(_string,pos,n);
	return _string.compare(bounds.i, bounds.n, s._string);
}

int xstring::compare(size_type pos, size_type n, const xstring& s, size_type pos1, size_type n1) const {
	const utf8_bounds bounds1(_string,pos,n), bounds2(_string,pos1,n1);
	return _string.compare(bounds1.i, bounds1.n, s._string, bounds2.i, bounds2.n);
}

int xstring::compare(const pointer s) const {
	return _string.compare( s);
}

int xstring::compare(size_type pos, size_type n, const pointer s, size_type len) const {
	const utf8_bounds bounds(_string,pos,n);
	size_type l = utf8_byte_offset( s, len);
	return _string.compare(bounds.i, bounds.n, s, l);
}

/** ITERATORS */
xstring::iterator xstring::begin() {
	return iterator(_string.begin());
}

/** return an iterator pointing to the end of the string */
xstring::iterator xstring::end() {
	return iterator(_string.end());
}

/** return a const iterator */
xstring::const_iterator xstring::begin() const {
	return const_iterator(_string.begin());
}

xstring::const_iterator xstring::end() const {
	return const_iterator(_string.end());
}

/** return a UTF-16LE encoded version */
std::wstring xstring::w_str() const {
	unsigned int c,d;
	int trailing;

	std::wstring ret;
	ret.reserve(length()+1);

	ustring::const_iterator i = _string.begin();
	while (i != _string.end()) {
		d = *i++;
		if (d < 0x80) {
			c = d;
			trailing = 0;
		} else if (d < 0xC0) {
			/* trailing byte in leading position */
			throw encoding_error("trailing byte in leading position");
		} else if (d < 0xE0) {
			c = d & 0x1f; 
			trailing = 1;
		} else if (d < 0xF0) {
			c = d & 0x0F; 
			trailing = 2;
		} else if (d < 0xF8) {
			c = d & 0x07; 
			trailing = 3;
		} else 
			throw encoding_error("invalid UTF16");

		for (; trailing; trailing--) {
			if (i == _string.end() || (((d = *i++) & 0xC0) != 0x80))
				break;
			c <<= 6;
			c |= d & 0x3F;
		}

		/** assertion:  c is a single UTF-4 value */
		if (c < 0x10000) {
			ret.append(1,c);
		} else if (c < 0x110000) {
			c -= 0x10000;
			ret.append(1,0xD800 | (c >> 10));
			ret.append(1,0xDC00 | (c & 0x03FF));
		} else
			break;
	}
	return ret;
}













