//parse_utf8.h

#ifndef PARSE_UTF8_H_INCLUDED
#define PARSE_UTF8_H_INCLUDED

typedef int unichar;

class parse_utf8
{
public:
	enum return_status
	{
		//input accepted, no output yet
		INPUT_OK=0,
		
		//input accepted, call get_output for result
		SEQ_COMPLETE,
		
		//input ignored, no output
		INVALID_UTF8_BYTE,
		
		//input ignored, working sequence dropped
		UNEXPECTED_UTF8_CONTINUING_CHARACTER,
		
		//input accepted, call get_output for result despite warning
		OVERCODED_UTF8_SEQUENCE,
		
		//working sequence dropped, new sequence complete, call get_output
		TRUNCATED_UTF8_SEQUENCE_1,
		
		//working sequence dropped, new sequence begun, no output yet
		TRUNCATED_UTF8_SEQUENCE_2,
		
		//working sequence dropped, invalid byte ignored
		TRUNCATED_UTF8_SEQUENCE_3,
		
		//working sequence dropped
		TRUNCATED_UTF8_SEQUENCE_4
	};

	//send -1 as EOF, otherwise inchar must be >=0 and <=253
	return_status proc_input( int inchar );

	bool has_output( return_status s ){return s==SEQ_COMPLETE || s==TRUNCATED_UTF8_SEQUENCE_1;}
	bool is_error( return_status s ){return s!=INPUT_OK && s!=SEQ_COMPLETE;}
	unichar get_output(){ return m_last_result; }

	const char* strerror( return_status );

	parse_utf8();
private:

	unichar m_last_result;
	unichar m_input_so_far;
	unichar m_detect_length;
};

#endif

