
#include <stdio.h>
#include "parse_utf8.h"

size_t ucs4toutf8( unichar value, unsigned char *buf )
{
	if( value <=      0x0000007F )
	{
		buf[0] = (unsigned char)value;
		return 1;
	}
	else if( value <= 0x000007FF )
	{
		buf[1] = (unsigned char)(value & 0x3F | 0x80);
		value>>=6;
		buf[0] = (unsigned char)(value & 0x1F | 0xC0);
		return 2;
	}
	else if( value <= 0x0000FFFF )
	{
		buf[2] = (unsigned char)(value & 0x3F | 0x80);
		value>>=6;
		buf[1] = (unsigned char)(value & 0x3F | 0x80);
		value>>=6;
		buf[0] = (unsigned char)(value & 0x0F | 0xE0);
		return 3;
	}
	else if( value <= 0x001FFFFF )
	{
		buf[3] = (unsigned char)(value & 0x3F | 0x80);
		value>>=6;
		buf[2] = (unsigned char)(value & 0x3F | 0x80);
		value>>=6;
		buf[1] = (unsigned char)(value & 0x3F | 0x80);
		value>>=6;
		buf[0] = (unsigned char)(value & 0x07 | 0xF0);
		return 4;
	}
	else if( value <= 0x03FFFFFF )
	{
		buf[4] = (unsigned char)(value & 0x3F | 0x80);
		value>>=6;
		buf[3] = (unsigned char)(value & 0x3F | 0x80);
		value>>=6;
		buf[2] = (unsigned char)(value & 0x3F | 0x80);
		value>>=6;
		buf[1] = (unsigned char)(value & 0x3F | 0x80);
		value>>=6;
		buf[0] = (unsigned char)(value & 0x03 | 0xF8);
		return 5;
	}
	else if( value <= 0x7FFFFFFF )
	{
		buf[5] = (unsigned char)(value & 0x3F | 0x80);
		value>>=6;
		buf[4] = (unsigned char)(value & 0x3F | 0x80);
		value>>=6;
		buf[3] = (unsigned char)(value & 0x3F | 0x80);
		value>>=6;
		buf[2] = (unsigned char)(value & 0x3F | 0x80);
		value>>=6;
		buf[1] = (unsigned char)(value & 0x3F | 0x80);
		value>>=6;
		buf[0] = (unsigned char)(value & 0x01 | 0xFC);
		return 6;
	}
	return 0;
}
int main()
{
	parse_utf8 parser;

	int inchar;
	while( (inchar = fgetc(stdin)) )
	{
		parse_utf8::return_status st = parser.proc_input(inchar);

		if(parser.is_error(st))
		{
			fprintf(stderr, "utf8scrub: (char 0x%02x) %s\n", (unsigned)inchar,parser.strerror(st));
		}

		if(parser.has_output(st) || st == parse_utf8::OVERCODED_UTF8_SEQUENCE )
		{
			char buf[7];
			buf[ucs4toutf8(parser.get_output(),(unsigned char*)buf)]='\0';
			fputs(buf,stdout);
		}
		
		if(inchar == EOF)
			break;
	}
	
	return 0;
}
