Hi, We just managed to add chinese search ability to udmsearch, below are the changes to the parsetext function and an additional function in charset.c (sorry I cant diff them as we messed around with other bits - eg. adding more debugging to help us) It could probably be tidied up a bit, but my first language is not C :) The basic principle is that it parses the text string twice, first time for chinese (and does not change the string) - this is done by looking for characters with values > 128, followed by anything except a space. Then adding that to the database, being carefull to escape \, " and ' then doing the standard english/other language check.. The search interface also needs some minor changes to do escaping if chinese characters are searched. (have yet to do that) regards alan ------------------indexer.c----------------- static int ParseText(UDM_INDEXER * Indexer,UDM_SERVER * Server, char *content,int weight,int check_stopwords){ char *s, *lt, *ss, *lc, *ssa; char sb[4] ; if(weight&&content){ ss=UdmChiGetWord(content, &lc, Indexer->local_charset); while(ss){ ssa =ss +1; if ((strncmp(ssa , "'",1)==0) || (strncmp(ssa ,"\"",1)==0) || (strncmp(ssa ,"\\",1)==0)) { sprintf(sb,"%c\\%c\0", *ss, *ssa); } else { sprintf(sb,"%c%c\0", *ss, *ssa); } AddWord(Indexer, Server, sb, weight, check_stopwords); ss=UdmChiGetWord(NULL, &lc, Indexer->local_charset); } s=UdmGetWord(content, <, Indexer->local_charset); while(s){ AddWord(Indexer, Server, s, weight, check_stopwords); s=UdmGetWord(NULL, <, Indexer->local_charset); } UdmLog(Indexer->handle,UDM_LOG_DEBUG,"CHITEST"); } return(0); } --------------charset.c--------------------------------- char * UdmChiGetWord(char *s, char **last, int charset) { unsigned char *tok=NULL; int fl, i,matchednonchar,ischinese; char *ss; if (s == NULL && (s = *last) == NULL) return NULL; /* We find beginning of the word */ fl = 1; while(fl) { if( *s==0 ) { *last=s; return NULL ; } /* find the first character that is not in the 'charset range' */ ss = s+1; if( (*s <(char)0) && (*ss != (char)32)) { tok = s; fl = 0; } s++; } s++; *last = s; return tok; } -- Technical Director Linux Center (HK) Ltd. www.hklc.com ______________ If you want to unsubscribe send "unsubscribe udmsearch" to [EMAIL PROTECTED]