>>Finally found a function that seems to clean most of the MS Word control characters and other crap out that was causing me probems. Using two filters on the body text seems to be taking care of my problems now..
This will clean only about 1% of the crap, may be not even... Here is a function that will clen up more, and I'm still improving it ;-) function cleanWord (html) // cleans pasted text from Word { //alert(html) html = html.replace(/<o:p>\s*<\/o:p>/g, "") ; html = html.replace(/<o:p>.*?<\/o:p>/g, "") ; // Remove mso-xxx styles. html = html.replace( /\s*mso-[^:]+:[^;"]+;?/gi, "" ) ; // Remove margin styles. html = html.replace( /\s*MARGIN: 0cm 0cm 0pt\s*;/gi, "" ) ; html = html.replace( /\s*MARGIN: 0cm 0cm 0pt\s*"/gi, "\"" ) ; html = html.replace( /\s*TEXT-INDENT: 0cm\s*;/gi, "" ) ; html = html.replace( /\s*TEXT-INDENT: 0cm\s*"/gi, "\"" ) ; html = html.replace( /\s*TEXT-ALIGN: [^\s;]+;?"/gi, "\"" ) ; html = html.replace( /\s*PAGE-BREAK-BEFORE: [^\s;]+;?"/gi, "\"" ) ; html = html.replace( /\s*FONT-VARIANT: [^\s;]+;?"/gi, "\"" ) ; html = html.replace( /\s*tab-stops:[^;"]*;?/gi, "" ) ; html = html.replace( /\s*tab-stops:[^"]*/gi, "" ) ; html = html.replace( /\s*FONT-FAMILY:[^;"]*;?/gi, "" ) ; // Remove Class attributes html = html.replace(/<(\w[^>]*)\s*class=([^ |>]*)([^>]*)/gi, "<$1$3") ; // Remove styles. html = html.replace( /<(\w[^>]*)style="([^\"]*)"([^>]*)/gi, "<$1$3" ) ; // Remove empty styles. html = html.replace( /\s*style="\s*"/gi, '' ) ; html = html.replace( /<SPAN[^>]*>\s* \s*<\/SPAN>/gi, ' ' ) ; html = html.replace( /<SPAN[^>]*>\s*<\/SPAN>/gi, '' ) ; // Remove Lang attributes html = html.replace(/<(\w[^>]*) lang=([^ |>]*)([^>]*)/gi, "<$1$3") ; html = html.replace( /<SPAN\s*>([\s\S]*?)<\/SPAN>/gi, '$1' ) ; html = html.replace( /<SPAN\s*>([\s\S]*?)<\/SPAN>/gi, '$1' ) ; html = html.replace( /<SPAN\s*>([\s\S]*?)<\/SPAN>/gi, '$1' ) ; // remove all font tags html = html.replace( /<\/?FONT[^>]*>/gi, '' ) ; html = html.replace( /<\/?FONT[^>]*>/gi, '' ) ; html = html.replace( /<\/?FONT[^>]*>/gi, '' ) ; html = html.replace( /<\/?DIV([^>]*)>/gi, '' ) ; // Remove XML elements and declarations html = html.replace(/<\\?\?xml[^>]*>/gi, "") ; // Remove Tags with XML namespace declarations: <o:p></o:p> html = html.replace(/<\/?\w+:[^>]*>/gi, "") ; html = html.replace( /<H\d>\s*<\/H\d>/gi, '' ) ; //clean up H tags html = html.replace( /<H1([^>]*)>/gi, '<H1>' ) ; html = html.replace( /<H2([^>]*)>/gi, '<H2>' ) ; html = html.replace( /<H3([^>]*)>/gi, '<H3>' ) ; html = html.replace( /<H4([^>]*)>/gi, '<H4>' ) ; html = html.replace( /<H5([^>]*)>/gi, '<H5>' ) ; html = html.replace( /<H6([^>]*)>/gi, '<H6>' ) ; html = html.replace( /<P([^>]*)>/gi, '<P>' ) ; html = html.replace( /<BR([^>]*)>/gi, '<BR>' ) ; html = html.replace( /<P>\s*(<P>)+<\/P>/gi, '<P>' ) ; html = html.replace( /<\/P>\s*(<\/P>)+<\/P>/gi, '</P>' ) ; html = html.replace( /<(U|I|STRIKE)> <\/\1>/g, ' ' ) ; // no comment... html = html.replace( /<!--[\s\S]*?-->/gi, '' ) ; // transform bullet lists var re = new RegExp("<P>·<SPAN>( | )*</SPAN>([\\s\\S]*?)</P>", "gi"); html = html.replace( re, "<LI>$2</LI>" ) ; re = new RegExp("<P>·( | )*([\\s\\S]*?)</P>", "gi"); html = html.replace( /(<BR>|<P>)[§·-]( | )*([\s\S]*?)<\/P>/gi, "<LI>$2</LI>" ) ; // remove spaces at begining html = html.replace( /^( | )*\s*/, '') ; // replace all stupid <P align=center>...</P> because they are overridden by higher // style declarations like justify, etc. html = html.replace( /<P\s*align=center>([\s\S]*?)<\/P>/gi, '<BR><CENTER>$1</CENTER>' ) ; // remove useless </CENTER><CENTER> html = html.replace( /<\/CENTER>(\s*<BR>\s*)<CENTER>/gi, '$1' ) ; // remove useless <BR> in <TD> html = html.replace( /(<TD[^>]*>)\s*<BR>\s*/gi, '$1' ) ; // replace <CENTER>...</CENTER> inside of TDs html = html.replace( /(<TD[^>]*)>\s*<CENTER>([\s\S]*?)<\/CENTER>\s*<\/TD>/gi, '$1 align=center>$2</TD>' ) ; // remove Paragraphs inside TD html = html.replace(/(<TD[^>]*>)\s*<P[^>]*>([\s\S]*?)\s*<\/P>\s*([\s\S]*?<\/TD>)/gi, '$1$2$3'); // Remove empty tags (three times, just to make sure). html = html.replace( /<([^\s>]+)[^>]*>\s*<\/\1>/g, '' ) ; html = html.replace( /<([^\s>]+)[^>]*>\s*<\/\1>/g, '' ) ; html = html.replace( /<([^\s>]+)[^>]*>\s*<\/\1>/g, '' ) ; html = html.replace( /[^\n\r]<P>/gi, '<P>' ) ; html = html.replace( /[^\n\r]<BR>/gi, '<BR>' ) ; //alert(html) return (html); } -- _______________________________________ REUSE CODE! Use custom tags; See http://www.contentbox.com/claude/customtags/tagstore.cfm (Please send any spam to this address: [EMAIL PROTECTED]) Thanks. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~| Create Web Applications With ColdFusion MX7 & Flex 2. Build powerful, scalable RIAs. Free Trial http://www.adobe.com/products/coldfusion/flex2/?sdid=RVJS Archive: http://www.houseoffusion.com/groups/CF-Talk/message.cfm/messageid:277080 Subscription: http://www.houseoffusion.com/groups/CF-Talk/subscribe.cfm Unsubscribe: http://www.houseoffusion.com/cf_lists/unsubscribe.cfm?user=11502.10531.4