patch: export/import ignored words list with document

WJCarpenter Thu, 7 Sep 2000 13:25:13 -0500 (CDT)

This patch (against 090100 sources) implements a new feature of saving
a document's spellcheck ignored word list with the document when saved
in the *.abw format.  The corresponding read back is also implemented.
After this patch, the "ignore all" word list acts like a persistent,
per-document custom dictionary.  

I have added two preference variables, SpellCheckIgnoredWordsSave and
SpellCheckIgnoredWordsLoad, but they don't actually control the
feature at this point (both halves are always on).  The implementation
delay is because preference values aren't available in the
import/export code, and it will take me a while to root through the
class hierarchy to find a simple way to get at them.  (One of the
downsides of a certain style of OO programming that is otherwise
mostly A Good Thing.)

Ignored words are saved in a bit of XML that looks like this:

        <ignoredwords>
          <iw>sumthing</iw>
          <iw>utherthing</iw>
        </ignoredwords>

The <ignoredwords> tag is at the same level as <section> and <style>.

All changes are XP, tested on Linux.  I am not sure if this works with
the alternative Gnome XML parser, but my guess is that it does.
-- 
[EMAIL PROTECTED] (WJCarpenter)    PGP 0x91865119
38 95 1B 69 C9 C6 3D 25    73 46 32 04 69 D6 ED F3

       Buy my house in Woodinville (near Seattle):
                 <http://www.johnlscott.com/57554>

diff -ru abi-090100-ORIG/src/wp/ap/xp/ap_Prefs_SchemeIds.h 
abi-090100/src/wp/ap/xp/ap_Prefs_SchemeIds.h
--- abi-090100-ORIG/src/wp/ap/xp/ap_Prefs_SchemeIds.h   Tue Jul 25 14:10:51 2000
+++ abi-090100/src/wp/ap/xp/ap_Prefs_SchemeIds.h        Tue Sep  5 15:58:39 2000
@@ -55,6 +55,12 @@
 #define AP_PREF_KEY_SpellCheckInternet                         "SpellCheckInternet"   
         /* enable spell checking internet names {0,1} */
 #define AP_PREF_DEFAULT_SpellCheckInternet                     "1"
 
+#define AP_PREF_KEY_SpellCheckIgnoredWordsSave         "SpellCheckIgnoredWordsSave"   
+ /* save ignored words list with doc */
+#define AP_PREF_DEFAULT_SpellCheckIgnoredWordsSave     "1"
+
+#define AP_PREF_KEY_SpellCheckIgnoredWordsLoad         "SpellCheckIgnoredWordsLoad"   
+ /* load ignored words list with doc */
+#define AP_PREF_DEFAULT_SpellCheckIgnoredWordsLoad     "1"
+
 #define AP_PREF_KEY_OptionsTabNumber                           "OptionsTabNumber"     
                 /* the page number of the currently shown page in the */
 #define AP_PREF_DEFAULT_OptionsTabNumber                       "0"                    
                                 /* options dialog */
 
diff -ru abi-090100-ORIG/src/wp/impexp/xp/ie_exp_AbiWord_1.cpp 
abi-090100/src/wp/impexp/xp/ie_exp_AbiWord_1.cpp
--- abi-090100-ORIG/src/wp/impexp/xp/ie_exp_AbiWord_1.cpp       Wed Jul 26 21:15:31 
2000
+++ abi-090100/src/wp/impexp/xp/ie_exp_AbiWord_1.cpp    Tue Sep  5 21:30:13 2000
@@ -116,6 +116,7 @@
                                                                 UT_Bool 
bNewLineAfter, PT_AttrPropIndex api);
        void                            _outputData(const UT_UCSChar * p, UT_uint32 
length);
        void                            _handleStyles(void);
+       void                            _handleIgnoredWords(void);
        void                            _handleDataItems(void);
        
        PD_Document *           m_pDocument;
@@ -455,6 +456,7 @@
        
        
        _handleStyles();
+       _handleIgnoredWords();
 }
 
 s_AbiWord_1_Listener::~s_AbiWord_1_Listener()
@@ -620,6 +622,54 @@
 
        if (bWroteOpenStyleSection)
                m_pie->write("</styles>\n");
+
+       return;
+}
+
+void s_AbiWord_1_Listener::_handleIgnoredWords(void)
+{
+       UT_Bool saveIgnores;
+       //pPrefs->getPrefsValueBool((XML_Char 
+*)XAP_PREF_KEY_SpellCheckIgnoredWordsSave, &saveIgnores);
+       if (!saveIgnores) return;  // don't bother
+       UT_Bool bWroteOpenIgnoredWordsSection = UT_FALSE;
+
+       const UT_UCSChar *word;
+       for (UT_uint32 i = 0; m_pDocument->enumIgnores(i, &word); i++)
+       {
+               if (!bWroteOpenIgnoredWordsSection)
+               {
+                       m_pie->write("<ignoredwords>\n");
+                       bWroteOpenIgnoredWordsSection = UT_TRUE;
+               }
+               m_pie->write("<iw>");
+               for (UT_uint32 udex=0; word[udex]; ++udex)
+               {
+                       UT_UCSChar ch = word[udex];
+                       switch (ch)
+                       {
+                       case '&':   m_pie->write("&amp;");  break;
+                       case '<':   m_pie->write("&lt;");  break;
+                       case '>':   m_pie->write("&gt;");  break;
+                       case '"':   m_pie->write("&quot;");  break;
+                       default:
+                               char utb[100];
+                               if (ch < ' ' || ch >= 128)
+                               {
+                                       sprintf(utb, "&#x%x;", ch);
+                               }
+                               else
+                               {
+                                       utb[0] = (char)ch;
+                                       utb[1] = 0;
+                               }
+                               m_pie->write(utb);
+                       }
+               }
+               m_pie->write("</iw>\n");
+       }
+
+       if (bWroteOpenIgnoredWordsSection)
+               m_pie->write("</ignoredwords>\n");
 
        return;
 }
diff -ru abi-090100-ORIG/src/wp/impexp/xp/ie_imp_AbiWord_1.cpp 
abi-090100/src/wp/impexp/xp/ie_imp_AbiWord_1.cpp
--- abi-090100-ORIG/src/wp/impexp/xp/ie_imp_AbiWord_1.cpp       Wed Jul 26 21:15:31 
2000
+++ abi-090100/src/wp/impexp/xp/ie_imp_AbiWord_1.cpp    Tue Sep  5 14:37:19 2000
@@ -259,6 +259,8 @@
 #define TT_PAGEBREAK   11              // a forced page-break <pbr>
 #define TT_STYLESECTION        12              // a style section <styles>
 #define TT_STYLE               13              // a style <s> within a style section
+#define TT_IGNOREDWORDS 14             // an ignored words section <ignoredwords>
+#define TT_IGNOREDWORD  15      // a word <iw> within an ignored words section
 
 struct _TokenTable
 {
@@ -297,6 +299,8 @@
        {       "pbr",                  TT_PAGEBREAK    },
        {       "styles",               TT_STYLESECTION },
        {       "s",                    TT_STYLE                },
+       {       "ignoredwords", TT_IGNOREDWORDS },
+       {       "iw",                   TT_IGNOREDWORD  },
        {       "*",                    TT_OTHER                }};     // must be last
 
 #define TokenTableSize ((sizeof(s_Tokens)/sizeof(s_Tokens[0])))
@@ -456,6 +460,16 @@
                X_CheckError(m_pDocument->appendStyle(atts));
                return;
                
+       case TT_IGNOREDWORDS:
+               X_VerifyParseState(_PS_Doc);
+               m_parseState = _PS_IgnoredWordsSec;
+               return;
+
+       case TT_IGNOREDWORD:
+               X_VerifyParseState(_PS_IgnoredWordsSec);
+               m_parseState = _PS_IgnoredWordsItem;
+               return;
+                       
        case TT_OTHER:
        default:
                UT_DEBUGMSG(("Unknown tag [%s]\n",name));
@@ -564,6 +578,16 @@
                m_parseState = _PS_StyleSec;
                return;
                
+       case TT_IGNOREDWORDS:
+               X_VerifyParseState(_PS_IgnoredWordsSec);
+               m_parseState = _PS_Doc;
+               return;
+
+       case TT_IGNOREDWORD:
+               X_VerifyParseState(_PS_IgnoredWordsItem);
+               m_parseState = _PS_IgnoredWordsSec;
+               return;
+
        case TT_OTHER:
        default:
                UT_DEBUGMSG(("Unknown end tag [%s]\n",name));
@@ -591,6 +615,7 @@
                }
                
        case _PS_Block:
+       case _PS_IgnoredWordsItem:
                {
                        UT_ASSERT(sizeof(XML_Char) == sizeof(UT_Byte));
                        UT_ASSERT(sizeof(XML_Char) != sizeof(UT_UCSChar));
@@ -600,19 +625,17 @@
                        //    [] convert CRLF to SP.
                        //    [] convert CR to SP.
                        //    [] convert LF to SP.
+                       // ignored words processing doesn't care about the 
+                       // white-space stuff, but it does no harm
 
                        UT_Byte * ss = (UT_Byte *)s;
-                       UT_UCSChar buf[1024];
+                       UT_UCSChar _buf[1024], *buf = _buf;
+                       // len is an upper bound on the length of the decoded stuff
+                       if (len > 1000) buf = new UT_UCSChar[len+1];
                        int bufLen = 0;
 
                        for (int k=0; k<len; k++)
                        {
-                               if (bufLen == NrElements(buf))          // pump it out 
in chunks
-                               {
-                                       
X_CheckError(m_pDocument->appendSpan(buf,bufLen));
-                                       bufLen = 0;
-                               }
-
                                if ((ss[k] < 0x80) && (m_lenCharDataSeen > 0))
                                {
                                        // is it us-ascii and we are in a UTF-8
@@ -674,10 +697,24 @@
                                }
                        }
 
-                       // flush out the last piece of a buffer
+                       // flush out the buffer
 
                        if (bufLen > 0)
-                               X_CheckError(m_pDocument->appendSpan(buf,bufLen));
+                       {
+                               switch (m_parseState)
+                               {
+                               case _PS_Block:
+                                       
+X_CheckError(m_pDocument->appendSpan(buf,bufLen));
+                                       break;
+                               case _PS_IgnoredWordsItem:
+                                       
+X_CheckError(m_pDocument->appendIgnore(buf,bufLen));
+                                       break;
+                               default:
+                                       UT_ASSERT(UT_SHOULD_NOT_HAPPEN);
+                                       break;
+                               }
+                       }
+                       if (buf != _buf) delete buf;
                        return;
                }
 
diff -ru abi-090100-ORIG/src/wp/impexp/xp/ie_imp_AbiWord_1.h 
abi-090100/src/wp/impexp/xp/ie_imp_AbiWord_1.h
--- abi-090100-ORIG/src/wp/impexp/xp/ie_imp_AbiWord_1.h Wed Jul 26 21:15:31 2000
+++ abi-090100/src/wp/impexp/xp/ie_imp_AbiWord_1.h      Tue Sep  5 14:15:24 2000
@@ -82,7 +82,9 @@
                               _PS_DataSec,
                               _PS_DataItem,
                               _PS_StyleSec,
-                              _PS_Style
+                              _PS_Style,
+                              _PS_IgnoredWordsSec,
+                              _PS_IgnoredWordsItem
     } ParseState;
 
     UT_Error                   m_error;

patch: export/import ignored words list with document

Reply via email to