This patch (against 090800 sources) implements saving a document's
spellcheck ignored word list with the document when saved in the *.abw
format.  The corresponding read back is also implemented.  After this
patch, the "ignore all" word list acts like a persistent, per-document
custom dictionary.  This patch includes the changes of my previous but
not-yet-committed patch on the same subject of a few days ago.  In
other words, if someone decides to commit the attached patch, my
earlier patch can be discarded.  If someone would rather see it a
different way, I could cook that up.

I have added two preference variables, SpellCheckIgnoredWordsSave and
SpellCheckIgnoredWordsLoad, and this time around they actually control
the feature (sorry, no GUI yet).  As a side effect of the
implementation to use those preferences, PD_Document and AP_Convert
objects now have an XAP_App pointer (and a getApp()).  The means that
lots of new places (including the import/export code) can now access
the user preferences.  Go hog wild.

Ignored words are saved in a bit of XML that looks like this:

        <ignoredwords>
          <iw>sumthing</iw>
          <iw>utherthing</iw>
        </ignoredwords>

The <ignoredwords> tag is at the same level as <section> and <style>.

Most changes are XP, tested on Linux.  I'm still not sure if this
works with the alternative Gnome XML parser, but my guess is that it
does.  A small part of the changes is platform-specific, but things
should work if the code meets my modest assumptions about class
hierarchies and member functions.  The Unix stuff works for sure.
-- 
[EMAIL PROTECTED] (WJCarpenter)    PGP 0x91865119
38 95 1B 69 C9 C6 3D 25    73 46 32 04 69 D6 ED F3

       Buy my house in Woodinville (near Seattle):
                 <http://www.johnlscott.com/57554>

diff -ru abi-090800-ORIG/src/text/ptbl/xp/pd_Document.cpp 
abi-090800/src/text/ptbl/xp/pd_Document.cpp
--- abi-090800-ORIG/src/text/ptbl/xp/pd_Document.cpp    Thu Jul  6 01:55:57 2000
+++ abi-090800/src/text/ptbl/xp/pd_Document.cpp Sat Sep  9 22:28:55 2000
@@ -50,9 +50,10 @@
 //////////////////////////////////////////////////////////////////
 //////////////////////////////////////////////////////////////////
 
-PD_Document::PD_Document()
+PD_Document::PD_Document(XAP_App *pApp)
        : AD_Document(), m_hashDataItems(11)
 {
+       m_pApp = pApp;
        m_pPieceTable = NULL;
 
        // perhaps this should be a magic "unknown" or "NULL" value,
diff -ru abi-090800-ORIG/src/text/ptbl/xp/pd_Document.h 
abi-090800/src/text/ptbl/xp/pd_Document.h
--- abi-090800-ORIG/src/text/ptbl/xp/pd_Document.h      Wed Jul 26 21:15:26 2000
+++ abi-090800/src/text/ptbl/xp/pd_Document.h   Sat Sep  9 22:29:43 2000
@@ -44,6 +44,7 @@
 class pf_Frag_Strux;
 class PX_ChangeRecord;
 class PD_Style;
+class XAP_App;
 
 #ifdef PT_TEST
 #include "ut_test.h"
@@ -61,7 +62,7 @@
 class PD_Document : public AD_Document
 {
 public:
-       PD_Document();
+       PD_Document(XAP_App *pApp);
 
        virtual UT_Error                readFromFile(const char * szFilename, int 
ieft);
        virtual UT_Error                newDocument(void);
@@ -168,6 +169,7 @@
 
        const char *                            getFileName() { return m_szFilename; }
        UT_uint32                               getLastType() { return 
m_lastSavedAsType; }
+       XAP_App * getApp() { return m_pApp; }
        
 #ifdef PT_TEST
        void                                    __dump(FILE * fp) const;
@@ -185,6 +187,7 @@
        UT_AlphaHashTable                       m_hashDataItems;
 
        IEFileType                              m_lastSavedAsType;
+       XAP_App *                               m_pApp;
 };
 
 
diff -ru abi-090800-ORIG/src/wp/ap/beos/ap_BeOSApp.cpp 
abi-090800/src/wp/ap/beos/ap_BeOSApp.cpp
--- abi-090800-ORIG/src/wp/ap/beos/ap_BeOSApp.cpp       Wed Aug 16 13:35:41 2000
+++ abi-090800/src/wp/ap/beos/ap_BeOSApp.cpp    Sun Sep 10 10:47:54 2000
@@ -666,7 +666,7 @@
 #if CONVERT
                        if (to)
                        {
-                               AP_Convert * conv = new AP_Convert();
+                               AP_Convert * conv = new AP_Convert(getApp());
                                conv->setVerbose(verbose);
                                conv->convertTo(m_pArgs->m_argv[k], to);
                                delete conv;
diff -ru abi-090800-ORIG/src/wp/ap/beos/ap_BeOSFrame.cpp 
abi-090800/src/wp/ap/beos/ap_BeOSFrame.cpp
--- abi-090800-ORIG/src/wp/ap/beos/ap_BeOSFrame.cpp     Sat Aug 19 16:35:39 2000
+++ abi-090800/src/wp/ap/beos/ap_BeOSFrame.cpp  Sun Sep 10 10:27:56 2000
@@ -416,7 +416,7 @@
        // load a document into the current frame.
        // if no filename, create a new document.
 
-       AD_Document * pNewDoc = new PD_Document();
+       AD_Document * pNewDoc = new PD_Document(getApp());
        UT_ASSERT(pNewDoc);
        
        if (!szFilename || !*szFilename)
diff -ru abi-090800-ORIG/src/wp/ap/qnx/ap_QNXFrame.cpp 
abi-090800/src/wp/ap/qnx/ap_QNXFrame.cpp
--- abi-090800-ORIG/src/wp/ap/qnx/ap_QNXFrame.cpp       Tue Aug 15 09:20:27 2000
+++ abi-090800/src/wp/ap/qnx/ap_QNXFrame.cpp    Sun Sep 10 10:28:08 2000
@@ -463,7 +463,7 @@
        // load a document into the current frame.
        // if no filename, create a new document.
 
-       AD_Document * pNewDoc = new PD_Document();
+       AD_Document * pNewDoc = new PD_Document(getApp());
        UT_ASSERT(pNewDoc);
        
        if (!szFilename || !*szFilename)
diff -ru abi-090800-ORIG/src/wp/ap/unix/ap_UnixApp.cpp 
abi-090800/src/wp/ap/unix/ap_UnixApp.cpp
--- abi-090800-ORIG/src/wp/ap/unix/ap_UnixApp.cpp       Sun Aug 27 18:45:41 2000
+++ abi-090800/src/wp/ap/unix/ap_UnixApp.cpp    Sun Sep 10 10:47:07 2000
@@ -1011,7 +1011,7 @@
                        // [filename]
                        if (to) 
                        {
-                               AP_Convert * conv = new AP_Convert();
+                               AP_Convert * conv = new AP_Convert(getApp());
                                conv->setVerbose(verbose);
                                conv->convertTo(m_pArgs->m_argv[k], to);
                                delete conv;
diff -ru abi-090800-ORIG/src/wp/ap/unix/ap_UnixFrame.cpp 
abi-090800/src/wp/ap/unix/ap_UnixFrame.cpp
--- abi-090800-ORIG/src/wp/ap/unix/ap_UnixFrame.cpp     Thu Jul 27 20:24:11 2000
+++ abi-090800/src/wp/ap/unix/ap_UnixFrame.cpp  Sun Sep 10 10:28:20 2000
@@ -425,7 +425,7 @@
        // load a document into the current frame.
        // if no filename, create a new document.
 
-       AD_Document * pNewDoc = new PD_Document();
+       AD_Document * pNewDoc = new PD_Document(getApp());
        UT_ASSERT(pNewDoc);
        
        if (!szFilename || !*szFilename)
diff -ru abi-090800-ORIG/src/wp/ap/unix/gnome/ap_UnixGnomeApp.cpp 
abi-090800/src/wp/ap/unix/gnome/ap_UnixGnomeApp.cpp
--- abi-090800-ORIG/src/wp/ap/unix/gnome/ap_UnixGnomeApp.cpp    Sun Aug 27 19:41:01 
2000
+++ abi-090800/src/wp/ap/unix/gnome/ap_UnixGnomeApp.cpp Sun Sep 10 10:48:08 2000
@@ -277,7 +277,7 @@
        }
        
        if (to) {
-               AP_Convert * conv = new AP_Convert();
+               AP_Convert * conv = new AP_Convert(getApp());
                conv->setVerbose(verbose);
 
                while ((file = poptGetArg (poptcon)) != NULL) {
diff -ru abi-090800-ORIG/src/wp/ap/win/ap_Win32App.cpp 
abi-090800/src/wp/ap/win/ap_Win32App.cpp
--- abi-090800-ORIG/src/wp/ap/win/ap_Win32App.cpp       Mon Jul 10 14:56:16 2000
+++ abi-090800/src/wp/ap/win/ap_Win32App.cpp    Sun Sep 10 10:48:18 2000
@@ -867,7 +867,7 @@
                        // [filename]
                        if (to) 
                        {
-                               AP_Convert * conv = new AP_Convert();
+                               AP_Convert * conv = new AP_Convert(getApp());
                                conv->setVerbose(verbose);
                                conv->convertTo(m_pArgs->m_argv[k], to);
                                delete conv;
diff -ru abi-090800-ORIG/src/wp/ap/win/ap_Win32Frame.cpp 
abi-090800/src/wp/ap/win/ap_Win32Frame.cpp
--- abi-090800-ORIG/src/wp/ap/win/ap_Win32Frame.cpp     Tue Sep  5 02:01:32 2000
+++ abi-090800/src/wp/ap/win/ap_Win32Frame.cpp  Sun Sep 10 10:28:42 2000
@@ -1112,7 +1112,7 @@
        // load a document into the current frame.
        // if no filename, create a new document.
 
-       AD_Document * pNewDoc = new PD_Document();
+       AD_Document * pNewDoc = new PD_Document(getApp());
        UT_ASSERT(pNewDoc);
        
        if (!szFilename || !*szFilename)
diff -ru abi-090800-ORIG/src/wp/ap/xp/ap_Convert.cpp 
abi-090800/src/wp/ap/xp/ap_Convert.cpp
--- abi-090800-ORIG/src/wp/ap/xp/ap_Convert.cpp Wed Mar  8 14:35:15 2000
+++ abi-090800/src/wp/ap/xp/ap_Convert.cpp      Sun Sep 10 10:44:46 2000
@@ -24,11 +24,14 @@
 #include "ie_exp.h"
 #include "ut_types.h"
 
+class XAP_App;
+
 //////////////////////////////////////////////////////////////////
 
-AP_Convert::AP_Convert(void)
+AP_Convert::AP_Convert(XAP_App *pApp)
 {
     m_iVerbose = 1;
+       m_pApp = pApp;
 }
 
 AP_Convert::~AP_Convert(void)
@@ -42,7 +45,7 @@
                                                        const char * szTargetFilename,
                                                        IEFileType targetFormat)
 {
-       PD_Document * pNewDoc = new PD_Document();
+       PD_Document * pNewDoc = new PD_Document(getApp());
        UT_Error error;
        UT_ASSERT(pNewDoc);
 
diff -ru abi-090800-ORIG/src/wp/ap/xp/ap_Convert.h abi-090800/src/wp/ap/xp/ap_Convert.h
--- abi-090800-ORIG/src/wp/ap/xp/ap_Convert.h   Wed Mar  8 14:35:15 2000
+++ abi-090800/src/wp/ap/xp/ap_Convert.h        Sun Sep 10 10:45:16 2000
@@ -28,7 +28,7 @@
 class AP_Convert
 {
  public:
-       AP_Convert(void);
+       AP_Convert(XAP_App *pApp);
        ~AP_Convert(void);
 
     void convertTo(const char * szSourceFilename,
@@ -44,9 +44,11 @@
                                   const char * szTargetFormat);
 
        void setVerbose(int level);
+       XAP_App* getApp() { return m_pApp; }
 
  private:
        int m_iVerbose;
+       XAP_App *m_pApp;
 };
 
 #endif /* AP_CONVERT_H */
diff -ru abi-090800-ORIG/src/wp/ap/xp/ap_Prefs_SchemeIds.h 
abi-090800/src/wp/ap/xp/ap_Prefs_SchemeIds.h
--- abi-090800-ORIG/src/wp/ap/xp/ap_Prefs_SchemeIds.h   Tue Jul 25 14:10:51 2000
+++ abi-090800/src/wp/ap/xp/ap_Prefs_SchemeIds.h        Sun Sep 10 11:14:47 2000
@@ -55,6 +55,12 @@
 #define AP_PREF_KEY_SpellCheckInternet                         "SpellCheckInternet"   
         /* enable spell checking internet names {0,1} */
 #define AP_PREF_DEFAULT_SpellCheckInternet                     "1"
 
+#define AP_PREF_KEY_SpellCheckIgnoredWordsSave         "SpellCheckIgnoredWordsSave"   
+ /* save ignored words list with doc */
+#define AP_PREF_DEFAULT_SpellCheckIgnoredWordsSave     "1"
+
+#define AP_PREF_KEY_SpellCheckIgnoredWordsLoad         "SpellCheckIgnoredWordsLoad"   
+ /* load ignored words list with doc */
+#define AP_PREF_DEFAULT_SpellCheckIgnoredWordsLoad     "1"
+
 #define AP_PREF_KEY_OptionsTabNumber                           "OptionsTabNumber"     
                 /* the page number of the currently shown page in the */
 #define AP_PREF_DEFAULT_OptionsTabNumber                       "0"                    
                                 /* options dialog */
 
@@ -122,6 +128,8 @@
 dcl(SpellCheckCaps)
 dcl(SpellCheckNumbers)
 dcl(SpellCheckInternet)
+dcl(SpellCheckIgnoredWordsSave)
+dcl(SpellCheckIgnoredWordsLoad)
 dcl(OptionsTabNumber)
 dcl(RulerUnits)
 dcl(RulerVisible)
diff -ru abi-090800-ORIG/src/wp/impexp/xp/ie_exp_AbiWord_1.cpp 
abi-090800/src/wp/impexp/xp/ie_exp_AbiWord_1.cpp
--- abi-090800-ORIG/src/wp/impexp/xp/ie_exp_AbiWord_1.cpp       Wed Jul 26 21:15:31 
2000
+++ abi-090800/src/wp/impexp/xp/ie_exp_AbiWord_1.cpp    Sun Sep 10 11:36:46 2000
@@ -21,6 +21,7 @@
 #include "ut_types.h"
 #include "ut_bytebuf.h"
 #include "ut_base64.h"
+#include "ut_debugmsg.h"
 #include "pt_Types.h"
 #include "ie_exp_AbiWord_1.h"
 #include "pd_Document.h"
@@ -30,6 +31,7 @@
 #include "px_CR_Span.h"
 #include "px_CR_Strux.h"
 #include "xap_App.h"
+#include "ap_Prefs.h"
 #include "pd_Style.h"
 
 /*****************************************************************/
@@ -116,6 +118,7 @@
                                                                 UT_Bool 
bNewLineAfter, PT_AttrPropIndex api);
        void                            _outputData(const UT_UCSChar * p, UT_uint32 
length);
        void                            _handleStyles(void);
+       void                            _handleIgnoredWords(void);
        void                            _handleDataItems(void);
        
        PD_Document *           m_pDocument;
@@ -455,6 +458,7 @@
        
        
        _handleStyles();
+       _handleIgnoredWords();
 }
 
 s_AbiWord_1_Listener::~s_AbiWord_1_Listener()
@@ -620,6 +624,61 @@
 
        if (bWroteOpenStyleSection)
                m_pie->write("</styles>\n");
+
+       return;
+}
+
+void s_AbiWord_1_Listener::_handleIgnoredWords(void)
+{
+       UT_ASSERT(m_pDocument);
+       XAP_App *pApp = m_pDocument->getApp();
+       UT_ASSERT(pApp);
+       XAP_Prefs *pPrefs = pApp->getPrefs();
+       UT_ASSERT(pPrefs);
+       
+       UT_Bool saveIgnores;
+       pPrefs->getPrefsValueBool((XML_Char *)AP_PREF_KEY_SpellCheckIgnoredWordsSave, 
+&saveIgnores);
+       UT_DEBUGMSG(("Ignored words list %s being saved with document\n", 
+saveIgnores?"is":"is not"));
+       if (!saveIgnores) return;  // don't bother
+       UT_Bool bWroteOpenIgnoredWordsSection = UT_FALSE;
+
+       const UT_UCSChar *word;
+       for (UT_uint32 i = 0; m_pDocument->enumIgnores(i, &word); i++)
+       {
+               if (!bWroteOpenIgnoredWordsSection)
+               {
+                       m_pie->write("<ignoredwords>\n");
+                       bWroteOpenIgnoredWordsSection = UT_TRUE;
+               }
+               m_pie->write("<iw>");
+               for (UT_uint32 udex=0; word[udex]; ++udex)
+               {
+                       UT_UCSChar ch = word[udex];
+                       switch (ch)
+                       {
+                       case '&':   m_pie->write("&amp;");  break;
+                       case '<':   m_pie->write("&lt;");  break;
+                       case '>':   m_pie->write("&gt;");  break;
+                       case '"':   m_pie->write("&quot;");  break;
+                       default:
+                               char utb[100];
+                               if (ch < ' ' || ch >= 128)
+                               {
+                                       sprintf(utb, "&#x%x;", ch);
+                               }
+                               else
+                               {
+                                       utb[0] = (char)ch;
+                                       utb[1] = 0;
+                               }
+                               m_pie->write(utb);
+                       }
+               }
+               m_pie->write("</iw>\n");
+       }
+
+       if (bWroteOpenIgnoredWordsSection)
+               m_pie->write("</ignoredwords>\n");
 
        return;
 }
diff -ru abi-090800-ORIG/src/wp/impexp/xp/ie_imp_AbiWord_1.cpp 
abi-090800/src/wp/impexp/xp/ie_imp_AbiWord_1.cpp
--- abi-090800-ORIG/src/wp/impexp/xp/ie_imp_AbiWord_1.cpp       Wed Jul 26 21:15:31 
2000
+++ abi-090800/src/wp/impexp/xp/ie_imp_AbiWord_1.cpp    Sun Sep 10 11:49:28 2000
@@ -33,6 +33,8 @@
 #include "ie_types.h"
 #include "pd_Document.h"
 #include "ut_bytebuf.h"
+#include "xap_Prefs.h"
+#include "ap_Prefs.h"
 
 /*****************************************************************
 ******************************************************************
@@ -259,6 +261,8 @@
 #define TT_PAGEBREAK   11              // a forced page-break <pbr>
 #define TT_STYLESECTION        12              // a style section <styles>
 #define TT_STYLE               13              // a style <s> within a style section
+#define TT_IGNOREDWORDS 14             // an ignored words section <ignoredwords>
+#define TT_IGNOREDWORD  15      // a word <iw> within an ignored words section
 
 struct _TokenTable
 {
@@ -297,6 +301,8 @@
        {       "pbr",                  TT_PAGEBREAK    },
        {       "styles",               TT_STYLESECTION },
        {       "s",                    TT_STYLE                },
+       {       "ignoredwords", TT_IGNOREDWORDS },
+       {       "iw",                   TT_IGNOREDWORD  },
        {       "*",                    TT_OTHER                }};     // must be last
 
 #define TokenTableSize ((sizeof(s_Tokens)/sizeof(s_Tokens[0])))
@@ -339,6 +345,12 @@
        xxx_UT_DEBUGMSG(("startElement: %s\n", name));
 
        X_EatIfAlreadyError();                          // xml parser keeps running 
until buffer consumed
+
+       UT_ASSERT(m_pDocument);
+       XAP_App *pApp = m_pDocument->getApp();
+       UT_ASSERT(pApp);
+       XAP_Prefs *pPrefs = pApp->getPrefs();
+       UT_ASSERT(pPrefs);
        
        UT_uint32 tokenIndex = s_mapNameToToken(name);
        switch (s_Tokens[tokenIndex].m_type)
@@ -456,6 +468,23 @@
                X_CheckError(m_pDocument->appendStyle(atts));
                return;
                
+       case TT_IGNOREDWORDS:
+               X_VerifyParseState(_PS_Doc);
+               // This caches the preference value.  Our assumption is that the 
+ignored words
+               // list is small with respect to the document size, but nothing forces 
+that.
+               // The scheme is to parse the ignored words list as usual, but if we 
+don't want
+               // it loaded from the file, it just isn't added to the in-memory 
+ignored words
+               // list.  The cached preference value keeps us from looking it up for 
+each word.
+               pPrefs->getPrefsValueBool((XML_Char 
+*)AP_PREF_KEY_SpellCheckIgnoredWordsLoad, &m_bLoadIgnoredWords);
+
+               m_parseState = _PS_IgnoredWordsSec;
+               return;
+
+       case TT_IGNOREDWORD:
+               X_VerifyParseState(_PS_IgnoredWordsSec);
+               m_parseState = _PS_IgnoredWordsItem;
+               return;
+                       
        case TT_OTHER:
        default:
                UT_DEBUGMSG(("Unknown tag [%s]\n",name));
@@ -564,6 +593,16 @@
                m_parseState = _PS_StyleSec;
                return;
                
+       case TT_IGNOREDWORDS:
+               X_VerifyParseState(_PS_IgnoredWordsSec);
+               m_parseState = _PS_Doc;
+               return;
+
+       case TT_IGNOREDWORD:
+               X_VerifyParseState(_PS_IgnoredWordsItem);
+               m_parseState = _PS_IgnoredWordsSec;
+               return;
+
        case TT_OTHER:
        default:
                UT_DEBUGMSG(("Unknown end tag [%s]\n",name));
@@ -591,6 +630,7 @@
                }
                
        case _PS_Block:
+       case _PS_IgnoredWordsItem:
                {
                        UT_ASSERT(sizeof(XML_Char) == sizeof(UT_Byte));
                        UT_ASSERT(sizeof(XML_Char) != sizeof(UT_UCSChar));
@@ -600,19 +640,17 @@
                        //    [] convert CRLF to SP.
                        //    [] convert CR to SP.
                        //    [] convert LF to SP.
+                       // ignored words processing doesn't care about the 
+                       // white-space stuff, but it does no harm
 
                        UT_Byte * ss = (UT_Byte *)s;
-                       UT_UCSChar buf[1024];
+                       UT_UCSChar _buf[1024], *buf = _buf;
+                       // len is an upper bound on the length of the decoded stuff
+                       if (len > 1000) buf = new UT_UCSChar[len+1];
                        int bufLen = 0;
 
                        for (int k=0; k<len; k++)
                        {
-                               if (bufLen == NrElements(buf))          // pump it out 
in chunks
-                               {
-                                       
X_CheckError(m_pDocument->appendSpan(buf,bufLen));
-                                       bufLen = 0;
-                               }
-
                                if ((ss[k] < 0x80) && (m_lenCharDataSeen > 0))
                                {
                                        // is it us-ascii and we are in a UTF-8
@@ -674,10 +712,24 @@
                                }
                        }
 
-                       // flush out the last piece of a buffer
+                       // flush out the buffer
 
                        if (bufLen > 0)
-                               X_CheckError(m_pDocument->appendSpan(buf,bufLen));
+                       {
+                               switch (m_parseState)
+                               {
+                               case _PS_Block:
+                                       
+X_CheckError(m_pDocument->appendSpan(buf,bufLen));
+                                       break;
+                               case _PS_IgnoredWordsItem:
+                                       if (m_bLoadIgnoredWords) 
+X_CheckError(m_pDocument->appendIgnore(buf,bufLen));
+                                       break;
+                               default:
+                                       UT_ASSERT(UT_SHOULD_NOT_HAPPEN);
+                                       break;
+                               }
+                       }
+                       if (buf != _buf) delete buf;
                        return;
                }
 
diff -ru abi-090800-ORIG/src/wp/impexp/xp/ie_imp_AbiWord_1.h 
abi-090800/src/wp/impexp/xp/ie_imp_AbiWord_1.h
--- abi-090800-ORIG/src/wp/impexp/xp/ie_imp_AbiWord_1.h Wed Jul 26 21:15:31 2000
+++ abi-090800/src/wp/impexp/xp/ie_imp_AbiWord_1.h      Sun Sep 10 11:41:42 2000
@@ -82,11 +82,14 @@
                               _PS_DataSec,
                               _PS_DataItem,
                               _PS_StyleSec,
-                              _PS_Style
+                              _PS_Style,
+                              _PS_IgnoredWordsSec,
+                              _PS_IgnoredWordsItem
     } ParseState;
 
     UT_Error                   m_error;
     ParseState                 m_parseState;
+       UT_Bool                         m_bLoadIgnoredWords;
     XML_Char                   m_charDataSeen[4];
     UT_uint32                  m_lenCharDataSeen;
     UT_uint32                  m_lenCharDataExpected;

Reply via email to