Hi,

Now that CVS is back up -- or at least communicating at more that 50 bytes
a second -- I finally got made a patch. The code added allows wv to import
an MS Word 2 document, and output it in legible form. It appears (based
on what I have seen, half a dozen docs from all over the world) that
Word 2 docs are quite a lot simpler structured, with a linear text layout,
no OLE.

Of course nothing is perfect, and this outputs the text only in text form,
without "properties" or even basic formatting such as paragraphs (<p> or
\par). Doing that requires getting the FIB right (does anybody know the 
FIB layout for Word 2?) and understanding what is happening on lines
188-399 of decode_simple.c (not very legible to an amateur :-( )

Any help or info would be appreciated.

As this patch adds functionality that was competely lacking earlier, it 
should IMHO be applied after (necessary) regression testing. I don't
think there are very many Word 2 users any more, but still this may be
good to have.

-- 
Martin Vermeer [EMAIL PROTECTED]
:wq

Index: clx.c
===================================================================
RCS file: /cvsroot/wv/clx.c,v
retrieving revision 1.17
diff -u -r1.17 clx.c
--- clx.c       2000/08/28 04:41:53     1.17
+++ clx.c       2000/09/02 19:40:13
@@ -103,7 +103,7 @@
                        
wvGetPCD_PLCF(&clx->pcd,&clx->pos,&clx->nopcd,wvStream_tell(fd),lcb,fd);
                        j+=lcb;
 
-                       if (ver == WORD7)
+                       if (ver <= WORD7) /* MV 28.8.2000 Appears to be valid */
                                {
 #if 0
                                /* DANGER !!, this is a completely mad attempt to 
differenciate 
@@ -115,18 +115,6 @@
                                /* I think that this is the correct reason for this 
behaviour */
                                if (fExtChar == 0)
 #endif
-                                       for (i=0;i<clx->nopcd;i++)
-                                               {
-                                               clx->pcd[i].fc *= 2;
-                                               clx->pcd[i].fc |= 0x40000000UL;
-                                               }
-                               }
-                       if (ver == WORD6)
-                               {
-                               /* Copy the above ;-) MV 27.8.2000 
-                               Note: worth trying for WORD2 etc. also (?)
-                               */
-                               if (fExtChar == 0)
                                        for (i=0;i<clx->nopcd;i++)
                                                {
                                                clx->pcd[i].fc *= 2;
Index: decode_simple.c
===================================================================
RCS file: /cvsroot/wv/decode_simple.c,v
retrieving revision 1.48
diff -u -r1.48 decode_simple.c
--- decode_simple.c     2000/06/06 00:55:16     1.48
+++ decode_simple.c     2000/09/02 19:40:15
@@ -167,6 +167,20 @@
        beginning at fib.fcMin up to (but not including) fib.fcMac.
        */
 
+       if ( (ver == WORD2) && !ps->fib.fComplex)
+               {
+               wvHandleDocument(ps,DOCBEGIN);
+               wvStream_goto(ps->mainfd,ps->fib.fcMin);
+               for (i = ps->fib.fcMin; i < ps->fib.fcMac; i++)
+                       {
+                       eachchar = wvGetChar(ps->mainfd, 1);
+                       wvOutputTextChar(eachchar, 1, ps, &achp);
+                       /* Formatting still lacking. How? This is just a start. */
+                       }
+               return;
+               }
+
+
 #ifdef DEBUG
        if ( ps->fib.fcMac != wvGetEndFCPiece(ps->clx.nopcd-1,&ps->clx) )
                wvTrace(("fcMac is not the same as the piecetable %x 
%x!\n",ps->fib.fcMac,wvGetEndFCPiece(ps->clx.nopcd-1,&ps->clx)));
Index: fib.c
===================================================================
RCS file: /cvsroot/wv/fib.c,v
retrieving revision 1.25
diff -u -r1.25 fib.c
--- fib.c       2000/06/06 00:55:16     1.25
+++ fib.c       2000/09/02 19:40:17
@@ -275,6 +275,14 @@
        item->wIdent = read_16ubit(fd);
        item->nFib = read_16ubit(fd);
 
+       if ( (wvQuerySupported(item,NULL) == WORD2) )
+               {
+               wvInitFIB(item);
+               wvStream_offset(fd,-4);
+               wvGetFIB2(item,fd);
+               return;
+               }
+
        if ( (wvQuerySupported(item,NULL) == WORD5) || (wvQuerySupported(item,NULL) == 
WORD6) || (wvQuerySupported(item,NULL) == WORD7) )
                {
                wvInitFIB(item);
@@ -618,6 +626,126 @@
     return(ret);
     }
 
+void wvGetFIB2(FIB *item,wvStream *fd)
+       {
+       U16 temp16;
+       U8 temp8;
+
+       item->wIdent = read_16ubit(fd);
+       item->nFib = read_16ubit(fd);
+
+       item->nProduct = read_16ubit(fd);
+       item->lid = read_16ubit(fd);
+       wvTrace(("lid is %x\n",item->lid));
+       item->pnNext = (S16)read_16ubit(fd);
+       temp16 = read_16ubit(fd);
+
+       item->fDot = (temp16 & 0x0001);
+       item->fGlsy = (temp16 & 0x0002) >> 1;
+       item->fComplex = (temp16 & 0x0004) >> 2;
+       item->fHasPic = (temp16 & 0x0008) >> 3;
+       item->cQuickSaves = (temp16 & 0x00F0) >> 4;
+       item->fEncrypted = (temp16 & 0x0100) >> 8;
+       item->fWhichTblStm = 0;         /* word 6 files only have one table stream*/
+       item->fReadOnlyRecommended = (temp16 & 0x0400) >> 10;
+       item->fWriteReservation = (temp16 & 0x0800) >> 11;
+       item->fExtChar = (temp16 & 0x1000) >> 12;
+       wvTrace(("fExtChar is %d\n",item->fExtChar));
+       item->fLoadOverride = 0;
+       item->fFarEast = 0;
+       item->fCrypto = 0;
+       item->nFibBack = read_16ubit(fd);
+       wvTrace(("nFibBack is %d\n",item->nFibBack));
+
+       item->lKey = read_32ubit(fd);
+       item->envr = read_8ubit(fd);
+       temp8 = read_8ubit(fd);
+       item->fMac = 0;
+       item->fEmptySpecial = 0;
+       item->fLoadOverridePage = 0;
+       item->fFutureSavedUndo = 0;
+       item->fWord97Saved = 0;
+       item->fSpare0 = 0;
+       item->chse = read_16ubit(fd);
+       item->chsTables = read_16ubit(fd);
+       item->fcMin = read_32ubit(fd);  /* These appear correct MV 29.8.2000 */
+       item->fcMac = read_32ubit(fd);
+       wvTrace(("fc from %o to %o", item->fcMin, item->fcMac));
+
+       item->csw = 14;
+       item->wMagicCreated = 0xCA0;    /*this is the unique id of the creater, so its 
+me :-)*/
+       /* Dunno what more there is here... if nFib = 45 (?) Try anyway. */
+
+       item->cbMac = read_32ubit(fd);
+
+       read_16ubit(fd);
+       read_16ubit(fd);
+       read_16ubit(fd);
+       read_16ubit(fd);
+       read_16ubit(fd);
+       read_16ubit(fd);
+       read_16ubit(fd);
+       read_16ubit(fd);
+
+       item->ccpText = read_32ubit(fd);
+       wvTrace(("length %d == %d", item->fcMac - item->fcMin, item->ccpText));
+       item->ccpFtn = (S32)read_32ubit(fd);
+       item->ccpHdr = (S32)read_32ubit(fd);
+       item->ccpMcr = (S32)read_32ubit(fd);
+       item->ccpAtn = (S32)read_32ubit(fd);
+       item->ccpEdn = (S32)read_32ubit(fd);
+       item->ccpTxbx = (S32)read_32ubit(fd);
+       item->ccpHdrTxbx = (S32)read_32ubit(fd);
+
+       read_32ubit(fd);
+
+       item->fcStshfOrig = (S32)read_32ubit(fd);
+       item->lcbStshfOrig = read_32ubit(fd);
+       item->fcStshf = (S32)read_32ubit(fd);
+       item->lcbStshf = read_32ubit(fd);
+       item->fcPlcffndRef = (S32)read_32ubit(fd);
+       item->lcbPlcffndRef = read_32ubit(fd);
+       item->fcPlcffndTxt = (S32)read_32ubit(fd);
+       item->lcbPlcffndTxt = read_32ubit(fd);
+       item->fcPlcfandRef = (S32)read_32ubit(fd);
+       item->lcbPlcfandRef = read_32ubit(fd);
+       item->fcPlcfandTxt = (S32)read_32ubit(fd);
+       item->lcbPlcfandTxt = read_32ubit(fd);
+       item->fcPlcfsed = (S32)read_32ubit(fd);
+       item->lcbPlcfsed = read_32ubit(fd);
+       item->fcPlcpad = (S32)read_32ubit(fd);
+       item->lcbPlcpad = read_32ubit(fd);
+       item->fcPlcfphe = (S32)read_32ubit(fd);
+       item->lcbPlcfphe = read_32ubit(fd);
+       item->fcSttbfglsy = (S32)read_32ubit(fd);
+       item->lcbSttbfglsy = read_32ubit(fd);
+       item->fcPlcfglsy = (S32)read_32ubit(fd);
+       item->lcbPlcfglsy = read_32ubit(fd);
+       item->fcPlcfhdd = (S32)read_32ubit(fd);
+       item->lcbPlcfhdd = read_32ubit(fd);
+       item->fcPlcfbteChpx = (S32)read_32ubit(fd);
+       item->lcbPlcfbteChpx = read_32ubit(fd);
+       item->fcPlcfbtePapx = (S32)read_32ubit(fd);
+       item->lcbPlcfbtePapx = read_32ubit(fd);
+       item->fcPlcfsea = (S32)read_32ubit(fd);
+       item->lcbPlcfsea = read_32ubit(fd);
+       item->fcSttbfffn = (S32)read_32ubit(fd);
+       item->lcbSttbfffn = read_32ubit(fd);
+       item->fcPlcffldMom = (S32)read_32ubit(fd);
+       item->lcbPlcffldMom = read_32ubit(fd);
+       item->fcPlcffldHdr = (S32)read_32ubit(fd);
+       item->lcbPlcffldHdr = read_32ubit(fd);
+       item->fcPlcffldFtn = (S32)read_32ubit(fd);
+       item->lcbPlcffldFtn = read_32ubit(fd);
+       item->fcPlcffldAtn = (S32)read_32ubit(fd);
+       item->lcbPlcffldAtn = read_32ubit(fd);
+       item->fcPlcffldMcr = (S32)read_32ubit(fd);
+       item->lcbPlcffldMcr = read_32ubit(fd);
+       item->fcSttbfbkmk = (S32)read_32ubit(fd);
+
+       /* item->lcbSttbfbkmk = read_32ubit(fd); This one is too much. */
+
+       }
 
 void wvGetFIB6(FIB *item,wvStream *fd)
        {
Index: wv.h
===================================================================
RCS file: /cvsroot/wv/wv.h,v
retrieving revision 1.66
diff -u -r1.66 wv.h
--- wv.h        2000/08/02 23:27:12     1.66
+++ wv.h        2000/09/02 19:40:38
@@ -468,6 +468,7 @@
        } FIB;
 
 void wvGetFIB(FIB *item,wvStream *fd);
+void wvGetFIB2(FIB *item,wvStream *fd);
 void wvGetFIB6(FIB *item,wvStream *fd);
 void wvInitFIB(FIB *item);
 
Index: wvHtml.c
===================================================================
RCS file: /cvsroot/wv/wvHtml.c,v
retrieving revision 1.45
diff -u -r1.45 wvHtml.c
--- wvHtml.c    2000/08/28 04:41:53     1.45
+++ wvHtml.c    2000/09/02 19:40:40
@@ -251,7 +251,7 @@
        ps.filename = argv[optind];
        ps.dir = dir;
 
-       if (ret & 0x8000)
+       if (ret & 0x8000)  /* Password protected? */
                {
                if ( (ret & 0x7fff) == WORD8)
                        {
Index: wvparse.c
===================================================================
RCS file: /cvsroot/wv/wvparse.c,v
retrieving revision 1.26
diff -u -r1.26 wvparse.c
--- wvparse.c   2000/06/06 00:55:16     1.26
+++ wvparse.c   2000/09/02 19:40:40
@@ -62,7 +62,8 @@
        if ( (ret&0x7fff) != WORD8 )
                ps->data = ps->mainfd;
 
-       if ( (ret != WORD8) && (ret != WORD7) && (ret!= WORD6) )
+       if ( (ret != WORD8) && (ret != WORD7) && (ret!= WORD6) && (ret!= WORD2) ) 
+                       /* WORD2 test */
                {
                /* return the errors and the encrypted files*/
                if (!(ret & 0x8000))
@@ -119,7 +120,8 @@
                {
                wvError(("Theres a good chance that this is a word 2 doc of nFib 
%d\n",read_16ubit(*mainfd)));
                wvStream_rewind(*mainfd);
-               return(-1);
+//             return(-1);
+               return(0);
                }
        else if (0x37fe == magic)
                {
Index: oledecod/oledecod.c
===================================================================
RCS file: /cvsroot/wv/oledecod/oledecod.c,v
retrieving revision 1.8
diff -u -r1.8 oledecod.c
--- oledecod/oledecod.c 2000/03/30 09:01:43     1.8
+++ oledecod/oledecod.c 2000/09/02 19:40:44
@@ -94,13 +94,14 @@
   root_list = sbd_list = NULL;
 
   /* open input file */
-  test (input != NULL, 4, ends ());
+  test (input != NULL, 4, ends ()); /* cannot be opened */
 
   /* fast check type of file */
   verbose ("fast testing type of file");
   test ((c = getc (input)) != EOF, 5, ends ());
   test (ungetc (c, input) != EOF, 5, ends ());
-  test ( (c < 32 || c > 126) , 8, ends ());
+  test ( (c < 32 || c > 126) , 8, ends ());  /* We have a legible character, not good 
+*/
+  test (c != 0xdb, 2, ends ());  /* probably non-ole Word 2 file */
   test (c == 0xd0, 9, ends ());
 
   /* read header block */

Reply via email to