src/docrecord.py | 27 +++++++++++++++++++++++++-- src/docstream.py | 23 +++++++++++++++++++---- test/doc/bookmark.doc |binary test/doc/bookmark.rtf | 7 +++++++ test/doc/test.py | 11 +++++++++++ 5 files changed, 62 insertions(+), 6 deletions(-)
New commits: commit 8f7af80bf491794f9b1781157cceb25783227741 Author: Miklos Vajna <vmik...@suse.cz> Date: Sat Jan 19 14:25:33 2013 +0100 doc: dump bookmarks diff --git a/src/docrecord.py b/src/docrecord.py index f277196..36b29c3 100644 --- a/src/docrecord.py +++ b/src/docrecord.py @@ -202,11 +202,12 @@ class PlcFld(DOCDirStream, PLC): class PlcfBkl(DOCDirStream, PLC): """The Plcfbkl structure is a PLC that contains only CPs and no additional data.""" - def __init__(self, mainStream, offset, size): + def __init__(self, mainStream, offset, size, start): DOCDirStream.__init__(self, mainStream.doc.getDirectoryStreamByName("1Table").bytes, mainStream = mainStream) PLC.__init__(self, size, 0) # 0 is defined by 2.8.12 self.pos = offset self.size = size + self.start = start def dump(self): print '<plcfBkl type="PlcfBkl" offset="%d" size="%d bytes">' % (self.pos, self.size) @@ -216,7 +217,7 @@ class PlcfBkl(DOCDirStream, PLC): # aCp end = offset + self.getuInt32(pos = pos) print '<aCP index="%d" bookmarkEnd="%d">' % (i, end) - start = self.mainStream.plcfAtnBkf.aCP[i] + start = self.start.aCP[i] print '<transformed value="%s"/>' % self.quoteAttr(self.mainStream.retrieveText(start, end)) pos += 4 print '</aCP>' @@ -2609,4 +2610,26 @@ class SttbSavedBy(DOCDirStream): assert self.pos == self.mainStream.fcSttbSavedBy + self.size print '</sttbSavedBy>' +class SttbfBkmk(DOCDirStream): + """The SttbfBkmk structure is an STTB structure whose strings specify the names of bookmarks.""" + def __init__(self, mainStream): + DOCDirStream.__init__(self, mainStream.doc.getDirectoryStreamByName("1Table").bytes) + self.pos = mainStream.fcSttbfBkmk + self.size = mainStream.lcbSttbfBkmk + self.mainStream = mainStream + + def dump(self): + print '<sttbfBkmk type="SttbfBkmk" offset="%d" size="%d bytes">' % (self.pos, self.size) + self.printAndSet("fExtended", self.readuInt16()) + self.printAndSet("cData", self.readuInt16()) + self.printAndSet("cbExtra", self.readuInt16()) + for i in range(self.cData): + cchData = self.readuInt16() + print '<cchData index="%d" offset="%d" size="%d bytes">' % (i, self.pos, cchData) + print '<string value="%s"/>' % globals.encodeName(self.bytes[self.pos:self.pos+2*cchData].decode('utf-16'), lowOnly = True) + self.pos += 2*cchData + print '</cchData>' + assert self.pos == self.mainStream.fcSttbfBkmk + self.size + print '</sttbfBkmk>' + # vim:set filetype=python shiftwidth=4 softtabstop=4 expandtab: diff --git a/src/docstream.py b/src/docstream.py index 7633ccc..d9003aa 100644 --- a/src/docstream.py +++ b/src/docstream.py @@ -253,11 +253,11 @@ class WordDocumentStream(DOCDirStream): ["fcPlcfFldMcr"], ["lcbPlcfFldMcr"], ["fcSttbfBkmk"], - ["lcbSttbfBkmk"], + ["lcbSttbfBkmk", self.handleLcbSttbfBkmk], ["fcPlcfBkf"], - ["lcbPlcfBkf"], + ["lcbPlcfBkf", self.handleLcbPlcfBkf], ["fcPlcfBkl"], - ["lcbPlcfBkl"], + ["lcbPlcfBkl", self.handleLcbPlcfBkl], ["fcCmds"], ["lcbCmds", self.handleLcbCmds], ["fcUnused1"], @@ -489,7 +489,19 @@ class WordDocumentStream(DOCDirStream): def handleLcbPlcfAtnBkl(self): offset = self.fcPlcfAtnBkl size = self.lcbPlcfAtnBkl - plcfBkl = docrecord.PlcfBkl(self, offset, size) + plcfBkl = docrecord.PlcfBkl(self, offset, size, start = self.plcfAtnBkf) + plcfBkl.dump() + + def handleLcbPlcfBkf(self): + offset = self.fcPlcfBkf + size = self.lcbPlcfBkf + self.plcfBkf = docrecord.PlcfBkf(self, offset, size) + self.plcfBkf.dump() + + def handleLcbPlcfBkl(self): + offset = self.fcPlcfBkl + size = self.lcbPlcfBkl + plcfBkl = docrecord.PlcfBkl(self, offset, size, start = self.plcfBkf) plcfBkl.dump() def handleLcbPlcfSed(self): @@ -537,6 +549,9 @@ class WordDocumentStream(DOCDirStream): def handleLcbSttbListNames(self): docrecord.SttbListNames(self).dump() + def handleLcbSttbfBkmk(self): + docrecord.SttbfBkmk(self).dump() + def dumpFibRgFcLcb97(self, name): print '<%s type="FibRgFcLcb97" size="744 bytes">' % name self.__dumpFibRgFcLcb97() diff --git a/test/doc/bookmark.doc b/test/doc/bookmark.doc new file mode 100755 index 0000000..6d32ff6 Binary files /dev/null and b/test/doc/bookmark.doc differ diff --git a/test/doc/bookmark.rtf b/test/doc/bookmark.rtf new file mode 100644 index 0000000..05b3d42 --- /dev/null +++ b/test/doc/bookmark.rtf @@ -0,0 +1,7 @@ +{\rtf1 +{\*\bkmkstart firstword} +Hello +{\*\bkmkend firstword} + world! +\par +} diff --git a/test/doc/test.py b/test/doc/test.py index 6936f1a..d02f680 100755 --- a/test/doc/test.py +++ b/test/doc/test.py @@ -118,6 +118,17 @@ class Test(unittest.TestCase): self.assertEqual(1, len(authors)) self.assertEqual('vmiklos', authors[0].attrib['value']) + def test_bookmark(self): + self.dump('bookmark') + + bookmarkStarts = self.root.findall('stream[@name="WordDocument"]/fib/fibRgFcLcbBlob/lcbPlcfBkf/plcfBkf/aCP') + bookmarkEnds = self.root.findall('stream[@name="WordDocument"]/fib/fibRgFcLcbBlob/lcbPlcfBkl/plcfBkl/aCP') + self.assertEqual(1, len(bookmarkStarts)) + self.assertEqual(1, len(bookmarkEnds)) + + # The bookmark covers Hello + self.assertEqual('Hello', bookmarkEnds[0].findall('transformed')[0].attrib['value']) + def test_zoom(self): self.dump('zoom') dopBase = self.root.findall('stream[@name="WordDocument"]/fib/fibRgFcLcbBlob/lcbDop/dop/dop2007/dop2003/dop2002/dop2000/dop97/dop95/dopBase')[0] _______________________________________________ Libreoffice-commits mailing list libreoffice-comm...@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/libreoffice-commits