[Libreoffice-commits] .: src/docrecord.py src/docstream.py test/doc
src/docrecord.py | 27 +-- src/docstream.py | 23 +++ test/doc/bookmark.doc |binary test/doc/bookmark.rtf |7 +++ test/doc/test.py | 11 +++ 5 files changed, 62 insertions(+), 6 deletions(-) New commits: commit 8f7af80bf491794f9b1781157cceb25783227741 Author: Miklos Vajna Date: Sat Jan 19 14:25:33 2013 +0100 doc: dump bookmarks diff --git a/src/docrecord.py b/src/docrecord.py index f277196..36b29c3 100644 --- a/src/docrecord.py +++ b/src/docrecord.py @@ -202,11 +202,12 @@ class PlcFld(DOCDirStream, PLC): class PlcfBkl(DOCDirStream, PLC): """The Plcfbkl structure is a PLC that contains only CPs and no additional data.""" -def __init__(self, mainStream, offset, size): +def __init__(self, mainStream, offset, size, start): DOCDirStream.__init__(self, mainStream.doc.getDirectoryStreamByName("1Table").bytes, mainStream = mainStream) PLC.__init__(self, size, 0) # 0 is defined by 2.8.12 self.pos = offset self.size = size +self.start = start def dump(self): print '' % (self.pos, self.size) @@ -216,7 +217,7 @@ class PlcfBkl(DOCDirStream, PLC): # aCp end = offset + self.getuInt32(pos = pos) print '' % (i, end) -start = self.mainStream.plcfAtnBkf.aCP[i] +start = self.start.aCP[i] print '' % self.quoteAttr(self.mainStream.retrieveText(start, end)) pos += 4 print '' @@ -2609,4 +2610,26 @@ class SttbSavedBy(DOCDirStream): assert self.pos == self.mainStream.fcSttbSavedBy + self.size print '' +class SttbfBkmk(DOCDirStream): +"""The SttbfBkmk structure is an STTB structure whose strings specify the names of bookmarks.""" +def __init__(self, mainStream): +DOCDirStream.__init__(self, mainStream.doc.getDirectoryStreamByName("1Table").bytes) +self.pos = mainStream.fcSttbfBkmk +self.size = mainStream.lcbSttbfBkmk +self.mainStream = mainStream + +def dump(self): +print '' % (self.pos, self.size) +self.printAndSet("fExtended", self.readuInt16()) +self.printAndSet("cData", self.readuInt16()) +self.printAndSet("cbExtra", self.readuInt16()) +for i in range(self.cData): +cchData = self.readuInt16() +print '' % (i, self.pos, cchData) +print '' % globals.encodeName(self.bytes[self.pos:self.pos+2*cchData].decode('utf-16'), lowOnly = True) +self.pos += 2*cchData +print '' +assert self.pos == self.mainStream.fcSttbfBkmk + self.size +print '' + # vim:set filetype=python shiftwidth=4 softtabstop=4 expandtab: diff --git a/src/docstream.py b/src/docstream.py index 7633ccc..d9003aa 100644 --- a/src/docstream.py +++ b/src/docstream.py @@ -253,11 +253,11 @@ class WordDocumentStream(DOCDirStream): ["fcPlcfFldMcr"], ["lcbPlcfFldMcr"], ["fcSttbfBkmk"], -["lcbSttbfBkmk"], +["lcbSttbfBkmk", self.handleLcbSttbfBkmk], ["fcPlcfBkf"], -["lcbPlcfBkf"], +["lcbPlcfBkf", self.handleLcbPlcfBkf], ["fcPlcfBkl"], -["lcbPlcfBkl"], +["lcbPlcfBkl", self.handleLcbPlcfBkl], ["fcCmds"], ["lcbCmds", self.handleLcbCmds], ["fcUnused1"], @@ -489,7 +489,19 @@ class WordDocumentStream(DOCDirStream): def handleLcbPlcfAtnBkl(self): offset = self.fcPlcfAtnBkl size = self.lcbPlcfAtnBkl -plcfBkl = docrecord.PlcfBkl(self, offset, size) +plcfBkl = docrecord.PlcfBkl(self, offset, size, start = self.plcfAtnBkf) +plcfBkl.dump() + +def handleLcbPlcfBkf(self): +offset = self.fcPlcfBkf +size = self.lcbPlcfBkf +self.plcfBkf = docrecord.PlcfBkf(self, offset, size) +self.plcfBkf.dump() + +def handleLcbPlcfBkl(self): +offset = self.fcPlcfBkl +size = self.lcbPlcfBkl +plcfBkl = docrecord.PlcfBkl(self, offset, size, start = self.plcfBkf) plcfBkl.dump() def handleLcbPlcfSed(self): @@ -537,6 +549,9 @@ class WordDocumentStream(DOCDirStream): def handleLcbSttbListNames(self): docrecord.SttbListNames(self).dump() +def handleLcbSttbfBkmk(self): +docrecord.SttbfBkmk(self).dump() + def dumpFibRgFcLcb97(self, name): print '<%s type="FibRgFcLcb97" size="744 bytes">' % name self.__dumpFibRgFcLcb97() diff --git a/test/doc/bookmark.doc b/test/doc/bookmark.doc new file mode 100755 index 000..6d32ff6 Binary files /dev/null and b/test/doc/bookmark.doc differ diff --git a/test/doc/bookmark.rtf b/test/doc/bookmark.rtf new file mode 100644 index 000..05b3d42 --- /dev/null +++ b/test/doc/bookmark.rtf @@ -0,0 +1,7 @@ +{\rtf1 +{\*\bkmkstart firstword} +Hello +{\*\bkmkend firstword} + world! +\par +} diff -
[Libreoffice-commits] .: src/docrecord.py src/docstream.py test/doc
src/docrecord.py | 67 +- src/docstream.py |5 +++- test/doc/list.doc |binary test/doc/list.rtf | 49 +++ test/doc/test.py |6 5 files changed, 120 insertions(+), 7 deletions(-) New commits: commit abd802bff3247df6fb1c87ed41fce45ba01a9a95 Author: Miklos Vajna Date: Fri Jan 11 12:19:28 2013 +0100 dump PlfLfo diff --git a/src/docrecord.py b/src/docrecord.py index 6e69156..ef87891 100644 --- a/src/docrecord.py +++ b/src/docrecord.py @@ -2393,13 +2393,14 @@ class PlcfGram(DOCDirStream, PLC): class LSTF(DOCDirStream): """The LSTF structure contains formatting properties that apply to an entire list.""" -def __init__(self, plfLst): +def __init__(self, plfLst, index): DOCDirStream.__init__(self, plfLst.bytes) self.pos = plfLst.pos self.size = 28 +self.index = index def dump(self): -print '' % (self.pos, self.size) +print '' % (self.index, self.pos, self.size) self.printAndSet("lsid", self.readInt32()) self.printAndSet("tplc", self.readInt32()) for i in range(9): @@ -2445,12 +2446,13 @@ class LVLF(DOCDirStream): class LVL(DOCDirStream): """The LVL structure contains formatting information about a specific level in a list.""" -def __init__(self, plfLst): +def __init__(self, plfLst, index): DOCDirStream.__init__(self, plfLst.bytes) self.pos = plfLst.pos +self.index = index def dump(self): -print '' % self.pos +print '' % (self.index, self.pos) lvlf = LVLF(self) lvlf.dump() self.pos = lvlf.pos @@ -2489,7 +2491,7 @@ class PlfLst(DOCDirStream): self.printAndSet("cLst", self.readInt16()) cLvl = 0 for i in range(self.cLst): -rgLstf = LSTF(self) +rgLstf = LSTF(self, i) rgLstf.dump() if rgLstf.fSimpleList: cLvl += 1 @@ -2497,9 +2499,62 @@ class PlfLst(DOCDirStream): cLvl += 9 self.pos = rgLstf.pos for i in range(cLvl): -lvl = LVL(self) +lvl = LVL(self, i) lvl.dump() self.pos = lvl.pos print '' +class LFO(DOCDirStream): +"""The LFO structure specifies the LSTF element that corresponds to a list that contains a paragraph.""" +def __init__(self, plfLfo): +DOCDirStream.__init__(self, plfLfo.bytes) +self.pos = plfLfo.pos + +def dump(self): +print '' % self.pos +self.printAndSet("lsid", self.readInt32()) +self.printAndSet("unused1", self.readuInt32()) +self.printAndSet("unused2", self.readuInt32()) +self.printAndSet("clfolvl", self.readuInt8()) +self.printAndSet("ibstFltAutoNum", self.readuInt8()) +self.printAndSet("grfhic", self.readuInt8()) # TODO dump grfhic +self.printAndSet("unused3", self.readuInt8()) +print '' + +class LFOData(DOCDirStream): +"""The LFOData structure contains the Main Document CP of the corresponding LFO.""" +def __init__(self, plfLfo, lfo): +DOCDirStream.__init__(self, plfLfo.bytes) +self.pos = plfLfo.pos +self.lfo = lfo + +def dump(self): +print '' % self.pos +self.printAndSet("cp", self.readuInt32()) +if self.lfo.clfolvl > 0: +print '' +print '' + +class PlfLfo(DOCDirStream): +"""The PlfLfo structure contains the list format override data for the document.""" +def __init__(self, mainStream): +DOCDirStream.__init__(self, mainStream.doc.getDirectoryStreamByName("1Table").bytes, mainStream = mainStream) +self.pos = mainStream.fcPlfLfo +self.size = mainStream.lcbPlfLfo + +def dump(self): +print '' % (self.pos, self.size) +self.printAndSet("lfoMac", self.readInt32()) +lfos = [] +for i in range(self.lfoMac): +lfo = LFO(self) +lfos.append(lfo) +lfo.dump() +self.pos = lfo.pos +for i in range(self.lfoMac): +lfoData = LFOData(self, lfos[i]) +lfoData.dump() +self.pos = lfoData.pos +print '' + # vim:set filetype=python shiftwidth=4 softtabstop=4 expandtab: diff --git a/src/docstream.py b/src/docstream.py index ad46e1a..648fe81 100644 --- a/src/docstream.py +++ b/src/docstream.py @@ -352,7 +352,7 @@ class WordDocumentStream(DOCDirStream): ["fcPlfLst"], ["lcbPlfLst", self.handleLcbPlfLst], ["fcPlfLfo"], -["lcbPlfLfo"], +["lcbPlfLfo", self.handleLcbPlfLfo], ["fcPlcfTxbxBkd"], ["lcbPlcfTxbxBkd"], ["fcPlcfTxbxHdrBkd"], @@ -521,6 +521,9 @@ class WordDocumentStream(DOCDirStream): def handleLcbPlfLst(self): docrecord.PlfLst(self).dump() +def handleLcbP
[Libreoffice-commits] .: src/docrecord.py src/docstream.py test/doc
src/docrecord.py | 31 ++- src/docstream.py |4 ++-- test/doc/sections.doc |binary test/doc/test.py |7 +++ 4 files changed, 23 insertions(+), 19 deletions(-) New commits: commit f922c24a1c0e7bf2f7e330503eb37f2d99f39b75 Author: Miklos Vajna Date: Wed Jan 9 17:54:07 2013 +0100 PlcfSed: fix dumping section text diff --git a/src/docrecord.py b/src/docrecord.py index 9e30066..cae0153 100644 --- a/src/docrecord.py +++ b/src/docrecord.py @@ -30,24 +30,21 @@ class FcCompressed(DOCDirStream): self.printAndSet("r1", self.r1) print '' -def getTransformedValue(self, start, end, logical = True): +def getTransformedValue(self, start, end, logicalPositions = True, logicalLength = True): +offset = self.fc if self.fCompressed: offset = self.fc/2 -if logical: -fro = offset -to = offset+end-start -else: -fro = start -to = end +if logicalPositions: +fro = offset + start +to = offset + end +else: +fro = start +to = end +if self.fCompressed: return globals.encodeName(self.mainStream.bytes[fro:to]) else: -if logical: -offset = self.fc -fro = offset -to = offset + (end - start) * 2 -else: -fro = start -to = end +if logicalLength: +to += (to - fro) return globals.encodeName(self.mainStream.bytes[fro:to].decode('utf-16'), lowOnly = True) class Pcd(DOCDirStream): @@ -301,7 +298,7 @@ class PlcfSed(DOCDirStream, PLC): self.size = size def dump(self): -print '' % (self.pos, self.size) +print '' % (self.pos, self.size) offset = self.mainStream.fcMin pos = self.pos for i in range(self.getElements()): @@ -315,9 +312,9 @@ class PlcfSed(DOCDirStream, PLC): aSed = Sed(self, self.getOffset(self.pos, i)) aSed.dump() -print '' % self.quoteAttr(self.mainStream.retrieveText(start, end, logical = True)) +print '' % self.quoteAttr(self.mainStream.retrieveText(offset + start, offset + end, logicalLength = True)) print '' -print '' +print '' class Tcg(DOCDirStream): """The Tcg structure specifies command-related customizations.""" diff --git a/src/docstream.py b/src/docstream.py index a0aba5e..6ec39f5 100644 --- a/src/docstream.py +++ b/src/docstream.py @@ -629,9 +629,9 @@ class WordDocumentStream(DOCDirStream): index = i return index -def retrieveText(self, start, end, logical = False): +def retrieveText(self, start, end, logicalLength = False): plcPcd = self.clx.pcdt.plcPcd idx = self.__findText(plcPcd, start) -return plcPcd.aPcd[idx].fc.getTransformedValue(start, end, logical = logical) +return plcPcd.aPcd[idx].fc.getTransformedValue(start, end, logicalPositions = False, logicalLength = logicalLength) # vim:set filetype=python shiftwidth=4 softtabstop=4 expandtab: diff --git a/test/doc/sections.doc b/test/doc/sections.doc new file mode 100644 index 000..9e26fb4 Binary files /dev/null and b/test/doc/sections.doc differ diff --git a/test/doc/test.py b/test/doc/test.py index c2b955c..0088e41 100755 --- a/test/doc/test.py +++ b/test/doc/test.py @@ -146,6 +146,13 @@ class Test(unittest.TestCase): result = self.root.findall('stream[@name="WordDocument"]/fib/fibRgFcLcbBlob/lcbPlcfFldMom/plcFld/aCP[@index="2"]/transformed') self.assertEqual('1', result[0].attrib['value']) +def test_sections(self): +self.dump('sections') + +sections = self.root.findall('stream[@name="WordDocument"]/fib/fibRgFcLcbBlob/lcbPlcfSed/plcfSed/') +self.assertEqual('Before section 1.\\x0D\\x0D\\x0C', sections[0].findall('transformed')[0].attrib['value']) +self.assertEqual("\\x0DThe next one isn't a balanced one:\\x0D\\x0D\\x0C", sections[2].findall('transformed')[0].attrib['value']) + if __name__ == '__main__': unittest.main() ___ Libreoffice-commits mailing list libreoffice-comm...@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/libreoffice-commits
[Libreoffice-commits] .: src/docrecord.py src/docstream.py test/doc
src/docrecord.py | 61 + src/docstream.py |5 +++- test/doc/field.doc |binary test/doc/field.rtf |8 ++ test/doc/test.py |9 +++ 5 files changed, 82 insertions(+), 1 deletion(-) New commits: commit 134ed95ebe314473ad094f0ee0cf1dbad49e Author: Miklos Vajna Date: Wed Jan 9 11:38:36 2013 +0100 dump PlcFld diff --git a/src/docrecord.py b/src/docrecord.py index 0aa271d..9e30066 100644 --- a/src/docrecord.py +++ b/src/docrecord.py @@ -142,6 +142,67 @@ class PlcfBkf(DOCDirStream, PLC): print '' print '' +class Fldch(DOCDirStream): +"""The fldch structure determines the type of the field character.""" +def __init__(self, parent): +DOCDirStream.__init__(self, parent.bytes) +self.pos = parent.pos +self.parent = parent + +def dump(self): +print '' % self.pos +buf = self.readuInt8() +self.printAndSet("ch", buf & 0x1f) # 1..5th bits +self.printAndSet("reserved", (buf & 0xe0) >> 5) # 6..8th bits +print '' +self.parent.pos = self.pos + +class Fld(DOCDirStream): +"""The Fld structure specifies a field character.""" +def __init__(self, parent, offset): +DOCDirStream.__init__(self, parent.bytes) +self.pos = offset + +def dump(self): +print '' % self.pos +self.fldch = Fldch(self) +self.fldch.dump() +self.printAndSet("grffld", self.readuInt8()) # TODO parse flt and grffldEnd +print '' + +class PlcFld(DOCDirStream, PLC): +"""The Plcfld structure specifies the location of fields in the document.""" +def __init__(self, mainStream): +DOCDirStream.__init__(self, mainStream.doc.getDirectoryStreamByName("1Table").bytes, mainStream = mainStream) +PLC.__init__(self, mainStream.lcbPlcfFldMom, 2) # 2 is defined by 2.8.25 +self.pos = mainStream.fcPlcfFldMom +self.size = mainStream.lcbPlcfFldMom + +def dump(self): +print '' % (self.pos, self.size) +offset = self.mainStream.fcMin # 2.8.25: CPs relative to the start of that document part. +pos = self.pos +aFlds = [] +for i in range(self.getElements()): +# aCp +value = self.getuInt32(pos = pos) +print '' % (i, value) +pos += 4 + +# aFld +aFld = Fld(self, self.getOffset(self.pos, i)) +aFld.dump() + +# This is a separator and the previous was a start: display the field instructions. +if aFld.fldch.ch == 0x14 and aFlds[-1][1].fldch.ch == 0x13: +print '' % self.quoteAttr(self.mainStream.retrieveText(offset + aFlds[-1][0] + 1, offset + value)) +# This is an end and the previous was a separator: display the field result. +elif aFld.fldch.ch == 0x15 and aFlds[-1][1].fldch.ch == 0x14: +print '' % self.quoteAttr(self.mainStream.retrieveText(offset + aFlds[-1][0] + 1, offset + value)) +aFlds.append((value, aFld)) +print '' +print '' + class PlcfBkl(DOCDirStream, PLC): """The Plcfbkl structure is a PLC that contains only CPs and no additional data.""" def __init__(self, mainStream, offset, size): diff --git a/src/docstream.py b/src/docstream.py index 167fb9c..a0aba5e 100644 --- a/src/docstream.py +++ b/src/docstream.py @@ -236,7 +236,7 @@ class WordDocumentStream(DOCDirStream): ["fcSttbfFfn"], ["lcbSttbfFfn", self.handleLcbSttbfFfn], ["fcPlcfFldMom"], -["lcbPlcfFldMom"], +["lcbPlcfFldMom", self.handleLcbPlcfFldMom], ["fcPlcfFldHdr"], ["lcbPlcfFldHdr"], ["fcPlcfFldFtn"], @@ -509,6 +509,9 @@ class WordDocumentStream(DOCDirStream): def handleLcbDggInfo(self): docrecord.OfficeArtContent(self).dump() +def handleLcbPlcfFldMom(self): +docrecord.PlcFld(self).dump() + def dumpFibRgFcLcb97(self, name): print '<%s type="FibRgFcLcb97" size="744 bytes">' % name self.__dumpFibRgFcLcb97() diff --git a/test/doc/field.doc b/test/doc/field.doc new file mode 100644 index 000..247f024 Binary files /dev/null and b/test/doc/field.doc differ diff --git a/test/doc/field.rtf b/test/doc/field.rtf new file mode 100644 index 000..48e39d1 --- /dev/null +++ b/test/doc/field.rtf @@ -0,0 +1,8 @@ +{\rtf1 +Page number: +{\field +{\*\fldinst PAGE } +{\fldrslt 1} +} +\par +} diff --git a/test/doc/test.py b/test/doc/test.py index a97d0f5..c2b955c 100755 --- a/test/doc/test.py +++ b/test/doc/test.py @@ -137,6 +137,15 @@ class Test(unittest.TestCase): # This first caused unhandled exceptions, then later invalid XML output. self.dump('escape') +def test_field(self): +self.dump('field') + +instruction = self.root.findall('stream[@name="WordDocument"]/fib/fibRgFcLcbBlob/lcbPlcf
[Libreoffice-commits] .: src/docrecord.py src/docstream.py test/doc
src/docrecord.py |6 -- src/docstream.py |4 ++-- test/doc/test.py |6 ++ 3 files changed, 8 insertions(+), 8 deletions(-) New commits: commit e2b8fcc9380b83e36742d66093f0aa9c888e012b Author: Miklos Vajna Date: Wed Nov 28 22:24:40 2012 +0100 PlcfBkl: dump the text of the commented text range, not just the start/end positions diff --git a/src/docrecord.py b/src/docrecord.py index adf0de5..30e169f 100644 --- a/src/docrecord.py +++ b/src/docrecord.py @@ -130,6 +130,7 @@ class PlcfBkf(DOCDirStream, PLC): PLC.__init__(self, size, 4) # 4 is defined by 2.8.10 self.pos = offset self.size = size +self.aCP = [] def dump(self): print '' % (self.pos, self.size) @@ -138,8 +139,8 @@ class PlcfBkf(DOCDirStream, PLC): for i in range(self.getElements()): # aCp start = offset + self.getuInt32(pos = pos) +self.aCP.append(start) print '' % (i, start) -print '' % FcCompressed.getFCTransformedValue(self.mainStream.bytes, start, start + 1) pos += 4 # aFBKF @@ -164,7 +165,8 @@ class PlcfBkl(DOCDirStream, PLC): # aCp end = offset + self.getuInt32(pos = pos) print '' % (i, end) -print '' % FcCompressed.getFCTransformedValue(self.mainStream.bytes, end, end + 1) +start = self.mainStream.plcfAtnBkf.aCP[i] +print '' % FcCompressed.getFCTransformedValue(self.mainStream.bytes, start, end) pos += 4 print '' print '' diff --git a/src/docstream.py b/src/docstream.py index dfc2d28..93b2a2a 100644 --- a/src/docstream.py +++ b/src/docstream.py @@ -457,8 +457,8 @@ class WordDocumentStream(DOCDirStream): def handleLcbPlcfAtnBkf(self): offset = self.fcPlcfAtnBkf size = self.lcbPlcfAtnBkf -plcfBkf = docrecord.PlcfBkf(self, offset, size) -plcfBkf.dump() +self.plcfAtnBkf = docrecord.PlcfBkf(self, offset, size) +self.plcfAtnBkf.dump() def handleLcbPlcfAtnBkl(self): offset = self.fcPlcfAtnBkl diff --git a/test/doc/test.py b/test/doc/test.py index a0356d0..546c141 100755 --- a/test/doc/test.py +++ b/test/doc/test.py @@ -109,10 +109,8 @@ class Test(unittest.TestCase): commentEnds = self.root.findall('stream[@name="WordDocument"]/fib/fibRgFcLcbBlob/lcbPlcfAtnBkl/plcfBkl/aCP') # The first comment covers Hello\x05, the second covers This\x05. -self.assertEqual('H', commentStarts[0].findall('transformed')[0].attrib['value']) -self.assertEqual('\\x05', commentEnds[0].findall('transformed')[0].attrib['value']) -self.assertEqual('T', commentStarts[1].findall('transformed')[0].attrib['value']) -self.assertEqual('\\x05', commentEnds[1].findall('transformed')[0].attrib['value']) +self.assertEqual('Hello', commentEnds[0].findall('transformed')[0].attrib['value']) +self.assertEqual('This', commentEnds[1].findall('transformed')[0].attrib['value']) if __name__ == '__main__': unittest.main() ___ Libreoffice-commits mailing list libreoffice-comm...@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/libreoffice-commits