src/docrecord.py | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++++ src/docstream.py | 5 +++- test/doc/field.doc |binary test/doc/field.rtf | 8 ++++++ test/doc/test.py | 9 +++++++ 5 files changed, 82 insertions(+), 1 deletion(-)
New commits: commit 134ed95ebe314473ad0333394f0ee0cf1dbad49e Author: Miklos Vajna <vmik...@suse.cz> Date: Wed Jan 9 11:38:36 2013 +0100 dump PlcFld diff --git a/src/docrecord.py b/src/docrecord.py index 0aa271d..9e30066 100644 --- a/src/docrecord.py +++ b/src/docrecord.py @@ -142,6 +142,67 @@ class PlcfBkf(DOCDirStream, PLC): print '</aCP>' print '</plcfBkf>' +class Fldch(DOCDirStream): + """The fldch structure determines the type of the field character.""" + def __init__(self, parent): + DOCDirStream.__init__(self, parent.bytes) + self.pos = parent.pos + self.parent = parent + + def dump(self): + print '<fldch type="fldch" offset="%d" size="1 byte">' % self.pos + buf = self.readuInt8() + self.printAndSet("ch", buf & 0x1f) # 1..5th bits + self.printAndSet("reserved", (buf & 0xe0) >> 5) # 6..8th bits + print '</fldch>' + self.parent.pos = self.pos + +class Fld(DOCDirStream): + """The Fld structure specifies a field character.""" + def __init__(self, parent, offset): + DOCDirStream.__init__(self, parent.bytes) + self.pos = offset + + def dump(self): + print '<fld type="FLD" offset="%d" size="2 bytes">' % self.pos + self.fldch = Fldch(self) + self.fldch.dump() + self.printAndSet("grffld", self.readuInt8()) # TODO parse flt and grffldEnd + print '</fld>' + +class PlcFld(DOCDirStream, PLC): + """The Plcfld structure specifies the location of fields in the document.""" + def __init__(self, mainStream): + DOCDirStream.__init__(self, mainStream.doc.getDirectoryStreamByName("1Table").bytes, mainStream = mainStream) + PLC.__init__(self, mainStream.lcbPlcfFldMom, 2) # 2 is defined by 2.8.25 + self.pos = mainStream.fcPlcfFldMom + self.size = mainStream.lcbPlcfFldMom + + def dump(self): + print '<plcFld type="PlcFld" offset="%d" size="%d bytes">' % (self.pos, self.size) + offset = self.mainStream.fcMin # 2.8.25: CPs relative to the start of that document part. + pos = self.pos + aFlds = [] + for i in range(self.getElements()): + # aCp + value = self.getuInt32(pos = pos) + print '<aCP index="%d" value="%d">' % (i, value) + pos += 4 + + # aFld + aFld = Fld(self, self.getOffset(self.pos, i)) + aFld.dump() + + # This is a separator and the previous was a start: display the field instructions. + if aFld.fldch.ch == 0x14 and aFlds[-1][1].fldch.ch == 0x13: + print '<transformed value="%s"/>' % self.quoteAttr(self.mainStream.retrieveText(offset + aFlds[-1][0] + 1, offset + value)) + # This is an end and the previous was a separator: display the field result. + elif aFld.fldch.ch == 0x15 and aFlds[-1][1].fldch.ch == 0x14: + print '<transformed value="%s"/>' % self.quoteAttr(self.mainStream.retrieveText(offset + aFlds[-1][0] + 1, offset + value)) + aFlds.append((value, aFld)) + print '</aCP>' + print '</plcFld>' + class PlcfBkl(DOCDirStream, PLC): """The Plcfbkl structure is a PLC that contains only CPs and no additional data.""" def __init__(self, mainStream, offset, size): diff --git a/src/docstream.py b/src/docstream.py index 167fb9c..a0aba5e 100644 --- a/src/docstream.py +++ b/src/docstream.py @@ -236,7 +236,7 @@ class WordDocumentStream(DOCDirStream): ["fcSttbfFfn"], ["lcbSttbfFfn", self.handleLcbSttbfFfn], ["fcPlcfFldMom"], - ["lcbPlcfFldMom"], + ["lcbPlcfFldMom", self.handleLcbPlcfFldMom], ["fcPlcfFldHdr"], ["lcbPlcfFldHdr"], ["fcPlcfFldFtn"], @@ -509,6 +509,9 @@ class WordDocumentStream(DOCDirStream): def handleLcbDggInfo(self): docrecord.OfficeArtContent(self).dump() + def handleLcbPlcfFldMom(self): + docrecord.PlcFld(self).dump() + def dumpFibRgFcLcb97(self, name): print '<%s type="FibRgFcLcb97" size="744 bytes">' % name self.__dumpFibRgFcLcb97() diff --git a/test/doc/field.doc b/test/doc/field.doc new file mode 100644 index 0000000..247f024 Binary files /dev/null and b/test/doc/field.doc differ diff --git a/test/doc/field.rtf b/test/doc/field.rtf new file mode 100644 index 0000000..48e39d1 --- /dev/null +++ b/test/doc/field.rtf @@ -0,0 +1,8 @@ +{\rtf1 +Page number: +{\field +{\*\fldinst PAGE } +{\fldrslt 1} +} +\par +} diff --git a/test/doc/test.py b/test/doc/test.py index a97d0f5..c2b955c 100755 --- a/test/doc/test.py +++ b/test/doc/test.py @@ -137,6 +137,15 @@ class Test(unittest.TestCase): # This first caused unhandled exceptions, then later invalid XML output. self.dump('escape') + def test_field(self): + self.dump('field') + + instruction = self.root.findall('stream[@name="WordDocument"]/fib/fibRgFcLcbBlob/lcbPlcfFldMom/plcFld/aCP[@index="1"]/transformed') + self.assertEqual(' PAGE ', instruction[0].attrib['value']) + + result = self.root.findall('stream[@name="WordDocument"]/fib/fibRgFcLcbBlob/lcbPlcfFldMom/plcFld/aCP[@index="2"]/transformed') + self.assertEqual('1', result[0].attrib['value']) + if __name__ == '__main__': unittest.main() _______________________________________________ Libreoffice-commits mailing list libreoffice-comm...@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/libreoffice-commits