src/docrecord.py | 31 ++++++++++++++----------------- src/docstream.py | 4 ++-- test/doc/sections.doc |binary test/doc/test.py | 7 +++++++ 4 files changed, 23 insertions(+), 19 deletions(-)
New commits: commit f922c24a1c0e7bf2f7e330503eb37f2d99f39b75 Author: Miklos Vajna <vmik...@suse.cz> Date: Wed Jan 9 17:54:07 2013 +0100 PlcfSed: fix dumping section text diff --git a/src/docrecord.py b/src/docrecord.py index 9e30066..cae0153 100644 --- a/src/docrecord.py +++ b/src/docrecord.py @@ -30,24 +30,21 @@ class FcCompressed(DOCDirStream): self.printAndSet("r1", self.r1) print '</fcCompressed>' - def getTransformedValue(self, start, end, logical = True): + def getTransformedValue(self, start, end, logicalPositions = True, logicalLength = True): + offset = self.fc if self.fCompressed: offset = self.fc/2 - if logical: - fro = offset - to = offset+end-start - else: - fro = start - to = end + if logicalPositions: + fro = offset + start + to = offset + end + else: + fro = start + to = end + if self.fCompressed: return globals.encodeName(self.mainStream.bytes[fro:to]) else: - if logical: - offset = self.fc - fro = offset - to = offset + (end - start) * 2 - else: - fro = start - to = end + if logicalLength: + to += (to - fro) return globals.encodeName(self.mainStream.bytes[fro:to].decode('utf-16'), lowOnly = True) class Pcd(DOCDirStream): @@ -301,7 +298,7 @@ class PlcfSed(DOCDirStream, PLC): self.size = size def dump(self): - print '<plcSed type="PlcSed" offset="%d" size="%d bytes">' % (self.pos, self.size) + print '<plcfSed type="PlcfSed" offset="%d" size="%d bytes">' % (self.pos, self.size) offset = self.mainStream.fcMin pos = self.pos for i in range(self.getElements()): @@ -315,9 +312,9 @@ class PlcfSed(DOCDirStream, PLC): aSed = Sed(self, self.getOffset(self.pos, i)) aSed.dump() - print '<transformed value="%s"/>' % self.quoteAttr(self.mainStream.retrieveText(start, end, logical = True)) + print '<transformed value="%s"/>' % self.quoteAttr(self.mainStream.retrieveText(offset + start, offset + end, logicalLength = True)) print '</aCP>' - print '</plcSed>' + print '</plcfSed>' class Tcg(DOCDirStream): """The Tcg structure specifies command-related customizations.""" diff --git a/src/docstream.py b/src/docstream.py index a0aba5e..6ec39f5 100644 --- a/src/docstream.py +++ b/src/docstream.py @@ -629,9 +629,9 @@ class WordDocumentStream(DOCDirStream): index = i return index - def retrieveText(self, start, end, logical = False): + def retrieveText(self, start, end, logicalLength = False): plcPcd = self.clx.pcdt.plcPcd idx = self.__findText(plcPcd, start) - return plcPcd.aPcd[idx].fc.getTransformedValue(start, end, logical = logical) + return plcPcd.aPcd[idx].fc.getTransformedValue(start, end, logicalPositions = False, logicalLength = logicalLength) # vim:set filetype=python shiftwidth=4 softtabstop=4 expandtab: diff --git a/test/doc/sections.doc b/test/doc/sections.doc new file mode 100644 index 0000000..9e26fb4 Binary files /dev/null and b/test/doc/sections.doc differ diff --git a/test/doc/test.py b/test/doc/test.py index c2b955c..0088e41 100755 --- a/test/doc/test.py +++ b/test/doc/test.py @@ -146,6 +146,13 @@ class Test(unittest.TestCase): result = self.root.findall('stream[@name="WordDocument"]/fib/fibRgFcLcbBlob/lcbPlcfFldMom/plcFld/aCP[@index="2"]/transformed') self.assertEqual('1', result[0].attrib['value']) + def test_sections(self): + self.dump('sections') + + sections = self.root.findall('stream[@name="WordDocument"]/fib/fibRgFcLcbBlob/lcbPlcfSed/plcfSed/') + self.assertEqual('Before section 1.\\x0D\\x0D\\x0C', sections[0].findall('transformed')[0].attrib['value']) + self.assertEqual("\\x0DThe next one isn't a balanced one:\\x0D\\x0D\\x0C", sections[2].findall('transformed')[0].attrib['value']) + if __name__ == '__main__': unittest.main() _______________________________________________ Libreoffice-commits mailing list libreoffice-comm...@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/libreoffice-commits