Makefile | 1 msodumper/docdirstream.py | 2 msodumper/vsdstream.py | 286 ++++++++++++++++++++++++++++++++++++++++++++++ vsd-dump.py | 44 +++++++ 4 files changed, 332 insertions(+), 1 deletion(-)
New commits: commit e13cb64ab62501dbcd0dec98f042f46dd702bf56 Author: Miklos Vajna <vmik...@collabora.co.uk> Date: Tue Nov 25 19:27:36 2014 +0100 Add initial SummaryInformation dumper diff --git a/Makefile b/Makefile index 1aa9120..47b42c9 100644 --- a/Makefile +++ b/Makefile @@ -2,3 +2,4 @@ check: cd test/doc && ./test.py pep8 --ignore=E501 doc-dump.py msodumper/doc{dirstream,record,sprm,stream}.py test/doc/test.py pep8 --ignore=E501 emf-dump.py msodumper/{emf,wmf}record.py + pep8 --ignore=E501 vsd-dump.py msodumper/vsdstream.py diff --git a/msodumper/vsdstream.py b/msodumper/vsdstream.py new file mode 100644 index 0000000..36c279a --- /dev/null +++ b/msodumper/vsdstream.py @@ -0,0 +1,286 @@ +#!/usr/bin/env python2 +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + +import ole +import ctypes +import struct +from docdirstream import DOCDirStream +import docrecord +import globals +import sys +import os +import bisect + + +class VSDFile: + """Represents the whole visio file - feed will all bytes.""" + def __init__(self, chars, params): + self.chars = chars + self.size = len(self.chars) + self.params = params + self.error = None + + self.init() + + def init(self): + self.header = ole.Header(self.chars, self.params) + self.pos = self.header.parse() + + def __getDirectoryObj(self): + obj = self.header.getDirectory() + obj.parseDirEntries() + return obj + + def getDirectoryNames(self): + return self.__getDirectoryObj().getDirectoryNames() + + def getDirectoryStreamByName(self, name): + obj = self.__getDirectoryObj() + bytes = obj.getRawStreamByName(name) + return self.getStreamFromBytes(name, bytes) + + def getStreamFromBytes(self, name, bytes): + if name == "\x05SummaryInformation": + return SummaryInformationStream(bytes, self.params, doc=self) + else: + return DOCDirStream(bytes, self.params, name, doc=self) + + def getName(self): + return "native" + + +class GsfVSDFile(VSDFile): + """Same as VSDFile, but uses gsf to read the OLE streams.""" + def __init__(self, chars, params, gsf): + self.gsf = gsf + VSDFile.__init__(self, chars, params) + + def disableStderr(self): + nil = os.open(os.devnull, os.O_WRONLY) + self.savedStderr = os.dup(2) + os.dup2(nil, 2) + + def enableStderr(self): + os.dup2(self.savedStderr, 2) + + def init(self): + self.streams = {} + self.gsf.gsf_init() + gsfInput = self.gsf.gsf_input_memory_new(self.chars, len(self.chars), False) + self.disableStderr() + gsfInfile = self.gsf.gsf_infile_msole_new(gsfInput, None) + self.enableStderr() + if not gsfInfile: + self.error = "gsf_infile_msole_new() failed" + return + for i in range(self.gsf.gsf_infile_num_children(gsfInfile)): + child = self.gsf.gsf_infile_child_by_index(gsfInfile, i) + childName = ctypes.string_at(self.gsf.gsf_infile_name_by_index(gsfInfile, i)) + childSize = self.gsf.gsf_input_size(child) + childData = "" + while True: + bufSize = 1024 + pos = self.gsf.gsf_input_tell(child) + if pos == childSize: + break + elif pos + bufSize > childSize: + bufSize = childSize - pos + childData += ctypes.string_at(self.gsf.gsf_input_read(child, bufSize, None), bufSize) + self.streams[childName] = childData + self.gsf.gsf_shutdown() + + def getDirectoryNames(self): + return self.streams.keys() + + def getDirectoryStreamByName(self, name): + return self.getStreamFromBytes(name, self.streams[name]) + + def getName(self): + return "gsf" + + +def createVSDFile(chars, params): + hasGsf = True + try: + gsf = ctypes.cdll.LoadLibrary('libgsf-1.so') + gsf.gsf_input_read.restype = ctypes.c_void_p + except: + hasGsf = False + + if hasGsf: + return GsfVSDFile(chars, params, gsf) + else: + return VSDFile(chars, params) + + +class SummaryInformationStream(DOCDirStream): + def __init__(self, bytes, params, doc): + DOCDirStream.__init__(self, bytes, params, "\x05SummaryInformation", doc=doc) + + def dump(self): + print '<stream name="\\x05SummaryInformation" size="%d">' % self.size + PropertySetStream(self).dump() + print '</stream>' + + +class PropertySetStream(DOCDirStream): + def __init__(self, parent): + DOCDirStream.__init__(self, parent.bytes) + self.parent = parent + + def dump(self): + print '<propertySetStream type="PropertySetStream" offset="%s">' % self.pos + self.printAndSet("ByteOrder", self.readuInt16()) + self.printAndSet("Version", self.readuInt16()) + self.printAndSet("SystemIdentifier", self.readuInt32()) + self.printAndSet("CLSID0", self.readuInt32()) + self.printAndSet("CLSID1", self.readuInt32()) + self.printAndSet("CLSID2", self.readuInt32()) + self.printAndSet("CLSID3", self.readuInt32()) + self.printAndSet("NumPropertySets", self.readuInt32()) + self.printAndSet("FMTID00", self.readuInt32()) + self.printAndSet("FMTID01", self.readuInt32()) + self.printAndSet("FMTID02", self.readuInt32()) + self.printAndSet("FMTID03", self.readuInt32()) + self.printAndSet("Offset0", self.readuInt32()) + if self.NumPropertySets == 0x00000002: + print '<todo what="PropertySetStream::dump: handle NumPropertySets == 0x00000002"/>' + PropertySet(self).dump() + print '</propertySetStream>' + + +class PropertySet(DOCDirStream): + def __init__(self, parent): + DOCDirStream.__init__(self, parent.bytes) + self.parent = parent + self.pos = parent.Offset0 + + def dump(self): + self.posOrig = self.pos + print '<propertySet type="PropertySet" offset="%s">' % self.pos + self.printAndSet("Size", self.readuInt32()) + self.printAndSet("NumProperties", self.readuInt32()) + self.idsAndOffsets = {} + for i in range(self.NumProperties): + self.idsAndOffsets[i] = PropertyIdentifierAndOffset(self, i) + self.idsAndOffsets[i].dump() + for i in range(self.NumProperties): + TypedPropertyValue(self, i).dump() + print '</propertySet>' + +PropertyIdentifier = { + 0x00000001: "CODEPAGE_PROPERTY_IDENTIFIER", + 0x00000002: "PIDSI_TITLE", + 0x00000003: "PIDSI_SUBJECT", + 0x00000004: "PIDSI_AUTHOR", + 0x00000005: "PIDSI_KEYWORDS", + 0x00000006: "PIDSI_COMMENTS", + 0x00000007: "PIDSI_TEMPLATE", + 0x00000008: "PIDSI_LASTAUTHOR", + 0x00000009: "PIDSI_REVNUMBER", + 0x0000000A: "PIDSI_EDITTIME", + 0x0000000B: "PIDSI_LASTPRINTED", + 0x0000000C: "PIDSI_CREATE_DTM", + 0x0000000D: "PIDSI_LASTSAVE_DTM", + 0x0000000E: "PIDSI_PAGECOUNT", + 0x0000000F: "PIDSI_WORDCOUNT", + 0x00000010: "PIDSI_CHARCOUNT", + 0x00000011: "PIDSI_THUMBNAIL", + 0x00000012: "PIDSI_APPNAME", + 0x00000013: "PIDSI_DOC_SECURITY", +} + + +class PropertyIdentifierAndOffset(DOCDirStream): + def __init__(self, parent, index): + DOCDirStream.__init__(self, parent.bytes) + self.parent = parent + self.index = index + self.pos = parent.pos + + def dump(self): + print '<propertyIdentifierAndOffset%s type="PropertyIdentifierAndOffset" offset="%s">' % (self.index, self.pos) + self.printAndSet("PropertyIdentifier", self.readuInt32(), dict=PropertyIdentifier) + self.printAndSet("Offset", self.readuInt32()) + print '</propertyIdentifierAndOffset%s>' % self.index + self.parent.pos = self.pos + +PropertyType = { + 0x0000: "VT_EMPTY", + 0x0001: "VT_NULL", + 0x0002: "VT_I2", + 0x0003: "VT_I4", + 0x0004: "VT_R4", + 0x0005: "VT_R8", + 0x0006: "VT_CY", + 0x0007: "VT_DATE", + 0x0008: "VT_BSTR", + 0x000A: "VT_ERROR", + 0x000B: "VT_BOOL", + 0x000E: "VT_DECIMAL", + 0x0010: "VT_I1", + 0x0011: "VT_UI1", + 0x0012: "VT_UI2", + 0x0013: "VT_UI4", + 0x0014: "VT_I8", + 0x0015: "VT_UI8", + 0x0016: "VT_INT", + 0x0017: "VT_UINT", + 0x001E: "VT_LPSTR", + 0x001F: "VT_LPWSTR", + 0x0040: "VT_FILETIME", + 0x0041: "VT_BLOB", + 0x0042: "VT_STREAM", + 0x0043: "VT_STORAGE", + 0x0044: "VT_STREAMED_Object", + 0x0045: "VT_STORED_Object", + 0x0046: "VT_BLOB_Object", + 0x0047: "VT_CF", + 0x0048: "VT_CLSID", + 0x0049: "VT_VERSIONED_STREAM", +} + + +class TypedPropertyValue(DOCDirStream): + def __init__(self, parent, index): + DOCDirStream.__init__(self, parent.bytes) + self.parent = parent + self.index = index + self.pos = parent.posOrig + parent.idsAndOffsets[index].Offset + + def dump(self): + print '<typedPropertyValue%s type="TypedPropertyValue" offset="%s">' % (self.index, self.pos) + self.printAndSet("Type", self.readuInt16(), dict=PropertyType) + self.printAndSet("Padding", self.readuInt16()) + if self.Type == 0x0002: # VT_I2 + self.printAndSet("Value", self.readInt16()) + elif self.Type == 0x001E: # VT_LPSTR + CodePageString(self, "Value").dump() + else: + print '<todo what="TypedPropertyValue::dump: unhandled Type %s"/>' % hex(self.Type) + print '</typedPropertyValue%s>' % self.index + + +class CodePageString(DOCDirStream): + def __init__(self, parent, name): + DOCDirStream.__init__(self, parent.bytes) + self.pos = parent.pos + self.name = name + + def dump(self): + print '<%s type="CodePageString">' % self.name + self.printAndSet("Size", self.readuInt32()) + bytes = [] + for i in range(self.Size): + c = self.readuInt8() + if c == 0: + break + bytes.append(c) + print '<Characters value="%s"/>' % "".join(map(lambda c: chr(c), bytes)) + print '</%s>' % self.name + +# vim:set filetype=python shiftwidth=4 softtabstop=4 expandtab: diff --git a/vsd-dump.py b/vsd-dump.py new file mode 100755 index 0000000..9d56f8f --- /dev/null +++ b/vsd-dump.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python2 +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + +import sys +sys = reload(sys) +sys.setdefaultencoding("utf-8") + +from msodumper import globals, vsdstream + + +class VSDDumper: + def __init__(self, filepath, params): + self.filepath = filepath + self.params = params + + def dump(self): + file = open(self.filepath, 'rb') + strm = vsdstream.createVSDFile(file.read(), self.params) + file.close() + dirnames = strm.getDirectoryNames() + print '<?xml version="1.0"?>\n<streams ole-type="%s">' % strm.getName() + if strm.error: + print '<error what="%s"/>' % strm.error + for dirname in dirnames: + if len(dirname) == 0 or dirname in ['Root Entry']: + continue + strm.getDirectoryStreamByName(dirname).dump() + print '</streams>' + + +def main(args): + exname, args = args[0], args[1:] + params = globals.Params() + dumper = VSDDumper(args[0], params) + dumper.dump() + +if __name__ == '__main__': + main(sys.argv) + +# vim:set filetype=python shiftwidth=4 softtabstop=4 expandtab: commit 50a4c3e2478bf80a544e00164d0dfdda687587b8 Author: Miklos Vajna <vmik...@collabora.co.uk> Date: Tue Nov 25 19:26:16 2014 +0100 test for object identity should be 'is not' diff --git a/msodumper/docdirstream.py b/msodumper/docdirstream.py index c9aa3d5..88a91d6 100644 --- a/msodumper/docdirstream.py +++ b/msodumper/docdirstream.py @@ -138,7 +138,7 @@ class DOCDirStream: count = 0 pos = self.pos while True: - if (not limit is None) and count == limit: + if (limit is not None) and count == limit: break i = self.getuInt8(pos=pos) pos += 1 _______________________________________________ Libreoffice-commits mailing list libreoffice-comm...@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/libreoffice-commits