bin/get-bugzilla-attachments-by-mimetype | 102 ++++++++++++++++++------------- 1 file changed, 62 insertions(+), 40 deletions(-)
New commits: commit d4fd1c0c38179967166f64342d216e46d7f4d6d3 Author: Michael Stahl <mst...@redhat.com> Date: Mon Nov 11 13:44:36 2013 +0100 get-bugzilla-attachments-by-mimetype: more launchpad fixes - look at more interesting packages on lanuchpad, not just libreoffice. - the searchTasks method by default does not return closed tasks, and there does not appear to be a documented wild card search, so stupidly enumerate all possible status. Change-Id: I51691506874722a1d8eea4755513edf50164cf9d diff --git a/bin/get-bugzilla-attachments-by-mimetype b/bin/get-bugzilla-attachments-by-mimetype index 05a24e7..6655ea8 100755 --- a/bin/get-bugzilla-attachments-by-mimetype +++ b/bin/get-bugzilla-attachments-by-mimetype @@ -182,40 +182,41 @@ def get_launchpad_bugs(prefix): ubuntu = launchpad.distributions["ubuntu"] #since searching bugs having attachments with specific mimetypes is not available in launchpad API - #we're iterating over all bugs of the libreoffice source package - libo = ubuntu.getSourcePackage(name="libreoffice") - libobugs = libo.searchTasks() - - for bugtask in libobugs: - bug = bugtask.bug - id = str(bug.id) - print("parsing " + id + " status: " + bugtask.status + " title: " + bug.title[:50]) - attachmentid = 0 - for attachment in bug.attachments: - attachmentid += 1 - handle = attachment.data.open() - if not handle.content_type in mimetypes: - #print "skipping" - continue - - suffix = mimetypes[handle.content_type] - if not os.path.isdir(suffix): - try: - os.mkdir(suffix) - except: - pass - - download = suffix + '/' + prefix + id + '-' + str(attachmentid) + '.' + suffix - - if os.path.isfile(download): - print("assuming " + id + " is up to date") - break + #we're iterating over all bugs of the most interesting source packages + for pkg in ["libreoffice", "openoffice.org", "abiword", "gnumeric", "koffice", "calligra"]: + srcpkg = ubuntu.getSourcePackage(name=pkg) + pkgbugs = srcpkg.searchTasks(status=["New", "Fix Committed", "Invalid", "Won't Fix", "Confirmed", "Triaged", "In Progress", "Incomplete", "Incomplete (with response)", "Incomplete (without response)", "Fix Released", "Opinion", "Expired"]) + + for bugtask in pkgbugs: + bug = bugtask.bug + id = str(bug.id) + print("parsing " + id + " status: " + bugtask.status + " title: " + bug.title[:50]) + attachmentid = 0 + for attachment in bug.attachments: + attachmentid += 1 + handle = attachment.data.open() + if not handle.content_type in mimetypes: + #print "skipping" + continue + + suffix = mimetypes[handle.content_type] + if not os.path.isdir(suffix): + try: + os.mkdir(suffix) + except: + pass + + download = suffix + '/' + prefix + id + '-' + str(attachmentid) + '.' + suffix + + if os.path.isfile(download): + print("assuming " + id + " is up to date") + break - print('mimetype is ' + handle.content_type + ' downloading as ' + download) + print('mimetype is ' + handle.content_type + ' downloading as ' + download) - f = open(download, "w") - f.write(handle.read()) - f.close() + f = open(download, "w") + f.write(handle.read()) + f.close() freedesktop = 'http://bugs.freedesktop.org/buglist.cgi' abisource = 'http://bugzilla.abisource.com/buglist.cgi' #added for abiword commit bad960e65f4d00315ea7c12cc00b84b26680eb9d Author: Michael Stahl <mst...@redhat.com> Date: Mon Nov 11 12:45:40 2013 +0100 get-bugzilla-attachments-by-mimetype: better test for existing file Change-Id: I208a74d11945986d0712970999dbd33c03efe488 diff --git a/bin/get-bugzilla-attachments-by-mimetype b/bin/get-bugzilla-attachments-by-mimetype index 4e588af..05a24e7 100755 --- a/bin/get-bugzilla-attachments-by-mimetype +++ b/bin/get-bugzilla-attachments-by-mimetype @@ -98,6 +98,11 @@ def get_novell_bug_via_xml(url, mimetype, prefix, suffix): attachmentid += 1 + download = suffix + '/' + prefix + id + '-' + str(attachmentid) + '.' + suffix + if os.path.isfile(download): + print("assuming " + download + " is up to date") + continue + realAttachmentId = match.group(1) handle = urlopen_retry(novellattach + realAttachmentId) if not handle: @@ -115,7 +120,6 @@ def get_novell_bug_via_xml(url, mimetype, prefix, suffix): print("skipping") continue - download = suffix + '/' + prefix + id + '-' + str(attachmentid) + '.' + suffix print('downloading as ' + download) f = open(download, 'wb') f.write(handle.read()) commit 3e9d164a06d60e756dffad4dd18795796348e97e Author: Michael Stahl <mst...@redhat.com> Date: Sun Nov 10 19:58:58 2013 +0100 get-bugzilla-attachments-by-mimetype: add some missing mime types ... which are officially registered on https://www.iana.org/assignments/media-types/application ... plus some more non-standard ones for FreeHand, Keynote, ClarisWorks. For Apple Keynote there are 2 different ones that appear to be widely used. Change-Id: I26d4a85733a744188cc87a78fdba0d9d3f44da96 diff --git a/bin/get-bugzilla-attachments-by-mimetype b/bin/get-bugzilla-attachments-by-mimetype index 15864cf..4e588af 100755 --- a/bin/get-bugzilla-attachments-by-mimetype +++ b/bin/get-bugzilla-attachments-by-mimetype @@ -271,11 +271,21 @@ mimetypes = { 'application/msword': 'doc', 'application/vnd.ms-powerpoint': 'ppt', 'application/vnd.ms-excel': 'xls', + 'application/vnd.ms-excel.sheet.binary.macroEnabled.12': 'xlsb', + 'application/vnd.ms-excel.sheet.macroEnabled.12': 'xlsm', + 'application/vnd.ms-excel.template.macroEnabled.12': 'xltm', + 'application/vnd.ms-powerpoint.presentation.macroEnabled.12': 'pptm', + 'application/vnd.ms-powerpoint.slide.macroEnabled.12': 'sldm', + 'application/vnd.ms-powerpoint.slideshow.macroEnabled.12': 'ppsm', + 'application/vnd.ms-powerpoint.template.macroEnabled.12': 'potm', + 'application/vnd.ms-word.document.macroEnabled.12': 'docm', + 'application/vnd.ms-word.template.macroEnabled.12': 'dotm', 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': 'xlsx', 'application/vnd.openxmlformats-officedocument.spreadsheetml.template': 'xltx', 'application/vnd.openxmlformats-officedocument.presentationml.presentation': 'pptx', 'application/vnd.openxmlformats-officedocument.presentationml.template': 'ppotx', 'application/vnd.openxmlformats-officedocument.presentationml.slideshow': 'ppsx', + 'application/vnd.openxmlformats-officedocument.presentationml.slide': 'sldx', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'docx', 'application/vnd.openxmlformats-officedocument.wordprocessingml.template': 'dotx', 'application/vnd.visio': 'vsd', @@ -287,6 +297,7 @@ mimetypes = { 'text/html': 'html', 'application/docbook+xml': 'docbook', # misc + 'text/csv': 'csv', 'text/spreadsheet': 'slk', 'application/vnd.corel-draw': 'cdr', 'application/vnd.lotus-wordpro': 'lwp', @@ -294,6 +305,10 @@ mimetypes = { 'application/vnd.wordperfect': 'wpd', 'application/wordperfect5.1': 'wpd', 'application/vnd.ms-works': 'wps', + 'application/clarisworks' : 'cwk', + 'application/macwriteii' : 'mw', + 'application/vnd.apple.keynote': 'key', + 'application/x-iwork-keynote-sffkey': 'key', 'application/x-hwp': 'hwp', 'application/x-aportisdoc': 'pdb', 'application/x-pocket-word': 'psw', @@ -314,6 +329,7 @@ mimetypes = { 'application/vnd.stardivision.writer': 'sdw5', 'application/vnd.stardivision.writer-global': 'sgl5', # relatively uncommon image mimetypes + 'image/x-freehand': 'fh', 'image/cgm': 'cgm', 'image/tiff': 'tiff', 'image/vnd.dxf': 'dxf', commit fdb747ff8c4653d3e94192693f1080398ae20339 Author: Michael Stahl <mst...@redhat.com> Date: Sun Nov 10 19:17:17 2013 +0100 get-bugzilla-attachments-by-mimetype: more Python 3 in exception handler ... and also fix the print functions that shouldn't output a newline. Change-Id: Ifd866cb33b3ef9a2e83625ed03d5cb836c1ba56b diff --git a/bin/get-bugzilla-attachments-by-mimetype b/bin/get-bugzilla-attachments-by-mimetype index e3fb177..15864cf 100755 --- a/bin/get-bugzilla-attachments-by-mimetype +++ b/bin/get-bugzilla-attachments-by-mimetype @@ -18,6 +18,7 @@ # #where X is the n'th attachment of that type in the bug +from __future__ import print_function import feedparser import base64 import re @@ -40,7 +41,7 @@ def urlopen_retry(url): try: return urlopen(url) except IOError as e: - print("caught IOError: " + e) + print("caught IOError: " + str(e)) if maxretries == i: raise print("retrying...") @@ -51,17 +52,17 @@ def get_from_bug_url_via_xml(url, mimetype, prefix, suffix): if os.path.isfile(suffix + '/' + prefix + id + '-1.' + suffix): print("assuming " + id + " is up to date") else: - print("parsing", id) + print("parsing " + id) sock = urlopen_retry(url+"&ctype=xml") dom = minidom.parse(sock) sock.close() attachmentid=0 for attachment in dom.getElementsByTagName('attachment'): attachmentid += 1 - print(" mimetype is") + print(" mimetype is", end=' ') for node in attachment.childNodes: if node.nodeName == 'type': - print(node.firstChild.nodeValue) + print(node.firstChild.nodeValue, end=' ') if node.firstChild.nodeValue.lower() != mimetype.lower(): print('skipping') break @@ -102,14 +103,14 @@ def get_novell_bug_via_xml(url, mimetype, prefix, suffix): if not handle: print("attachment %s is not accessible" % realAttachmentId) continue - print(" mimetype is") + print(" mimetype is", end=' ') info = handle.info() if info.get_content_type: remoteMime = info.get_content_type() else: remoteMime = info.gettype() - print(remoteMime) + print(remoteMime, end=' ') if remoteMime != mimetype: print("skipping") continue @@ -161,7 +162,7 @@ def get_through_rss_query_url(url, mimetype, prefix, suffix): except KeyboardInterrupt: raise # Ctrl+C should work except: - print(entry['id'] + " failed: " + sys.exc_info()[0]) + print(entry['id'] + " failed: " + str(sys.exc_info()[0])) pass def get_through_rss_query(queryurl, mimetype, prefix, suffix): _______________________________________________ Libreoffice-commits mailing list libreoffice-comm...@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/libreoffice-commits