mahfiaz has proposed merging lp:~mahfiaz/openlp/bug-933706 into lp:openlp. Requested reviews: OpenLP Core (openlp-core)
For more details, see: https://code.launchpad.net/~mahfiaz/openlp/bug-933706/+merge/111162 Adds SundayPlus importer with its own strip_rtf. -- https://code.launchpad.net/~mahfiaz/openlp/bug-933706/+merge/111162 Your team OpenLP Core is requested to review the proposed merge of lp:~mahfiaz/openlp/bug-933706 into lp:openlp.
=== modified file 'openlp/plugins/songs/lib/importer.py' --- openlp/plugins/songs/lib/importer.py 2012-06-16 08:26:25 +0000 +++ openlp/plugins/songs/lib/importer.py 2012-06-20 07:47:28 +0000 @@ -43,6 +43,7 @@ from ewimport import EasyWorshipSongImport from songbeamerimport import SongBeamerImport from songshowplusimport import SongShowPlusImport +from sundayplusimport import SundayPlusImport from foilpresenterimport import FoilPresenterImport from zionworximport import ZionWorxImport # Imports that might fail @@ -144,9 +145,10 @@ SongBeamer = 11 SongShowPlus = 12 SongsOfFellowship = 13 - WordsOfWorship = 14 - ZionWorx = 15 - #CSV = 16 + SundayPlus = 14 + WordsOfWorship = 15 + ZionWorx = 16 + #CSV = 17 # Set optional attribute defaults __defaults__ = { @@ -274,6 +276,13 @@ 'The Songs of Fellowship importer has been disabled because ' 'OpenLP cannot access OpenOffice or LibreOffice.') }, + SundayPlus: { + u'class': SundayPlusImport, + u'name': u'SundayPlus', + u'prefix': u'sundayPlus', + u'filter': u'%s (*.ptf)' % translate( + 'SongsPlugin.ImportWizardForm', 'SundayPlus Song Files') + }, WordsOfWorship: { u'class': WowImport, u'name': u'Words of Worship', @@ -321,6 +330,7 @@ SongFormat.SongBeamer, SongFormat.SongShowPlus, SongFormat.SongsOfFellowship, + SongFormat.SundayPlus, SongFormat.WordsOfWorship, SongFormat.ZionWorx ] === added file 'openlp/plugins/songs/lib/sundayplusimport.py' --- openlp/plugins/songs/lib/sundayplusimport.py 1970-01-01 00:00:00 +0000 +++ openlp/plugins/songs/lib/sundayplusimport.py 2012-06-20 07:47:28 +0000 @@ -0,0 +1,379 @@ +# -*- coding: utf-8 -*- +# vim: autoindent shiftwidth=4 expandtab textwidth=80 tabstop=4 softtabstop=4 + +############################################################################### +# OpenLP - Open Source Lyrics Projection # +# --------------------------------------------------------------------------- # +# Copyright (c) 2008-2012 Raoul Snyman # +# Portions copyright (c) 2008-2012 Tim Bentley, Gerald Britton, Jonathan # +# Corwin, Michael Gorven, Scott Guerrieri, Matthias Hub, Meinert Jordan, # +# Armin Köhler, Joshua Miller, Stevan Pettit, Andreas Preikschat, Mattias # +# Põldaru, Christian Richter, Philip Ridout, Simon Scudder, Jeffrey Smith, # +# Maikel Stuivenberg, Martin Thompson, Jon Tibble, Frode Woldsund # +# --------------------------------------------------------------------------- # +# This program is free software; you can redistribute it and/or modify it # +# under the terms of the GNU General Public License as published by the Free # +# Software Foundation; version 2 of the License. # +# # +# This program is distributed in the hope that it will be useful, but WITHOUT # +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # +# more details. # +# # +# You should have received a copy of the GNU General Public License along # +# with this program; if not, write to the Free Software Foundation, Inc., 59 # +# Temple Place, Suite 330, Boston, MA 02111-1307 USA # +############################################################################### + +import logging +import re + +from os.path import split + +from openlp.plugins.songs.lib import VerseType, retrieve_windows_encoding +from openlp.plugins.songs.lib.songimport import SongImport + +log = logging.getLogger(__name__) + +class SundayPlusImport(SongImport): + """ + Import Sunday Plus songs + + The format examples can be found attached to bug report at + <http://support.openlp.org/issues/395> + """ + hotkey_to_verse_type = { + u'1': u'v1', + u'2': u'v2', + u'3': u'v3', + u'4': u'v4', + u'5': u'v5', + u'6': u'v6', + u'7': u'v7', + u'8': u'v8', + u'9': u'v9', + u'C': u'c', + u'+': u'b', + u'Z': u'o'} + + def __init__(self, manager, **kwargs): + """ + Initialise the class. + """ + SongImport.__init__(self, manager, **kwargs) + + def doImport(self): + self.importWizard.progressBar.setMaximum(len(self.importSource)) + self.encoding = 'us-ascii' + for filename in self.importSource: + if self.stopImportFlag: + return + song_file = open(filename) + self.doImportFile(song_file) + song_file.close() + + def doImportFile(self, file): + """ + Process the Sunday Plus file object. + """ + self.setDefaults() + if not self.parse(file.read()): + self.logError(file.name) + return + if self.title == '': + self.title = self.title_from_filename(file.name) + if not self.finish(): + self.logError(file.name) + + def parse(self, data, cell = False): + if data[0] != '[' and data[-1] != ']': + self.logError(u'File is malformed') + return False + i = 1 + verse_type = VerseType.Tags[VerseType.Verse] + while i < len(data): + byte = data[i] + if byte == '#': + end = data.find(':', i+1) + name = data[i+1:end] + i = end + 1 + while data[i] == ' ': + i += 1 + if data[i] == '"': + end = data.find('"', i+1) + value = data[i+1:end] + elif data[i] == '[': + j = i + inside_quotes = False + while j < len(data): + char = data[j] + if char == '"': + inside_quotes = not inside_quotes + elif not inside_quotes and char == ']': + end = j + 1 + break + j += 1 + value = data[i:end] + else: + end = data.find(',', i+1) + if data.find('(', i, end) != -1: + end = data.find(')', i) + 1 + value = data[i:end] + if cell == False: + if name == 'title': + self.title = self.decode(self.unescape(value)) + elif name == 'Author': + author = self.decode(self.unescape(value)) + if len(author): + self.addAuthor(author) + elif name == 'Copyright': + self.copyright = self.decode(self.unescape(value)) + elif name[0:4] == 'CELL': + self.parse(value, cell = name[4:]) + else: + if name == 'MARKER_NAME': + value = value.strip() + if len(value): + verse_type = VerseType.Tags[ + VerseType.from_loose_input(value[0])] + if len(value) >= 2 and value[-1] in ['0', '1', '2', + '3', '4', '5', '6', '7', '8', '9']: + verse_type = "%s%s" % (verse_type, value[-1]) + elif name == 'Hotkey': + # Hotkey always appears after MARKER_NAME, so it + # effectivetly overrides MARKER_NAME, if present. + if len(value) and \ + value in self.hotkey_to_verse_type.keys(): + verse_type = self.hotkey_to_verse_type[value] + if name == 'rtf': + value = self.unescape(value) + verse = self.strip_rtf(value, self.encoding).strip() + lines = verse.split('\n') + for i in xrange(len(lines)): + lines[i] = lines[i].strip() + line = lines[i] + if line[:4] in u'CCLI': + m = re.search(r'[0-9]+', line) + if m: + self.ccliNumber = int(m.group(0)) + lines.pop(i) + elif line.lower() == u'public domain': + lines.pop(i) + self.addVerse('\n'.join(lines).strip(), verse_type) + if end == -1: + break + i = end + 1 + i += 1 + return True + + def title_from_filename(self, filename): + filename = split(filename)[1] + if len(filename) > 4 and filename[-4:].lower() == u'.ptf': + title = filename[:-4] + else: + title = filename + if title[-3:] == '1-7': + title = title[:-3] + return title.replace(u'_', u' ') + + def decode(self, blob): + while True: + try: + return unicode(blob, self.encoding) + except: + # This is asked again every time the previously chosen + # encoding does not work. + self.encoding = retrieve_windows_encoding() + + def unescape(self, text): + text = text.replace('^^', '"') + text = text.replace('^', '\'') + return text.strip() + + def strip_rtf(self, text, encoding): + # Thanks to Markus Jarderot (MizardX) for this code, used by permission + # <http://stackoverflow.com/questions/188545/regular-expression-for- + # extracting-text-from-an-rtf-string> + pattern = re.compile(r"\\([a-z]{1,32})(-?\d{1,10})?[ ]?|\\'" + r"([0-9a-f]{2})|\\([^a-z])|([{}])|[\r\n]+|(.)", re.I) + # Control words which specify a "destination" and we can ignore it. + destinations = frozenset(( + 'aftncn', 'aftnsep', 'aftnsepc', 'annotation', 'atnauthor', + 'atndate', 'atnicn', 'atnid', 'atnparent', 'atnref', 'atntime', + 'atrfend', 'atrfstart', 'author', 'background', 'bkmkend', + 'bkmkstart', 'blipuid', 'buptim', 'category', 'colorschememapping', + 'colortbl', 'comment', 'company', 'creatim', 'datafield', + 'datastore', 'defchp', 'defpap', 'do', 'doccomm', 'docvar', + 'dptxbxtext', 'ebcend', 'ebcstart', 'factoidname', 'falt', 'fchars', + 'ffdeftext', 'ffentrymcr', 'ffexitmcr', 'ffformat', 'ffhelptext', + 'ffl', 'ffname', 'ffstattext', 'field', 'file', 'filetbl', + 'fldinst', 'fldrslt', 'fldtype', 'fname', 'fontemb', 'fontfile', + 'footer', 'footerf', 'footerl', 'footerr', 'footnote', + 'formfield', 'ftncn', 'ftnsep', 'ftnsepc', 'g', 'generator', + 'gridtbl', 'header', 'headerf', 'headerl', 'headerr', 'hl', 'hlfr', + 'hlinkbase', 'hlloc', 'hlsrc', 'hsv', 'htmltag', 'info', 'keycode', + 'keywords', 'latentstyles', 'lchars', 'levelnumbers', 'leveltext', + 'lfolevel', 'linkval', 'list', 'listlevel', 'listname', + 'listoverride', 'listoverridetable', 'listpicture', 'liststylename', + 'listtable', 'listtext', 'lsdlockedexcept', 'macc', 'maccPr', + 'mailmerge', 'maln', 'malnScr', 'manager', 'margPr', 'mbar', + 'mbarPr', 'mbaseJc', 'mbegChr', 'mborderBox', 'mborderBoxPr', + 'mbox', 'mboxPr', 'mchr', 'mcount', 'mctrlPr', 'md', 'mdeg', + 'mdegHide', 'mden', 'mdiff', 'mdPr', 'me', 'mendChr', 'meqArr', + 'meqArrPr', 'mf', 'mfName', 'mfPr', 'mfunc', 'mfuncPr', 'mgroupChr', + 'mgroupChrPr', 'mgrow', 'mhideBot', 'mhideLeft', 'mhideRight', + 'mhideTop', 'mhtmltag', 'mlim', 'mlimloc', 'mlimlow', 'mlimlowPr', + 'mlimupp', 'mlimuppPr', 'mm', 'mmaddfieldname', 'mmath', + 'mmathPict', 'mmathPr', 'mmaxdist', 'mmc', 'mmcJc', 'mmconnectstr', + 'mmconnectstrdata', 'mmcPr', 'mmcs', 'mmdatasource', + 'mmheadersource', 'mmmailsubject', 'mmodso', 'mmodsofilter', + 'mmodsofldmpdata', 'mmodsomappedname', 'mmodsoname', + 'mmodsorecipdata', 'mmodsosort', 'mmodsosrc', 'mmodsotable', + 'mmodsoudl', 'mmodsoudldata', 'mmodsouniquetag', 'mmPr', 'mmquery', + 'mmr', 'mnary', 'mnaryPr', 'mnoBreak', 'mnum', 'mobjDist', 'moMath', + 'moMathPara', 'moMathParaPr', 'mopEmu', 'mphant', 'mphantPr', + 'mplcHide', 'mpos', 'mr', 'mrad', 'mradPr', 'mrPr', 'msepChr', + 'mshow', 'mshp', 'msPre', 'msPrePr', 'msSub', 'msSubPr', 'msSubSup', + 'msSubSupPr', 'msSup', 'msSupPr', 'mstrikeBLTR', 'mstrikeH', + 'mstrikeTLBR', 'mstrikeV', 'msub', 'msubHide', 'msup', 'msupHide', + 'mtransp', 'mtype', 'mvertJc', 'mvfmf', 'mvfml', 'mvtof', 'mvtol', + 'mzeroAsc', 'mzeroDesc', 'mzeroWid', 'nesttableprops', 'nextfile', + 'nonesttables', 'objalias', 'objclass', 'objdata', 'object', + 'objname', 'objsect', 'objtime', 'oldcprops', 'oldpprops', + 'oldsprops', 'oldtprops', 'oleclsid', 'operator', 'panose', + 'password', 'passwordhash', 'pgp', 'pgptbl', 'picprop', 'pict', + 'pn', 'pnseclvl', 'pntext', 'pntxta', 'pntxtb', 'printim', + 'private', 'propname', 'protend', 'protstart', 'protusertbl', 'pxe', + 'result', 'revtbl', 'revtim', 'rsidtbl', 'rxe', 'shp', 'shpgrp', + 'shpinst', 'shppict', 'shprslt', 'shptxt', 'sn', 'sp', 'staticval', + 'stylesheet', 'subject', 'sv', 'svb', 'tc', 'template', 'themedata', + 'title', 'txe', 'ud', 'upr', 'userprops', 'wgrffmtfilter', + 'windowcaption', 'writereservation', 'writereservhash', 'xe', + 'xform', 'xmlattrname', 'xmlattrvalue', 'xmlclose', 'xmlname', + 'xmlnstbl', 'xmlopen')) + # Translation of some special characters. + specialchars = { + u'par': u'\n', + u'sect': u'\n\n', + u'page': u'\n\n', + u'line': u'\n', + u'tab': u'\t', + u'emdash': u'\u2014', + u'endash': u'\u2013', + u'emspace': u'\u2003', + u'enspace': u'\u2002', + u'qmspace': u'\u2005', + u'bullet': u'\u2022', + u'lquote': u'\u2018', + u'rquote': u'\u2019', + u'ldblquote': u'\u201C', + u'rdblquote': u'\u201D'} + charset_mapping = { + # Thai encoding + 'fcharset222': u'cp874', + 'ansicpg874': u'cp874', + # Central+East European encoding + 'fcharset238': u'cp1250', + 'ansicpg1250': u'cp1250', + # Cyrillic encoding + 'fcharset204': u'cp1251', + 'ansicpg1251': u'cp1251', + # West European encoding + 'fcharset0': u'cp1252', + 'ansicpg1252': u'cp1252', + # Greek encoding + 'fcharset161': u'cp1253', + 'ansicpg1253': u'cp1253', + # Turkish encoding + 'fcharset162': u'cp1254', + 'ansicpg1254': u'cp1254', + # Hebrew encoding + 'fcharset177': u'cp1255', + 'ansicpg1255': u'cp1255', + # Arabic encoding + 'fcharset178': u'cp1256', + 'ansicpg1256': u'cp1256', + # Baltic encoding + 'fcharset186': u'cp1257', + 'ansicpg1257': u'cp1257', + # Vietnamese encoding + 'fcharset163': u'cp1258', + 'ansicpg1258': u'cp1258'} + charsets = charset_mapping.keys() + # Character encoding is defined together with fonts. + # font_table could contain eg '0': 'cp1252' + font_table = {} + stack = [] + # Whether this group (and all inside it) are "ignorable". + ignorable = False + # Whether we are inside the font table. + inside_font_table = False + current_font = '' + # Number of ASCII characters to skip after an unicode character. + ucskip = 1 + # Number of ASCII characters left to skip. + curskip = 0 + # Output buffer. + out = [] + for match in pattern.finditer(text): + word, arg, hex, char, brace, tchar = match.groups() + if brace: + curskip = 0 + if brace == u'{': + # Push state + stack.append((ucskip, ignorable, inside_font_table)) + elif brace == u'}': + # Pop state + ucskip, ignorable, inside_font_table = stack.pop() + # \x (not a letter) + elif char: + curskip = 0 + if char == '~': + if not ignorable: + out.append(u'\xA0') + elif char in u'{}\\': + if not ignorable: + out.append(char) + elif char == u'*': + ignorable = True + # \foo + elif word: + curskip = 0 + if word in destinations: + ignorable = True + elif word in specialchars: + out.append(specialchars[word]) + elif word == u'uc': + ucskip = int(arg) + elif word == u'u': + c = int(arg) + if c < 0: + c += 0x10000 + out.append(unichr(c)) + curskip = ucskip + elif word == 'fonttbl': + inside_font_table = True + ignorable = True + elif word == 'f': + current_font = arg + if not inside_font_table: + encoding = font_table[arg] + elif word in ('ansicpg', 'fcharset'): + if inside_font_table: + font_table[current_font] = charset_mapping[word + arg] + else: + encoding = charset_mapping[word + arg] + # \'xx + elif hex: + if curskip > 0: + curskip -= 1 + elif not ignorable: + c = int(hex, 16) + out.append(chr(c).decode(encoding)) + elif tchar: + if curskip > 0: + curskip -= 1 + elif not ignorable: + out.append(tchar) + return ''.join(out) +
_______________________________________________ Mailing list: https://launchpad.net/~openlp-core Post to : openlp-core@lists.launchpad.net Unsubscribe : https://launchpad.net/~openlp-core More help : https://help.launchpad.net/ListHelp