Date: Friday, September 20, 2013 @ 18:20:54 Author: jelle Revision: 97460
archrelease: copy trunk to community-staging-any Added: freevo/repos/community-staging-any/PKGBUILD (from rev 97459, freevo/trunk/PKGBUILD) freevo/repos/community-staging-any/freevo.desktop (from rev 97459, freevo/trunk/freevo.desktop) freevo/repos/community-staging-any/freevo.install (from rev 97459, freevo/trunk/freevo.install) freevo/repos/community-staging-any/imdb-html5lib.patch (from rev 97459, freevo/trunk/imdb-html5lib.patch) Deleted: freevo/repos/community-staging-any/PKGBUILD freevo/repos/community-staging-any/freevo.desktop freevo/repos/community-staging-any/freevo.install freevo/repos/community-staging-any/imdb-html5lib.patch ---------------------+ PKGBUILD | 102 ++++---- freevo.desktop | 22 - freevo.install | 50 ++-- imdb-html5lib.patch | 602 +++++++++++++++++++++++++------------------------- 4 files changed, 388 insertions(+), 388 deletions(-) Deleted: PKGBUILD =================================================================== --- PKGBUILD 2013-09-20 16:19:46 UTC (rev 97459) +++ PKGBUILD 2013-09-20 16:20:54 UTC (rev 97460) @@ -1,51 +0,0 @@ -# $Id$ -# Maintainer: Ronald van Haren <ronald.archlinux.org> - -pkgname=freevo -pkgver=1.9.0 -pkgrel=11 -pkgdesc="An open-source home theatre PC platform" -url="http://freevo.sourceforge.net/" -license=('GPL2') -arch=('any') -depends=('lame' 'flac' 'faad2' 'cdparanoia' 'vorbis-tools' 'fbset' - 'smpeg' 'python2-pillow' 'python2-pysqlite-legacy' 'pyxml' 'lsdvd' - 'twisted' 'python2-numpy' 'python2-pygame' 'kaa-metadata' - 'kaa-imlib2' 'python2-beautifulsoup3' 'python2-html5lib') -optdepends=('mplayer: most features' - 'xine-lib: can be used instead of mplayer' - 'tvtime: watching tv' - 'xmltv: parsing online tv guides') -install=freevo.install -options=('docs') -source=("http://downloads.sourceforge.net/freevo/$pkgname-$pkgver.tar.gz" - 'freevo.desktop' - 'imdb-html5lib.patch') -sha1sums=('0533a2d4ff8a7b09b3b233fdf303ff56bda22d16' - '5f6e76dc58496f072aca29257c24be5ffa9c6c7d' - '57e1099527603500c594fbffa2cb7233b636eed1') - -package() { - cd "${srcdir}"/${pkgname}-${pkgver} - - # patch from http://sourceforge.net/tracker/index.php?func=detail&aid=2924872&group_id=46652&atid=446895 - patch -p0 < "${srcdir}"/imdb-html5lib.patch - - # remove check for python-imaging - sed -i "/'Image'/d" "${srcdir}"/${pkgname}-${pkgver}/setup.py - python2 setup.py install --prefix="${pkgdir}"/usr - - install -Dm644 local_conf.py.example "${pkgdir}"/usr/share/doc/freevo/local_conf.py.example - - # install .desktop file and icon - install -Dm644 "${srcdir}"/${pkgname}-${pkgver}/share/icons/misc/freevo_app.png \ - "${pkgdir}"/usr/share/pixmaps/freevo.png - install -Dm644 "${srcdir}"/freevo.desktop \ - "${pkgdir}"/usr/share/applications/freevo.desktop - - # fix executable for python 2.7 - sed -i -e 's:\(#!/usr/bin/env[ ]\+python$\|#!/usr/bin/python$\):\12:g' \ - $(find "${pkgdir}" -regex ".*.py\|.*.recipe") - sed -i "s:python:python2:g" "${pkgdir}"/usr/share/freevo/htdocs/downloadurl - sed -i "s|search = ('python', 'python2')|search = ('python2', 'python2.7')|" "${pkgdir}"/usr/bin/freevo -} Copied: freevo/repos/community-staging-any/PKGBUILD (from rev 97459, freevo/trunk/PKGBUILD) =================================================================== --- PKGBUILD (rev 0) +++ PKGBUILD 2013-09-20 16:20:54 UTC (rev 97460) @@ -0,0 +1,51 @@ +# $Id$ +# Maintainer: Ronald van Haren <ronald.archlinux.org> + +pkgname=freevo +pkgver=1.9.0 +pkgrel=12 +pkgdesc="An open-source home theatre PC platform" +url="http://freevo.sourceforge.net/" +license=('GPL2') +arch=('any') +depends=('lame' 'flac' 'faad2' 'cdparanoia' 'vorbis-tools' 'fbset' + 'smpeg' 'python2-pillow' 'python2-pysqlite-legacy' 'pyxml' 'lsdvd' + 'twisted' 'python2-numpy' 'python2-pygame' 'kaa-metadata' + 'kaa-imlib2' 'python2-beautifulsoup3' 'python2-html5lib') +optdepends=('mplayer: most features' + 'xine-lib: can be used instead of mplayer' + 'tvtime: watching tv' + 'xmltv: parsing online tv guides') +install=freevo.install +options=('docs') +source=("http://downloads.sourceforge.net/freevo/$pkgname-$pkgver.tar.gz" + 'freevo.desktop' + 'imdb-html5lib.patch') +sha1sums=('0533a2d4ff8a7b09b3b233fdf303ff56bda22d16' + '5f6e76dc58496f072aca29257c24be5ffa9c6c7d' + '57e1099527603500c594fbffa2cb7233b636eed1') + +package() { + cd "${srcdir}"/${pkgname}-${pkgver} + + # patch from http://sourceforge.net/tracker/index.php?func=detail&aid=2924872&group_id=46652&atid=446895 + patch -p0 < "${srcdir}"/imdb-html5lib.patch + + # remove check for python-imaging + sed -i "/'Image'/d" "${srcdir}"/${pkgname}-${pkgver}/setup.py + python2 setup.py install --prefix="${pkgdir}"/usr + + install -Dm644 local_conf.py.example "${pkgdir}"/usr/share/doc/freevo/local_conf.py.example + + # install .desktop file and icon + install -Dm644 "${srcdir}"/${pkgname}-${pkgver}/share/icons/misc/freevo_app.png \ + "${pkgdir}"/usr/share/pixmaps/freevo.png + install -Dm644 "${srcdir}"/freevo.desktop \ + "${pkgdir}"/usr/share/applications/freevo.desktop + + # fix executable for python 2.7 + sed -i -e 's:\(#!/usr/bin/env[ ]\+python$\|#!/usr/bin/python$\):\12:g' \ + $(find "${pkgdir}" -regex ".*.py\|.*.recipe") + sed -i "s:python:python2:g" "${pkgdir}"/usr/share/freevo/htdocs/downloadurl + sed -i "s|search = ('python', 'python2')|search = ('python2', 'python2.7')|" "${pkgdir}"/usr/bin/freevo +} Deleted: freevo.desktop =================================================================== --- freevo.desktop 2013-09-20 16:19:46 UTC (rev 97459) +++ freevo.desktop 2013-09-20 16:20:54 UTC (rev 97460) @@ -1,11 +0,0 @@ -[Desktop Entry] -Version=1.0 -Name=Freevo -GenericName=Freevo -Comment=Home theatre -Exec=freevo -Terminal=false -Type=Application -Icon=/usr/share/pixmaps/freevo.png -Categories=AudioVideo; - Copied: freevo/repos/community-staging-any/freevo.desktop (from rev 97459, freevo/trunk/freevo.desktop) =================================================================== --- freevo.desktop (rev 0) +++ freevo.desktop 2013-09-20 16:20:54 UTC (rev 97460) @@ -0,0 +1,11 @@ +[Desktop Entry] +Version=1.0 +Name=Freevo +GenericName=Freevo +Comment=Home theatre +Exec=freevo +Terminal=false +Type=Application +Icon=/usr/share/pixmaps/freevo.png +Categories=AudioVideo; + Deleted: freevo.install =================================================================== --- freevo.install 2013-09-20 16:19:46 UTC (rev 97459) +++ freevo.install 2013-09-20 16:20:54 UTC (rev 97460) @@ -1,25 +0,0 @@ -post_install() { - if [ ! -d /etc/freevo ]; then - mkdir /etc/freevo - fi - - if [ ! -f /etc/freevo/local_conf.py ]; then - cp /usr/share/doc/freevo/local_conf.py.example /etc/freevo/local_conf.py - fi - -cat << _EOF - -==> freevo installation notes: ---------------------------------------- - Run 'freevo setup --help' and follow the instructions. Then edit - /etc/freevo/local_conf.py to suit your needs. This file can be - installed on a per-user basis in ~/.freevo and will take precedence - over the one in /etc/freevo/. - -_EOF - -} - -post_upgrade() { - post_install -} Copied: freevo/repos/community-staging-any/freevo.install (from rev 97459, freevo/trunk/freevo.install) =================================================================== --- freevo.install (rev 0) +++ freevo.install 2013-09-20 16:20:54 UTC (rev 97460) @@ -0,0 +1,25 @@ +post_install() { + if [ ! -d /etc/freevo ]; then + mkdir /etc/freevo + fi + + if [ ! -f /etc/freevo/local_conf.py ]; then + cp /usr/share/doc/freevo/local_conf.py.example /etc/freevo/local_conf.py + fi + +cat << _EOF + +==> freevo installation notes: +--------------------------------------- + Run 'freevo setup --help' and follow the instructions. Then edit + /etc/freevo/local_conf.py to suit your needs. This file can be + installed on a per-user basis in ~/.freevo and will take precedence + over the one in /etc/freevo/. + +_EOF + +} + +post_upgrade() { + post_install +} Deleted: imdb-html5lib.patch =================================================================== --- imdb-html5lib.patch 2013-09-20 16:19:46 UTC (rev 97459) +++ imdb-html5lib.patch 2013-09-20 16:20:54 UTC (rev 97460) @@ -1,301 +0,0 @@ -Index: src/helpers/imdb.py -=================================================================== ---- src/helpers/imdb.py (revision 11608) -+++ src/helpers/imdb.py (working copy) -@@ -53,19 +53,19 @@ - parser = OptionParser(version='%prog 1.0', conflict_handler='resolve', usage=""" - Search IMDB for a movie or a TV show - --freevo imdb [options] <search> [<output> <video file> [<video file>]] -+freevo imdb [options] | [<result> <fxd file> <video file> [<video file>]] - --Generate <output>.fxd for the movie. Files is a list of files that belongs to --this movie. Use [dvd|vcd] to add the whole disc or use [dvd|vcd][title] to add --a special DVD or VCD title to the list of files""") -+Generate a fxd for the movie. Files is a list of files that belongs to this -+movie. Use [dvd|vcd] to add the whole disc or use [dvd|vcd][title] to add a -+special DVD or VCD title to the list of files""") - parser.add_option('-v', '--verbose', action='count', default=0, - help='set the level of verbosity [default:%default]') - parser.add_option('-s', '--search', action='store_true', dest='search', default=False, - help='search imdb for string [default:%default]') - parser.add_option('-g', '--guess', action='store_true', dest='guess', default=False, - help='search imdb for possible filename match [default:%default]') -- parser.add_option('--tv', action='store_true', dest='tv', default=False, -- help='specify the search is a tv programme [default:%default]') -+ parser.add_option('--tv', action='store', dest='tv', default=None, -+ help='specify the id of a tv programme for a eipsode search [default:%default]') - parser.add_option('--season', dest='season', default=None, - help='specify the season in the search [default:%default]') - parser.add_option('--episode', dest='episode', default=None, -@@ -116,7 +116,9 @@ - sys.exit(u'--search requires <search pattern>') - elif opts.guess and len(args) < 1: - sys.exit(u'--guess requires <guess pattern>') -- tv_marker = (opts.tv or opts.season or opts.episode) and '"' or '' -+ #elif opts.tv and len(args) < 1: -+ # sys.exit(u'--tv requires <imdb id>') -+ tv_marker = (opts.season or opts.episode) and '"' or '' - - if opts.rom_drive is not None: - driveset = True -@@ -176,6 +178,23 @@ - print '%s' % title.encode(opts.encoding) - sys.exit(0) - -+ if opts.tv: -+ print "Searching IMDB for '%s' season:%s episode:%s..." % (opts.tv, opts.season, opts.episode) -+ results = fxd.getIMDBid(opts.tv, opts.season, opts.episode) -+ if len(results) == 0: -+ print 'No results' -+ #for result in results: -+ # if result[3]: -+ # title = 'http://www.imdb.com/title/tt%s/ %s %s (%s) %s' % (result[:1] + result[:4]) -+ # elif result[2]: -+ # title = 'http://www.imdb.com/title/tt%s/ %s %s (%s)' % (result[:1] + result[:3]) -+ # else: -+ # title = 'http://www.imdb.com/title/tt%s/ %s %s' % (result[:1] + result[:2]) -+ # title = results -+ title = 'http://www.imdb.com/title/tt%s/ %s' % (results, results) -+ print '%s' % title.encode(opts.encoding) -+ sys.exit(0) -+ - # normal usage - if len(args) < 3: - sys.exit(u'requires <imdb id> <fxd filename> <video file>|<cd id>') -Index: src/util/fxdimdb.py -=================================================================== ---- src/util/fxdimdb.py (revision 11608) -+++ src/util/fxdimdb.py (working copy) -@@ -48,8 +48,15 @@ - import codecs - import os - import traceback --from BeautifulSoup import BeautifulSoup, NavigableString --import HTMLParser -+from pprint import pprint, pformat -+try: -+ from html5lib import HTMLParser, treebuilders -+ from html5lib.treebuilders.soup import NavigableString -+ using_html5lib = True -+except ImportError: -+ import HTMLParser -+ from BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, NavigableString -+ using_html5lib = False - - import config - import util -@@ -232,6 +239,7 @@ - response.close() - - _debug_('id_list has %s items' % (len(self.id_list))) -+ #print 'id_list=%s' % (pformat(self.id_list)) - if len(self.id_list) > 20: - # too many results, check if there are stupid results in the list - words = [] -@@ -311,7 +319,11 @@ - dvd = 0 - - try: -- soup = BeautifulSoup(results.read(), convertEntities='xml') -+ if using_html5lib: -+ parser = HTMLParser(tree=treebuilders.getTreeBuilder('beautifulsoup')) -+ soup = parser.parse(results.read()) -+ else: -+ soup = BeautifulSoup(results.read(), convertEntities=BeautifulStoneSoup.HTML_ENTITIES) - except UnicodeDecodeError: - print "Unicode error: check that /usr/lib/python2.x/site.py has the correct default encoding" - traceback.print_exc() -@@ -336,68 +348,61 @@ - self.info['year'] = y[1:-1] - except (AttributeError, TypeError, ValueError): - self.info['title'] = self.title -- self.info['year'] = title.find('a').string.strip() -+ try: -+ self.info['year'] = title.find('a').contents[0].strip() -+ except AttributeError: -+ self.info['year'] = '' - - # Find the <div> with class info, each <h5> under this provides info -+ wanted_keys = ('release_date', 'genre', 'tagline', 'plot', 'plot_keywords', -+ 'also_known_as', 'mpaa', 'runtime', 'country', 'language', -+ 'color', 'aspect_ratio', 'sound_mix', 'certification', -+ ) -+ - for info in main.findAll('div', {'class' : 'info'}): - infoh5 = info.find('h5') - if not infoh5: - continue - try: -- infostr = infoh5.next -- key = infostr.string.strip(':').lower().replace(' ', '_') -- nextsibling = nextsibling = infoh5.nextSibling.strip() -- sections = info.findAll('a', { 'href' : re.compile('/Sections') }) -- lists = info.findAll('a', { 'href' : re.compile('/List') }) -- if len(nextsibling) > 0: -- self.info[key] = nextsibling -+ infostr = infoh5.find(text=True) -+ key = infostr.strip().strip(':').lower().replace(' ', '_') -+ if key not in wanted_keys: -+ continue -+ content = info.find('div', {'class' : 'info-content'}) -+ infocontent = content.find(text=True) -+ if infocontent: -+ infocontent = infocontent.strip() -+ sections = info.findAll('a', { 'href' : re.compile('^/Sections') }) -+ lists = info.findAll('a', { 'href' : re.compile('^/List') }) -+ keywords = info.findAll('a', { 'href' : re.compile('^/keyword') }) -+ #print 'key=%s content=%r keywords=%r sections=%r lists=%r' % (key, infocontent, keywords, sections, lists) -+ if len(infocontent) > 0: -+ self.info[key] = infocontent - elif len(sections) > 0: - items = [] - for item in sections: -- items.append(item.string) -+ items.append(item.contents[0].strip()) - self.info[key] = ' / '.join(items) - elif len(lists) > 0: - items = [] - for item in lists: -- items.append(item.string) -+ items.append(item.contents[0].strip()) - self.info[key] = ' / '.join(items) -+ elif len(keywords) > 0: -+ items = [] -+ for item in keywords: -+ items.append(item.contents[0].strip()) -+ self.info[key] = ' / '.join(items) - except: - pass - -- # Find Plot Outline/Summary: -- # Normally the tag is named "Plot Outline:" - however sometimes -- # the tag is "Plot Summary:" or just "Plot:". Search for all strings. -- imdb_result = soup.find(text='Plot Outline:') -- if not imdb_result: -- imdb_result = soup.find(text='Plot Summary:') -- if not imdb_result: -- imdb_result = soup.find(text='Plot:') -- if imdb_result: -- self.info['plot'] = imdb_result.next.strip() -- else: -- self.info['plot'] = u'' -- -- # Find tagline - sometimes the tagline is missing. -- # Use an empty string if no tagline could be found. -- imdb_result = soup.find(text='Tagline:') -- if imdb_result: -- self.info['tagline'] = imdb_result.next.strip() -- else: -- self.info['tagline'] = u'' -- - rating = soup.find(text='User Rating:').findNext(text=re.compile('/10')) -- if rating: -+ try: - votes = rating.findNext('a') -- self.info['rating'] = rating.strip() + ' (' + votes.string.strip() + ')' -- else: -+ self.info['rating'] = rating.strip() + ' (' + votes.contents[0].strip() + ')' -+ except AttributeError: - self.info['rating'] = '' - -- runtime = soup.find(text='Runtime:') -- if runtime and runtime.next: -- self.info['runtime'] = runtime.next.strip() -- else: -- self.info['runtime'] = '' -- - # Replace special characters in the items - for (k,v) in self.info.items(): - self.info[k] = self.convert_entities(v) -@@ -794,10 +799,14 @@ - _debug_('parsesearchdata(results=%r, url=%r, id=%r)' % (results, url, id)) - - self.id_list = [] -- m = re.compile('/title/tt([0-9]*)/') -- y = re.compile('\(([^)]+)\)') -+ m = re.compile('/title/tt(\d+)/') -+ y = re.compile('\((\d+)\) *(.*)') - try: -- soup = BeautifulSoup(results.read(), convertEntities='xml') -+ if using_html5lib: -+ parser = HTMLParser(tree=treebuilders.getTreeBuilder('beautifulsoup')) -+ soup = parser.parse(results.read()) -+ else: -+ soup = BeautifulSoup(results.read(), convertEntities=BeautifulStoneSoup.HTML_ENTITIES) - except HTMLParser.HTMLParseError, why: - traceback.print_exc() - _debug_('Cannot parse %r: %s' % (url, why), DWARNING) -@@ -806,28 +815,37 @@ - traceback.print_exc() - _debug_('Cannot parse %r: %s' % (url, why), DWARNING) - return self.id_list -- items = soup.findAll('a', href=re.compile('/title/tt')) -+ items = soup.findAll('a', href=re.compile('^/title/tt')) - ids = set([]) - for item in items: -- idm = m.search(item['href']) -+ idm = item.attrMap['href'] - if not idm: - continue -- if isinstance(item.next.next, NavigableString): -- yrm = y.findall(item.next.next) -- -- id = idm.group(1) -- name = item.string -- # skip empty names -- if not name: -+ m_match = m.match(idm) -+ if not m_match: -+ # skip invalid titles - continue -- # skip duplicate ids -+ id = m_match.group(1) - if id in ids: -+ # skip duplicate ids - continue -+ name = item.contents[0] -+ if not isinstance(name, NavigableString): -+ # skip empty names -+ continue -+ if isinstance(item.next.next, NavigableString): -+ yrm = item.next.next.strip() - ids.add(id) -- year = len(yrm) > 0 and yrm[0] or '0000' -- type = len(yrm) > 1 and yrm[1] or '' -+ y_match = y.match(yrm) -+ if y_match: -+ year = y_match.group(1) -+ type = y_match.group(2) -+ else: -+ year = '0000' -+ type = '' - #print 'url', item['href'] - #print item.parent.findChildren(text=re.compile('[^ ]')) -+ #print 'id=%s name=%s year=%s type=%s' % (id, name, year, type) - self.id_list += [ ( id, name, year, type ) ] - - for item in self.id_list: -@@ -840,7 +858,11 @@ - Returns a new id for getIMDBid with TV series episode data - """ - try: -- soup = BeautifulSoup(results.read(), convertEntities='xml') -+ if using_html5lib: -+ parser = HTMLParser(tree=treebuilders.getTreeBuilder('beautifulsoup')) -+ soup = parser.parse(results.read()) -+ else: -+ soup = BeautifulSoup(results.read(), convertEntities=BeautifulStoneSoup.HTML_ENTITIES) - except UnicodeDecodeError: - print "Unicode error; check that /usr/lib/python2.x/site.py has the correct default encoding" - pass -@@ -968,9 +990,6 @@ - self.image = vfs.basename(self.image) - - _debug_('Downloaded cover image from %s' % (self.image_url)) -- print "Freevo knows nothing about the copyright of this image, please" -- print "go to %s to check for more information about private." % self.image_url -- print "use of this image" - - - def str2XML(self, line): Copied: freevo/repos/community-staging-any/imdb-html5lib.patch (from rev 97459, freevo/trunk/imdb-html5lib.patch) =================================================================== --- imdb-html5lib.patch (rev 0) +++ imdb-html5lib.patch 2013-09-20 16:20:54 UTC (rev 97460) @@ -0,0 +1,301 @@ +Index: src/helpers/imdb.py +=================================================================== +--- src/helpers/imdb.py (revision 11608) ++++ src/helpers/imdb.py (working copy) +@@ -53,19 +53,19 @@ + parser = OptionParser(version='%prog 1.0', conflict_handler='resolve', usage=""" + Search IMDB for a movie or a TV show + +-freevo imdb [options] <search> [<output> <video file> [<video file>]] ++freevo imdb [options] | [<result> <fxd file> <video file> [<video file>]] + +-Generate <output>.fxd for the movie. Files is a list of files that belongs to +-this movie. Use [dvd|vcd] to add the whole disc or use [dvd|vcd][title] to add +-a special DVD or VCD title to the list of files""") ++Generate a fxd for the movie. Files is a list of files that belongs to this ++movie. Use [dvd|vcd] to add the whole disc or use [dvd|vcd][title] to add a ++special DVD or VCD title to the list of files""") + parser.add_option('-v', '--verbose', action='count', default=0, + help='set the level of verbosity [default:%default]') + parser.add_option('-s', '--search', action='store_true', dest='search', default=False, + help='search imdb for string [default:%default]') + parser.add_option('-g', '--guess', action='store_true', dest='guess', default=False, + help='search imdb for possible filename match [default:%default]') +- parser.add_option('--tv', action='store_true', dest='tv', default=False, +- help='specify the search is a tv programme [default:%default]') ++ parser.add_option('--tv', action='store', dest='tv', default=None, ++ help='specify the id of a tv programme for a eipsode search [default:%default]') + parser.add_option('--season', dest='season', default=None, + help='specify the season in the search [default:%default]') + parser.add_option('--episode', dest='episode', default=None, +@@ -116,7 +116,9 @@ + sys.exit(u'--search requires <search pattern>') + elif opts.guess and len(args) < 1: + sys.exit(u'--guess requires <guess pattern>') +- tv_marker = (opts.tv or opts.season or opts.episode) and '"' or '' ++ #elif opts.tv and len(args) < 1: ++ # sys.exit(u'--tv requires <imdb id>') ++ tv_marker = (opts.season or opts.episode) and '"' or '' + + if opts.rom_drive is not None: + driveset = True +@@ -176,6 +178,23 @@ + print '%s' % title.encode(opts.encoding) + sys.exit(0) + ++ if opts.tv: ++ print "Searching IMDB for '%s' season:%s episode:%s..." % (opts.tv, opts.season, opts.episode) ++ results = fxd.getIMDBid(opts.tv, opts.season, opts.episode) ++ if len(results) == 0: ++ print 'No results' ++ #for result in results: ++ # if result[3]: ++ # title = 'http://www.imdb.com/title/tt%s/ %s %s (%s) %s' % (result[:1] + result[:4]) ++ # elif result[2]: ++ # title = 'http://www.imdb.com/title/tt%s/ %s %s (%s)' % (result[:1] + result[:3]) ++ # else: ++ # title = 'http://www.imdb.com/title/tt%s/ %s %s' % (result[:1] + result[:2]) ++ # title = results ++ title = 'http://www.imdb.com/title/tt%s/ %s' % (results, results) ++ print '%s' % title.encode(opts.encoding) ++ sys.exit(0) ++ + # normal usage + if len(args) < 3: + sys.exit(u'requires <imdb id> <fxd filename> <video file>|<cd id>') +Index: src/util/fxdimdb.py +=================================================================== +--- src/util/fxdimdb.py (revision 11608) ++++ src/util/fxdimdb.py (working copy) +@@ -48,8 +48,15 @@ + import codecs + import os + import traceback +-from BeautifulSoup import BeautifulSoup, NavigableString +-import HTMLParser ++from pprint import pprint, pformat ++try: ++ from html5lib import HTMLParser, treebuilders ++ from html5lib.treebuilders.soup import NavigableString ++ using_html5lib = True ++except ImportError: ++ import HTMLParser ++ from BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, NavigableString ++ using_html5lib = False + + import config + import util +@@ -232,6 +239,7 @@ + response.close() + + _debug_('id_list has %s items' % (len(self.id_list))) ++ #print 'id_list=%s' % (pformat(self.id_list)) + if len(self.id_list) > 20: + # too many results, check if there are stupid results in the list + words = [] +@@ -311,7 +319,11 @@ + dvd = 0 + + try: +- soup = BeautifulSoup(results.read(), convertEntities='xml') ++ if using_html5lib: ++ parser = HTMLParser(tree=treebuilders.getTreeBuilder('beautifulsoup')) ++ soup = parser.parse(results.read()) ++ else: ++ soup = BeautifulSoup(results.read(), convertEntities=BeautifulStoneSoup.HTML_ENTITIES) + except UnicodeDecodeError: + print "Unicode error: check that /usr/lib/python2.x/site.py has the correct default encoding" + traceback.print_exc() +@@ -336,68 +348,61 @@ + self.info['year'] = y[1:-1] + except (AttributeError, TypeError, ValueError): + self.info['title'] = self.title +- self.info['year'] = title.find('a').string.strip() ++ try: ++ self.info['year'] = title.find('a').contents[0].strip() ++ except AttributeError: ++ self.info['year'] = '' + + # Find the <div> with class info, each <h5> under this provides info ++ wanted_keys = ('release_date', 'genre', 'tagline', 'plot', 'plot_keywords', ++ 'also_known_as', 'mpaa', 'runtime', 'country', 'language', ++ 'color', 'aspect_ratio', 'sound_mix', 'certification', ++ ) ++ + for info in main.findAll('div', {'class' : 'info'}): + infoh5 = info.find('h5') + if not infoh5: + continue + try: +- infostr = infoh5.next +- key = infostr.string.strip(':').lower().replace(' ', '_') +- nextsibling = nextsibling = infoh5.nextSibling.strip() +- sections = info.findAll('a', { 'href' : re.compile('/Sections') }) +- lists = info.findAll('a', { 'href' : re.compile('/List') }) +- if len(nextsibling) > 0: +- self.info[key] = nextsibling ++ infostr = infoh5.find(text=True) ++ key = infostr.strip().strip(':').lower().replace(' ', '_') ++ if key not in wanted_keys: ++ continue ++ content = info.find('div', {'class' : 'info-content'}) ++ infocontent = content.find(text=True) ++ if infocontent: ++ infocontent = infocontent.strip() ++ sections = info.findAll('a', { 'href' : re.compile('^/Sections') }) ++ lists = info.findAll('a', { 'href' : re.compile('^/List') }) ++ keywords = info.findAll('a', { 'href' : re.compile('^/keyword') }) ++ #print 'key=%s content=%r keywords=%r sections=%r lists=%r' % (key, infocontent, keywords, sections, lists) ++ if len(infocontent) > 0: ++ self.info[key] = infocontent + elif len(sections) > 0: + items = [] + for item in sections: +- items.append(item.string) ++ items.append(item.contents[0].strip()) + self.info[key] = ' / '.join(items) + elif len(lists) > 0: + items = [] + for item in lists: +- items.append(item.string) ++ items.append(item.contents[0].strip()) + self.info[key] = ' / '.join(items) ++ elif len(keywords) > 0: ++ items = [] ++ for item in keywords: ++ items.append(item.contents[0].strip()) ++ self.info[key] = ' / '.join(items) + except: + pass + +- # Find Plot Outline/Summary: +- # Normally the tag is named "Plot Outline:" - however sometimes +- # the tag is "Plot Summary:" or just "Plot:". Search for all strings. +- imdb_result = soup.find(text='Plot Outline:') +- if not imdb_result: +- imdb_result = soup.find(text='Plot Summary:') +- if not imdb_result: +- imdb_result = soup.find(text='Plot:') +- if imdb_result: +- self.info['plot'] = imdb_result.next.strip() +- else: +- self.info['plot'] = u'' +- +- # Find tagline - sometimes the tagline is missing. +- # Use an empty string if no tagline could be found. +- imdb_result = soup.find(text='Tagline:') +- if imdb_result: +- self.info['tagline'] = imdb_result.next.strip() +- else: +- self.info['tagline'] = u'' +- + rating = soup.find(text='User Rating:').findNext(text=re.compile('/10')) +- if rating: ++ try: + votes = rating.findNext('a') +- self.info['rating'] = rating.strip() + ' (' + votes.string.strip() + ')' +- else: ++ self.info['rating'] = rating.strip() + ' (' + votes.contents[0].strip() + ')' ++ except AttributeError: + self.info['rating'] = '' + +- runtime = soup.find(text='Runtime:') +- if runtime and runtime.next: +- self.info['runtime'] = runtime.next.strip() +- else: +- self.info['runtime'] = '' +- + # Replace special characters in the items + for (k,v) in self.info.items(): + self.info[k] = self.convert_entities(v) +@@ -794,10 +799,14 @@ + _debug_('parsesearchdata(results=%r, url=%r, id=%r)' % (results, url, id)) + + self.id_list = [] +- m = re.compile('/title/tt([0-9]*)/') +- y = re.compile('\(([^)]+)\)') ++ m = re.compile('/title/tt(\d+)/') ++ y = re.compile('\((\d+)\) *(.*)') + try: +- soup = BeautifulSoup(results.read(), convertEntities='xml') ++ if using_html5lib: ++ parser = HTMLParser(tree=treebuilders.getTreeBuilder('beautifulsoup')) ++ soup = parser.parse(results.read()) ++ else: ++ soup = BeautifulSoup(results.read(), convertEntities=BeautifulStoneSoup.HTML_ENTITIES) + except HTMLParser.HTMLParseError, why: + traceback.print_exc() + _debug_('Cannot parse %r: %s' % (url, why), DWARNING) +@@ -806,28 +815,37 @@ + traceback.print_exc() + _debug_('Cannot parse %r: %s' % (url, why), DWARNING) + return self.id_list +- items = soup.findAll('a', href=re.compile('/title/tt')) ++ items = soup.findAll('a', href=re.compile('^/title/tt')) + ids = set([]) + for item in items: +- idm = m.search(item['href']) ++ idm = item.attrMap['href'] + if not idm: + continue +- if isinstance(item.next.next, NavigableString): +- yrm = y.findall(item.next.next) +- +- id = idm.group(1) +- name = item.string +- # skip empty names +- if not name: ++ m_match = m.match(idm) ++ if not m_match: ++ # skip invalid titles + continue +- # skip duplicate ids ++ id = m_match.group(1) + if id in ids: ++ # skip duplicate ids + continue ++ name = item.contents[0] ++ if not isinstance(name, NavigableString): ++ # skip empty names ++ continue ++ if isinstance(item.next.next, NavigableString): ++ yrm = item.next.next.strip() + ids.add(id) +- year = len(yrm) > 0 and yrm[0] or '0000' +- type = len(yrm) > 1 and yrm[1] or '' ++ y_match = y.match(yrm) ++ if y_match: ++ year = y_match.group(1) ++ type = y_match.group(2) ++ else: ++ year = '0000' ++ type = '' + #print 'url', item['href'] + #print item.parent.findChildren(text=re.compile('[^ ]')) ++ #print 'id=%s name=%s year=%s type=%s' % (id, name, year, type) + self.id_list += [ ( id, name, year, type ) ] + + for item in self.id_list: +@@ -840,7 +858,11 @@ + Returns a new id for getIMDBid with TV series episode data + """ + try: +- soup = BeautifulSoup(results.read(), convertEntities='xml') ++ if using_html5lib: ++ parser = HTMLParser(tree=treebuilders.getTreeBuilder('beautifulsoup')) ++ soup = parser.parse(results.read()) ++ else: ++ soup = BeautifulSoup(results.read(), convertEntities=BeautifulStoneSoup.HTML_ENTITIES) + except UnicodeDecodeError: + print "Unicode error; check that /usr/lib/python2.x/site.py has the correct default encoding" + pass +@@ -968,9 +990,6 @@ + self.image = vfs.basename(self.image) + + _debug_('Downloaded cover image from %s' % (self.image_url)) +- print "Freevo knows nothing about the copyright of this image, please" +- print "go to %s to check for more information about private." % self.image_url +- print "use of this image" + + + def str2XML(self, line):