Hello community, here is the log from the commit of package youtube-dl for openSUSE:Factory checked in at 2015-04-25 14:38:50 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/youtube-dl (Old) and /work/SRC/openSUSE:Factory/.youtube-dl.new (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "youtube-dl" Changes: -------- --- /work/SRC/openSUSE:Factory/youtube-dl/youtube-dl.changes 2015-04-13 20:30:18.000000000 +0200 +++ /work/SRC/openSUSE:Factory/.youtube-dl.new/youtube-dl.changes 2015-04-25 14:38:51.000000000 +0200 @@ -1,0 +2,8 @@ +Sat Apr 18 10:59:42 UTC 2015 - benoit.mo...@gmx.fr + +- update to new upstream release 2015.04.17: + * fix unicode warning about the configuration file + * new handlers for CrooksAndLiars, QQMusic, QQMusicAlbum, + QQMusicSinger, Srf + +------------------------------------------------------------------- Old: ---- youtube-dl-2015.04.09.tar.gz youtube-dl-2015.04.09.tar.gz.sig New: ---- youtube-dl-2015.04.17.tar.gz youtube-dl-2015.04.17.tar.gz.sig ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ youtube-dl.spec ++++++ --- /var/tmp/diff_new_pack.YjiFaf/_old 2015-04-25 14:38:52.000000000 +0200 +++ /var/tmp/diff_new_pack.YjiFaf/_new 2015-04-25 14:38:52.000000000 +0200 @@ -17,7 +17,7 @@ Name: youtube-dl -Version: 2015.04.09 +Version: 2015.04.17 Release: 0 Summary: A tool for downloading from Youtube License: SUSE-Public-Domain and CC-BY-SA-3.0 ++++++ youtube-dl-2015.04.09.tar.gz -> youtube-dl-2015.04.17.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/docs/supportedsites.md new/youtube-dl/docs/supportedsites.md --- old/youtube-dl/docs/supportedsites.md 2015-04-09 00:21:19.000000000 +0200 +++ new/youtube-dl/docs/supportedsites.md 2015-04-17 11:14:25.000000000 +0200 @@ -98,6 +98,7 @@ - **CondeNast**: Condé Nast media group: Condé Nast, GQ, Glamour, Vanity Fair, Vogue, W Magazine, WIRED - **Cracked** - **Criterion** + - **CrooksAndLiars** - **Crunchyroll** - **crunchyroll:playlist** - **CSpan**: C-SPAN @@ -359,6 +360,9 @@ - **prosiebensat1**: ProSiebenSat.1 Digital - **Puls4** - **Pyvideo** + - **QQMusic** + - **QQMusicAlbum** + - **QQMusicSinger** - **QuickVid** - **R7** - **radio.de** @@ -434,6 +438,7 @@ - **Sport5** - **SportBox** - **SportDeutschland** + - **Srf** - **SRMediathek**: Saarländischer Rundfunk - **SSA** - **stanfordoc**: Stanford Open ClassRoom Files old/youtube-dl/youtube-dl and new/youtube-dl/youtube-dl differ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/YoutubeDL.py new/youtube-dl/youtube_dl/YoutubeDL.py --- old/youtube-dl/youtube_dl/YoutubeDL.py 2015-04-07 13:38:08.000000000 +0200 +++ new/youtube-dl/youtube_dl/YoutubeDL.py 2015-04-17 11:14:17.000000000 +0200 @@ -919,6 +919,11 @@ if format_spec == 'best' or format_spec is None: return available_formats[-1] elif format_spec == 'worst': + audiovideo_formats = [ + f for f in available_formats + if f.get('vcodec') != 'none' and f.get('acodec') != 'none'] + if audiovideo_formats: + return audiovideo_formats[0] return available_formats[0] elif format_spec == 'bestaudio': audio_formats = [ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/__init__.py new/youtube-dl/youtube_dl/extractor/__init__.py --- old/youtube-dl/youtube_dl/extractor/__init__.py 2015-04-09 00:20:51.000000000 +0200 +++ new/youtube-dl/youtube_dl/extractor/__init__.py 2015-04-17 11:14:17.000000000 +0200 @@ -90,6 +90,7 @@ from .condenast import CondeNastIE from .cracked import CrackedIE from .criterion import CriterionIE +from .crooksandliars import CrooksAndLiarsIE from .crunchyroll import ( CrunchyrollIE, CrunchyrollShowPlaylistIE @@ -395,6 +396,11 @@ from .prosiebensat1 import ProSiebenSat1IE from .puls4 import Puls4IE from .pyvideo import PyvideoIE +from .qqmusic import ( + QQMusicIE, + QQMusicSingerIE, + QQMusicAlbumIE, +) from .quickvid import QuickVidIE from .r7 import R7IE from .radiode import RadioDeIE @@ -479,6 +485,7 @@ from .sport5 import Sport5IE from .sportbox import SportBoxIE from .sportdeutschland import SportDeutschlandIE +from .srf import SrfIE from .srmediathek import SRMediathekIE from .ssa import SSAIE from .stanfordoc import StanfordOpenClassroomIE diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/brightcove.py new/youtube-dl/youtube_dl/extractor/brightcove.py --- old/youtube-dl/youtube_dl/extractor/brightcove.py 2015-02-21 12:13:26.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/brightcove.py 2015-04-17 11:14:17.000000000 +0200 @@ -117,7 +117,10 @@ object_str = re.sub(r'(<object[^>]*)(xmlns=".*?")', r'\1', object_str) object_str = fix_xml_ampersands(object_str) - object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8')) + try: + object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8')) + except xml.etree.ElementTree.ParseError: + return fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars') if fv_el is not None: @@ -183,9 +186,9 @@ (?: [^>]+?class=[\'"][^>]*?BrightcoveExperience.*?[\'"] | [^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/ - ).+?</object>''', + ).+?>\s*</object>''', webpage) - return [cls._build_brighcove_url(m) for m in matches] + return list(filter(None, [cls._build_brighcove_url(m) for m in matches])) def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/common.py new/youtube-dl/youtube_dl/extractor/common.py --- old/youtube-dl/youtube_dl/extractor/common.py 2015-04-07 13:38:08.000000000 +0200 +++ new/youtube-dl/youtube_dl/extractor/common.py 2015-04-17 11:14:17.000000000 +0200 @@ -324,7 +324,7 @@ self._downloader.report_warning(errmsg) return False - def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True): + def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None): """ Returns a tuple (page content as string, URL handle) """ # Strip hashes from the URL (#1038) if isinstance(url_or_request, (compat_str, str)): @@ -334,14 +334,11 @@ if urlh is False: assert not fatal return False - content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal) + content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal, encoding=encoding) return (content, urlh) - def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None): - content_type = urlh.headers.get('Content-Type', '') - webpage_bytes = urlh.read() - if prefix is not None: - webpage_bytes = prefix + webpage_bytes + @staticmethod + def _guess_encoding_from_content(content_type, webpage_bytes): m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type) if m: encoding = m.group(1) @@ -354,6 +351,16 @@ encoding = 'utf-16' else: encoding = 'utf-8' + + return encoding + + def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None, encoding=None): + content_type = urlh.headers.get('Content-Type', '') + webpage_bytes = urlh.read() + if prefix is not None: + webpage_bytes = prefix + webpage_bytes + if not encoding: + encoding = self._guess_encoding_from_content(content_type, webpage_bytes) if self._downloader.params.get('dump_intermediate_pages', False): try: url = url_or_request.get_full_url() @@ -410,13 +417,13 @@ return content - def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5): + def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None): """ Returns the data of the page as a string """ success = False try_count = 0 while success is False: try: - res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal) + res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal, encoding=encoding) success = True except compat_http_client.IncompleteRead as e: try_count += 1 @@ -431,10 +438,10 @@ def _download_xml(self, url_or_request, video_id, note='Downloading XML', errnote='Unable to download XML', - transform_source=None, fatal=True): + transform_source=None, fatal=True, encoding=None): """Return the xml as an xml.etree.ElementTree.Element""" xml_string = self._download_webpage( - url_or_request, video_id, note, errnote, fatal=fatal) + url_or_request, video_id, note, errnote, fatal=fatal, encoding=encoding) if xml_string is False: return xml_string if transform_source: @@ -445,9 +452,10 @@ note='Downloading JSON metadata', errnote='Unable to download JSON metadata', transform_source=None, - fatal=True): + fatal=True, encoding=None): json_string = self._download_webpage( - url_or_request, video_id, note, errnote, fatal=fatal) + url_or_request, video_id, note, errnote, fatal=fatal, + encoding=encoding) if (not fatal) and json_string is False: return None return self._parse_json( @@ -492,7 +500,7 @@ # Methods for following #608 @staticmethod - def url_result(url, ie=None, video_id=None): + def url_result(url, ie=None, video_id=None, video_title=None): """Returns a url that points to a page that should be processed""" # TODO: ie should be the class used for getting the info video_info = {'_type': 'url', @@ -500,6 +508,8 @@ 'ie_key': ie} if video_id is not None: video_info['id'] = video_id + if video_title is not None: + video_info['title'] = video_title return video_info @staticmethod diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/crooksandliars.py new/youtube-dl/youtube_dl/extractor/crooksandliars.py --- old/youtube-dl/youtube_dl/extractor/crooksandliars.py 1970-01-01 01:00:00.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/crooksandliars.py 2015-04-17 11:14:17.000000000 +0200 @@ -0,0 +1,60 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + qualities, +) + + +class CrooksAndLiarsIE(InfoExtractor): + _VALID_URL = r'https?://embed\.crooksandliars\.com/(?:embed|v)/(?P<id>[A-Za-z0-9]+)' + _TESTS = [{ + 'url': 'https://embed.crooksandliars.com/embed/8RUoRhRi', + 'info_dict': { + 'id': '8RUoRhRi', + 'ext': 'mp4', + 'title': 'Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!', + 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f', + 'thumbnail': 're:^https?://.*\.jpg', + 'timestamp': 1428207000, + 'upload_date': '20150405', + 'uploader': 'Heather', + 'duration': 236, + } + }, { + 'url': 'http://embed.crooksandliars.com/v/MTE3MjUtMzQ2MzA', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage( + 'http://embed.crooksandliars.com/embed/%s' % video_id, video_id) + + manifest = self._parse_json( + self._search_regex( + r'var\s+manifest\s*=\s*({.+?})\n', webpage, 'manifest JSON'), + video_id) + + quality = qualities(('webm_low', 'mp4_low', 'webm_high', 'mp4_high')) + + formats = [{ + 'url': item['url'], + 'format_id': item['type'], + 'quality': quality(item['type']), + } for item in manifest['flavors'] if item['mime'].startswith('video/')] + self._sort_formats(formats) + + return { + 'url': url, + 'id': video_id, + 'title': manifest['title'], + 'description': manifest.get('description'), + 'thumbnail': self._proto_relative_url(manifest.get('poster')), + 'timestamp': int_or_none(manifest.get('created')), + 'uploader': manifest.get('author'), + 'duration': int_or_none(manifest.get('duration')), + 'formats': formats, + } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/dumpert.py new/youtube-dl/youtube_dl/extractor/dumpert.py --- old/youtube-dl/youtube_dl/extractor/dumpert.py 2015-04-03 10:22:07.000000000 +0200 +++ new/youtube-dl/youtube_dl/extractor/dumpert.py 2015-04-17 11:14:17.000000000 +0200 @@ -4,6 +4,7 @@ import base64 from .common import InfoExtractor +from ..compat import compat_urllib_request from ..utils import qualities @@ -23,7 +24,10 @@ def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + + req = compat_urllib_request.Request(url) + req.add_header('Cookie', 'nsfw=1') + webpage = self._download_webpage(req, video_id) files_base64 = self._search_regex( r'data-files="([^"]+)"', webpage, 'data files') diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/footyroom.py new/youtube-dl/youtube_dl/extractor/footyroom.py --- old/youtube-dl/youtube_dl/extractor/footyroom.py 2015-03-13 08:36:55.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/footyroom.py 2015-04-17 11:14:17.000000000 +0200 @@ -6,14 +6,21 @@ class FootyRoomIE(InfoExtractor): _VALID_URL = r'http://footyroom\.com/(?P<id>[^/]+)' - _TEST = { + _TESTS = [{ 'url': 'http://footyroom.com/schalke-04-0-2-real-madrid-2015-02/', 'info_dict': { 'id': 'schalke-04-0-2-real-madrid-2015-02', 'title': 'Schalke 04 0 – 2 Real Madrid', }, 'playlist_count': 3, - } + }, { + 'url': 'http://footyroom.com/georgia-0-2-germany-2015-03/', + 'info_dict': { + 'id': 'georgia-0-2-germany-2015-03', + 'title': 'Georgia 0 – 2 Germany', + }, + 'playlist_count': 1, + }] def _real_extract(self, url): playlist_id = self._match_id(url) @@ -36,6 +43,7 @@ r'data-config="([^"]+)"', payload, 'playwire url', default=None) if playwire_url: - entries.append(self.url_result(playwire_url, 'Playwire')) + entries.append(self.url_result(self._proto_relative_url( + playwire_url, 'http:'), 'Playwire')) return self.playlist_result(entries, playlist_id, playlist_title) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/generic.py new/youtube-dl/youtube_dl/extractor/generic.py --- old/youtube-dl/youtube_dl/extractor/generic.py 2015-04-09 00:20:51.000000000 +0200 +++ new/youtube-dl/youtube_dl/extractor/generic.py 2015-04-17 11:14:17.000000000 +0200 @@ -615,13 +615,24 @@ 'info_dict': { 'id': '100183293', 'ext': 'mp4', - 'title': 'Тайны перевала Дятлова • Тайна перевала Дятлова 1 серия 2 часть', + 'title': 'Тайны перевала Дятлова • 1 серия 2 часть', 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века', 'thumbnail': 're:^https?://.*\.jpg$', 'duration': 694, 'age_limit': 0, }, }, + # Playwire embed + { + 'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html', + 'info_dict': { + 'id': '3519514', + 'ext': 'mp4', + 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer', + 'thumbnail': 're:^https?://.*\.png$', + 'duration': 45.115, + }, + }, # 5min embed { 'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/', @@ -642,6 +653,32 @@ 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624', } }, + # Crooks and Liars embed + { + 'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists', + 'info_dict': { + 'id': '8RUoRhRi', + 'ext': 'mp4', + 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!", + 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f', + 'timestamp': 1428207000, + 'upload_date': '20150405', + 'uploader': 'Heather', + }, + }, + # Crooks and Liars external embed + { + 'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/', + 'info_dict': { + 'id': 'MTE3MjUtMzQ2MzA', + 'ext': 'mp4', + 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5', + 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec', + 'timestamp': 1265032391, + 'upload_date': '20100201', + 'uploader': 'Heather', + }, + }, # NBC Sports vplayer embed { 'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a', @@ -655,13 +692,41 @@ # UDN embed { 'url': 'http://www.udn.com/news/story/7314/822787', - 'md5': 'de06b4c90b042c128395a88f0384817e', + 'md5': 'fd2060e988c326991037b9aff9df21a6', 'info_dict': { - 'id': '300040', + 'id': '300346', 'ext': 'mp4', - 'title': '生物老師男變女 全校挺"做自己"', + 'title': '中一中男師變性 全校師生力挺', 'thumbnail': 're:^https?://.*\.jpg$', } + }, + # Ooyala embed + { + 'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T', + 'info_dict': { + 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs', + 'ext': 'mp4', + 'description': 'VIDEO: Index/Match versus VLOOKUP.', + 'title': 'This is what separates the Excel masters from the wannabes', + }, + 'params': { + # m3u8 downloads + 'skip_download': True, + } + }, + # Contains a SMIL manifest + { + 'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html', + 'info_dict': { + 'id': 'file', + 'ext': 'flv', + 'title': '+ Football: Lottery Champions League Europe', + 'uploader': 'www.telewebion.com', + }, + 'params': { + # rtmpe downloads + 'skip_download': True, + } } ] @@ -1066,7 +1131,8 @@ # Look for Ooyala videos mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or - re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage)) + re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or + re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage)) if mobj is not None: return OoyalaIE._build_url_result(mobj.group('ec')) @@ -1269,12 +1335,24 @@ if mobj is not None: return self.url_result(mobj.group('url'), 'Pladform') + # Look for Playwire embeds + mobj = re.search( + r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage) + if mobj is not None: + return self.url_result(mobj.group('url')) + # Look for 5min embeds mobj = re.search( r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage) if mobj is not None: return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin') + # Look for Crooks and Liars embeds + mobj = re.search( + r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage) + if mobj is not None: + return self.url_result(mobj.group('url')) + # Look for NBC Sports VPlayer embeds nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage) if nbc_sports_url: @@ -1376,13 +1454,22 @@ # here's a fun little line of code for you: video_id = os.path.splitext(video_id)[0] - entries.append({ - 'id': video_id, - 'url': video_url, - 'uploader': video_uploader, - 'title': video_title, - 'age_limit': age_limit, - }) + if determine_ext(video_url) == 'smil': + entries.append({ + 'id': video_id, + 'formats': self._extract_smil_formats(video_url, video_id), + 'uploader': video_uploader, + 'title': video_title, + 'age_limit': age_limit, + }) + else: + entries.append({ + 'id': video_id, + 'url': video_url, + 'uploader': video_uploader, + 'title': video_title, + 'age_limit': age_limit, + }) if len(entries) == 1: return entries[0] diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/hitbox.py new/youtube-dl/youtube_dl/extractor/hitbox.py --- old/youtube-dl/youtube_dl/extractor/hitbox.py 2015-02-21 12:13:26.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/hitbox.py 2015-04-17 11:14:17.000000000 +0200 @@ -10,6 +10,7 @@ float_or_none, int_or_none, compat_str, + determine_ext, ) @@ -147,12 +148,27 @@ servers.append(base_url) for stream in cdn.get('bitrates'): label = stream.get('label') - if label != 'Auto': + if label == 'Auto': + continue + stream_url = stream.get('url') + if not stream_url: + continue + bitrate = int_or_none(stream.get('bitrate')) + if stream.get('provider') == 'hls' or determine_ext(stream_url) == 'm3u8': + if not stream_url.startswith('http'): + continue formats.append({ - 'url': '%s/%s' % (base_url, stream.get('url')), + 'url': stream_url, 'ext': 'mp4', - 'vbr': stream.get('bitrate'), - 'resolution': label, + 'tbr': bitrate, + 'format_note': label, + 'rtmp_live': True, + }) + else: + formats.append({ + 'url': '%s/%s' % (base_url, stream_url), + 'ext': 'mp4', + 'tbr': bitrate, 'rtmp_live': True, 'format_note': host, 'page_url': url, diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/miomio.py new/youtube-dl/youtube_dl/extractor/miomio.py --- old/youtube-dl/youtube_dl/extractor/miomio.py 2015-04-07 13:38:08.000000000 +0200 +++ new/youtube-dl/youtube_dl/extractor/miomio.py 2015-04-17 11:14:17.000000000 +0200 @@ -7,6 +7,7 @@ from ..utils import ( xpath_text, int_or_none, + ExtractorError, ) @@ -14,13 +15,14 @@ IE_NAME = 'miomio.tv' _VALID_URL = r'https?://(?:www\.)?miomio\.tv/watch/cc(?P<id>[0-9]+)' _TESTS = [{ - 'url': 'http://www.miomio.tv/watch/cc179734/', - 'md5': '48de02137d0739c15b440a224ad364b9', + # "type=video" in flashvars + 'url': 'http://www.miomio.tv/watch/cc88912/', + 'md5': '317a5f7f6b544ce8419b784ca8edae65', 'info_dict': { - 'id': '179734', + 'id': '88912', 'ext': 'flv', - 'title': '手绘动漫鬼泣但丁全程画法', - 'duration': 354, + 'title': '【SKY】字幕 铠武昭和VS平成 假面骑士大战FEAT战队 魔星字幕组 字幕', + 'duration': 5923, }, }, { 'url': 'http://www.miomio.tv/watch/cc184024/', @@ -42,7 +44,7 @@ r'src="(/mioplayer/[^"]+)"', webpage, 'ref_path') xml_config = self._search_regex( - r'flashvars="type=sina&(.+?)&', + r'flashvars="type=(?:sina|video)&(.+?)&', webpage, 'xml config') # skipping the following page causes lags and eventually connection drop-outs @@ -59,6 +61,9 @@ 'Referer': 'http://www.miomio.tv%s' % mioplayer_path, } + if not int_or_none(xpath_text(vid_config, 'timelength')): + raise ExtractorError('Unable to load videos!', expected=True) + entries = [] for f in vid_config.findall('./durl'): segment_url = xpath_text(f, 'url', 'video url') diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/mtv.py new/youtube-dl/youtube_dl/extractor/mtv.py --- old/youtube-dl/youtube_dl/extractor/mtv.py 2015-02-23 17:25:54.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/mtv.py 2015-04-17 11:14:17.000000000 +0200 @@ -118,6 +118,14 @@ mediagen_doc = self._download_xml(mediagen_url, video_id, 'Downloading video urls') + item = mediagen_doc.find('./video/item') + if item is not None and item.get('type') == 'text': + message = '%s returned error: ' % self.IE_NAME + if item.get('code') is not None: + message += '%s - ' % item.get('code') + message += item.text + raise ExtractorError(message, expected=True) + description_node = itemdoc.find('description') if description_node is not None: description = description_node.text.strip() diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/pladform.py new/youtube-dl/youtube_dl/extractor/pladform.py --- old/youtube-dl/youtube_dl/extractor/pladform.py 2015-03-09 03:01:10.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/pladform.py 2015-04-17 11:14:17.000000000 +0200 @@ -30,7 +30,7 @@ 'info_dict': { 'id': '100183293', 'ext': 'mp4', - 'title': 'Тайны перевала Дятлова • Тайна перевала Дятлова 1 серия 2 часть', + 'title': 'Тайны перевала Дятлова • 1 серия 2 часть', 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века', 'thumbnail': 're:^https?://.*\.jpg$', 'duration': 694, diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/qqmusic.py new/youtube-dl/youtube_dl/extractor/qqmusic.py --- old/youtube-dl/youtube_dl/extractor/qqmusic.py 1970-01-01 01:00:00.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/qqmusic.py 2015-04-17 11:14:17.000000000 +0200 @@ -0,0 +1,170 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import random +import time +import re + +from .common import InfoExtractor +from ..utils import ( + strip_jsonp, + unescapeHTML, +) +from ..compat import compat_urllib_request + + +class QQMusicIE(InfoExtractor): + _VALID_URL = r'http://y.qq.com/#type=song&mid=(?P<id>[0-9A-Za-z]+)' + _TESTS = [{ + 'url': 'http://y.qq.com/#type=song&mid=004295Et37taLD', + 'md5': 'bed90b6db2a7a7a7e11bc585f471f63a', + 'info_dict': { + 'id': '004295Et37taLD', + 'ext': 'm4a', + 'title': '可惜没如果', + 'upload_date': '20141227', + 'creator': '林俊杰', + 'description': 'md5:4348ff1dd24036906baa7b6f973f8d30', + } + }] + + # Reference: m_r_GetRUin() in top_player.js + # http://imgcache.gtimg.cn/music/portal_v3/y/top_player.js + @staticmethod + def m_r_get_ruin(): + curMs = int(time.time() * 1000) % 1000 + return int(round(random.random() * 2147483647) * curMs % 1E10) + + def _real_extract(self, url): + mid = self._match_id(url) + + detail_info_page = self._download_webpage( + 'http://s.plcloud.music.qq.com/fcgi-bin/fcg_yqq_song_detail_info.fcg?songmid=%s&play=0' % mid, + mid, note='Download song detail info', + errnote='Unable to get song detail info', encoding='gbk') + + song_name = self._html_search_regex( + r"songname:\s*'([^']+)'", detail_info_page, 'song name') + + publish_time = self._html_search_regex( + r'发行时间:(\d{4}-\d{2}-\d{2})', detail_info_page, + 'publish time', default=None) + if publish_time: + publish_time = publish_time.replace('-', '') + + singer = self._html_search_regex( + r"singer:\s*'([^']+)", detail_info_page, 'singer', default=None) + + lrc_content = self._html_search_regex( + r'<div class="content" id="lrc_content"[^<>]*>([^<>]+)</div>', + detail_info_page, 'LRC lyrics', default=None) + + guid = self.m_r_get_ruin() + + vkey = self._download_json( + 'http://base.music.qq.com/fcgi-bin/fcg_musicexpress.fcg?json=3&guid=%s' % guid, + mid, note='Retrieve vkey', errnote='Unable to get vkey', + transform_source=strip_jsonp)['key'] + song_url = 'http://cc.stream.qqmusic.qq.com/C200%s.m4a?vkey=%s&guid=%s&fromtag=0' % (mid, vkey, guid) + + return { + 'id': mid, + 'url': song_url, + 'title': song_name, + 'upload_date': publish_time, + 'creator': singer, + 'description': lrc_content, + } + + +class QQPlaylistBaseIE(InfoExtractor): + @staticmethod + def qq_static_url(category, mid): + return 'http://y.qq.com/y/static/%s/%s/%s/%s.html' % (category, mid[-2], mid[-1], mid) + + @classmethod + def get_entries_from_page(cls, page): + entries = [] + + for item in re.findall(r'class="data"[^<>]*>([^<>]+)</', page): + song_mid = unescapeHTML(item).split('|')[-5] + entries.append(cls.url_result( + 'http://y.qq.com/#type=song&mid=' + song_mid, 'QQMusic', + song_mid)) + + return entries + + +class QQMusicSingerIE(QQPlaylistBaseIE): + _VALID_URL = r'http://y.qq.com/#type=singer&mid=(?P<id>[0-9A-Za-z]+)' + _TEST = { + 'url': 'http://y.qq.com/#type=singer&mid=001BLpXF2DyJe2', + 'info_dict': { + 'id': '001BLpXF2DyJe2', + 'title': '林俊杰', + 'description': 'md5:2a222d89ba4455a3af19940c0481bb78', + }, + 'playlist_count': 12, + } + + def _real_extract(self, url): + mid = self._match_id(url) + + singer_page = self._download_webpage( + self.qq_static_url('singer', mid), mid, 'Download singer page') + + entries = self.get_entries_from_page(singer_page) + + singer_name = self._html_search_regex( + r"singername\s*:\s*'([^']+)'", singer_page, 'singer name', + default=None) + + singer_id = self._html_search_regex( + r"singerid\s*:\s*'([0-9]+)'", singer_page, 'singer id', + default=None) + + singer_desc = None + + if singer_id: + req = compat_urllib_request.Request( + 'http://s.plcloud.music.qq.com/fcgi-bin/fcg_get_singer_desc.fcg?utf8=1&outCharset=utf-8&format=xml&singerid=%s' % singer_id) + req.add_header( + 'Referer', 'http://s.plcloud.music.qq.com/xhr_proxy_utf8.html') + singer_desc_page = self._download_xml( + req, mid, 'Donwload singer description XML') + + singer_desc = singer_desc_page.find('./data/info/desc').text + + return self.playlist_result(entries, mid, singer_name, singer_desc) + + +class QQMusicAlbumIE(QQPlaylistBaseIE): + _VALID_URL = r'http://y.qq.com/#type=album&mid=(?P<id>[0-9A-Za-z]+)' + + _TEST = { + 'url': 'http://y.qq.com/#type=album&mid=000gXCTb2AhRR1&play=0', + 'info_dict': { + 'id': '000gXCTb2AhRR1', + 'title': '我们都是这样长大的', + 'description': 'md5:d216c55a2d4b3537fe4415b8767d74d6', + }, + 'playlist_count': 4, + } + + def _real_extract(self, url): + mid = self._match_id(url) + + album_page = self._download_webpage( + self.qq_static_url('album', mid), mid, 'Download album page') + + entries = self.get_entries_from_page(album_page) + + album_name = self._html_search_regex( + r"albumname\s*:\s*'([^']+)',", album_page, 'album name', + default=None) + + album_detail = self._html_search_regex( + r'<div class="album_detail close_detail">\s*<p>((?:[^<>]+(?:<br />)?)+)</p>', + album_page, 'album details', default=None) + + return self.playlist_result(entries, mid, album_name, album_detail) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/rai.py new/youtube-dl/youtube_dl/extractor/rai.py --- old/youtube-dl/youtube_dl/extractor/rai.py 2015-02-23 17:25:54.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/rai.py 2015-04-17 11:14:17.000000000 +0200 @@ -13,7 +13,7 @@ class RaiIE(InfoExtractor): - _VALID_URL = r'(?P<url>http://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html)' + _VALID_URL = r'(?P<url>(?P<host>http://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it))/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html)' _TESTS = [ { 'url': 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html', @@ -62,34 +62,78 @@ 'description': 'Edizione delle ore 20:30 ', } }, + { + 'url': 'http://www.ilcandidato.rai.it/dl/ray/media/Il-Candidato---Primo-episodio-Le-Primarie-28e5525a-b495-45e8-a7c3-bc48ba45d2b6.html', + 'md5': '02b64456f7cc09f96ff14e7dd489017e', + 'info_dict': { + 'id': '28e5525a-b495-45e8-a7c3-bc48ba45d2b6', + 'ext': 'flv', + 'title': 'Il Candidato - Primo episodio: "Le Primarie"', + 'description': 'Primo appuntamento con "Il candidato" con Filippo Timi, alias Piero Zucca presidente!', + 'uploader': 'RaiTre', + } + } ] + def _extract_relinker_url(self, webpage): + return self._proto_relative_url(self._search_regex( + [r'name="videourl" content="([^"]+)"', r'var\s+videoURL(?:_MP4)?\s*=\s*"([^"]+)"'], + webpage, 'relinker url', default=None)) + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') + host = mobj.group('host') - media = self._download_json('%s?json' % mobj.group('url'), video_id, 'Downloading video JSON') + webpage = self._download_webpage(url, video_id) + + relinker_url = self._extract_relinker_url(webpage) - title = media.get('name') - description = media.get('desc') - thumbnail = media.get('image_300') or media.get('image_medium') or media.get('image') - duration = parse_duration(media.get('length')) - uploader = media.get('author') - upload_date = unified_strdate(media.get('date')) - - formats = [] - - for format_id in ['wmv', 'm3u8', 'mediaUri', 'h264']: - media_url = media.get(format_id) - if not media_url: - continue - formats.append({ + if not relinker_url: + iframe_path = self._search_regex( + r'<iframe[^>]+src="/?(dl/[^"]+\?iframe\b[^"]*)"', + webpage, 'iframe') + webpage = self._download_webpage( + '%s/%s' % (host, iframe_path), video_id) + relinker_url = self._extract_relinker_url(webpage) + + relinker = self._download_json( + '%s&output=47' % relinker_url, video_id) + + media_url = relinker['video'][0] + ct = relinker.get('ct') + if ct == 'f4m': + formats = self._extract_f4m_formats( + media_url + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id) + else: + formats = [{ 'url': media_url, - 'format_id': format_id, - 'ext': 'mp4', - }) + 'format_id': ct, + }] - subtitles = self.extract_subtitles(video_id, url) + json_link = self._html_search_meta( + 'jsonlink', webpage, 'JSON link', default=None) + if json_link: + media = self._download_json( + host + json_link, video_id, 'Downloading video JSON') + title = media.get('name') + description = media.get('desc') + thumbnail = media.get('image_300') or media.get('image_medium') or media.get('image') + duration = parse_duration(media.get('length')) + uploader = media.get('author') + upload_date = unified_strdate(media.get('date')) + else: + title = (self._search_regex( + r'var\s+videoTitolo\s*=\s*"(.+?)";', + webpage, 'title', default=None) or self._og_search_title(webpage)).replace('\\"', '"') + description = self._og_search_description(webpage) + thumbnail = self._og_search_thumbnail(webpage) + duration = None + uploader = self._html_search_meta('Editore', webpage, 'uploader') + upload_date = unified_strdate(self._html_search_meta( + 'item-date', webpage, 'upload date', default=None)) + + subtitles = self.extract_subtitles(video_id, webpage) return { 'id': video_id, @@ -103,8 +147,7 @@ 'subtitles': subtitles, } - def _get_subtitles(self, video_id, url): - webpage = self._download_webpage(url, video_id) + def _get_subtitles(self, video_id, webpage): subtitles = {} m = re.search(r'<meta name="closedcaption" content="(?P<captions>[^"]+)"', webpage) if m: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/rtve.py new/youtube-dl/youtube_dl/extractor/rtve.py --- old/youtube-dl/youtube_dl/extractor/rtve.py 2015-04-07 13:38:08.000000000 +0200 +++ new/youtube-dl/youtube_dl/extractor/rtve.py 2015-04-17 11:14:17.000000000 +0200 @@ -8,6 +8,7 @@ from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( + ExtractorError, float_or_none, remove_end, std_headers, @@ -98,6 +99,8 @@ info = self._download_json( 'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id, video_id)['page']['items'][0] + if info['state'] == 'DESPU': + raise ExtractorError('The video is no longer available', expected=True) png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id) png = self._download_webpage(png_url, video_id, 'Downloading url information') video_url = _decrypt_url(png) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/soundcloud.py new/youtube-dl/youtube_dl/extractor/soundcloud.py --- old/youtube-dl/youtube_dl/extractor/soundcloud.py 2015-04-03 10:22:07.000000000 +0200 +++ new/youtube-dl/youtube_dl/extractor/soundcloud.py 2015-04-17 11:14:17.000000000 +0200 @@ -221,7 +221,12 @@ info_json_url += "&secret_token=" + token elif mobj.group('player'): query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) - return self.url_result(query['url'][0]) + real_url = query['url'][0] + # If the token is in the query of the original url we have to + # manually add it + if 'secret_token' in query: + real_url += '?secret_token=' + query['secret_token'][0] + return self.url_result(real_url) else: # extract uploader (which is in the url) uploader = mobj.group('uploader') diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/spike.py new/youtube-dl/youtube_dl/extractor/spike.py --- old/youtube-dl/youtube_dl/extractor/spike.py 2015-02-21 12:13:26.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/spike.py 2015-04-17 11:14:17.000000000 +0200 @@ -5,7 +5,7 @@ class SpikeIE(MTVServicesInfoExtractor): _VALID_URL = r'''(?x)https?:// - (?:www\.spike\.com/(?:video-clips|(?:full-)?episodes)/.+| + (?:www\.spike\.com/(?:video-(?:clips|playlists)|(?:full-)?episodes)/.+| m\.spike\.com/videos/video\.rbml\?id=(?P<id>[^&]+)) ''' _TEST = { diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/srf.py new/youtube-dl/youtube_dl/extractor/srf.py --- old/youtube-dl/youtube_dl/extractor/srf.py 1970-01-01 01:00:00.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/srf.py 2015-04-17 11:14:17.000000000 +0200 @@ -0,0 +1,104 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +from .common import InfoExtractor +from ..utils import ( + determine_ext, + parse_iso8601, + xpath_text, +) + + +class SrfIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.srf\.ch/play(?:er)?/tv/[^/]+/video/(?P<display_id>[^?]+)\?id=|tp\.srgssr\.ch/p/flash\?urn=urn:srf:ais:video:)(?P<id>[0-9a-f\-]{36})' + _TESTS = [{ + 'url': 'http://www.srf.ch/play/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5', + 'md5': '4cd93523723beff51bb4bee974ee238d', + 'info_dict': { + 'id': '28e1a57d-5b76-4399-8ab3-9097f071e6c5', + 'display_id': 'snowden-beantragt-asyl-in-russland', + 'ext': 'm4v', + 'upload_date': '20130701', + 'title': 'Snowden beantragt Asyl in Russland', + 'timestamp': 1372713995, + } + }, { + # No Speichern (Save) button + 'url': 'http://www.srf.ch/play/tv/top-gear/video/jaguar-xk120-shadow-und-tornado-dampflokomotive?id=677f5829-e473-4823-ac83-a1087fe97faa', + 'md5': 'd97e236e80d1d24729e5d0953d276a4f', + 'info_dict': { + 'id': '677f5829-e473-4823-ac83-a1087fe97faa', + 'display_id': 'jaguar-xk120-shadow-und-tornado-dampflokomotive', + 'ext': 'flv', + 'upload_date': '20130710', + 'title': 'Jaguar XK120, Shadow und Tornado-Dampflokomotive', + 'timestamp': 1373493600, + }, + }, { + 'url': 'http://www.srf.ch/player/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5', + 'only_matching': True, + }, { + 'url': 'https://tp.srgssr.ch/p/flash?urn=urn:srf:ais:video:28e1a57d-5b76-4399-8ab3-9097f071e6c5', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + display_id = re.match(self._VALID_URL, url).group('display_id') or video_id + + video_data = self._download_xml( + 'http://il.srgssr.ch/integrationlayer/1.0/ue/srf/video/play/%s.xml' % video_id, + display_id) + + title = xpath_text( + video_data, './AssetMetadatas/AssetMetadata/title', fatal=True) + thumbnails = [{ + 'url': s.text + } for s in video_data.findall('.//ImageRepresentation/url')] + timestamp = parse_iso8601(xpath_text(video_data, './createdDate')) + # The <duration> field in XML is different from the exact duration, skipping + + formats = [] + for item in video_data.findall('./Playlists/Playlist') + video_data.findall('./Downloads/Download'): + for url_node in item.findall('url'): + quality = url_node.attrib['quality'] + full_url = url_node.text + original_ext = determine_ext(full_url) + format_id = '%s-%s' % (quality, item.attrib['protocol']) + if original_ext == 'f4m': + formats.extend(self._extract_f4m_formats( + full_url + '?hdcore=3.4.0', display_id, f4m_id=format_id)) + elif original_ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + full_url, display_id, 'mp4', m3u8_id=format_id)) + else: + formats.append({ + 'url': full_url, + 'ext': original_ext, + 'format_id': format_id, + 'quality': 0 if 'HD' in quality else -1, + 'preference': 1, + }) + + self._sort_formats(formats) + + subtitles = {} + subtitles_data = video_data.find('Subtitles') + if subtitles_data is not None: + subtitles_list = [{ + 'url': sub.text, + 'ext': determine_ext(sub.text), + } for sub in subtitles_data] + if subtitles_list: + subtitles['de'] = subtitles_list + + return { + 'id': video_id, + 'display_id': display_id, + 'formats': formats, + 'title': title, + 'thumbnails': thumbnails, + 'timestamp': timestamp, + 'subtitles': subtitles, + } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/teamcoco.py new/youtube-dl/youtube_dl/extractor/teamcoco.py --- old/youtube-dl/youtube_dl/extractor/teamcoco.py 2015-03-28 08:06:40.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/teamcoco.py 2015-04-17 11:14:17.000000000 +0200 @@ -1,10 +1,14 @@ +# -*- coding: utf-8 -*- from __future__ import unicode_literals import base64 import re from .common import InfoExtractor -from ..utils import qualities +from ..utils import ( + ExtractorError, + qualities, +) class TeamcocoIE(InfoExtractor): @@ -18,6 +22,7 @@ 'ext': 'mp4', 'title': 'Conan Becomes A Mary Kay Beauty Consultant', 'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.', + 'duration': 504, 'age_limit': 0, } }, { @@ -28,8 +33,20 @@ 'ext': 'mp4', 'description': 'Louis C.K. got starstruck by George W. Bush, so what? Part one.', 'title': 'Louis C.K. Interview Pt. 1 11/3/11', + 'duration': 288, 'age_limit': 0, } + }, { + 'url': 'http://teamcoco.com/video/timothy-olyphant-drinking-whiskey', + 'info_dict': { + 'id': '88748', + 'ext': 'mp4', + 'title': 'Timothy Olyphant Raises A Toast To “Justified”', + 'description': 'md5:15501f23f020e793aeca761205e42c24', + }, + 'params': { + 'skip_download': True, # m3u8 downloads + } } ] _VIDEO_ID_REGEXES = ( @@ -49,35 +66,50 @@ video_id = self._html_search_regex( self._VIDEO_ID_REGEXES, webpage, 'video id') - embed_url = 'http://teamcoco.com/embed/v/%s' % video_id - embed = self._download_webpage( - embed_url, video_id, 'Downloading embed page') + preload = None + preloads = re.findall(r'"preload":\s*"([^"]+)"', webpage) + if preloads: + preload = max([(len(p), p) for p in preloads])[1] + + if not preload: + preload = ''.join(re.findall(r'this\.push\("([^"]+)"\);', webpage)) + + if not preload: + preload = self._html_search_regex([ + r'player,\[?"([^"]+)"\]?', r'player.init\(\[?"([^"]+)"\]?\)' + ], webpage.replace('","', ''), 'preload data', default=None) + + if not preload: + raise ExtractorError( + 'Preload information could not be extracted', expected=True) - player_data = self._parse_json(self._search_regex( - r'Y\.Ginger\.Module\.Player(?:;var\s*player\s*=\s*new\s*m)?\((\{.*?\})\);', embed, 'player data'), video_id) data = self._parse_json( - base64.b64decode(player_data['preload'].encode('ascii')).decode('utf-8'), video_id) + base64.b64decode(preload.encode('ascii')).decode('utf-8'), video_id) formats = [] get_quality = qualities(['500k', '480p', '1000k', '720p', '1080p']) for filed in data['files']: - m_format = re.search(r'(\d+(k|p))\.mp4', filed['url']) - if m_format is not None: - format_id = m_format.group(1) + if filed['type'] == 'hls': + formats.extend(self._extract_m3u8_formats( + filed['url'], video_id, ext='mp4')) else: - format_id = filed['bitrate'] - tbr = ( - int(filed['bitrate']) - if filed['bitrate'].isdigit() - else None) - - formats.append({ - 'url': filed['url'], - 'ext': 'mp4', - 'tbr': tbr, - 'format_id': format_id, - 'quality': get_quality(format_id), - }) + m_format = re.search(r'(\d+(k|p))\.mp4', filed['url']) + if m_format is not None: + format_id = m_format.group(1) + else: + format_id = filed['bitrate'] + tbr = ( + int(filed['bitrate']) + if filed['bitrate'].isdigit() + else None) + + formats.append({ + 'url': filed['url'], + 'ext': 'mp4', + 'tbr': tbr, + 'format_id': format_id, + 'quality': get_quality(format_id), + }) self._sort_formats(formats) @@ -88,5 +120,6 @@ 'title': data['title'], 'thumbnail': data.get('thumb', {}).get('href'), 'description': data.get('teaser'), + 'duration': data.get('duration'), 'age_limit': self._family_friendly_search(webpage), } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/ted.py new/youtube-dl/youtube_dl/extractor/ted.py --- old/youtube-dl/youtube_dl/extractor/ted.py 2015-02-23 17:25:54.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/ted.py 2015-04-17 11:14:17.000000000 +0200 @@ -5,9 +5,8 @@ from .common import InfoExtractor -from ..compat import ( - compat_str, -) +from ..compat import compat_str +from ..utils import int_or_none class TEDIE(InfoExtractor): @@ -170,17 +169,41 @@ finfo = self._NATIVE_FORMATS.get(f['format_id']) if finfo: f.update(finfo) - else: - # Use rtmp downloads - formats = [{ - 'format_id': f['name'], - 'url': talk_info['streamer'], - 'play_path': f['file'], - 'ext': 'flv', - 'width': f['width'], - 'height': f['height'], - 'tbr': f['bitrate'], - } for f in talk_info['resources']['rtmp']] + + for format_id, resources in talk_info['resources'].items(): + if format_id == 'h264': + for resource in resources: + bitrate = int_or_none(resource.get('bitrate')) + formats.append({ + 'url': resource['file'], + 'format_id': '%s-%sk' % (format_id, bitrate), + 'tbr': bitrate, + }) + elif format_id == 'rtmp': + streamer = talk_info.get('streamer') + if not streamer: + continue + for resource in resources: + formats.append({ + 'format_id': '%s-%s' % (format_id, resource.get('name')), + 'url': streamer, + 'play_path': resource['file'], + 'ext': 'flv', + 'width': int_or_none(resource.get('width')), + 'height': int_or_none(resource.get('height')), + 'tbr': int_or_none(resource.get('bitrate')), + }) + elif format_id == 'hls': + formats.extend(self._extract_m3u8_formats( + resources.get('stream'), video_name, 'mp4', m3u8_id=format_id)) + + audio_download = talk_info.get('audioDownload') + if audio_download: + formats.append({ + 'url': audio_download, + 'format_id': 'audio', + }) + self._sort_formats(formats) video_id = compat_str(talk_info['id']) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/tumblr.py new/youtube-dl/youtube_dl/extractor/tumblr.py --- old/youtube-dl/youtube_dl/extractor/tumblr.py 2015-02-21 12:13:26.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/tumblr.py 2015-04-17 11:14:17.000000000 +0200 @@ -56,6 +56,6 @@ 'url': video_url, 'ext': 'mp4', 'title': video_title, - 'description': self._og_search_description(webpage), - 'thumbnail': self._og_search_thumbnail(webpage), + 'description': self._og_search_description(webpage, default=None), + 'thumbnail': self._og_search_thumbnail(webpage, default=None), } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/udn.py new/youtube-dl/youtube_dl/extractor/udn.py --- old/youtube-dl/youtube_dl/extractor/udn.py 2015-04-09 00:20:51.000000000 +0200 +++ new/youtube-dl/youtube_dl/extractor/udn.py 2015-04-17 11:14:17.000000000 +0200 @@ -3,12 +3,15 @@ import json from .common import InfoExtractor -from ..utils import js_to_json +from ..utils import ( + js_to_json, + ExtractorError, +) from ..compat import compat_urlparse class UDNEmbedIE(InfoExtractor): - _VALID_URL = r'(?:https?:)?//video\.udn\.com/embed/news/(?P<id>\d+)' + _VALID_URL = r'https?://video\.udn\.com/(?:embed|play)/news/(?P<id>\d+)' _TESTS = [{ 'url': 'http://video.udn.com/embed/news/300040', 'md5': 'de06b4c90b042c128395a88f0384817e', @@ -19,7 +22,11 @@ 'thumbnail': 're:^https?://.*\.jpg$', } }, { - 'url': '//video.udn.com/embed/news/300040', + 'url': 'https://video.udn.com/embed/news/300040', + 'only_matching': True, + }, { + # From https://video.udn.com/news/303776 + 'url': 'https://video.udn.com/play/news/303776', 'only_matching': True, }] @@ -47,7 +54,10 @@ 'retrieve url for %s video' % video_type), 'format_id': video_type, 'preference': 0 if video_type == 'mp4' else -1, - } for video_type, api_url in video_urls.items()] + } for video_type, api_url in video_urls.items() if api_url] + + if not formats: + raise ExtractorError('No videos found', expected=True) self._sort_formats(formats) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/vine.py new/youtube-dl/youtube_dl/extractor/vine.py --- old/youtube-dl/youtube_dl/extractor/vine.py 2015-03-24 16:32:06.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/vine.py 2015-04-17 11:14:17.000000000 +0200 @@ -1,7 +1,6 @@ from __future__ import unicode_literals import re -import json import itertools from .common import InfoExtractor @@ -9,8 +8,8 @@ class VineIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?vine\.co/v/(?P<id>\w+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?vine\.co/(?:v|oembed)/(?P<id>\w+)' + _TESTS = [{ 'url': 'https://vine.co/v/b9KOOWX7HUx', 'md5': '2f36fed6235b16da96ce9b4dc890940d', 'info_dict': { @@ -23,21 +22,53 @@ 'uploader': 'Jack Dorsey', 'uploader_id': '76', }, - } + }, { + 'url': 'https://vine.co/v/MYxVapFvz2z', + 'md5': '7b9a7cbc76734424ff942eb52c8f1065', + 'info_dict': { + 'id': 'MYxVapFvz2z', + 'ext': 'mp4', + 'title': 'Fuck Da Police #Mikebrown #justice #ferguson #prayforferguson #protesting #NMOS14', + 'alt_title': 'Vine by Luna', + 'description': 'Fuck Da Police #Mikebrown #justice #ferguson #prayforferguson #protesting #NMOS14', + 'upload_date': '20140815', + 'uploader': 'Luna', + 'uploader_id': '1102363502380728320', + }, + }, { + 'url': 'https://vine.co/v/bxVjBbZlPUH', + 'md5': 'ea27decea3fa670625aac92771a96b73', + 'info_dict': { + 'id': 'bxVjBbZlPUH', + 'ext': 'mp4', + 'title': '#mw3 #ac130 #killcam #angelofdeath', + 'alt_title': 'Vine by Z3k3', + 'description': '#mw3 #ac130 #killcam #angelofdeath', + 'upload_date': '20130430', + 'uploader': 'Z3k3', + 'uploader_id': '936470460173008896', + }, + }, { + 'url': 'https://vine.co/oembed/MYxVapFvz2z.json', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage('https://vine.co/v/' + video_id, video_id) - data = json.loads(self._html_search_regex( - r'window\.POST_DATA = { %s: ({.+?}) }' % video_id, webpage, 'vine data')) + data = self._parse_json( + self._html_search_regex( + r'window\.POST_DATA = { %s: ({.+?}) };\s*</script>' % video_id, + webpage, 'vine data'), + video_id) formats = [{ 'format_id': '%(format)s-%(rate)s' % f, 'vcodec': f['format'], 'quality': f['rate'], 'url': f['videoUrl'], - } for f in data['videoUrls'] if f.get('rate')] + } for f in data['videoUrls']] self._sort_formats(formats) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/youtube.py new/youtube-dl/youtube_dl/extractor/youtube.py --- old/youtube-dl/youtube_dl/extractor/youtube.py 2015-04-09 00:20:51.000000000 +0200 +++ new/youtube-dl/youtube_dl/extractor/youtube.py 2015-04-17 11:14:17.000000000 +0200 @@ -1370,15 +1370,22 @@ def extract_videos_from_page(self, page): ids_in_page = [] - for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page): - if mobj.group(1) not in ids_in_page: - ids_in_page.append(mobj.group(1)) - return ids_in_page + titles_in_page = [] + for mobj in re.finditer(r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?', page): + video_id = mobj.group('id') + video_title = unescapeHTML(mobj.group('title')) + try: + idx = ids_in_page.index(video_id) + if video_title and not titles_in_page[idx]: + titles_in_page[idx] = video_title + except ValueError: + ids_in_page.append(video_id) + titles_in_page.append(video_title) + return zip(ids_in_page, titles_in_page) def _real_extract(self, url): channel_id = self._match_id(url) - video_ids = [] url = 'https://www.youtube.com/channel/%s/videos' % channel_id channel_page = self._download_webpage(url, channel_id) autogenerated = re.search(r'''(?x) @@ -1390,20 +1397,21 @@ if autogenerated: # The videos are contained in a single page # the ajax pages can't be used, they are empty - video_ids = self.extract_videos_from_page(channel_page) entries = [ - self.url_result(video_id, 'Youtube', video_id=video_id) - for video_id in video_ids] + self.url_result( + video_id, 'Youtube', video_id=video_id, + video_title=video_title) + for video_id, video_title in self.extract_videos_from_page(channel_page)] return self.playlist_result(entries, channel_id) def _entries(): more_widget_html = content_html = channel_page for pagenum in itertools.count(1): - ids_in_page = self.extract_videos_from_page(content_html) - for video_id in ids_in_page: + for video_id, video_title in self.extract_videos_from_page(content_html): yield self.url_result( - video_id, 'Youtube', video_id=video_id) + video_id, 'Youtube', video_id=video_id, + video_title=video_title) mobj = re.search( r'data-uix-load-more-href="/?(?P<more>[^"]+)"', diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/options.py new/youtube-dl/youtube_dl/options.py --- old/youtube-dl/youtube_dl/options.py 2015-04-07 13:38:08.000000000 +0200 +++ new/youtube-dl/youtube_dl/options.py 2015-04-17 11:14:17.000000000 +0200 @@ -794,21 +794,22 @@ if opts.verbose: write_string('[debug] Override config: ' + repr(overrideArguments) + '\n') else: - command_line_conf = sys.argv[1:] - # Workaround for Python 2.x, where argv is a byte list - if sys.version_info < (3,): - command_line_conf = [ - a.decode(preferredencoding(), 'replace') for a in command_line_conf] + def compat_conf(conf): + if sys.version_info < (3,): + return [a.decode(preferredencoding(), 'replace') for a in conf] + return conf + + command_line_conf = compat_conf(sys.argv[1:]) if '--ignore-config' in command_line_conf: system_conf = [] user_conf = [] else: - system_conf = _readOptions('/etc/youtube-dl.conf') + system_conf = compat_conf(_readOptions('/etc/youtube-dl.conf')) if '--ignore-config' in system_conf: user_conf = [] else: - user_conf = _readUserConf() + user_conf = compat_conf(_readUserConf()) argv = system_conf + user_conf + command_line_conf opts, args = parser.parse_args(argv) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/postprocessor/atomicparsley.py new/youtube-dl/youtube_dl/postprocessor/atomicparsley.py --- old/youtube-dl/youtube_dl/postprocessor/atomicparsley.py 2015-02-21 12:13:26.000000000 +0100 +++ new/youtube-dl/youtube_dl/postprocessor/atomicparsley.py 2015-04-17 11:14:17.000000000 +0200 @@ -50,8 +50,13 @@ msg = stderr.decode('utf-8', 'replace').strip() raise AtomicParsleyPPError(msg) - os.remove(encodeFilename(filename)) os.remove(encodeFilename(temp_thumbnail)) - os.rename(encodeFilename(temp_filename), encodeFilename(filename)) + # for formats that don't support thumbnails (like 3gp) AtomicParsley + # won't create to the temporary file + if b'No changes' in stdout: + self._downloader.report_warning('The file format doesn\'t support embedding a thumbnail') + else: + os.remove(encodeFilename(filename)) + os.rename(encodeFilename(temp_filename), encodeFilename(filename)) return True, info diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/utils.py new/youtube-dl/youtube_dl/utils.py --- old/youtube-dl/youtube_dl/utils.py 2015-04-07 13:38:08.000000000 +0200 +++ new/youtube-dl/youtube_dl/utils.py 2015-04-17 11:14:17.000000000 +0200 @@ -312,17 +312,17 @@ """Sanitizes and normalizes path on Windows""" if sys.platform != 'win32': return s - drive, _ = os.path.splitdrive(s) - unc, _ = os.path.splitunc(s) - unc_or_drive = unc or drive - norm_path = os.path.normpath(remove_start(s, unc_or_drive)).split(os.path.sep) - if unc_or_drive: + drive_or_unc, _ = os.path.splitdrive(s) + if sys.version_info < (2, 7) and not drive_or_unc: + drive_or_unc, _ = os.path.splitunc(s) + norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep) + if drive_or_unc: norm_path.pop(0) sanitized_path = [ path_part if path_part in ['.', '..'] else re.sub('(?:[/<>:"\\|\\\\?\\*]|\.$)', '#', path_part) for path_part in norm_path] - if unc_or_drive: - sanitized_path.insert(0, unc_or_drive + os.path.sep) + if drive_or_unc: + sanitized_path.insert(0, drive_or_unc + os.path.sep) return os.path.join(*sanitized_path) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/version.py new/youtube-dl/youtube_dl/version.py --- old/youtube-dl/youtube_dl/version.py 2015-04-09 00:21:18.000000000 +0200 +++ new/youtube-dl/youtube_dl/version.py 2015-04-17 11:14:24.000000000 +0200 @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.04.09' +__version__ = '2015.04.17'