commit youtube-dl for openSUSE:Factory

root Sun, 28 Jun 2020 14:09:02 -0700

Hello community,

here is the log from the commit of package youtube-dl for openSUSE:Factory 
checked in at 2020-06-28 23:07:55
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/youtube-dl (Old)
 and      /work/SRC/openSUSE:Factory/.youtube-dl.new.3060 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


Package is "youtube-dl"

Sun Jun 28 23:07:55 2020 rev:135 rq:817540 version:2020.06.16.1

Changes:
--------
--- /work/SRC/openSUSE:Factory/youtube-dl/python-youtube-dl.changes     
2020-06-07 21:38:54.133561154 +0200
+++ /work/SRC/openSUSE:Factory/.youtube-dl.new.3060/python-youtube-dl.changes   
2020-06-28 23:08:50.791360863 +0200
@@ -1,0 +2,17 @@
+Sun Jun 28 14:38:22 UTC 2020 - Hans-Peter Jansen <h...@urpla.net>
+
+- Update to release 2020.06.16.1
+  * [youtube] Force old layout (#25682, #25683, #25680, #25686)
+  * [youtube] Fix categories and improve tags extraction
+
+- Update to version 2020.06.16
+  * [youtube] Fix uploader id and uploader URL extraction
+  * [youtube] Improve view count extraction
+  * [youtube] Fix upload date extraction (#25677)
+  * [youtube] Fix thumbnails extraction (#25676)
+  * [youtube] Fix playlist and feed extraction (#25675)
+  * [facebook] Add support for single-video ID links
+  * [youtube] Extract chapters from JSON (#24819)
+  * [kaltura] Add support for multiple embeds on a webpage (#25523)
+
+-------------------------------------------------------------------
youtube-dl.changes: same change

Old:
----
  youtube-dl-2020.06.06.tar.gz
  youtube-dl-2020.06.06.tar.gz.sig

New:
----
  youtube-dl-2020.06.16.1.tar.gz
  youtube-dl-2020.06.16.1.tar.gz.sig

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ python-youtube-dl.spec ++++++
--- /var/tmp/diff_new_pack.OI2mHI/_old  2020-06-28 23:08:52.423366283 +0200
+++ /var/tmp/diff_new_pack.OI2mHI/_new  2020-06-28 23:08:52.427366296 +0200
@@ -19,7 +19,7 @@
 %define modname youtube-dl
 %{?!python_module:%define python_module() python-%{**} python3-%{**}}
 Name:           python-youtube-dl
-Version:        2020.06.06
+Version:        2020.06.16.1
 Release:        0
 Summary:        A Python module for downloading from video sites for offline 
watching
 License:        SUSE-Public-Domain AND CC-BY-SA-3.0

++++++ youtube-dl.spec ++++++
--- /var/tmp/diff_new_pack.OI2mHI/_old  2020-06-28 23:08:52.455366389 +0200
+++ /var/tmp/diff_new_pack.OI2mHI/_new  2020-06-28 23:08:52.459366402 +0200
@@ -17,7 +17,7 @@
 
 
 Name:           youtube-dl
-Version:        2020.06.06
+Version:        2020.06.16.1
 Release:        0
 Summary:        A tool for downloading from video sites for offline watching
 License:        SUSE-Public-Domain AND CC-BY-SA-3.0

++++++ youtube-dl-2020.06.06.tar.gz -> youtube-dl-2020.06.16.1.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/youtube-dl/ChangeLog new/youtube-dl/ChangeLog
--- old/youtube-dl/ChangeLog    2020-06-05 20:51:34.000000000 +0200
+++ new/youtube-dl/ChangeLog    2020-06-16 01:21:58.000000000 +0200
@@ -1,3 +1,23 @@
+version 2020.06.16.1
+
+Extractors
+* [youtube] Force old layout (#25682, #25683, #25680, #25686)
+* [youtube] Fix categories and improve tags extraction
+
+
+version 2020.06.16
+
+Extractors
+* [youtube] Fix uploader id and uploader URL extraction
+* [youtube] Improve view count extraction
+* [youtube] Fix upload date extraction (#25677)
+* [youtube] Fix thumbnails extraction (#25676)
+* [youtube] Fix playlist and feed extraction (#25675)
++ [facebook] Add support for single-video ID links
++ [youtube] Extract chapters from JSON (#24819)
++ [kaltura] Add support for multiple embeds on a webpage (#25523)
+
+
 version 2020.06.06
 
 Extractors
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/youtube-dl/test/test_youtube_chapters.py 
new/youtube-dl/test/test_youtube_chapters.py
--- old/youtube-dl/test/test_youtube_chapters.py        2020-06-01 
23:04:21.000000000 +0200
+++ new/youtube-dl/test/test_youtube_chapters.py        2020-06-16 
01:21:45.000000000 +0200
@@ -267,7 +267,7 @@
         for description, duration, expected_chapters in self._TEST_CASES:
             ie = YoutubeIE()
             expect_value(
-                self, ie._extract_chapters(description, duration),
+                self, ie._extract_chapters_from_description(description, 
duration),
                 expected_chapters, None)
 
 
Binary files old/youtube-dl/youtube-dl and new/youtube-dl/youtube-dl differ
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/facebook.py 
new/youtube-dl/youtube_dl/extractor/facebook.py
--- old/youtube-dl/youtube_dl/extractor/facebook.py     2020-06-01 
23:04:23.000000000 +0200
+++ new/youtube-dl/youtube_dl/extractor/facebook.py     2020-06-16 
01:21:45.000000000 +0200
@@ -466,15 +466,18 @@
             return info_dict
 
         if '/posts/' in url:
-            entries = [
-                self.url_result('facebook:%s' % vid, FacebookIE.ie_key())
-                for vid in self._parse_json(
-                    self._search_regex(
-                        r'(["\'])video_ids\1\s*:\s*(?P<ids>\[.+?\])',
-                        webpage, 'video ids', group='ids'),
-                    video_id)]
+            video_id_json = self._search_regex(
+                r'(["\'])video_ids\1\s*:\s*(?P<ids>\[.+?\])', webpage, 'video 
ids', group='ids',
+                default='')
+            if video_id_json:
+                entries = [
+                    self.url_result('facebook:%s' % vid, FacebookIE.ie_key())
+                    for vid in self._parse_json(video_id_json, video_id)]
+                return self.playlist_result(entries, video_id)
 
-            return self.playlist_result(entries, video_id)
+            # Single Video?
+            video_id = self._search_regex(r'video_id:\s*"([0-9]+)"', webpage, 
'single video id')
+            return self.url_result('facebook:%s' % video_id, 
FacebookIE.ie_key())
         else:
             _, info_dict = self._extract_from_url(
                 self._VIDEO_PAGE_TEMPLATE % video_id,
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/generic.py 
new/youtube-dl/youtube_dl/extractor/generic.py
--- old/youtube-dl/youtube_dl/extractor/generic.py      2020-06-01 
23:04:23.000000000 +0200
+++ new/youtube-dl/youtube_dl/extractor/generic.py      2020-06-16 
01:21:45.000000000 +0200
@@ -1709,6 +1709,15 @@
             'add_ie': ['Kaltura'],
         },
         {
+            # multiple kaltura embeds, nsfw
+            'url': 
'https://www.quartier-rouge.be/prive/femmes/kamila-avec-video-jaime-sadomie.html',
+            'info_dict': {
+                'id': 'kamila-avec-video-jaime-sadomie',
+                'title': "Kamila avec vídeo “J'aime sadomie”",
+            },
+            'playlist_count': 8,
+        },
+        {
             # Non-standard Vimeo embed
             'url': 'https://openclassrooms.com/courses/understanding-the-web',
             'md5': '64d86f1c7d369afd9a78b38cbb88d80a',
@@ -2844,9 +2853,12 @@
             return self.url_result(mobj.group('url'), 'Zapiks')
 
         # Look for Kaltura embeds
-        kaltura_url = KalturaIE._extract_url(webpage)
-        if kaltura_url:
-            return self.url_result(smuggle_url(kaltura_url, {'source_url': 
url}), KalturaIE.ie_key())
+        kaltura_urls = KalturaIE._extract_urls(webpage)
+        if kaltura_urls:
+            return self.playlist_from_matches(
+                kaltura_urls, video_id, video_title,
+                getter=lambda x: smuggle_url(x, {'source_url': url}),
+                ie=KalturaIE.ie_key())
 
         # Look for EaglePlatform embeds
         eagleplatform_url = EaglePlatformIE._extract_url(webpage)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/kaltura.py 
new/youtube-dl/youtube_dl/extractor/kaltura.py
--- old/youtube-dl/youtube_dl/extractor/kaltura.py      2020-06-01 
23:04:24.000000000 +0200
+++ new/youtube-dl/youtube_dl/extractor/kaltura.py      2020-06-16 
01:21:45.000000000 +0200
@@ -113,9 +113,14 @@
 
     @staticmethod
     def _extract_url(webpage):
+        urls = KalturaIE._extract_urls(webpage)
+        return urls[0] if urls else None
+
+    @staticmethod
+    def _extract_urls(webpage):
         # Embed codes: 
https://knowledge.kaltura.com/embedding-kaltura-media-players-your-site
-        mobj = (
-            re.search(
+        finditer = (
+            re.finditer(
                 r"""(?xs)
                     kWidget\.(?:thumb)?[Ee]mbed\(
                     \{.*?
@@ -124,7 +129,7 @@
                         (?P<q3>['"])entry_?[Ii]d(?P=q3)\s*:\s*
                         (?P<q4>['"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\})
                 """, webpage)
-            or re.search(
+            or re.finditer(
                 r'''(?xs)
                     (?P<q1>["'])
                         
(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)*
@@ -138,7 +143,7 @@
                     )
                     (?P<q3>["'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3)
                 ''', webpage)
-            or re.search(
+            or re.finditer(
                 r'''(?xs)
                     <(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P<q1>["'])
                       
(?:https?:)?//(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)
@@ -148,7 +153,8 @@
                     (?P=q1)
                 ''', webpage)
         )
-        if mobj:
+        urls = []
+        for mobj in finditer:
             embed_info = mobj.groupdict()
             for k, v in embed_info.items():
                 if v:
@@ -160,7 +166,8 @@
                 webpage)
             if service_mobj:
                 url = smuggle_url(url, {'service_url': 
service_mobj.group('id')})
-            return url
+            urls.append(url)
+        return urls
 
     def _kaltura_api_call(self, video_id, actions, service_url=None, *args, 
**kwargs):
         params = actions[0]
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/youtube.py 
new/youtube-dl/youtube_dl/extractor/youtube.py
--- old/youtube-dl/youtube_dl/extractor/youtube.py      2020-06-01 
23:04:26.000000000 +0200
+++ new/youtube-dl/youtube_dl/extractor/youtube.py      2020-06-16 
01:21:45.000000000 +0200
@@ -70,9 +70,14 @@
 
     _PLAYLIST_ID_RE = 
r'(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}'
 
+    _YOUTUBE_CLIENT_HEADERS = {
+        'x-youtube-client-name': '1',
+        'x-youtube-client-version': '1.20200609.04.02',
+    }
+
     def _set_language(self):
         self._set_cookie(
-            '.youtube.com', 'PREF', 'f1=50000000&hl=en',
+            '.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',
             # YouTube sets the expire time to about two months
             expire_time=time.time() + 2 * 30 * 24 * 3600)
 
@@ -301,7 +306,8 @@
                         'https://youtube.com/%s' % mobj.group('more'), 
playlist_id,
                         'Downloading page #%s%s'
                         % (page_num, ' (retry #%d)' % count if count else ''),
-                        transform_source=uppercase_escape)
+                        transform_source=uppercase_escape,
+                        headers=self._YOUTUBE_CLIENT_HEADERS)
                     break
                 except ExtractorError as e:
                     if isinstance(e.cause, compat_HTTPError) and e.cause.code 
in (500, 503):
@@ -1652,8 +1658,63 @@
         video_id = mobj.group(2)
         return video_id
 
+    def _extract_chapters_from_json(self, webpage, video_id, duration):
+        if not webpage:
+            return
+        player = self._parse_json(
+            self._search_regex(
+                r'RELATED_PLAYER_ARGS["\']\s*:\s*({.+})\s*,?\s*\n', webpage,
+                'player args', default='{}'),
+            video_id, fatal=False)
+        if not player or not isinstance(player, dict):
+            return
+        watch_next_response = player.get('watch_next_response')
+        if not isinstance(watch_next_response, compat_str):
+            return
+        response = self._parse_json(watch_next_response, video_id, fatal=False)
+        if not response or not isinstance(response, dict):
+            return
+        chapters_list = try_get(
+            response,
+            lambda x: x['playerOverlays']
+                       ['playerOverlayRenderer']
+                       ['decoratedPlayerBarRenderer']
+                       ['decoratedPlayerBarRenderer']
+                       ['playerBar']
+                       ['chapteredPlayerBarRenderer']
+                       ['chapters'],
+            list)
+        if not chapters_list:
+            return
+
+        def chapter_time(chapter):
+            return float_or_none(
+                try_get(
+                    chapter,
+                    lambda x: x['chapterRenderer']['timeRangeStartMillis'],
+                    int),
+                scale=1000)
+        chapters = []
+        for next_num, chapter in enumerate(chapters_list, start=1):
+            start_time = chapter_time(chapter)
+            if start_time is None:
+                continue
+            end_time = (chapter_time(chapters_list[next_num])
+                        if next_num < len(chapters_list) else duration)
+            if end_time is None:
+                continue
+            title = try_get(
+                chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
+                compat_str)
+            chapters.append({
+                'start_time': start_time,
+                'end_time': end_time,
+                'title': title,
+            })
+        return chapters
+
     @staticmethod
-    def _extract_chapters(description, duration):
+    def _extract_chapters_from_description(description, duration):
         if not description:
             return None
         chapter_lines = re.findall(
@@ -1687,6 +1748,10 @@
             })
         return chapters
 
+    def _extract_chapters(self, webpage, description, video_id, duration):
+        return (self._extract_chapters_from_json(webpage, video_id, duration)
+                or self._extract_chapters_from_description(description, 
duration))
+
     def _real_extract(self, url):
         url, smuggled_data = unsmuggle_url(url, {})
 
@@ -1833,6 +1898,9 @@
         video_details = try_get(
             player_response, lambda x: x['videoDetails'], dict) or {}
 
+        microformat = try_get(
+            player_response, lambda x: 
x['microformat']['playerMicroformatRenderer'], dict) or {}
+
         video_title = video_info.get('title', [None])[0] or 
video_details.get('title')
         if not video_title:
             self._downloader.report_warning('Unable to extract video title')
@@ -1910,6 +1978,8 @@
             view_count = extract_view_count(video_info)
         if view_count is None and video_details:
             view_count = int_or_none(video_details.get('viewCount'))
+        if view_count is None and microformat:
+            view_count = int_or_none(microformat.get('viewCount'))
 
         if is_live is None:
             is_live = bool_or_none(video_details.get('isLive'))
@@ -2161,7 +2231,12 @@
             video_uploader_id = mobj.group('uploader_id')
             video_uploader_url = mobj.group('uploader_url')
         else:
-            self._downloader.report_warning('unable to extract uploader 
nickname')
+            owner_profile_url = url_or_none(microformat.get('ownerProfileUrl'))
+            if owner_profile_url:
+                video_uploader_id = self._search_regex(
+                    r'(?:user|channel)/([^/]+)', owner_profile_url, 'uploader 
id',
+                    default=None)
+                video_uploader_url = owner_profile_url
 
         channel_id = (
             str_or_none(video_details.get('channelId'))
@@ -2172,17 +2247,33 @@
                 video_webpage, 'channel id', default=None, group='id'))
         channel_url = 'http://www.youtube.com/channel/%s' % channel_id if 
channel_id else None
 
-        # thumbnail image
-        # We try first to get a high quality image:
-        m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
-                            video_webpage, re.DOTALL)
-        if m_thumb is not None:
-            video_thumbnail = m_thumb.group(1)
-        elif 'thumbnail_url' not in video_info:
-            self._downloader.report_warning('unable to extract video 
thumbnail')
+        thumbnails = []
+        thumbnails_list = try_get(
+            video_details, lambda x: x['thumbnail']['thumbnails'], list) or []
+        for t in thumbnails_list:
+            if not isinstance(t, dict):
+                continue
+            thumbnail_url = url_or_none(t.get('url'))
+            if not thumbnail_url:
+                continue
+            thumbnails.append({
+                'url': thumbnail_url,
+                'width': int_or_none(t.get('width')),
+                'height': int_or_none(t.get('height')),
+            })
+
+        if not thumbnails:
             video_thumbnail = None
-        else:   # don't panic if we can't find it
-            video_thumbnail = 
compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
+            # We try first to get a high quality image:
+            m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
+                                video_webpage, re.DOTALL)
+            if m_thumb is not None:
+                video_thumbnail = m_thumb.group(1)
+            thumbnail_url = try_get(video_info, lambda x: 
x['thumbnail_url'][0], compat_str)
+            if thumbnail_url:
+                video_thumbnail = 
compat_urllib_parse_unquote_plus(thumbnail_url)
+            if video_thumbnail:
+                thumbnails.append({'url': video_thumbnail})
 
         # upload date
         upload_date = self._html_search_meta(
@@ -2192,6 +2283,8 @@
                 [r'(?s)id="eow-date.*?>(.*?)</span>',
                  
r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed
 live|Started) on (.+?)[<"\']'],
                 video_webpage, 'upload date', default=None)
+        if not upload_date:
+            upload_date = microformat.get('publishDate') or 
microformat.get('uploadDate')
         upload_date = unified_strdate(upload_date)
 
         video_license = self._html_search_regex(
@@ -2263,17 +2356,21 @@
         m_cat_container = self._search_regex(
             r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
             video_webpage, 'categories', default=None)
+        category = None
         if m_cat_container:
             category = self._html_search_regex(
                 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
                 default=None)
-            video_categories = None if category is None else [category]
-        else:
-            video_categories = None
+        if not category:
+            category = try_get(
+                microformat, lambda x: x['category'], compat_str)
+        video_categories = None if category is None else [category]
 
         video_tags = [
             unescapeHTML(m.group('content'))
             for m in re.finditer(self._meta_regex('og:video:tag'), 
video_webpage)]
+        if not video_tags:
+            video_tags = try_get(video_details, lambda x: x['keywords'], list)
 
         def _extract_count(count_name):
             return str_to_int(self._search_regex(
@@ -2324,7 +2421,7 @@
                     errnote='Unable to download video annotations', 
fatal=False,
                     data=urlencode_postdata({xsrf_field_name: xsrf_token}))
 
-        chapters = self._extract_chapters(description_original, video_duration)
+        chapters = self._extract_chapters(video_webpage, description_original, 
video_id, video_duration)
 
         # Look for the DASH manifest
         if self._downloader.params.get('youtube_include_dash_manifest', True):
@@ -2415,7 +2512,7 @@
             'creator': video_creator or artist,
             'title': video_title,
             'alt_title': video_alt_title or track,
-            'thumbnail': video_thumbnail,
+            'thumbnails': thumbnails,
             'description': video_description,
             'categories': video_categories,
             'tags': video_tags,
@@ -3191,7 +3288,8 @@
             more = self._download_json(
                 'https://youtube.com/%s' % mobj.group('more'), 
self._PLAYLIST_TITLE,
                 'Downloading page #%s' % page_num,
-                transform_source=uppercase_escape)
+                transform_source=uppercase_escape,
+                headers=self._YOUTUBE_CLIENT_HEADERS)
             content_html = more['content_html']
             more_widget_html = more['load_more_widget_html']
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/youtube-dl/youtube_dl/version.py 
new/youtube-dl/youtube_dl/version.py
--- old/youtube-dl/youtube_dl/version.py        2020-06-05 20:51:34.000000000 
+0200
+++ new/youtube-dl/youtube_dl/version.py        2020-06-16 01:21:58.000000000 
+0200
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals
 
-__version__ = '2020.06.06'
+__version__ = '2020.06.16.1'

commit youtube-dl for openSUSE:Factory

Reply via email to