commit you-get for openSUSE:Factory

Source-Sync Thu, 01 Apr 2021 05:18:49 -0700

Script 'mail_helper' called by obssrc
Hello community,

here is the log from the commit of package you-get for openSUSE:Factory checked 
in at 2021-04-01 14:17:07
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/you-get (Old)
 and      /work/SRC/openSUSE:Factory/.you-get.new.2401 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


Package is "you-get"

Thu Apr  1 14:17:07 2021 rev:36 rq:882000 version:0.4.1520

Changes:
--------
--- /work/SRC/openSUSE:Factory/you-get/you-get.changes  2020-12-28 
00:28:34.153773706 +0100
+++ /work/SRC/openSUSE:Factory/.you-get.new.2401/you-get.changes        
2021-04-01 14:18:25.448083923 +0200
@@ -1,0 +2,5 @@
+Mon Mar 29 14:55:07 UTC 2021 - Luigi Baldoni <aloi...@gmx.com>
+
+- Update to version 0.4.1520 (no changelog supplied)
+
+-------------------------------------------------------------------

Old:
----
  you-get-0.4.1500.tar.gz

New:
----
  you-get-0.4.1520.tar.gz

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ you-get.spec ++++++
--- /var/tmp/diff_new_pack.h2EyKA/_old  2021-04-01 14:18:26.072084894 +0200
+++ /var/tmp/diff_new_pack.h2EyKA/_new  2021-04-01 14:18:26.076084900 +0200
@@ -1,7 +1,7 @@
 #
 # spec file for package you-get
 #
-# Copyright (c) 2020 SUSE LLC
+# Copyright (c) 2021 SUSE LLC
 #
 # All modifications and additions to the file contributed by third parties
 # remain the property of their copyright owners, unless otherwise agreed
@@ -17,7 +17,7 @@
 
 
 Name:           you-get
-Version:        0.4.1500
+Version:        0.4.1520
 Release:        0
 Summary:        Dumb downloader that scrapes the web
 License:        MIT

++++++ you-get-0.4.1500.tar.gz -> you-get-0.4.1520.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/you-get-0.4.1500/.gitignore 
new/you-get-0.4.1520/.gitignore
--- old/you-get-0.4.1500/.gitignore     2020-12-26 19:21:43.000000000 +0100
+++ new/you-get-0.4.1520/.gitignore     2021-03-29 16:44:07.000000000 +0200
@@ -84,6 +84,7 @@
 *.m4a
 *.DS_Store
 *.txt
+*.sw[a-p]
 
 *.zip
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/you-get-0.4.1500/README.md 
new/you-get-0.4.1520/README.md
--- old/you-get-0.4.1500/README.md      2020-12-26 19:21:43.000000000 +0100
+++ new/you-get-0.4.1520/README.md      2021-03-29 16:44:07.000000000 +0200
@@ -414,6 +414,7 @@
 | ???????????? | <http://www.kuwo.cn/>          | | |???|
 | ?????????   | <http://www.le.com/>           |???| | |
 | ??????FM   | <http://www.lizhi.fm/>         | | |???|
+| ???????????? | <http://www.lrts.me/>          | | |???|
 | ??????     | <http://www.miaopai.com/>      |???| | |
 | MioMio????????? | <http://www.miomio.tv/>    |???| | |
 | MissEvan<br/>??????FM | <http://www.missevan.com/> | | |???|
@@ -425,7 +426,6 @@
 | Sina<br/>????????????<br/>?????????????????? | 
<http://video.sina.com.cn/><br/><http://video.weibo.com/> |???| | |
 | Sohu<br/>???????????? | <http://tv.sohu.com/> |???| | |
 | **Tudou<br/>??????** | <http://www.tudou.com/> |???| | |
-| ??????     | <http://www.xiami.com/>        |???| |???|
 | ???????????? | <http://www.isuntv.com/>       |???| | |
 | **Youku<br/>??????** | <http://www.youku.com/> |???| | |
 | ??????TV   | <http://www.zhanqi.tv/lives>   |???| | |
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/you-get-0.4.1500/src/you_get/common.py 
new/you-get-0.4.1520/src/you_get/common.py
--- old/you-get-0.4.1500/src/you_get/common.py  2020-12-26 19:21:43.000000000 
+0100
+++ new/you-get-0.4.1520/src/you_get/common.py  2021-03-29 16:44:07.000000000 
+0200
@@ -76,6 +76,7 @@
     'letv'             : 'le',
     'lizhi'            : 'lizhi',
     'longzhu'          : 'longzhu',
+    'lrts'             : 'lrts',
     'magisto'          : 'magisto',
     'metacafe'         : 'metacafe',
     'mgtv'             : 'mgtv',
@@ -112,7 +113,6 @@
     'veoh'             : 'veoh',
     'vine'             : 'vine',
     'vk'               : 'vk',
-    'xiami'            : 'xiami',
     'xiaokaxiu'        : 'yixia',
     'xiaojiadianvideo' : 'fc2video',
     'ximalaya'         : 'ximalaya',
@@ -1555,6 +1555,21 @@
         '-l', '--playlist', action='store_true',
         help='Prefer to download a playlist'
     )
+
+    playlist_grp = parser.add_argument_group('Playlist optional options')
+    playlist_grp.add_argument(
+        '--first', metavar='FIRST',
+        help='the first number'
+    )
+    playlist_grp.add_argument(
+        '--last', metavar='LAST',
+        help='the last number'
+    )
+    playlist_grp.add_argument(
+        '--size', '--page-size', metavar='PAGE_SIZE',
+        help='the page size number'
+    )
+
     download_grp.add_argument(
         '-a', '--auto-rename', action='store_true', default=False,
         help='Auto rename same name different files'
@@ -1672,7 +1687,7 @@
     socket.setdefaulttimeout(args.timeout)
 
     try:
-        extra = {}
+        extra = {'args': args}
         if extractor_proxy:
             extra['extractor_proxy'] = extractor_proxy
         if stream_id:
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/you-get-0.4.1500/src/you_get/extractors/__init__.py 
new/you-get-0.4.1520/src/you_get/extractors/__init__.py
--- old/you-get-0.4.1500/src/you_get/extractors/__init__.py     2020-12-26 
19:21:43.000000000 +0100
+++ new/you-get-0.4.1520/src/you_get/extractors/__init__.py     2021-03-29 
16:44:07.000000000 +0200
@@ -78,7 +78,6 @@
 from .vk import *
 from .w56 import *
 from .wanmen import *
-from .xiami import *
 from .xinpianchang import *
 from .yixia import *
 from .youku import *
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/you-get-0.4.1500/src/you_get/extractors/bilibili.py 
new/you-get-0.4.1520/src/you_get/extractors/bilibili.py
--- old/you-get-0.4.1500/src/you_get/extractors/bilibili.py     2020-12-26 
19:21:43.000000000 +0100
+++ new/you-get-0.4.1520/src/you_get/extractors/bilibili.py     2021-03-29 
16:44:07.000000000 +0200
@@ -4,12 +4,16 @@
 from ..extractor import VideoExtractor
 
 import hashlib
+import math
+
 
 class Bilibili(VideoExtractor):
     name = "Bilibili"
 
     # Bilibili media encoding options, in descending quality order.
     stream_types = [
+        {'id': 'hdflv2', 'quality': 125, 'audio_quality': 30280,
+         'container': 'FLV', 'video_resolution': '3840p', 'desc': '?????? 
HDR'},
         {'id': 'hdflv2_4k', 'quality': 120, 'audio_quality': 30280,
          'container': 'FLV', 'video_resolution': '2160p', 'desc': '?????? 4K'},
         {'id': 'flv_p60', 'quality': 116, 'audio_quality': 30280,
@@ -734,7 +738,8 @@
             api_url = self.bilibili_space_video_api(mid)
             api_content = get_content(api_url, headers=self.bilibili_headers())
             videos_info = json.loads(api_content)
-            pc = videos_info['data']['page']['count'] // 
videos_info['data']['page']['ps']
+            # pc = videos_info['data']['page']['count'] // 
videos_info['data']['page']['ps']
+            pc = math.ceil(videos_info['data']['page']['count'] / 
videos_info['data']['page']['ps'])
 
             for pn in range(1, pc + 1):
                 api_url = self.bilibili_space_video_api(mid, pn=pn)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/you-get-0.4.1500/src/you_get/extractors/instagram.py 
new/you-get-0.4.1520/src/you_get/extractors/instagram.py
--- old/you-get-0.4.1500/src/you_get/extractors/instagram.py    2020-12-26 
19:21:43.000000000 +0100
+++ new/you-get-0.4.1520/src/you_get/extractors/instagram.py    2021-03-29 
16:44:07.000000000 +0200
@@ -22,14 +22,15 @@
             download_urls([stream], title, ext, size, output_dir, merge=merge)
     else:
         data = re.search(r'window\._sharedData\s*=\s*(.*);</script>', html)
-        if data is not None:
+        try:
             info = json.loads(data.group(1))
             post = info['entry_data']['PostPage'][0]
-        else:
+            assert post
+        except:
             # with logged-in cookies
             data = 
re.search(r'window\.__additionalDataLoaded\(\'[^\']+\',(.*)\);</script>', html)
             if data is not None:
-                log.e('[Error] Cookies needed.')
+                log.e('[Warning] Cookies needed.')
             post = json.loads(data.group(1))
 
         if 'edge_sidecar_to_children' in post['graphql']['shortcode_media']:
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/you-get-0.4.1500/src/you_get/extractors/lrts.py 
new/you-get-0.4.1520/src/you_get/extractors/lrts.py
--- old/you-get-0.4.1500/src/you_get/extractors/lrts.py 1970-01-01 
01:00:00.000000000 +0100
+++ new/you-get-0.4.1520/src/you_get/extractors/lrts.py 2021-03-29 
16:44:07.000000000 +0200
@@ -0,0 +1,81 @@
+#!/usr/bin/env python
+
+__all__ = ['lrts_download']
+
+import logging
+from ..common import *
+from ..util import log, term
+
+def lrts_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
+    html = get_html(url)
+    args = kwargs.get('args')
+    if not args: args = {}
+    matched = re.search(r"/book/(\d+)", url)
+    if not matched:
+        raise AssertionError("not found book number: %s" % url)
+    book_no = matched.group(1)
+    book_title = book_no
+    matched = re.search(r"<title>([^-]*)[-](.*)[,](.*)</title>", html)
+    if matched:
+        book_title = matched.group(1)
+
+    matched = re.search(r"var totalCount='(\d+)'", html)
+    if not matched:
+        raise AssertionError("not found total count in html")
+    total_count = int(matched.group(1))
+    log.i('%s total: %s' % (book_title, total_count))
+    first_page = 0
+    if ('first' in args and args.first!= None):
+        first_page = int(args.first)
+
+    page_size = 10
+    if ('page_size' in args and args.page_size != None):
+        page_size = int(args.page_size)
+    last_page = (total_count // page_size) + 1
+    if ('last' in args and args.last != None):
+        last_page = int(args.last)
+
+    log.i('page size is %s, page from %s to %s' % (page_size, first_page, 
last_page))
+    headers = {
+      'Referer': url
+    }
+    items = []
+    for page in range(first_page, last_page):
+        page_url = 'http://www.lrts.me/ajax/book/%s/%s/%s' % (book_no, page, 
page_size)
+        response_content = json.loads(post_content(page_url, headers))
+        if response_content['status'] != 'success':
+            raise AssertionError("got the page failed: %s" % (page_url))
+        data = response_content['data']['data']
+        if data:
+            for i in data:
+                i['resName'] = parse.unquote(i['resName'])
+            items.extend(data)
+        else:
+            break
+    headers = {
+      'Referer': 'http://www.lrts.me/playlist'
+    }
+
+    for item in items:
+        i_url = 'http://www.lrts.me/ajax/path/4/%s/%s' % (item['fatherResId'], 
item['resId'])
+        response_content = json.loads(post_content(i_url, headers))
+        if response_content['status'] == 'success' and 
response_content['data']:
+            item['ok'] = True
+            item['url'] = response_content['data']
+            logging.debug('ok')
+
+    items = list(filter(lambda i: 'ok' in i and i['ok'], items))
+    log.i('Downloading %s: %s count ...' % (book_title, len(items)))
+
+    for item in items:
+        title = item['resName']
+        file_url = item['url']
+        # if not file_url: continue
+        _, _, size = url_info(file_url)
+        print_info(site_info, title, 'mp3', size)
+        if not info_only:
+            download_urls([file_url], title, 'mp3', size, output_dir, 
merge=merge)
+
+site_info = "lrts.me"
+download = lrts_download
+download_playlist = lrts_download
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/you-get-0.4.1500/src/you_get/extractors/mgtv.py 
new/you-get-0.4.1520/src/you_get/extractors/mgtv.py
--- old/you-get-0.4.1500/src/you_get/extractors/mgtv.py 2020-12-26 
19:21:43.000000000 +0100
+++ new/you-get-0.4.1520/src/you_get/extractors/mgtv.py 2021-03-29 
16:44:07.000000000 +0200
@@ -9,19 +9,36 @@
 from os.path import dirname
 import re
 
+import base64
+import time
+import uuid
+
+
 class MGTV(VideoExtractor):
     name = "?????? (MGTV)"
 
     # Last updated: 2016-11-13
     stream_types = [
+        {'id': 'fhd', 'container': 'ts', 'video_profile': '??????'},
         {'id': 'hd', 'container': 'ts', 'video_profile': '??????'},
         {'id': 'sd', 'container': 'ts', 'video_profile': '??????'},
         {'id': 'ld', 'container': 'ts', 'video_profile': '??????'},
     ]
-    
-    id_dic = {i['video_profile']:(i['id']) for i in stream_types}
-    
-    api_endpoint = 'http://pcweb.api.mgtv.com/player/video?video_id={video_id}'
+
+    id_dic = {i['video_profile']: (i['id']) for i in stream_types}
+
+    did = str(uuid.uuid4())
+    ver = '0.3.0301'
+    pno = '1030'
+
+    def tk2(self):
+        return base64.urlsafe_b64encode(b'did=%s|ver=%s|pno=%s|clit=%d' % (
+            self.did.encode(), self.ver.encode(), self.pno.encode(), 
time.time())).decode('utf-8')[::-1]
+
+    info_endpoint = 'https://pcweb.api.mgtv.com/video/info?vid={video_id}'
+    player_endpoint = 
'https://pcweb.api.mgtv.com/player/video?did={did}&tk2={tk2}&video_id={video_id}'
+    source_endpoint = 
'https://pcweb.api.mgtv.com/player/getSource?tk2={tk2}&pm2={pm2}&video_id={video_id}'
+    playlist_endpoint = 
'https://pcweb.api.mgtv.com/episode/list?video_id={video_id}&page={page}&size=30'
 
     @staticmethod
     def get_vid_from_url(url):
@@ -30,66 +47,92 @@
         vid = match1(url, 'https?://www.mgtv.com/(?:b|l)/\d+/(\d+).html')
         if not vid:
             vid = match1(url, 'https?://www.mgtv.com/hz/bdpz/\d+/(\d+).html')
+        if not vid:
+            vid = match1(url, 'https?://www.mgtv.com/s/(\d+).html')
         return vid
-    
-    #----------------------------------------------------------------------
-    @staticmethod
-    def get_mgtv_real_url(url):
+
+    # ----------------------------------------------------------------------
+    def get_mgtv_real_url(self, url):
         """str->list of str
         Give you the real URLs."""
         content = loads(get_content(url))
         m3u_url = content['info']
         split = urlsplit(m3u_url)
-        
-        base_url = "{scheme}://{netloc}{path}/".format(scheme = split[0],
-                                                      netloc = split[1],
-                                                      path = dirname(split[2]))
 
-        content = get_content(content['info'])  #get the REAL M3U url, maybe 
to be changed later?
+        base_url = "{scheme}://{netloc}{path}/".format(scheme=split[0],
+                                                       netloc=split[1],
+                                                       path=dirname(split[2]))
+
+        content = get_content(content['info'],
+                              headers={'Referer': self.url})  # get the REAL 
M3U url, maybe to be changed later?
         segment_list = []
         segments_size = 0
         for i in content.split():
-            if not i.startswith('#'):  #not the best way, better we use the 
m3u8 package
+            if not i.startswith('#'):  # not the best way, better we use the 
m3u8 package
                 segment_list.append(base_url + i)
             # use ext-info for fast size calculate
             elif i.startswith('#EXT-MGTV-File-SIZE:'):
-                segments_size += int(i[i.rfind(':')+1:])
+                segments_size += int(i[i.rfind(':') + 1:])
 
         return m3u_url, segments_size, segment_list
 
     def download_playlist_by_url(self, url, **kwargs):
-        pass
+        self.url = url
+        self.vid = self.get_vid_from_url(self.url)
+        content_playlist = 
get_content(self.playlist_endpoint.format(video_id=self.vid, page=1))
+        content_playlist = loads(content_playlist)
+        for ep in content_playlist['data']['list']:
+            self.download_by_url('https://www.mgtv.com' + ep['url'], **kwargs)
+        max_page = content_playlist['data']['total_page']
+        for page in range(2, max_page + 1):
+            content_playlist = 
get_content(self.playlist_endpoint.format(video_id=self.vid, page=page))
+            content_playlist = loads(content_playlist)
+            for ep in content_playlist['data']['list']:
+                self.download_by_url('https://www.mgtv.com' + ep['url'], 
**kwargs)
 
     def prepare(self, **kwargs):
         if self.url:
             self.vid = self.get_vid_from_url(self.url)
-        content = get_content(self.api_endpoint.format(video_id = self.vid))
-        content = loads(content)
-        self.title = content['data']['info']['title']
-        domain = content['data']['stream_domain'][0]
-        
-        #stream_available = [i['name'] for i in content['data']['stream']]
+        content_info = 
get_content(self.info_endpoint.format(video_id=self.vid))
+        log.d(content_info)
+        content_info = loads(content_info)
+        self.title = content_info['data']['info']['videoName']
+
+        content_player = get_content(self.player_endpoint.format(did=self.did, 
video_id=self.vid, tk2=self.tk2()))
+        log.d(content_player)
+        content_player = loads(content_player)
+        pm2 = content_player['data']['atc']['pm2']
+
+        content_source = 
get_content(self.source_endpoint.format(video_id=self.vid, tk2=self.tk2(), 
pm2=pm2))
+        log.d(content_source)
+        content_source = loads(content_source)
+        domain = content_source['data']['stream_domain'][0]
+
+        # stream_available = [i['name'] for i in content['data']['stream']]
         stream_available = {}
-        for i in content['data']['stream']:
+        for i in content_source['data']['stream']:
             stream_available[i['name']] = i['url']
 
         for s in self.stream_types:
             if s['video_profile'] in stream_available.keys():
                 quality_id = self.id_dic[s['video_profile']]
                 url = stream_available[s['video_profile']]
-                url = domain + re.sub( r'(\&arange\=\d+)', '', url)  #Un-Hum
+                if url is None or url == '':
+                    # skip invalid profile with empty url
+                    continue
+                url = domain + re.sub(r'(\&arange\=\d+)', '', url)  # Un-Hum
                 m3u8_url, m3u8_size, segment_list_this = 
self.get_mgtv_real_url(url)
 
                 stream_fileid_list = []
                 for i in segment_list_this:
                     
stream_fileid_list.append(os.path.basename(i).split('.')[0])
 
-            #make pieces
-            pieces = []
-            for i in zip(stream_fileid_list, segment_list_this):
-                pieces.append({'fileid': i[0], 'segs': i[1],})
+                # make pieces
+                pieces = []
+                for i in zip(stream_fileid_list, segment_list_this):
+                    pieces.append({'fileid': i[0], 'segs': i[1], })
 
-                self.streams[quality_id] = {
+                    self.streams[quality_id] = {
                         'container': s['container'],
                         'video_profile': s['video_profile'],
                         'size': m3u8_size,
@@ -97,8 +140,8 @@
                         'm3u8_url': m3u8_url
                     }
 
-            if not kwargs['info_only']:
-                self.streams[quality_id]['src'] = segment_list_this
+                if not kwargs['info_only']:
+                    self.streams[quality_id]['src'] = segment_list_this
 
     def extract(self, **kwargs):
         if 'stream_id' in kwargs and kwargs['stream_id']:
@@ -132,7 +175,8 @@
                 if 'index' not in kwargs:
                     self.p([])
                 else:
-                    stream_id = self.streams_sorted[0]['id'] if 'id' in 
self.streams_sorted[0] else self.streams_sorted[0]['itag']
+                    stream_id = self.streams_sorted[0]['id'] if 'id' in 
self.streams_sorted[0] else \
+                        self.streams_sorted[0]['itag']
                     self.p_i(stream_id)
 
         # default to use the best quality
@@ -148,8 +192,10 @@
             else:
                 download_urls(stream_info['src'], self.title, 
stream_info['container'], stream_info['size'],
                               output_dir=kwargs['output_dir'],
-                              merge=kwargs.get('merge', True))
-                              # av=stream_id in self.dash_streams)
+                              merge=kwargs.get('merge', True),
+                              headers={'Referer': self.url})
+                # av=stream_id in self.dash_streams)
+
 
 site = MGTV()
 download = site.download_by_url
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/you-get-0.4.1500/src/you_get/extractors/tumblr.py 
new/you-get-0.4.1520/src/you_get/extractors/tumblr.py
--- old/you-get-0.4.1500/src/you_get/extractors/tumblr.py       2020-12-26 
19:21:43.000000000 +0100
+++ new/you-get-0.4.1520/src/you_get/extractors/tumblr.py       2021-03-29 
16:44:07.000000000 +0200
@@ -14,7 +14,7 @@
         return
 
     import ssl
-    ssl_context = 
request.HTTPSHandler(context=ssl.SSLContext(ssl.PROTOCOL_TLSv1))
+    ssl_context = 
request.HTTPSHandler(context=ssl.SSLContext(ssl.PROTOCOL_TLSv1_2)) # server 
requires TLS v1.2
     cookie_handler = request.HTTPCookieProcessor()
     opener = request.build_opener(ssl_context, cookie_handler)
     request.install_opener(opener)
@@ -45,23 +45,30 @@
                      r1(r'<title>([^<\n]*)', html)
         urls = re.findall(r'(https?://[^;"&]+/tumblr_[^;"&]+_\d+\.jpg)', html) 
+\
                re.findall(r'(https?://[^;"&]+/tumblr_[^;"&]+_\d+\.png)', html) 
+\
-               re.findall(r'(https?://[^;"&]+/tumblr_[^";&]+_\d+\.gif)', html)
+               re.findall(r'(https?://[^;"&]+/tumblr_[^;"&]+_\d+\.gif)', html) 
+\
+               
re.findall(r'(https?://\d+\.media\.tumblr\.com/[^;"&]+/s\d+x\d+/[^;"&]+\.jpg)', 
html) +\
+               
re.findall(r'(https?://\d+\.media\.tumblr\.com/[^;"&]+/s\d+x\d+/[^;"&]+\.png)', 
html) +\
+               
re.findall(r'(https?://\d+\.media\.tumblr\.com/[^;"&]+/s\d+x\d+/[^;"&]+\.gif)', 
html)
 
         tuggles = {}
         for url in urls:
             if url.endswith('.gif'):
                 hd_url = url
             elif url.endswith('.jpg'):
-                hd_url = r1(r'(.+)_\d+\.jpg$', url) + '_1280.jpg' # FIXME: 
decide actual quality
+                hd_url = url  # FIXME: decide actual quality # 
r1(r'(.+)_\d+\.jpg$', url) + '_1280.jpg'
             elif url.endswith('.png'):
-                hd_url = r1(r'(.+)_\d+\.png$', url) + '_1280.png' # FIXME: 
decide actual quality
+                hd_url = url  # FIXME: decide actual quality # 
r1(r'(.+)_\d+\.png$', url) + '_1280.png'
             else:
                 continue
             filename = parse.unquote(hd_url.split('/')[-1])
             title = '.'.join(filename.split('.')[:-1])
-            tumblr_id = r1(r'^tumblr_(.+)_\d+$', title)
-            quality = int(r1(r'^tumblr_.+_(\d+)$', title))
+            tumblr_id = r1(r'^tumblr_(.+)_\d+$', title) or title
+            try:
+                quality = int(r1(r'^tumblr_.+_(\d+)$', title))
+            except:
+                quality = int(r1(r'/s(\d+)x\d+/', hd_url))
             ext = filename.split('.')[-1]
+
             try:
                 size = int(get_head(hd_url)['Content-Length'])
                 if tumblr_id not in tuggles or tuggles[tumblr_id]['quality'] < 
quality:
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/you-get-0.4.1500/src/you_get/extractors/xiami.py 
new/you-get-0.4.1520/src/you_get/extractors/xiami.py
--- old/you-get-0.4.1500/src/you_get/extractors/xiami.py        2020-12-26 
19:21:43.000000000 +0100
+++ new/you-get-0.4.1520/src/you_get/extractors/xiami.py        1970-01-01 
01:00:00.000000000 +0100
@@ -1,215 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-__all__ = ['xiami_download']
-
-from ..common import *
-
-from xml.dom.minidom import parseString
-from urllib import parse
-
-def location_dec(str):
-    head = int(str[0])
-    str = str[1:]
-    rows = head
-    cols = int(len(str)/rows) + 1
-
-    out = ""
-    full_row = len(str) % head
-    for c in range(cols):
-        for r in range(rows):
-            if c == (cols - 1) and r >= full_row:
-                continue
-            if r < full_row:
-                char = str[r*cols+c]
-            else:
-                char = str[cols*full_row+(r-full_row)*(cols-1)+c]
-            out += char
-    return parse.unquote(out).replace("^", "0")
-
-def xiami_download_lyric(lrc_url, file_name, output_dir):
-    lrc = get_content(lrc_url, headers=fake_headers)
-    filename = get_filename(file_name)
-    if len(lrc) > 0:
-        with open(output_dir + "/" + filename + '.lrc', 'w', encoding='utf-8') 
as x:
-            x.write(lrc)
-
-def xiami_download_pic(pic_url, file_name, output_dir):
-    from ..util.strings import get_filename
-    pic_url = pic_url.replace('_1', '')
-    pos = pic_url.rfind('.')
-    ext = pic_url[pos:]
-    pic = get_content(pic_url, headers=fake_headers, decoded=False)
-    if len(pic) > 0:
-        with open(output_dir + "/" + file_name.replace('/', '-') + ext, 'wb') 
as x:
-            x.write(pic)
-
-def xiami_download_song(sid, output_dir = '.', info_only = False):
-    xml = 
get_content('http://www.xiami.com/song/playlist/id/%s/object_name/default/object_id/0'
 % sid, headers=fake_headers)
-    doc = parseString(xml)
-    i = doc.getElementsByTagName("track")[0]
-    artist = i.getElementsByTagName("artist")[0].firstChild.nodeValue
-    album_name = i.getElementsByTagName("album_name")[0].firstChild.nodeValue
-    song_title = i.getElementsByTagName("name")[0].firstChild.nodeValue
-    url = 
location_dec(i.getElementsByTagName("location")[0].firstChild.nodeValue)
-    try:
-        lrc_url = i.getElementsByTagName("lyric")[0].firstChild.nodeValue
-    except:
-        pass
-    type_, ext, size = url_info(url, headers=fake_headers)
-    if not ext:
-        ext = 'mp3'
-
-    print_info(site_info, song_title, ext, size)
-    if not info_only:
-        file_name = "%s - %s - %s" % (song_title, artist, album_name)
-        download_urls([url], file_name, ext, size, output_dir, 
headers=fake_headers)
-        try:
-            xiami_download_lyric(lrc_url, file_name, output_dir)
-        except:
-            pass
-
-def xiami_download_showcollect(cid, output_dir = '.', info_only = False):
-    html = get_content('http://www.xiami.com/song/showcollect/id/' + cid, 
headers=fake_headers)
-    collect_name = r1(r'<title>(.*)</title>', html)
-
-    xml = get_content('http://www.xiami.com/song/playlist/id/%s/type/3' % cid, 
headers=fake_headers)
-    doc = parseString(xml)
-    output_dir =  output_dir + "/" + "[" + collect_name + "]"
-    tracks = doc.getElementsByTagName("track")
-    track_nr = 1
-    for i in tracks:
-        artist=album_name=song_title=url=""
-        try:
-            song_id = i.getElementsByTagName("song_id")[0].firstChild.nodeValue
-            artist = i.getElementsByTagName("artist")[0].firstChild.nodeValue
-            album_name = 
i.getElementsByTagName("album_name")[0].firstChild.nodeValue
-            song_title = 
i.getElementsByTagName("title")[0].firstChild.nodeValue
-            url = 
location_dec(i.getElementsByTagName("location")[0].firstChild.nodeValue)
-        except:
-            log.e("Song %s failed. [Info Missing] artist:%s, album:%s, 
title:%s, url:%s" % (song_id, artist, album_name, song_title, url))
-            continue
-        try:
-            lrc_url = i.getElementsByTagName("lyric")[0].firstChild.nodeValue
-        except:
-            pass
-        type_, ext, size = url_info(url, headers=fake_headers)
-        if not ext:
-            ext = 'mp3'
-
-        print_info(site_info, song_title, ext, size)
-        if not info_only:
-            file_name = "%02d.%s - %s - %s" % (track_nr, song_title, artist, 
album_name)
-            download_urls([url], file_name, ext, size, output_dir, 
headers=fake_headers)
-            try:
-                xiami_download_lyric(lrc_url, file_name, output_dir)
-            except:
-                pass
-
-        track_nr += 1
-
-def xiami_download_album(aid, output_dir='.', info_only=False):
-    xml = get_content('http://www.xiami.com/song/playlist/id/%s/type/1' % aid, 
headers=fake_headers)
-    album_name = r1(r'<album_name><!\[CDATA\[(.*)\]\]>', xml)
-    artist = r1(r'<artist><!\[CDATA\[(.*)\]\]>', xml)
-    doc = parseString(xml)
-    output_dir = output_dir + "/%s - %s" % (artist, album_name)
-    track_list = doc.getElementsByTagName('trackList')[0]
-    tracks = track_list.getElementsByTagName("track")
-    track_nr = 1
-    pic_exist = False
-    for i in tracks:
-#in this xml track tag is used for both "track in a trackList" and track no
-#dirty here
-        if i.firstChild.nodeValue is not None:
-            continue
-        song_title = i.getElementsByTagName("songName")[0].firstChild.nodeValue
-        url = 
location_dec(i.getElementsByTagName("location")[0].firstChild.nodeValue)
-        try:
-            lrc_url = i.getElementsByTagName("lyric")[0].firstChild.nodeValue
-        except:
-            pass
-        if not pic_exist:
-            pic_url = i.getElementsByTagName("pic")[0].firstChild.nodeValue
-        type_, ext, size = url_info(url, headers=fake_headers)
-        if not ext:
-            ext = 'mp3'
-
-        print_info(site_info, song_title, ext, size)
-        if not info_only:
-            file_name = "%02d.%s" % (track_nr, song_title)
-            download_urls([url], file_name, ext, size, output_dir, 
headers=fake_headers)
-            try:
-                xiami_download_lyric(lrc_url, file_name, output_dir)
-            except:
-                pass
-            if not pic_exist:
-                xiami_download_pic(pic_url, 'cover', output_dir)
-                pic_exist = True
-
-        track_nr += 1
-
-def xiami_download_mv(url, output_dir='.', merge=True, info_only=False):
-    # FIXME: broken merge
-    page = get_content(url, headers=fake_headers)
-    title = re.findall('<title>([^<]+)', page)[0]
-    vid, uid = re.findall(r'vid:"(\d+)",uid:"(\d+)"', page)[0]
-    api_url = 'http://cloud.video.taobao.com/videoapi/info.php?vid=%s&uid=%s' 
% (vid, uid)
-    result = get_content(api_url, headers=fake_headers)
-    doc = parseString(result)
-    video_url = doc.getElementsByTagName("video_url")[-1].firstChild.nodeValue
-    length = int(doc.getElementsByTagName("length")[-1].firstChild.nodeValue)
-
-    v_urls = []
-    k_start = 0
-    total_size = 0
-    while True:
-        k_end = k_start + 20000000
-        if k_end >= length: k_end = length - 1
-        v_url = video_url + '/start_%s/end_%s/1.flv' % (k_start, k_end)
-        try:
-            _, ext, size = url_info(v_url)
-        except:
-            break
-        v_urls.append(v_url)
-        total_size += size
-        k_start = k_end + 1
-
-    print_info(site_info, title, ext, total_size)
-    if not info_only:
-        download_urls(v_urls, title, ext, total_size, output_dir, merge=merge, 
headers=fake_headers)
-
-def xiami_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
-#albums
-    if re.match(r'http://www.xiami.com/album/\d+', url):
-        id = r1(r'http://www.xiami.com/album/(\d+)', url)
-        xiami_download_album(id, output_dir, info_only)
-    elif re.match(r'http://www.xiami.com/album/\w+', url):
-        page = get_content(url, headers=fake_headers)
-        album_id = 
re.search(r'rel="canonical"\s+href="http://www.xiami.com/album/([^"]+)"', 
page).group(1)
-        xiami_download_album(album_id, output_dir, info_only)
-
-#collections
-    if re.match(r'http://www.xiami.com/collect/\d+', url):
-        id = r1(r'http://www.xiami.com/collect/(\d+)', url)
-        xiami_download_showcollect(id, output_dir, info_only)
-
-#single track
-    if re.match(r'http://www.xiami.com/song/\d+\b', url):
-        id = r1(r'http://www.xiami.com/song/(\d+)', url)
-        xiami_download_song(id, output_dir, info_only)
-    elif re.match(r'http://www.xiami.com/song/\w+', url):
-        html = get_content(url, headers=fake_headers)
-        id = r1(r'rel="canonical" href="http://www.xiami.com/song/([^"]+)"', 
html)
-        xiami_download_song(id, output_dir, info_only)
-
-    if re.match('http://www.xiami.com/song/detail/id/\d+', url):
-        id = r1(r'http://www.xiami.com/song/detail/id/(\d+)', url)
-        xiami_download_song(id, output_dir, info_only)
-
-    if re.match('http://www.xiami.com/mv', url):
-        xiami_download_mv(url, output_dir, merge=merge, info_only=info_only)
-
-site_info = "Xiami.com"
-download = xiami_download
-download_playlist = playlist_not_supported("xiami")
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/you-get-0.4.1500/src/you_get/extractors/youku.py 
new/you-get-0.4.1520/src/you_get/extractors/youku.py
--- old/you-get-0.4.1500/src/you_get/extractors/youku.py        2020-12-26 
19:21:43.000000000 +0100
+++ new/you-get-0.4.1520/src/you_get/extractors/youku.py        2021-03-29 
16:44:07.000000000 +0200
@@ -77,7 +77,7 @@
         self.api_error_code = None
         self.api_error_msg = None
 
-        self.ccode = '0590'
+        self.ccode = '0532'
         # Found in 
http://g.alicdn.com/player/ykplayer/0.5.64/youku-player.min.js
         # grep -oE '"[0-9a-zA-Z+/=]{256}"' youku-player.min.js
         self.ckey = 
'DIl58SLFxFNndSV1GFNnMQVYkx1PP5tKe1siZu/86PR1u/Wh1Ptd+WOZsHHWxysSfAOhNJpdVWsdVJNsfJ8Sxd8WKVvNfAS8aS8fAOzYARzPyPc3JvtnPHjTdKfESTdnuTW6ZPvk2pNDh4uFzotgdMEFkzQ5wZVXl2Pf1/Y6hLK0OnCNxBj3+nb0v72gZ6b0td+WOZsHHWxysSo/0y9D2K42SaB8Y/+aD2K42SaB8Y/+ahU+WOZsHcrxysooUeND'
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/you-get-0.4.1500/src/you_get/extractors/youtube.py 
new/you-get-0.4.1520/src/you_get/extractors/youtube.py
--- old/you-get-0.4.1500/src/you_get/extractors/youtube.py      2020-12-26 
19:21:43.000000000 +0100
+++ new/you-get-0.4.1520/src/you_get/extractors/youtube.py      2021-03-29 
16:44:07.000000000 +0200
@@ -157,7 +157,12 @@
             log.wtf('[Failed] Unsupported URL pattern.')
 
         video_page = get_content('https://www.youtube.com/playlist?list=%s' % 
playlist_id)
-        ytInitialData = json.loads(match1(video_page, 
r'window\["ytInitialData"\]\s*=\s*(.+);'))
+        playlist_json_serialized = match1(video_page, 
r'window\["ytInitialData"\]\s*=\s*(.+);', r'var\s+ytInitialData\s*=\s*([^;]+);')
+
+        if len(playlist_json_serialized) == 0:
+            log.wtf('[Failed] Unable to extract playlist data')
+
+        ytInitialData = json.loads(playlist_json_serialized[0])
 
         tab0 = 
ytInitialData['contents']['twoColumnBrowseResultsRenderer']['tabs'][0]
         itemSection0 = 
tab0['tabRenderer']['content']['sectionListRenderer']['contents'][0]
@@ -353,7 +358,10 @@
 
         # Prepare caption tracks
         try:
-            caption_tracks = 
json.loads(ytplayer_config['args']['player_response'])['captions']['playerCaptionsTracklistRenderer']['captionTracks']
+            try:
+                caption_tracks = 
json.loads(ytplayer_config['args']['player_response'])['captions']['playerCaptionsTracklistRenderer']['captionTracks']
+            except:
+                caption_tracks = 
ytInitialPlayerResponse['captions']['playerCaptionsTracklistRenderer']['captionTracks']
             for ct in caption_tracks:
                 ttsurl, lang = ct['baseUrl'], ct['languageCode']
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/you-get-0.4.1500/src/you_get/version.py 
new/you-get-0.4.1520/src/you_get/version.py
--- old/you-get-0.4.1500/src/you_get/version.py 2020-12-26 19:21:43.000000000 
+0100
+++ new/you-get-0.4.1520/src/you_get/version.py 2021-03-29 16:44:07.000000000 
+0200
@@ -1,4 +1,4 @@
 #!/usr/bin/env python
 
 script_name = 'you-get'
-__version__ = '0.4.1500'
+__version__ = '0.4.1520'
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/you-get-0.4.1500/tests/test.py 
new/you-get-0.4.1520/tests/test.py
--- old/you-get-0.4.1500/tests/test.py  2020-12-26 19:21:43.000000000 +0100
+++ new/you-get-0.4.1520/tests/test.py  2021-03-29 16:44:07.000000000 +0200
@@ -40,14 +40,6 @@
     def test_acfun(self):
         acfun.download('https://www.acfun.cn/v/ac11701912', info_only=True)
 
-    def test_bilibili(self):
-        bilibili.download(
-            "https://www.bilibili.com/watchlater/#/BV1PE411q7mZ/p6";, 
info_only=True
-        )
-        bilibili.download(
-            "https://www.bilibili.com/watchlater/#/av74906671/p6";, 
info_only=True
-        )
-
     def test_soundcloud(self):
         ## single song
         soundcloud.download(

commit you-get for openSUSE:Factory

Reply via email to