Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package you-get for openSUSE:Factory checked in at 2021-04-01 14:17:07 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/you-get (Old) and /work/SRC/openSUSE:Factory/.you-get.new.2401 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "you-get" Thu Apr 1 14:17:07 2021 rev:36 rq:882000 version:0.4.1520 Changes: -------- --- /work/SRC/openSUSE:Factory/you-get/you-get.changes 2020-12-28 00:28:34.153773706 +0100 +++ /work/SRC/openSUSE:Factory/.you-get.new.2401/you-get.changes 2021-04-01 14:18:25.448083923 +0200 @@ -1,0 +2,5 @@ +Mon Mar 29 14:55:07 UTC 2021 - Luigi Baldoni <aloi...@gmx.com> + +- Update to version 0.4.1520 (no changelog supplied) + +------------------------------------------------------------------- Old: ---- you-get-0.4.1500.tar.gz New: ---- you-get-0.4.1520.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ you-get.spec ++++++ --- /var/tmp/diff_new_pack.h2EyKA/_old 2021-04-01 14:18:26.072084894 +0200 +++ /var/tmp/diff_new_pack.h2EyKA/_new 2021-04-01 14:18:26.076084900 +0200 @@ -1,7 +1,7 @@ # # spec file for package you-get # -# Copyright (c) 2020 SUSE LLC +# Copyright (c) 2021 SUSE LLC # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -17,7 +17,7 @@ Name: you-get -Version: 0.4.1500 +Version: 0.4.1520 Release: 0 Summary: Dumb downloader that scrapes the web License: MIT ++++++ you-get-0.4.1500.tar.gz -> you-get-0.4.1520.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1500/.gitignore new/you-get-0.4.1520/.gitignore --- old/you-get-0.4.1500/.gitignore 2020-12-26 19:21:43.000000000 +0100 +++ new/you-get-0.4.1520/.gitignore 2021-03-29 16:44:07.000000000 +0200 @@ -84,6 +84,7 @@ *.m4a *.DS_Store *.txt +*.sw[a-p] *.zip diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1500/README.md new/you-get-0.4.1520/README.md --- old/you-get-0.4.1500/README.md 2020-12-26 19:21:43.000000000 +0100 +++ new/you-get-0.4.1520/README.md 2021-03-29 16:44:07.000000000 +0200 @@ -414,6 +414,7 @@ | ???????????? | <http://www.kuwo.cn/> | | |???| | ????????? | <http://www.le.com/> |???| | | | ??????FM | <http://www.lizhi.fm/> | | |???| +| ???????????? | <http://www.lrts.me/> | | |???| | ?????? | <http://www.miaopai.com/> |???| | | | MioMio????????? | <http://www.miomio.tv/> |???| | | | MissEvan<br/>??????FM | <http://www.missevan.com/> | | |???| @@ -425,7 +426,6 @@ | Sina<br/>????????????<br/>?????????????????? | <http://video.sina.com.cn/><br/><http://video.weibo.com/> |???| | | | Sohu<br/>???????????? | <http://tv.sohu.com/> |???| | | | **Tudou<br/>??????** | <http://www.tudou.com/> |???| | | -| ?????? | <http://www.xiami.com/> |???| |???| | ???????????? | <http://www.isuntv.com/> |???| | | | **Youku<br/>??????** | <http://www.youku.com/> |???| | | | ??????TV | <http://www.zhanqi.tv/lives> |???| | | diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1500/src/you_get/common.py new/you-get-0.4.1520/src/you_get/common.py --- old/you-get-0.4.1500/src/you_get/common.py 2020-12-26 19:21:43.000000000 +0100 +++ new/you-get-0.4.1520/src/you_get/common.py 2021-03-29 16:44:07.000000000 +0200 @@ -76,6 +76,7 @@ 'letv' : 'le', 'lizhi' : 'lizhi', 'longzhu' : 'longzhu', + 'lrts' : 'lrts', 'magisto' : 'magisto', 'metacafe' : 'metacafe', 'mgtv' : 'mgtv', @@ -112,7 +113,6 @@ 'veoh' : 'veoh', 'vine' : 'vine', 'vk' : 'vk', - 'xiami' : 'xiami', 'xiaokaxiu' : 'yixia', 'xiaojiadianvideo' : 'fc2video', 'ximalaya' : 'ximalaya', @@ -1555,6 +1555,21 @@ '-l', '--playlist', action='store_true', help='Prefer to download a playlist' ) + + playlist_grp = parser.add_argument_group('Playlist optional options') + playlist_grp.add_argument( + '--first', metavar='FIRST', + help='the first number' + ) + playlist_grp.add_argument( + '--last', metavar='LAST', + help='the last number' + ) + playlist_grp.add_argument( + '--size', '--page-size', metavar='PAGE_SIZE', + help='the page size number' + ) + download_grp.add_argument( '-a', '--auto-rename', action='store_true', default=False, help='Auto rename same name different files' @@ -1672,7 +1687,7 @@ socket.setdefaulttimeout(args.timeout) try: - extra = {} + extra = {'args': args} if extractor_proxy: extra['extractor_proxy'] = extractor_proxy if stream_id: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1500/src/you_get/extractors/__init__.py new/you-get-0.4.1520/src/you_get/extractors/__init__.py --- old/you-get-0.4.1500/src/you_get/extractors/__init__.py 2020-12-26 19:21:43.000000000 +0100 +++ new/you-get-0.4.1520/src/you_get/extractors/__init__.py 2021-03-29 16:44:07.000000000 +0200 @@ -78,7 +78,6 @@ from .vk import * from .w56 import * from .wanmen import * -from .xiami import * from .xinpianchang import * from .yixia import * from .youku import * diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1500/src/you_get/extractors/bilibili.py new/you-get-0.4.1520/src/you_get/extractors/bilibili.py --- old/you-get-0.4.1500/src/you_get/extractors/bilibili.py 2020-12-26 19:21:43.000000000 +0100 +++ new/you-get-0.4.1520/src/you_get/extractors/bilibili.py 2021-03-29 16:44:07.000000000 +0200 @@ -4,12 +4,16 @@ from ..extractor import VideoExtractor import hashlib +import math + class Bilibili(VideoExtractor): name = "Bilibili" # Bilibili media encoding options, in descending quality order. stream_types = [ + {'id': 'hdflv2', 'quality': 125, 'audio_quality': 30280, + 'container': 'FLV', 'video_resolution': '3840p', 'desc': '?????? HDR'}, {'id': 'hdflv2_4k', 'quality': 120, 'audio_quality': 30280, 'container': 'FLV', 'video_resolution': '2160p', 'desc': '?????? 4K'}, {'id': 'flv_p60', 'quality': 116, 'audio_quality': 30280, @@ -734,7 +738,8 @@ api_url = self.bilibili_space_video_api(mid) api_content = get_content(api_url, headers=self.bilibili_headers()) videos_info = json.loads(api_content) - pc = videos_info['data']['page']['count'] // videos_info['data']['page']['ps'] + # pc = videos_info['data']['page']['count'] // videos_info['data']['page']['ps'] + pc = math.ceil(videos_info['data']['page']['count'] / videos_info['data']['page']['ps']) for pn in range(1, pc + 1): api_url = self.bilibili_space_video_api(mid, pn=pn) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1500/src/you_get/extractors/instagram.py new/you-get-0.4.1520/src/you_get/extractors/instagram.py --- old/you-get-0.4.1500/src/you_get/extractors/instagram.py 2020-12-26 19:21:43.000000000 +0100 +++ new/you-get-0.4.1520/src/you_get/extractors/instagram.py 2021-03-29 16:44:07.000000000 +0200 @@ -22,14 +22,15 @@ download_urls([stream], title, ext, size, output_dir, merge=merge) else: data = re.search(r'window\._sharedData\s*=\s*(.*);</script>', html) - if data is not None: + try: info = json.loads(data.group(1)) post = info['entry_data']['PostPage'][0] - else: + assert post + except: # with logged-in cookies data = re.search(r'window\.__additionalDataLoaded\(\'[^\']+\',(.*)\);</script>', html) if data is not None: - log.e('[Error] Cookies needed.') + log.e('[Warning] Cookies needed.') post = json.loads(data.group(1)) if 'edge_sidecar_to_children' in post['graphql']['shortcode_media']: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1500/src/you_get/extractors/lrts.py new/you-get-0.4.1520/src/you_get/extractors/lrts.py --- old/you-get-0.4.1500/src/you_get/extractors/lrts.py 1970-01-01 01:00:00.000000000 +0100 +++ new/you-get-0.4.1520/src/you_get/extractors/lrts.py 2021-03-29 16:44:07.000000000 +0200 @@ -0,0 +1,81 @@ +#!/usr/bin/env python + +__all__ = ['lrts_download'] + +import logging +from ..common import * +from ..util import log, term + +def lrts_download(url, output_dir='.', merge=True, info_only=False, **kwargs): + html = get_html(url) + args = kwargs.get('args') + if not args: args = {} + matched = re.search(r"/book/(\d+)", url) + if not matched: + raise AssertionError("not found book number: %s" % url) + book_no = matched.group(1) + book_title = book_no + matched = re.search(r"<title>([^-]*)[-](.*)[,](.*)</title>", html) + if matched: + book_title = matched.group(1) + + matched = re.search(r"var totalCount='(\d+)'", html) + if not matched: + raise AssertionError("not found total count in html") + total_count = int(matched.group(1)) + log.i('%s total: %s' % (book_title, total_count)) + first_page = 0 + if ('first' in args and args.first!= None): + first_page = int(args.first) + + page_size = 10 + if ('page_size' in args and args.page_size != None): + page_size = int(args.page_size) + last_page = (total_count // page_size) + 1 + if ('last' in args and args.last != None): + last_page = int(args.last) + + log.i('page size is %s, page from %s to %s' % (page_size, first_page, last_page)) + headers = { + 'Referer': url + } + items = [] + for page in range(first_page, last_page): + page_url = 'http://www.lrts.me/ajax/book/%s/%s/%s' % (book_no, page, page_size) + response_content = json.loads(post_content(page_url, headers)) + if response_content['status'] != 'success': + raise AssertionError("got the page failed: %s" % (page_url)) + data = response_content['data']['data'] + if data: + for i in data: + i['resName'] = parse.unquote(i['resName']) + items.extend(data) + else: + break + headers = { + 'Referer': 'http://www.lrts.me/playlist' + } + + for item in items: + i_url = 'http://www.lrts.me/ajax/path/4/%s/%s' % (item['fatherResId'], item['resId']) + response_content = json.loads(post_content(i_url, headers)) + if response_content['status'] == 'success' and response_content['data']: + item['ok'] = True + item['url'] = response_content['data'] + logging.debug('ok') + + items = list(filter(lambda i: 'ok' in i and i['ok'], items)) + log.i('Downloading %s: %s count ...' % (book_title, len(items))) + + for item in items: + title = item['resName'] + file_url = item['url'] + # if not file_url: continue + _, _, size = url_info(file_url) + print_info(site_info, title, 'mp3', size) + if not info_only: + download_urls([file_url], title, 'mp3', size, output_dir, merge=merge) + +site_info = "lrts.me" +download = lrts_download +download_playlist = lrts_download diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1500/src/you_get/extractors/mgtv.py new/you-get-0.4.1520/src/you_get/extractors/mgtv.py --- old/you-get-0.4.1500/src/you_get/extractors/mgtv.py 2020-12-26 19:21:43.000000000 +0100 +++ new/you-get-0.4.1520/src/you_get/extractors/mgtv.py 2021-03-29 16:44:07.000000000 +0200 @@ -9,19 +9,36 @@ from os.path import dirname import re +import base64 +import time +import uuid + + class MGTV(VideoExtractor): name = "?????? (MGTV)" # Last updated: 2016-11-13 stream_types = [ + {'id': 'fhd', 'container': 'ts', 'video_profile': '??????'}, {'id': 'hd', 'container': 'ts', 'video_profile': '??????'}, {'id': 'sd', 'container': 'ts', 'video_profile': '??????'}, {'id': 'ld', 'container': 'ts', 'video_profile': '??????'}, ] - - id_dic = {i['video_profile']:(i['id']) for i in stream_types} - - api_endpoint = 'http://pcweb.api.mgtv.com/player/video?video_id={video_id}' + + id_dic = {i['video_profile']: (i['id']) for i in stream_types} + + did = str(uuid.uuid4()) + ver = '0.3.0301' + pno = '1030' + + def tk2(self): + return base64.urlsafe_b64encode(b'did=%s|ver=%s|pno=%s|clit=%d' % ( + self.did.encode(), self.ver.encode(), self.pno.encode(), time.time())).decode('utf-8')[::-1] + + info_endpoint = 'https://pcweb.api.mgtv.com/video/info?vid={video_id}' + player_endpoint = 'https://pcweb.api.mgtv.com/player/video?did={did}&tk2={tk2}&video_id={video_id}' + source_endpoint = 'https://pcweb.api.mgtv.com/player/getSource?tk2={tk2}&pm2={pm2}&video_id={video_id}' + playlist_endpoint = 'https://pcweb.api.mgtv.com/episode/list?video_id={video_id}&page={page}&size=30' @staticmethod def get_vid_from_url(url): @@ -30,66 +47,92 @@ vid = match1(url, 'https?://www.mgtv.com/(?:b|l)/\d+/(\d+).html') if not vid: vid = match1(url, 'https?://www.mgtv.com/hz/bdpz/\d+/(\d+).html') + if not vid: + vid = match1(url, 'https?://www.mgtv.com/s/(\d+).html') return vid - - #---------------------------------------------------------------------- - @staticmethod - def get_mgtv_real_url(url): + + # ---------------------------------------------------------------------- + def get_mgtv_real_url(self, url): """str->list of str Give you the real URLs.""" content = loads(get_content(url)) m3u_url = content['info'] split = urlsplit(m3u_url) - - base_url = "{scheme}://{netloc}{path}/".format(scheme = split[0], - netloc = split[1], - path = dirname(split[2])) - content = get_content(content['info']) #get the REAL M3U url, maybe to be changed later? + base_url = "{scheme}://{netloc}{path}/".format(scheme=split[0], + netloc=split[1], + path=dirname(split[2])) + + content = get_content(content['info'], + headers={'Referer': self.url}) # get the REAL M3U url, maybe to be changed later? segment_list = [] segments_size = 0 for i in content.split(): - if not i.startswith('#'): #not the best way, better we use the m3u8 package + if not i.startswith('#'): # not the best way, better we use the m3u8 package segment_list.append(base_url + i) # use ext-info for fast size calculate elif i.startswith('#EXT-MGTV-File-SIZE:'): - segments_size += int(i[i.rfind(':')+1:]) + segments_size += int(i[i.rfind(':') + 1:]) return m3u_url, segments_size, segment_list def download_playlist_by_url(self, url, **kwargs): - pass + self.url = url + self.vid = self.get_vid_from_url(self.url) + content_playlist = get_content(self.playlist_endpoint.format(video_id=self.vid, page=1)) + content_playlist = loads(content_playlist) + for ep in content_playlist['data']['list']: + self.download_by_url('https://www.mgtv.com' + ep['url'], **kwargs) + max_page = content_playlist['data']['total_page'] + for page in range(2, max_page + 1): + content_playlist = get_content(self.playlist_endpoint.format(video_id=self.vid, page=page)) + content_playlist = loads(content_playlist) + for ep in content_playlist['data']['list']: + self.download_by_url('https://www.mgtv.com' + ep['url'], **kwargs) def prepare(self, **kwargs): if self.url: self.vid = self.get_vid_from_url(self.url) - content = get_content(self.api_endpoint.format(video_id = self.vid)) - content = loads(content) - self.title = content['data']['info']['title'] - domain = content['data']['stream_domain'][0] - - #stream_available = [i['name'] for i in content['data']['stream']] + content_info = get_content(self.info_endpoint.format(video_id=self.vid)) + log.d(content_info) + content_info = loads(content_info) + self.title = content_info['data']['info']['videoName'] + + content_player = get_content(self.player_endpoint.format(did=self.did, video_id=self.vid, tk2=self.tk2())) + log.d(content_player) + content_player = loads(content_player) + pm2 = content_player['data']['atc']['pm2'] + + content_source = get_content(self.source_endpoint.format(video_id=self.vid, tk2=self.tk2(), pm2=pm2)) + log.d(content_source) + content_source = loads(content_source) + domain = content_source['data']['stream_domain'][0] + + # stream_available = [i['name'] for i in content['data']['stream']] stream_available = {} - for i in content['data']['stream']: + for i in content_source['data']['stream']: stream_available[i['name']] = i['url'] for s in self.stream_types: if s['video_profile'] in stream_available.keys(): quality_id = self.id_dic[s['video_profile']] url = stream_available[s['video_profile']] - url = domain + re.sub( r'(\&arange\=\d+)', '', url) #Un-Hum + if url is None or url == '': + # skip invalid profile with empty url + continue + url = domain + re.sub(r'(\&arange\=\d+)', '', url) # Un-Hum m3u8_url, m3u8_size, segment_list_this = self.get_mgtv_real_url(url) stream_fileid_list = [] for i in segment_list_this: stream_fileid_list.append(os.path.basename(i).split('.')[0]) - #make pieces - pieces = [] - for i in zip(stream_fileid_list, segment_list_this): - pieces.append({'fileid': i[0], 'segs': i[1],}) + # make pieces + pieces = [] + for i in zip(stream_fileid_list, segment_list_this): + pieces.append({'fileid': i[0], 'segs': i[1], }) - self.streams[quality_id] = { + self.streams[quality_id] = { 'container': s['container'], 'video_profile': s['video_profile'], 'size': m3u8_size, @@ -97,8 +140,8 @@ 'm3u8_url': m3u8_url } - if not kwargs['info_only']: - self.streams[quality_id]['src'] = segment_list_this + if not kwargs['info_only']: + self.streams[quality_id]['src'] = segment_list_this def extract(self, **kwargs): if 'stream_id' in kwargs and kwargs['stream_id']: @@ -132,7 +175,8 @@ if 'index' not in kwargs: self.p([]) else: - stream_id = self.streams_sorted[0]['id'] if 'id' in self.streams_sorted[0] else self.streams_sorted[0]['itag'] + stream_id = self.streams_sorted[0]['id'] if 'id' in self.streams_sorted[0] else \ + self.streams_sorted[0]['itag'] self.p_i(stream_id) # default to use the best quality @@ -148,8 +192,10 @@ else: download_urls(stream_info['src'], self.title, stream_info['container'], stream_info['size'], output_dir=kwargs['output_dir'], - merge=kwargs.get('merge', True)) - # av=stream_id in self.dash_streams) + merge=kwargs.get('merge', True), + headers={'Referer': self.url}) + # av=stream_id in self.dash_streams) + site = MGTV() download = site.download_by_url diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1500/src/you_get/extractors/tumblr.py new/you-get-0.4.1520/src/you_get/extractors/tumblr.py --- old/you-get-0.4.1500/src/you_get/extractors/tumblr.py 2020-12-26 19:21:43.000000000 +0100 +++ new/you-get-0.4.1520/src/you_get/extractors/tumblr.py 2021-03-29 16:44:07.000000000 +0200 @@ -14,7 +14,7 @@ return import ssl - ssl_context = request.HTTPSHandler(context=ssl.SSLContext(ssl.PROTOCOL_TLSv1)) + ssl_context = request.HTTPSHandler(context=ssl.SSLContext(ssl.PROTOCOL_TLSv1_2)) # server requires TLS v1.2 cookie_handler = request.HTTPCookieProcessor() opener = request.build_opener(ssl_context, cookie_handler) request.install_opener(opener) @@ -45,23 +45,30 @@ r1(r'<title>([^<\n]*)', html) urls = re.findall(r'(https?://[^;"&]+/tumblr_[^;"&]+_\d+\.jpg)', html) +\ re.findall(r'(https?://[^;"&]+/tumblr_[^;"&]+_\d+\.png)', html) +\ - re.findall(r'(https?://[^;"&]+/tumblr_[^";&]+_\d+\.gif)', html) + re.findall(r'(https?://[^;"&]+/tumblr_[^;"&]+_\d+\.gif)', html) +\ + re.findall(r'(https?://\d+\.media\.tumblr\.com/[^;"&]+/s\d+x\d+/[^;"&]+\.jpg)', html) +\ + re.findall(r'(https?://\d+\.media\.tumblr\.com/[^;"&]+/s\d+x\d+/[^;"&]+\.png)', html) +\ + re.findall(r'(https?://\d+\.media\.tumblr\.com/[^;"&]+/s\d+x\d+/[^;"&]+\.gif)', html) tuggles = {} for url in urls: if url.endswith('.gif'): hd_url = url elif url.endswith('.jpg'): - hd_url = r1(r'(.+)_\d+\.jpg$', url) + '_1280.jpg' # FIXME: decide actual quality + hd_url = url # FIXME: decide actual quality # r1(r'(.+)_\d+\.jpg$', url) + '_1280.jpg' elif url.endswith('.png'): - hd_url = r1(r'(.+)_\d+\.png$', url) + '_1280.png' # FIXME: decide actual quality + hd_url = url # FIXME: decide actual quality # r1(r'(.+)_\d+\.png$', url) + '_1280.png' else: continue filename = parse.unquote(hd_url.split('/')[-1]) title = '.'.join(filename.split('.')[:-1]) - tumblr_id = r1(r'^tumblr_(.+)_\d+$', title) - quality = int(r1(r'^tumblr_.+_(\d+)$', title)) + tumblr_id = r1(r'^tumblr_(.+)_\d+$', title) or title + try: + quality = int(r1(r'^tumblr_.+_(\d+)$', title)) + except: + quality = int(r1(r'/s(\d+)x\d+/', hd_url)) ext = filename.split('.')[-1] + try: size = int(get_head(hd_url)['Content-Length']) if tumblr_id not in tuggles or tuggles[tumblr_id]['quality'] < quality: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1500/src/you_get/extractors/xiami.py new/you-get-0.4.1520/src/you_get/extractors/xiami.py --- old/you-get-0.4.1500/src/you_get/extractors/xiami.py 2020-12-26 19:21:43.000000000 +0100 +++ new/you-get-0.4.1520/src/you_get/extractors/xiami.py 1970-01-01 01:00:00.000000000 +0100 @@ -1,215 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -__all__ = ['xiami_download'] - -from ..common import * - -from xml.dom.minidom import parseString -from urllib import parse - -def location_dec(str): - head = int(str[0]) - str = str[1:] - rows = head - cols = int(len(str)/rows) + 1 - - out = "" - full_row = len(str) % head - for c in range(cols): - for r in range(rows): - if c == (cols - 1) and r >= full_row: - continue - if r < full_row: - char = str[r*cols+c] - else: - char = str[cols*full_row+(r-full_row)*(cols-1)+c] - out += char - return parse.unquote(out).replace("^", "0") - -def xiami_download_lyric(lrc_url, file_name, output_dir): - lrc = get_content(lrc_url, headers=fake_headers) - filename = get_filename(file_name) - if len(lrc) > 0: - with open(output_dir + "/" + filename + '.lrc', 'w', encoding='utf-8') as x: - x.write(lrc) - -def xiami_download_pic(pic_url, file_name, output_dir): - from ..util.strings import get_filename - pic_url = pic_url.replace('_1', '') - pos = pic_url.rfind('.') - ext = pic_url[pos:] - pic = get_content(pic_url, headers=fake_headers, decoded=False) - if len(pic) > 0: - with open(output_dir + "/" + file_name.replace('/', '-') + ext, 'wb') as x: - x.write(pic) - -def xiami_download_song(sid, output_dir = '.', info_only = False): - xml = get_content('http://www.xiami.com/song/playlist/id/%s/object_name/default/object_id/0' % sid, headers=fake_headers) - doc = parseString(xml) - i = doc.getElementsByTagName("track")[0] - artist = i.getElementsByTagName("artist")[0].firstChild.nodeValue - album_name = i.getElementsByTagName("album_name")[0].firstChild.nodeValue - song_title = i.getElementsByTagName("name")[0].firstChild.nodeValue - url = location_dec(i.getElementsByTagName("location")[0].firstChild.nodeValue) - try: - lrc_url = i.getElementsByTagName("lyric")[0].firstChild.nodeValue - except: - pass - type_, ext, size = url_info(url, headers=fake_headers) - if not ext: - ext = 'mp3' - - print_info(site_info, song_title, ext, size) - if not info_only: - file_name = "%s - %s - %s" % (song_title, artist, album_name) - download_urls([url], file_name, ext, size, output_dir, headers=fake_headers) - try: - xiami_download_lyric(lrc_url, file_name, output_dir) - except: - pass - -def xiami_download_showcollect(cid, output_dir = '.', info_only = False): - html = get_content('http://www.xiami.com/song/showcollect/id/' + cid, headers=fake_headers) - collect_name = r1(r'<title>(.*)</title>', html) - - xml = get_content('http://www.xiami.com/song/playlist/id/%s/type/3' % cid, headers=fake_headers) - doc = parseString(xml) - output_dir = output_dir + "/" + "[" + collect_name + "]" - tracks = doc.getElementsByTagName("track") - track_nr = 1 - for i in tracks: - artist=album_name=song_title=url="" - try: - song_id = i.getElementsByTagName("song_id")[0].firstChild.nodeValue - artist = i.getElementsByTagName("artist")[0].firstChild.nodeValue - album_name = i.getElementsByTagName("album_name")[0].firstChild.nodeValue - song_title = i.getElementsByTagName("title")[0].firstChild.nodeValue - url = location_dec(i.getElementsByTagName("location")[0].firstChild.nodeValue) - except: - log.e("Song %s failed. [Info Missing] artist:%s, album:%s, title:%s, url:%s" % (song_id, artist, album_name, song_title, url)) - continue - try: - lrc_url = i.getElementsByTagName("lyric")[0].firstChild.nodeValue - except: - pass - type_, ext, size = url_info(url, headers=fake_headers) - if not ext: - ext = 'mp3' - - print_info(site_info, song_title, ext, size) - if not info_only: - file_name = "%02d.%s - %s - %s" % (track_nr, song_title, artist, album_name) - download_urls([url], file_name, ext, size, output_dir, headers=fake_headers) - try: - xiami_download_lyric(lrc_url, file_name, output_dir) - except: - pass - - track_nr += 1 - -def xiami_download_album(aid, output_dir='.', info_only=False): - xml = get_content('http://www.xiami.com/song/playlist/id/%s/type/1' % aid, headers=fake_headers) - album_name = r1(r'<album_name><!\[CDATA\[(.*)\]\]>', xml) - artist = r1(r'<artist><!\[CDATA\[(.*)\]\]>', xml) - doc = parseString(xml) - output_dir = output_dir + "/%s - %s" % (artist, album_name) - track_list = doc.getElementsByTagName('trackList')[0] - tracks = track_list.getElementsByTagName("track") - track_nr = 1 - pic_exist = False - for i in tracks: -#in this xml track tag is used for both "track in a trackList" and track no -#dirty here - if i.firstChild.nodeValue is not None: - continue - song_title = i.getElementsByTagName("songName")[0].firstChild.nodeValue - url = location_dec(i.getElementsByTagName("location")[0].firstChild.nodeValue) - try: - lrc_url = i.getElementsByTagName("lyric")[0].firstChild.nodeValue - except: - pass - if not pic_exist: - pic_url = i.getElementsByTagName("pic")[0].firstChild.nodeValue - type_, ext, size = url_info(url, headers=fake_headers) - if not ext: - ext = 'mp3' - - print_info(site_info, song_title, ext, size) - if not info_only: - file_name = "%02d.%s" % (track_nr, song_title) - download_urls([url], file_name, ext, size, output_dir, headers=fake_headers) - try: - xiami_download_lyric(lrc_url, file_name, output_dir) - except: - pass - if not pic_exist: - xiami_download_pic(pic_url, 'cover', output_dir) - pic_exist = True - - track_nr += 1 - -def xiami_download_mv(url, output_dir='.', merge=True, info_only=False): - # FIXME: broken merge - page = get_content(url, headers=fake_headers) - title = re.findall('<title>([^<]+)', page)[0] - vid, uid = re.findall(r'vid:"(\d+)",uid:"(\d+)"', page)[0] - api_url = 'http://cloud.video.taobao.com/videoapi/info.php?vid=%s&uid=%s' % (vid, uid) - result = get_content(api_url, headers=fake_headers) - doc = parseString(result) - video_url = doc.getElementsByTagName("video_url")[-1].firstChild.nodeValue - length = int(doc.getElementsByTagName("length")[-1].firstChild.nodeValue) - - v_urls = [] - k_start = 0 - total_size = 0 - while True: - k_end = k_start + 20000000 - if k_end >= length: k_end = length - 1 - v_url = video_url + '/start_%s/end_%s/1.flv' % (k_start, k_end) - try: - _, ext, size = url_info(v_url) - except: - break - v_urls.append(v_url) - total_size += size - k_start = k_end + 1 - - print_info(site_info, title, ext, total_size) - if not info_only: - download_urls(v_urls, title, ext, total_size, output_dir, merge=merge, headers=fake_headers) - -def xiami_download(url, output_dir='.', merge=True, info_only=False, **kwargs): -#albums - if re.match(r'http://www.xiami.com/album/\d+', url): - id = r1(r'http://www.xiami.com/album/(\d+)', url) - xiami_download_album(id, output_dir, info_only) - elif re.match(r'http://www.xiami.com/album/\w+', url): - page = get_content(url, headers=fake_headers) - album_id = re.search(r'rel="canonical"\s+href="http://www.xiami.com/album/([^"]+)"', page).group(1) - xiami_download_album(album_id, output_dir, info_only) - -#collections - if re.match(r'http://www.xiami.com/collect/\d+', url): - id = r1(r'http://www.xiami.com/collect/(\d+)', url) - xiami_download_showcollect(id, output_dir, info_only) - -#single track - if re.match(r'http://www.xiami.com/song/\d+\b', url): - id = r1(r'http://www.xiami.com/song/(\d+)', url) - xiami_download_song(id, output_dir, info_only) - elif re.match(r'http://www.xiami.com/song/\w+', url): - html = get_content(url, headers=fake_headers) - id = r1(r'rel="canonical" href="http://www.xiami.com/song/([^"]+)"', html) - xiami_download_song(id, output_dir, info_only) - - if re.match('http://www.xiami.com/song/detail/id/\d+', url): - id = r1(r'http://www.xiami.com/song/detail/id/(\d+)', url) - xiami_download_song(id, output_dir, info_only) - - if re.match('http://www.xiami.com/mv', url): - xiami_download_mv(url, output_dir, merge=merge, info_only=info_only) - -site_info = "Xiami.com" -download = xiami_download -download_playlist = playlist_not_supported("xiami") diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1500/src/you_get/extractors/youku.py new/you-get-0.4.1520/src/you_get/extractors/youku.py --- old/you-get-0.4.1500/src/you_get/extractors/youku.py 2020-12-26 19:21:43.000000000 +0100 +++ new/you-get-0.4.1520/src/you_get/extractors/youku.py 2021-03-29 16:44:07.000000000 +0200 @@ -77,7 +77,7 @@ self.api_error_code = None self.api_error_msg = None - self.ccode = '0590' + self.ccode = '0532' # Found in http://g.alicdn.com/player/ykplayer/0.5.64/youku-player.min.js # grep -oE '"[0-9a-zA-Z+/=]{256}"' youku-player.min.js self.ckey = 'DIl58SLFxFNndSV1GFNnMQVYkx1PP5tKe1siZu/86PR1u/Wh1Ptd+WOZsHHWxysSfAOhNJpdVWsdVJNsfJ8Sxd8WKVvNfAS8aS8fAOzYARzPyPc3JvtnPHjTdKfESTdnuTW6ZPvk2pNDh4uFzotgdMEFkzQ5wZVXl2Pf1/Y6hLK0OnCNxBj3+nb0v72gZ6b0td+WOZsHHWxysSo/0y9D2K42SaB8Y/+aD2K42SaB8Y/+ahU+WOZsHcrxysooUeND' diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1500/src/you_get/extractors/youtube.py new/you-get-0.4.1520/src/you_get/extractors/youtube.py --- old/you-get-0.4.1500/src/you_get/extractors/youtube.py 2020-12-26 19:21:43.000000000 +0100 +++ new/you-get-0.4.1520/src/you_get/extractors/youtube.py 2021-03-29 16:44:07.000000000 +0200 @@ -157,7 +157,12 @@ log.wtf('[Failed] Unsupported URL pattern.') video_page = get_content('https://www.youtube.com/playlist?list=%s' % playlist_id) - ytInitialData = json.loads(match1(video_page, r'window\["ytInitialData"\]\s*=\s*(.+);')) + playlist_json_serialized = match1(video_page, r'window\["ytInitialData"\]\s*=\s*(.+);', r'var\s+ytInitialData\s*=\s*([^;]+);') + + if len(playlist_json_serialized) == 0: + log.wtf('[Failed] Unable to extract playlist data') + + ytInitialData = json.loads(playlist_json_serialized[0]) tab0 = ytInitialData['contents']['twoColumnBrowseResultsRenderer']['tabs'][0] itemSection0 = tab0['tabRenderer']['content']['sectionListRenderer']['contents'][0] @@ -353,7 +358,10 @@ # Prepare caption tracks try: - caption_tracks = json.loads(ytplayer_config['args']['player_response'])['captions']['playerCaptionsTracklistRenderer']['captionTracks'] + try: + caption_tracks = json.loads(ytplayer_config['args']['player_response'])['captions']['playerCaptionsTracklistRenderer']['captionTracks'] + except: + caption_tracks = ytInitialPlayerResponse['captions']['playerCaptionsTracklistRenderer']['captionTracks'] for ct in caption_tracks: ttsurl, lang = ct['baseUrl'], ct['languageCode'] diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1500/src/you_get/version.py new/you-get-0.4.1520/src/you_get/version.py --- old/you-get-0.4.1500/src/you_get/version.py 2020-12-26 19:21:43.000000000 +0100 +++ new/you-get-0.4.1520/src/you_get/version.py 2021-03-29 16:44:07.000000000 +0200 @@ -1,4 +1,4 @@ #!/usr/bin/env python script_name = 'you-get' -__version__ = '0.4.1500' +__version__ = '0.4.1520' diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1500/tests/test.py new/you-get-0.4.1520/tests/test.py --- old/you-get-0.4.1500/tests/test.py 2020-12-26 19:21:43.000000000 +0100 +++ new/you-get-0.4.1520/tests/test.py 2021-03-29 16:44:07.000000000 +0200 @@ -40,14 +40,6 @@ def test_acfun(self): acfun.download('https://www.acfun.cn/v/ac11701912', info_only=True) - def test_bilibili(self): - bilibili.download( - "https://www.bilibili.com/watchlater/#/BV1PE411q7mZ/p6", info_only=True - ) - bilibili.download( - "https://www.bilibili.com/watchlater/#/av74906671/p6", info_only=True - ) - def test_soundcloud(self): ## single song soundcloud.download(