diff --git a/script.module.youtube.dl/addon.xml b/script.module.youtube.dl/addon.xml index 565b80141..0bbbef4ae 100644 --- a/script.module.youtube.dl/addon.xml +++ b/script.module.youtube.dl/addon.xml @@ -1,5 +1,5 @@ - + diff --git a/script.module.youtube.dl/lib/YDStreamExtractor.py b/script.module.youtube.dl/lib/YDStreamExtractor.py index 11e198570..6a50860b8 100644 --- a/script.module.youtube.dl/lib/YDStreamExtractor.py +++ b/script.module.youtube.dl/lib/YDStreamExtractor.py @@ -189,7 +189,7 @@ def _getYoutubeDLVideo(url, quality=None, resolve_redirects=False): try: url = resolve_http_redirect(url) except Exception: - util.ERROR('_getYoutubeDLVideo(): Failed to resolve URL') + util.ERROR('_getYoutubeDLVideo(): Failed to resolve URL'+' '+url) return None ytdl = YoutubeDLWrapper._getYTDL() ytdl.clearDownloadParams() diff --git a/script.module.youtube.dl/lib/youtube_dl/YoutubeDL.py b/script.module.youtube.dl/lib/youtube_dl/YoutubeDL.py index 8367b6e53..9c5e8e69f 100644 --- a/script.module.youtube.dl/lib/youtube_dl/YoutubeDL.py +++ b/script.module.youtube.dl/lib/youtube_dl/YoutubeDL.py @@ -357,7 +357,7 @@ class YoutubeDL(object): _NUMERIC_FIELDS = set(( 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx', - 'timestamp', 'upload_year', 'upload_month', 'upload_day', + 'timestamp', 'upload_year', 'upload_month', 'upload_day', 'available_at', 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count', 'average_rating', 'comment_count', 'age_limit', 'start_time', 'end_time', @@ -681,8 +681,7 @@ def write_debug(self, message, only_once=False): message = '[debug] {0}'.format(message) if self.params.get('logger'): self.params['logger'].debug(message) - else: - self.to_stderr(message, only_once) + def report_unscoped_cookies(self, *args, **kwargs): # message=None, tb=False, is_error=False @@ -2404,60 +2403,52 @@ def format_resolution(format, default='unknown'): return res def _format_note(self, fdict): - res = '' - if fdict.get('ext') in ['f4f', 'f4m']: - res += '(unsupported) ' - if fdict.get('language'): - if res: - res += ' ' - res += '[%s] ' % fdict['language'] - if fdict.get('format_note') is not None: - res += fdict['format_note'] + ' ' - if fdict.get('tbr') is not None: - res += '%4dk ' % fdict['tbr'] + + def simplified_codec(f, field): + assert field in ('acodec', 'vcodec') + codec = f.get(field) + return ( + 'unknown' if not codec + else '.'.join(codec.split('.')[:4]) if codec != 'none' + else 'images' if field == 'vcodec' and f.get('acodec') == 'none' + else None if field == 'acodec' and f.get('vcodec') == 'none' + else 'audio only' if field == 'vcodec' + else 'video only') + + res = join_nonempty( + fdict.get('ext') in ('f4f', 'f4m') and '(unsupported)', + fdict.get('language') and ('[%s]' % (fdict['language'],)), + fdict.get('format_note') is not None and fdict['format_note'], + fdict.get('tbr') is not None and ('%4dk' % fdict['tbr']), + delim=' ') + res = [res] if res else [] if fdict.get('container') is not None: - if res: - res += ', ' - res += '%s container' % fdict['container'] - if (fdict.get('vcodec') is not None - and fdict.get('vcodec') != 'none'): - if res: - res += ', ' - res += fdict['vcodec'] - if fdict.get('vbr') is not None: - res += '@' + res.append('%s container' % (fdict['container'],)) + if fdict.get('vcodec') not in (None, 'none'): + codec = simplified_codec(fdict, 'vcodec') + if codec and fdict.get('vbr') is not None: + codec += '@' elif fdict.get('vbr') is not None and fdict.get('abr') is not None: - res += 'video@' - if fdict.get('vbr') is not None: - res += '%4dk' % fdict['vbr'] + codec = 'video@' + else: + codec = None + codec = join_nonempty(codec, fdict.get('vbr') is not None and ('%4dk' % fdict['vbr'])) + if codec: + res.append(codec) if fdict.get('fps') is not None: - if res: - res += ', ' - res += '%sfps' % fdict['fps'] - if fdict.get('acodec') is not None: - if res: - res += ', ' - if fdict['acodec'] == 'none': - res += 'video only' - else: - res += '%-5s' % fdict['acodec'] - elif fdict.get('abr') is not None: - if res: - res += ', ' - res += 'audio' - if fdict.get('abr') is not None: - res += '@%3dk' % fdict['abr'] - if fdict.get('asr') is not None: - res += ' (%5dHz)' % fdict['asr'] + res.append('%sfps' % (fdict['fps'],)) + codec = ( + simplified_codec(fdict, 'acodec') if fdict.get('acodec') is not None + else 'audio' if fdict.get('abr') is not None else None) + if codec: + res.append(join_nonempty( + '%-4s' % (codec + (('@%3dk' % fdict['abr']) if fdict.get('abr') else ''),), + fdict.get('asr') and '(%5dHz)' % fdict['asr'], delim=' ')) if fdict.get('filesize') is not None: - if res: - res += ', ' - res += format_bytes(fdict['filesize']) + res.append(format_bytes(fdict['filesize'])) elif fdict.get('filesize_approx') is not None: - if res: - res += ', ' - res += '~' + format_bytes(fdict['filesize_approx']) - return res + res.append('~' + format_bytes(fdict['filesize_approx'])) + return ', '.join(res) def list_formats(self, info_dict): formats = info_dict.get('formats', [info_dict]) diff --git a/script.module.youtube.dl/lib/youtube_dl/__init__.py b/script.module.youtube.dl/lib/youtube_dl/__init__.py index 3c1272e7b..202f2c9b9 100644 --- a/script.module.youtube.dl/lib/youtube_dl/__init__.py +++ b/script.module.youtube.dl/lib/youtube_dl/__init__.py @@ -409,6 +409,8 @@ def parse_retries(retries): 'include_ads': opts.include_ads, 'default_search': opts.default_search, 'youtube_include_dash_manifest': opts.youtube_include_dash_manifest, + 'youtube_player_js_version': opts.youtube_player_js_version, + 'youtube_player_js_variant': opts.youtube_player_js_variant, 'encoding': opts.encoding, 'extract_flat': opts.extract_flat, 'mark_watched': opts.mark_watched, diff --git a/script.module.youtube.dl/lib/youtube_dl/compat.py b/script.module.youtube.dl/lib/youtube_dl/compat.py index ebe22bdf9..a985cb03e 100644 --- a/script.module.youtube.dl/lib/youtube_dl/compat.py +++ b/script.module.youtube.dl/lib/youtube_dl/compat.py @@ -55,7 +55,7 @@ try: import collections.abc as compat_collections_abc except ImportError: - import collections as compat_collections_abc + compat_collections_abc = collections # compat_urllib_request @@ -3452,6 +3452,8 @@ def unpack(self, string): except ImportError: compat_map = map + +# compat_filter, compat_filter_fns try: from future_builtins import filter as compat_filter except ImportError: @@ -3459,6 +3461,9 @@ def unpack(self, string): from itertools import ifilter as compat_filter except ImportError: compat_filter = filter +# "Is this function one or maybe the other filter()?" +compat_filter_fns = tuple(set((filter, compat_filter))) + # compat_zip try: @@ -3478,6 +3483,40 @@ def unpack(self, string): from itertools import izip_longest as compat_itertools_zip_longest +# compat_abc_ABC +try: + from abc import ABC as compat_abc_ABC +except ImportError: + # Py < 3.4 + from abc import ABCMeta as _ABCMeta + compat_abc_ABC = _ABCMeta(str('ABC'), (object,), {}) + + +# dict mixin used here +# like UserDict.DictMixin, without methods created by MutableMapping +class _DictMixin(compat_abc_ABC): + def has_key(self, key): + return key in self + + # get(), clear(), setdefault() in MM + + def iterkeys(self): + return (k for k in self) + + def itervalues(self): + return (self[k] for k in self) + + def iteritems(self): + return ((k, self[k]) for k in self) + + # pop(), popitem() in MM + + def copy(self): + return type(self)(self) + + # update() in MM + + # compat_collections_chain_map # collections.ChainMap: new class try: @@ -3632,6 +3671,129 @@ def compat_datetime_timedelta_total_seconds(td): compat_zstandard = None +# compat_thread +try: + import _thread as compat_thread +except ImportError: + try: + import thread as compat_thread + except ImportError: + import dummy_thread as compat_thread + + +# compat_dict +# compat_builtins_dict +# compat_dict_items +if sys.version_info >= (3, 6): + compat_dict = compat_builtins_dict = dict + compat_dict_items = dict.items +else: + _get_ident = compat_thread.get_ident + + class compat_dict(compat_collections_abc.MutableMapping, _DictMixin, dict): + """`dict` that preserves insertion order with interface like Py3.7+""" + + _order = [] # default that should never be used + + def __init__(self, *mappings_or_iterables, **kwargs): + # order an unordered dict using a list of keys: actual Py 2.7+ + # OrderedDict uses a doubly linked list for better performance + self._order = [] + for arg in mappings_or_iterables: + self.__update(arg) + if kwargs: + self.__update(kwargs) + + def __getitem__(self, key): + return dict.__getitem__(self, key) + + def __setitem__(self, key, value): + try: + if key not in self._order: + self._order.append(key) + dict.__setitem__(self, key, value) + except Exception: + if key in self._order[-1:] and key not in self: + del self._order[-1] + raise + + def __len__(self): + return dict.__len__(self) + + def __delitem__(self, key): + dict.__delitem__(self, key) + try: + # expected case, O(len(self)), but who dels anyway? + self._order.remove(key) + except ValueError: + pass + + def __iter__(self): + for from_ in self._order: + if from_ in self: + yield from_ + + def __del__(self): + for attr in ('_order',): + try: + delattr(self, attr) + except Exception: + pass + + def __repr__(self, _repr_running={}): + # skip recursive items ... + call_key = id(self), _get_ident() + if _repr_running.get(call_key): + return '...' + _repr_running[call_key] = True + try: + return '%s({%s})' % ( + type(self).__name__, + ','.join('%r: %r' % k_v for k_v in self.items())) + finally: + del _repr_running[call_key] + + # merge/update (PEP 584) + + def __or__(self, other): + if not isinstance(other, compat_collections_abc.Mapping): + return NotImplemented + new = type(self)(self) + new.update(other) + return new + + def __ror__(self, other): + if not isinstance(other, compat_collections_abc.Mapping): + return NotImplemented + new = type(other)(other) + new.update(self) + return new + + def __ior__(self, other): + self.update(other) + return self + + # optimisations + + def __reversed__(self): + for from_ in reversed(self._order): + if from_ in self: + yield from_ + + def __contains__(self, item): + return dict.__contains__(self, item) + + # allow overriding update without breaking __init__ + def __update(self, *args, **kwargs): + super(compat_dict, self).update(*args, **kwargs) + + compat_builtins_dict = dict + # Using the object's method, not dict's: + # an ordered dict's items can be returned unstably by unordered + # dict.items as if the method was not ((k, self[k]) for k in self) + compat_dict_items = lambda d: d.items() + + legacy = [ 'compat_HTMLParseError', 'compat_HTMLParser', @@ -3662,9 +3824,11 @@ def compat_datetime_timedelta_total_seconds(td): __all__ = [ 'compat_Struct', + 'compat_abc_ABC', 'compat_base64_b64decode', 'compat_basestring', 'compat_brotli', + 'compat_builtins_dict', 'compat_casefold', 'compat_chr', 'compat_collections_abc', @@ -3672,9 +3836,12 @@ def compat_datetime_timedelta_total_seconds(td): 'compat_contextlib_suppress', 'compat_ctypes_WINFUNCTYPE', 'compat_datetime_timedelta_total_seconds', + 'compat_dict', + 'compat_dict_items', 'compat_etree_fromstring', 'compat_etree_iterfind', 'compat_filter', + 'compat_filter_fns', 'compat_get_terminal_size', 'compat_getenv', 'compat_getpass_getpass', @@ -3716,6 +3883,7 @@ def compat_datetime_timedelta_total_seconds(td): 'compat_struct_unpack', 'compat_subprocess_get_DEVNULL', 'compat_subprocess_Popen', + 'compat_thread', 'compat_tokenize_tokenize', 'compat_urllib_error', 'compat_urllib_parse', diff --git a/script.module.youtube.dl/lib/youtube_dl/downloader/common.py b/script.module.youtube.dl/lib/youtube_dl/downloader/common.py index 91e691776..8354030a9 100644 --- a/script.module.youtube.dl/lib/youtube_dl/downloader/common.py +++ b/script.module.youtube.dl/lib/youtube_dl/downloader/common.py @@ -11,6 +11,7 @@ decodeArgument, encodeFilename, error_to_compat_str, + float_or_none, format_bytes, shell_quote, timeconvert, @@ -367,14 +368,27 @@ def download(self, filename, info_dict): }) return True - min_sleep_interval = self.params.get('sleep_interval') - if min_sleep_interval: - max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval) - sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval) + min_sleep_interval, max_sleep_interval = ( + float_or_none(self.params.get(interval), default=0) + for interval in ('sleep_interval', 'max_sleep_interval')) + + sleep_note = '' + available_at = info_dict.get('available_at') + if available_at: + forced_sleep_interval = available_at - int(time.time()) + if forced_sleep_interval > min_sleep_interval: + sleep_note = 'as required by the site' + min_sleep_interval = forced_sleep_interval + if forced_sleep_interval > max_sleep_interval: + max_sleep_interval = forced_sleep_interval + + sleep_interval = random.uniform( + min_sleep_interval, max_sleep_interval or min_sleep_interval) + + if sleep_interval > 0: self.to_screen( - '[download] Sleeping %s seconds...' % ( - int(sleep_interval) if sleep_interval.is_integer() - else '%.2f' % sleep_interval)) + '[download] Sleeping %.2f seconds %s...' % ( + sleep_interval, sleep_note)) time.sleep(sleep_interval) return self.real_download(filename, info_dict) diff --git a/script.module.youtube.dl/lib/youtube_dl/extractor/common.py b/script.module.youtube.dl/lib/youtube_dl/extractor/common.py index a64fcfccc..9723da530 100644 --- a/script.module.youtube.dl/lib/youtube_dl/extractor/common.py +++ b/script.module.youtube.dl/lib/youtube_dl/extractor/common.py @@ -214,6 +214,7 @@ class InfoExtractor(object): width : height ratio as float. * no_resume The server does not support resuming the (HTTP or RTMP) download. Boolean. + * available_at Unix timestamp of when a format will be available to download * downloader_options A dictionary of downloader options as described in FileDownloader @@ -969,8 +970,6 @@ def report_warning(self, msg, *args, **kwargs): else: video_id = kwargs.pop('video_id', None) idstr = '' if video_id is None else '%s: ' % video_id - self._downloader.report_warning( - self.__ie_msg(idstr, msg), *args, **kwargs) def to_screen(self, msg): """Print msg to screen, prefixing it with '[ie_name]'""" diff --git a/script.module.youtube.dl/lib/youtube_dl/extractor/youtube.py b/script.module.youtube.dl/lib/youtube_dl/extractor/youtube.py index b31798729..81a019143 100644 --- a/script.module.youtube.dl/lib/youtube_dl/extractor/youtube.py +++ b/script.module.youtube.dl/lib/youtube_dl/extractor/youtube.py @@ -1,5 +1,4 @@ # coding: utf-8 - from __future__ import unicode_literals import collections @@ -18,6 +17,8 @@ compat_chr, compat_HTTPError, compat_map as map, + compat_dict as o_dict, + compat_dict_items as dict_items, compat_str, compat_urllib_parse, compat_urllib_parse_parse_qs as compat_parse_qs, @@ -87,8 +88,24 @@ class YoutubeBaseInfoExtractor(InfoExtractor): _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM)' - _INNERTUBE_CLIENTS = { - 'ios': { + # priority order for now + _INNERTUBE_CLIENTS = o_dict(( + # Doesn't require a PoToken for some reason: thx yt-dlp/yt-dlp#14693 + ('android_sdkless', { + 'INNERTUBE_CONTEXT': { + 'client': { + 'clientName': 'ANDROID', + 'clientVersion': '20.10.38', + 'userAgent': 'com.google.android.youtube/20.10.38 (Linux; U; Android 11) gzip', + 'osName': 'Android', + 'osVersion': '11', + }, + }, + 'INNERTUBE_CONTEXT_CLIENT_NAME': 3, + 'REQUIRE_JS_PLAYER': False, + 'WITH_COOKIES': False, + }), + ('ios', { 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'IOS', @@ -101,47 +118,95 @@ class YoutubeBaseInfoExtractor(InfoExtractor): }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 5, - 'REQUIRE_JS_PLAYER': False, 'REQUIRE_PO_TOKEN': True, - }, + 'REQUIRE_JS_PLAYER': False, + 'WITH_COOKIES': False, + }), # mweb has 'ultralow' formats # See: https://github.com/yt-dlp/yt-dlp/pull/557 - 'mweb': { + ('mweb', { 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'MWEB', - 'clientVersion': '2.20250311.03.00', + 'clientVersion': '2.2.20250925.01.00', # mweb previously did not require PO Token with this UA 'userAgent': 'Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 2, 'REQUIRE_PO_TOKEN': True, - 'SUPPORTS_COOKIES': True, - }, - 'tv': { + }), + ('tv_downgraded', { 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'TVHTML5', - 'clientVersion': '7.20250312.16.00', + 'clientVersion': '4', # avoids SABR formats, thx yt-dlp/yt-dlp#14887 'userAgent': 'Mozilla/5.0 (ChromiumStylePlatform) Cobalt/Version', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 7, 'SUPPORTS_COOKIES': True, - }, - 'web': { + }), + ('tv', { + 'INNERTUBE_CONTEXT': { + 'client': { + 'clientName': 'TVHTML5', + 'clientVersion': '7.20250312.16.00', + # See: https://github.com/youtube/cobalt/blob/main/cobalt/browser/user_agent/user_agent_platform_info.cc#L506 + 'userAgent': 'Mozilla/5.0 (ChromiumStylePlatform) Cobalt/25.lts.30.1034943-gold (unlike Gecko), Unknown_TV_Unknown_0/Unknown (Unknown, Unknown)', + }, + }, + 'INNERTUBE_CONTEXT_CLIENT_NAME': 7, + }), + ('web', { 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'WEB', - 'clientVersion': '2.20250312.04.00', + 'clientVersion': '2.20250925.01.00', + 'userAgent': 'Mozilla/5.0', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 1, 'REQUIRE_PO_TOKEN': True, + }), + ('web_embedded', { + 'INNERTUBE_CONTEXT': { + 'client': { + 'clientName': 'WEB_EMBEDDED_PLAYER', + 'clientVersion': '1.20250923.21.00', + 'embedUrl': 'https://www.youtube.com/', # Can be any valid URL + }, + }, + 'INNERTUBE_CONTEXT_CLIENT_NAME': 56, 'SUPPORTS_COOKIES': True, - }, - } + }), + # Safari UA returns pre-merged video+audio 144p/240p/360p/720p/1080p HLS formats + ('web_safari', { + 'INNERTUBE_CONTEXT': { + 'client': { + 'clientName': 'WEB', + 'clientVersion': '2.20250925.01.00', + 'userAgent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.5 Safari/605.1.15,gzip(gfe)', + }, + }, + 'INNERTUBE_CONTEXT_CLIENT_NAME': 1, + 'SUPPORTS_COOKIES': True, + 'REQUIRE_PO': True, + }), + # This client now requires sign-in for every video + ('web_creator', { + 'INNERTUBE_CONTEXT': { + 'client': { + 'clientName': 'WEB_CREATOR', + 'clientVersion': '1.20250922.03.00', + }, + }, + 'INNERTUBE_CONTEXT_CLIENT_NAME': 62, + 'REQUIRE_AUTH': True, + 'SUPPORTS_COOKIES': True, + 'WITH_COOKIES': True, + }), + )) def _login(self): """ @@ -418,11 +483,22 @@ def _extract_visitor_data(self, *args): ('responseContext', 'visitorData')), T(compat_str))) + # @functools.cached_property + def is_authenticated(self, _cache={}): + if self not in _cache: + _cache[self] = bool(self._generate_sapisidhash_header()) + return _cache[self] + def _extract_ytcfg(self, video_id, webpage): - return self._parse_json( - self._search_regex( - r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg', - default='{}'), video_id, fatal=False) or {} + ytcfg = self._search_json( + r'ytcfg\.set\s*\(', webpage, 'ytcfg', video_id, + end_pattern=r'\)\s*;', default={}) + + traverse_obj(ytcfg, ( + 'INNERTUBE_CONTEXT', 'client', 'configInfo', + T(lambda x: x.pop('appInstallData', None)))) + + return ytcfg def _extract_video(self, renderer): video_id = renderer['videoId'] @@ -457,10 +533,32 @@ def _extract_video(self, renderer): 'uploader': uploader, } + @staticmethod + def _get_text(data, *path_list, **kw_max_runs): + max_runs = kw_max_runs.get('max_runs') + + for path in path_list or [None]: + if path is None: + obj = [data] # shortcut + else: + obj = traverse_obj(data, tuple(variadic(path) + (all,))) + for runs in traverse_obj( + obj, ('simpleText', {'text': T(compat_str)}, all, filter), + ('runs', lambda _, r: isinstance(r.get('text'), compat_str), all, filter), + (T(list), lambda _, r: isinstance(r.get('text'), compat_str)), + default=[]): + max_runs = int_or_none(max_runs, default=len(runs)) + if max_runs < len(runs): + runs = runs[:max_runs] + text = ''.join(traverse_obj(runs, (Ellipsis, 'text'))) + if text: + return text + @staticmethod def _extract_thumbnails(data, *path_list, **kw_final_key): """ Extract thumbnails from thumbnails dict + @param path_list: path list to level that contains 'thumbnails' key """ final_key = kw_final_key.get('final_key', 'thumbnails') @@ -503,34 +601,26 @@ def _search_results(self, query, params): headers={'content-type': 'application/json'}) if not search: break - slr_contents = try_get( + slr_contents = traverse_obj( search, - (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'], - lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']), - list) + ('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', + 'sectionListRenderer', 'contents'), + ('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', + 'continuationItems'), + expected_type=list) if not slr_contents: break - for slr_content in slr_contents: - isr_contents = try_get( - slr_content, - lambda x: x['itemSectionRenderer']['contents'], - list) - if not isr_contents: - continue - for content in isr_contents: - if not isinstance(content, dict): - continue - video = content.get('videoRenderer') - if not isinstance(video, dict): - continue - video_id = video.get('videoId') - if not video_id: - continue - yield self._extract_video(video) - token = try_get( + for video in traverse_obj( + slr_contents, + (Ellipsis, 'itemSectionRenderer', 'contents', + Ellipsis, 'videoRenderer', + T(lambda v: v if v.get('videoId') else None))): + yield self._extract_video(video) + + token = traverse_obj( slr_contents, - lambda x: x[-1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'], - compat_str) + (-1, 'continuationItemRenderer', 'continuationEndpoint', + 'continuationCommand', 'token', T(compat_str))) if not token: break data['continuation'] = token @@ -694,7 +784,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): r'/(?P[a-zA-Z0-9_-]{8,})/player(?:_ias(?:_tce)?\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$', r'\b(?Pvfl[a-zA-Z0-9_-]{6,})\b.*?\.js$', ) - _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt') + _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'srt', 'vtt') _GEO_BYPASS = False @@ -1579,15 +1669,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor): '_rtmp': {'protocol': 'rtmp'}, # av01 video only formats sometimes served with "unknown" codecs - '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'}, - '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'}, - '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'}, - '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'}, + '394': {'acodec': 'none', 'vcodec': 'av01.0.00M.08'}, + '395': {'acodec': 'none', 'vcodec': 'av01.0.00M.08'}, + '396': {'acodec': 'none', 'vcodec': 'av01.0.01M.08'}, + '397': {'acodec': 'none', 'vcodec': 'av01.0.04M.08'}, + '398': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'}, + '399': {'acodec': 'none', 'vcodec': 'av01.0.08M.08'}, } _PLAYER_JS_VARIANT_MAP = ( ('main', 'player_ias.vflset/en_US/base.js'), + ('tcc', 'player_ias_tcc.vflset/en_US/base.js'), ('tce', 'player_ias_tce.vflset/en_US/base.js'), + ('es5', 'player_es5.vflset/en_US/base.js'), + ('es6', 'player_es6.vflset/en_US/base.js'), ('tv', 'tv-player-ias.vflset/tv-player-ias.js'), ('tv_es6', 'tv-player-es6.vflset/tv-player-es6.js'), ('phone', 'player-plasma-ias-phone-en_US.vflset/base.js'), @@ -1605,6 +1700,20 @@ def __init__(self, *args, **kwargs): self._code_cache = {} self._player_cache = {} + def _get_player_js_version(self): + player_js_version = self.get_param('youtube_player_js_version') + if player_js_version: + sts_hash = self._search_regex( + ('^actual$(^)?(^)?', r'^([0-9]{5,})@([0-9a-f]{8,})$'), + player_js_version, 'player_js_version', group=(1, 2), default=None) + if sts_hash: + return sts_hash + self.report_warning( + 'Invalid player JS version "{0}" specified. ' + 'It should be "{1}" or in the format of {2}'.format( + player_js_version, 'actual', 'SignatureTimeStamp@Hash'), only_once=True) + return None, None + # *ytcfgs, webpage=None def _extract_player_url(self, *ytcfgs, **kw_webpage): if ytcfgs and not isinstance(ytcfgs[0], dict): @@ -1615,9 +1724,34 @@ def _extract_player_url(self, *ytcfgs, **kw_webpage): webpage or '', 'player URL', fatal=False) if player_url: ytcfgs = ytcfgs + ({'PLAYER_JS_URL': player_url},) - return traverse_obj( + player_url = traverse_obj( ytcfgs, (Ellipsis, 'PLAYER_JS_URL'), (Ellipsis, 'WEB_PLAYER_CONTEXT_CONFIGS', Ellipsis, 'jsUrl'), - get_all=False, expected_type=lambda u: urljoin('https://www.youtube.com', u)) + get_all=False, expected_type=self._yt_urljoin) + + requested_js_variant = self.get_param('youtube_player_js_variant') + variant_js = next( + (v for k, v in self._PLAYER_JS_VARIANT_MAP if k == requested_js_variant), + None) + if variant_js: + player_id_override = self._get_player_js_version()[1] + player_id = player_id_override or self._extract_player_info(player_url) + original_url = player_url + player_url = self._yt_urljoin( + '/s/player/{0}/{1}'.format(player_id, variant_js)) + if original_url != player_url: + self.write_debug( + 'Forcing "{0}" player JS variant for player {1}\n' + ' original url = {2}'.format( + requested_js_variant, player_id, original_url), + only_once=True) + elif requested_js_variant != 'actual': + self.report_warning( + 'Invalid player JS variant name "{0}" requested. ' + 'Valid choices are: {1}'.format( + requested_js_variant, ','.join(k for k, _ in self._PLAYER_JS_VARIANT_MAP)), + only_once=True) + + return player_url def _download_player_url(self, video_id, fatal=False): res = self._download_webpage( @@ -1625,9 +1759,8 @@ def _download_player_url(self, video_id, fatal=False): note='Downloading iframe API JS', video_id=video_id, fatal=fatal) player_version = self._search_regex( r'player\\?/([0-9a-fA-F]{8})\\?/', res or '', 'player version', fatal=fatal, - default=NO_DEFAULT if res else None) - if player_version: - return 'https://www.youtube.com/s/player/{0}/player_ias.vflset/en_US/base.js'.format(player_version) + default=NO_DEFAULT if res else None) or None + return player_version and 'https://www.youtube.com/s/player/{0}/player_ias.vflset/en_US/base.js'.format(player_version) def _signature_cache_id(self, example_sig): """ Return a string representation of a signature """ @@ -1998,8 +2131,7 @@ def decrypt_nsig(n): return self._cached(self._decrypt_nsig, 'nsig', n, player_url) for fmt in formats: - parsed_fmt_url = compat_urllib_parse.urlparse(fmt['url']) - n_param = compat_parse_qs(parsed_fmt_url.query).get('n') + n_param = parse_qs(fmt['url']).get('n') if not n_param: continue n_param = n_param[-1] @@ -2014,9 +2146,15 @@ def decrypt_nsig(n): def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False): """ Extract signatureTimestamp (sts) + Required to tell API what sig/player version is in use. """ - sts = traverse_obj(ytcfg, 'STS', expected_type=int) + sts = traverse_obj( + (self._get_player_js_version(), ytcfg), + (0, 0), + (1, 'STS'), + expected_type=int_or_none) + if sts: return sts @@ -2042,32 +2180,35 @@ def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=F return sts def _mark_watched(self, video_id, player_response): - playback_url = url_or_none(try_get( - player_response, - lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl'])) - if not playback_url: - return - # cpn generation algorithm is reverse engineered from base.js. # In fact it works even with dummy cpn. CPN_ALPHABET = string.ascii_letters + string.digits + '-_' cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(16)) - # more consistent results setting it to right before the end - qs = parse_qs(playback_url) - video_length = '{0}'.format(float((qs.get('len') or ['1.5'])[0]) - 1) + for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')): + label = 'fully ' if is_full > 0 else '' - playback_url = update_url_query( - playback_url, { - 'ver': '2', - 'cpn': cpn, - 'cmt': video_length, - 'el': 'detailpage', # otherwise defaults to "shorts" - }) + playback_url = traverse_obj(player_response, ( + 'playbackTracking'. key, 'baseUrl', T(url_or_none))) + if not playback_url: + self.report_warning('Unable to mark {0}watched'.format(label)) + continue + + # more consistent results setting it to right before the end + qs = parse_qs(playback_url) + video_length = '{0}'.format(float((qs.get('len') or ['1.5'])[0]) - 1) - self._download_webpage( - playback_url, video_id, 'Marking watched', - 'Unable to mark watched', fatal=False) + playback_url = update_url_query( + playback_url, { + 'ver': '2', + 'cpn': cpn, + 'cmt': video_length, + 'el': 'detailpage', # otherwise defaults to "shorts" + }) + + self._download_webpage( + playback_url, video_id, 'Marking {0}watched'.format(label), + 'Unable to mark watched', fatal=False) @staticmethod def _extract_urls(webpage): @@ -2113,7 +2254,8 @@ def extract_id(cls, url): raise ExtractorError('Invalid URL: %s' % url) return mobj.group(2) - def _extract_chapters_from_json(self, data, video_id, duration): + @staticmethod + def _extract_chapters_from_json(data, video_id, duration): chapters_list = try_get( data, lambda x: x['playerOverlays'] @@ -2158,13 +2300,60 @@ def _extract_yt_initial_variable(self, webpage, regex, video_id, name): (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE), regex), webpage, name, default='{}'), video_id, fatal=False) + def _get_preroll_length(self, ad_slot_lists): + + def parse_instream_ad_renderer(instream_renderer): + for skippable, path in ( + ('', ('skipOffsetMilliseconds', T(int))), + ('non-', ('playerVars', T(compat_parse_qs), + 'length_seconds', -1, T(int_or_none(invscale=1000))))): + length_ms = traverse_obj(instream_renderer, path) + if length_ms is not None: + self.write_debug('Detected a %ds %sskippable ad' % ( + length_ms // 1000, skippable)) + break + return length_ms + + for slot_renderer in traverse_obj(ad_slot_lists, ('adSlots', Ellipsis, 'adSlotRenderer', T(dict))): + if traverse_obj(slot_renderer, ('adSlotMetadata', 'triggerEvent')) != 'SLOT_TRIGGER_EVENT_BEFORE_CONTENT': + continue + rendering_content = traverse_obj(slot_renderer, ( + 'fulfillmentContent', 'fulfilledLayout', 'playerBytesAdLayoutRenderer', + 'renderingContent', 'instreamVideoAdRenderer', T(dict))) + length_ms = parse_instream_ad_renderer(rendering_content) + if length_ms is not None: + return length_ms + times = traverse_obj(rendering_content, (( + ('playerBytesSequentialLayoutRenderer', 'sequentialLayouts'), + None), any, Ellipsis, 'playerBytesAdLayoutRenderer', + 'renderingContent', 'instreamVideoAdRenderer', + T(parse_instream_ad_renderer))) + if times: + return sum(times) + return 0 + + def _is_premium_subscriber(self, initial_data): + if not self.is_authenticated or not initial_data: + return False + + tlr = traverse_obj( + initial_data, ('topbar', 'desktopTopbarRenderer', 'logo', 'topbarLogoRenderer')) + return ( + traverse_obj(tlr, ('iconImage', 'iconType')) == 'YOUTUBE_PREMIUM_LOGO' + or 'premium' in (self._get_text(tlr, 'tooltipText') or '').lower() + ) + def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) video_id = self._match_id(url) base_url = self.http_scheme() + '//www.youtube.com/' webpage_url = base_url + 'watch?v=' + video_id + ua = traverse_obj(self._INNERTUBE_CLIENTS, ( + 'web', 'INNERTUBE_CONTEXT', 'client', 'userAgent')) + headers = {'User-Agent': ua} if ua else None webpage = self._download_webpage( - webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False) + webpage_url + '&bpctr=9999999999&has_verified=1', video_id, + headers=headers, fatal=False) player_response = None player_url = None @@ -2174,37 +2363,43 @@ def _real_extract(self, url): video_id, 'initial player response') is_live = traverse_obj(player_response, ('videoDetails', 'isLive')) + fetched_timestamp = None if False and not player_response: player_response = self._call_api( 'player', {'videoId': video_id}, video_id) if True or not player_response: origin = 'https://www.youtube.com' pb_context = {'html5Preference': 'HTML5_PREF_WANTS'} - player_url = self._extract_player_url(webpage) ytcfg = self._extract_ytcfg(video_id, webpage or '') sts = self._extract_signature_timestamp(video_id, player_url, ytcfg) if sts: pb_context['signatureTimestamp'] = sts - client_names = traverse_obj(self._INNERTUBE_CLIENTS, ( - T(dict.items), lambda _, k_v: not k_v[1].get('REQUIRE_PO_TOKEN'), - 0))[:1] + auth = self._generate_sapisidhash_header(origin) + + client_names = [] + if auth or self._is_premium_subscriber(player_response): + client_names = traverse_obj(self._INNERTUBE_CLIENTS, ( + T(dict_items), lambda _, k_v: k_v[0] == 'web_safari', 0))[:1] + if not client_names: + client_names = traverse_obj(self._INNERTUBE_CLIENTS, ( + T(dict_items), lambda _, k_v: not ( + k_v[1].get('REQUIRE_PO_TOKEN') + or (bool(k_v[1].get('WITH_COOKIES', auth)) ^ bool(auth)) + ), 0))[:1] if 'web' not in client_names: - # webpage links won't download: ignore links and playability + # only live HLS webpage links will download: ignore playability player_response = filter_dict( player_response or {}, - lambda k, _: k not in ('streamingData', 'playabilityStatus')) - - if is_live and 'ios' not in client_names: - client_names.append('ios') + lambda k, _: k != 'playabilityStatus') headers = { 'Sec-Fetch-Mode': 'navigate', 'Origin': origin, 'X-Goog-Visitor-Id': self._extract_visitor_data(ytcfg) or '', } - auth = self._generate_sapisidhash_header(origin) + if auth is not None: headers['Authorization'] = auth headers['X-Origin'] = origin @@ -2234,7 +2429,7 @@ def _real_extract(self, url): 'INNERTUBE_CONTEXT', 'client', 'clientVersion'), 'User-Agent': ( 'INNERTUBE_CONTEXT', 'client', 'userAgent'), - })) + }) or {}) api_player_response = self._call_api( 'player', query, video_id, fatal=False, headers=api_headers, @@ -2243,27 +2438,32 @@ def _real_extract(self, url): 'context', 'client', 'clientName')), 'API JSON', delim=' ')) - hls = traverse_obj( + # be sure to find HLS in case of is_live + hls = traverse_obj(player_response, ( + 'streamingData', 'hlsManifestUrl', T(url_or_none))) + fetched_timestamp = int(time.time()) + preroll_length_ms = ( + self._get_preroll_length(api_player_response) + or self._get_preroll_length(player_response)) + video_details = merge_dicts(*traverse_obj( (player_response, api_player_response), - (Ellipsis, 'streamingData', 'hlsManifestUrl', T(url_or_none))) - if len(hls) == 2 and not hls[0] and hls[1]: - player_response['streamingData']['hlsManifestUrl'] = hls[1] - else: - video_details = merge_dicts(*traverse_obj( - (player_response, api_player_response), - (Ellipsis, 'videoDetails', T(dict)))) - player_response.update(filter_dict( - api_player_response or {}, cndn=lambda k, _: k != 'captions')) - player_response['videoDetails'] = video_details + (Ellipsis, 'videoDetails', T(dict)))) + player_response.update(filter_dict( + api_player_response or {}, cndn=lambda k, _: k != 'captions')) + player_response['videoDetails'] = video_details + if hls and not traverse_obj(player_response, ( + 'streamingData', 'hlsManifestUrl', T(url_or_none))): + player_response['streamingData']['hlsManifestUrl'] = hls def is_agegated(playability): - if not isinstance(playability, dict): - return + # playability: dict + if not playability: + return False if playability.get('desktopLegacyAgeGateReason'): return True - reasons = filter(None, (playability.get(r) for r in ('status', 'reason'))) + reasons = traverse_obj(playability, (('status', 'reason'),)) AGE_GATE_REASONS = ( 'confirm your age', 'age-restricted', 'inappropriate', # reason 'age_verification_required', 'age_check_required', # status @@ -2320,16 +2520,7 @@ def get_playability_status(response): return self.url_result( trailer_video_id, self.ie_key(), trailer_video_id) - def get_text(x): - if not x: - return - text = x.get('simpleText') - if text and isinstance(text, compat_str): - return text - runs = x.get('runs') - if not isinstance(runs, list): - return - return ''.join([r['text'] for r in runs if isinstance(r.get('text'), compat_str)]) + get_text = lambda x: self._get_text(x) or '' search_meta = ( (lambda x: self._html_search_meta(x, webpage, default=None)) @@ -2407,11 +2598,19 @@ def build_fragments(f): return LazyList({ 'url': update_url_query(f['url'], { 'range': '{0}-{1}'.format(range_start, min(range_start + CHUNK_SIZE - 1, f['filesize'])), - }) + }), } for range_start in range(0, f['filesize'], CHUNK_SIZE)) lower = lambda s: s.lower() + if is_live: + fetched_timestamp = None + elif fetched_timestamp is not None: + # Handle preroll waiting period + preroll_sleep = self.get_param('youtube_preroll_sleep') + preroll_sleep = min(6, int_or_none(preroll_sleep, default=preroll_length_ms / 1000)) + fetched_timestamp += preroll_sleep + for fmt in streaming_formats: if fmt.get('targetDurationSec'): continue @@ -2455,6 +2654,10 @@ def build_fragments(f): self.write_debug(error_to_compat_str(e), only_once=True) continue + if parse_qs(fmt_url).get('n'): + # this and (we assume) all the formats here are n-scrambled + break + language_preference = ( 10 if audio_track.get('audioIsDefault') else -10 if 'descriptive' in (traverse_obj(audio_track, ('displayName', T(lower))) or '') @@ -2508,6 +2711,9 @@ def build_fragments(f): 'downloader_options': {'http_chunk_size': CHUNK_SIZE}, # No longer useful? }) + if fetched_timestamp: + dct['available_at'] = fetched_timestamp + formats.append(dct) def process_manifest_format(f, proto, client_name, itag, all_formats=False): @@ -2525,6 +2731,10 @@ def process_manifest_format(f, proto, client_name, itag, all_formats=False): if f.get('source_preference') is None: f['source_preference'] = -1 + # Deprioritize since its pre-merged m3u8 formats may have lower quality audio streams + if client_name == 'web_safari' and proto == 'hls' and not is_live: + f['source_preference'] -= 1 + if itag in ('616', '235'): f['format_note'] = join_nonempty(f.get('format_note'), 'Premium', delim=' ') f['source_preference'] += 100 @@ -2541,15 +2751,15 @@ def process_manifest_format(f, proto, client_name, itag, all_formats=False): hls_manifest_url = streaming_data.get('hlsManifestUrl') if hls_manifest_url: - for f in self._extract_m3u8_formats( + formats.extend( + f for f in self._extract_m3u8_formats( hls_manifest_url, video_id, 'mp4', - entry_protocol='m3u8_native', live=is_live, fatal=False): + entry_protocol='m3u8_native', live=is_live, fatal=False) if process_manifest_format( - f, 'hls', None, self._search_regex( - r'/itag/(\d+)', f['url'], 'itag', default=None)): - formats.append(f) + f, 'hls', None, self._search_regex( + r'/itag/(\d+)', f['url'], 'itag', default=None))) - if self._downloader.params.get('youtube_include_dash_manifest', True): + if self.get_param('youtube_include_dash_manifest', True): dash_manifest_url = streaming_data.get('dashManifestUrl') if dash_manifest_url: for f in self._extract_mpd_formats( @@ -2576,11 +2786,11 @@ def process_manifest_format(f, proto, client_name, itag, all_formats=False): playability_status, lambda x: x['errorScreen']['playerErrorMessageRenderer'], dict) or {} - reason = get_text(pemr.get('reason')) or playability_status.get('reason') + reason = get_text(pemr.get('reason')) or playability_status.get('reason') or '' subreason = pemr.get('subreason') if subreason: subreason = clean_html(get_text(subreason)) - if subreason == 'The uploader has not made this video available in your country.': + if subreason.startswith('The uploader has not made this video available in your country'): countries = microformat.get('availableCountries') if not countries: regions_allowed = search_meta('regionsAllowed') @@ -2588,7 +2798,12 @@ def process_manifest_format(f, proto, client_name, itag, all_formats=False): self.raise_geo_restricted( subreason, countries) reason += '\n' + subreason + if reason: + if 'sign in' in reason.lower(): + self.raise_login_required(remove_end(reason, 'This helps protect our community. Learn more')) + elif traverse_obj(playability_status, ('errorScreen', 'playerCaptchaViewModel', T(dict))): + reason += '. YouTube is requiring a captcha challenge before playback' raise ExtractorError(reason, expected=True) self._sort_formats(formats) @@ -2691,6 +2906,9 @@ def process_language(container, base_url, lang_code, query): for fmt in self._SUBTITLE_FORMATS: query.update({ 'fmt': fmt, + # xosf=1 causes undesirable text position data for vtt, json3 & srv* subtitles + # See: https://github.com/yt-dlp/yt-dlp/issues/13654 + 'xosf': [], }) lang_subs.append({ 'ext': fmt, @@ -2732,7 +2950,7 @@ def process_subtitles(): for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]: d_k += '_time' if d_k not in info and k in s_ks: - info[d_k] = parse_duration(query[k][0]) + info[d_k] = parse_duration(v[0]) if video_description: # Youtube Music Auto-generated description @@ -2761,28 +2979,26 @@ def process_subtitles(): initial_data = self._call_api( 'next', {'videoId': video_id}, video_id, fatal=False) + initial_sdcr = None if initial_data: chapters = self._extract_chapters_from_json( initial_data, video_id, duration) if not chapters: - for engagment_pannel in (initial_data.get('engagementPanels') or []): - contents = try_get( - engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'], - list) - if not contents: - continue + def chapter_time(mmlir): + return parse_duration( + get_text(mmlir.get('timeDescription'))) - def chapter_time(mmlir): - return parse_duration( - get_text(mmlir.get('timeDescription'))) + for markers in traverse_obj(initial_data, ( + 'engagementPanels', Ellipsis, 'engagementPanelSectionListRenderer', + 'content', 'macroMarkersListRenderer', 'contents', T(list))): chapters = [] - for next_num, content in enumerate(contents, start=1): + for next_num, content in enumerate(markers, start=1): mmlir = content.get('macroMarkersListItemRenderer') or {} start_time = chapter_time(mmlir) - end_time = chapter_time(try_get( - contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \ - if next_num < len(contents) else duration + end_time = (traverse_obj(markers, ( + next_num, 'macroMarkersListItemRenderer', T(chapter_time))) + if next_num < len(markers) else duration) if start_time is None or end_time is None: continue chapters.append({ @@ -2888,12 +3104,13 @@ def chapter_time(mmlir): info['track'] = mrr_contents_text # this is not extraction but spelunking! - carousel_lockups = traverse_obj( - initial_data, - ('engagementPanels', Ellipsis, 'engagementPanelSectionListRenderer', - 'content', 'structuredDescriptionContentRenderer', 'items', Ellipsis, - 'videoDescriptionMusicSectionRenderer', 'carouselLockups', Ellipsis), - expected_type=dict) or [] + initial_sdcr = traverse_obj(initial_data, ( + 'engagementPanels', Ellipsis, 'engagementPanelSectionListRenderer', + 'content', 'structuredDescriptionContentRenderer', T(dict)), + get_all=False) + carousel_lockups = traverse_obj(initial_sdcr, ( + 'items', Ellipsis, 'videoDescriptionMusicSectionRenderer', + 'carouselLockups', Ellipsis, T(dict))) or [] # try to reproduce logic from metadataRowContainerRenderer above (if it still is) fields = (('ALBUM', 'album'), ('ARTIST', 'artist'), ('SONG', 'track'), ('LICENSES', 'license')) # multiple_songs ? @@ -2918,6 +3135,23 @@ def chapter_time(mmlir): self.mark_watched(video_id, player_response) + # Fallbacks for missing metadata + if initial_sdcr: + if info.get('description') is None: + info['description'] = traverse_obj(initial_sdcr, ( + 'items', Ellipsis, 'expandableVideoDescriptionBodyRenderer', + 'attributedDescriptionBodyText', 'content', T(compat_str)), + get_all=False) + # videoDescriptionHeaderRenderer also has publishDate/channel/handle/ucid, but not needed + if info.get('title') is None: + info['title'] = traverse_obj( + (initial_sdcr, initial_data), + (0, 'items', Ellipsis, 'videoDescriptionHeaderRenderer', T(dict)), + (1, 'playerOverlays', 'playerOverlayRenderer', 'videoDetails', + 'playerOverlayVideoDetailsRenderer', T(dict)), + expected_type=lambda x: self._get_text(x, 'title'), + get_all=False) + return merge_dicts( info, { 'uploader_id': self._extract_uploader_id(owner_profile_url), @@ -3319,19 +3553,9 @@ def suitable(cls, url): @staticmethod def _extract_grid_item_renderer(item): - assert isinstance(item, dict) - for key, renderer in item.items(): - if not key.startswith('grid') or not key.endswith('Renderer'): - continue - if not isinstance(renderer, dict): - continue - return renderer - - @staticmethod - def _get_text(r, k): - return traverse_obj( - r, (k, 'runs', 0, 'text'), (k, 'simpleText'), - expected_type=txt_or_none) + return traverse_obj(item, ( + T(dict.items), lambda _, k_v: k_v[0].startswith('grid') and k_v[0].endswith('Renderer'), + 1, T(dict)), get_all=False) def _grid_entries(self, grid_renderer): for item in traverse_obj(grid_renderer, ('items', Ellipsis, T(dict))): @@ -3408,8 +3632,8 @@ def _shelf_entries(self, shelf_renderer, skip_channels=False): shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str) yield self.url_result(shelf_url, video_title=title) # Shelf may not contain shelf URL, fallback to extraction from content - for entry in self._shelf_entries_from_content(shelf_renderer): - yield entry + for from_ in self._shelf_entries_from_content(shelf_renderer): + yield from_ def _playlist_entries(self, video_list_renderer): for content in video_list_renderer['contents']: @@ -3428,38 +3652,56 @@ def _extract_lockup_view_model(self, view_model): if not content_id: return content_type = view_model.get('contentType') - if content_type not in ('LOCKUP_CONTENT_TYPE_PLAYLIST', 'LOCKUP_CONTENT_TYPE_PODCAST'): + if content_type == 'LOCKUP_CONTENT_TYPE_VIDEO': + ie = YoutubeIE + url = update_url_query( + 'https://www.youtube.com/watch', {'v': content_id}) + thumb_keys = (None,) + elif content_type in ('LOCKUP_CONTENT_TYPE_PLAYLIST', 'LOCKUP_CONTENT_TYPE_PODCAST'): + ie = YoutubeTabIE + url = update_url_query( + 'https://www.youtube.com/playlist', {'list': content_id}) + thumb_keys = ('collectionThumbnailViewModel', 'primaryThumbnail') + else: self.report_warning( - 'Unsupported lockup view model content type "{0}"{1}'.format(content_type, bug_reports_message()), only_once=True) + 'Unsupported lockup view model content type "{0}"{1}'.format(content_type, bug_reports_message()), + only_once=True) return + + thumb_keys = ('contentImage',) + thumb_keys + ('thumbnailViewModel', 'image') + return merge_dicts(self.url_result( - update_url_query('https://www.youtube.com/playlist', {'list': content_id}), - ie=YoutubeTabIE.ie_key(), video_id=content_id), { - 'title': traverse_obj(view_model, ( - 'metadata', 'lockupMetadataViewModel', 'title', 'content', T(compat_str))), - 'thumbnails': self._extract_thumbnails(view_model, ( - 'contentImage', 'collectionThumbnailViewModel', 'primaryThumbnail', - 'thumbnailViewModel', 'image'), final_key='sources'), - }) + url, ie=ie.ie_key(), video_id=content_id), + traverse_obj(view_model, { + 'title': ('metadata', 'lockupMetadataViewModel', 'title', + 'content', T(compat_str)), + 'thumbnails': T(lambda vm: self._extract_thumbnails( + vm, thumb_keys, final_key='sources')), + 'duration': ( + 'contentImage', 'thumbnailViewModel', 'overlays', + Ellipsis, ( + ('thumbnailBottomOverlayViewModel', 'badges'), + ('thumbnailOverlayBadgeViewModel', 'thumbnailBadges') + ), Ellipsis, 'thumbnailBadgeViewModel', 'text', + T(parse_duration), any), + }) + ) def _extract_shorts_lockup_view_model(self, view_model): content_id = traverse_obj(view_model, ( 'onTap', 'innertubeCommand', 'reelWatchEndpoint', 'videoId', T(lambda v: v if YoutubeIE.suitable(v) else None))) - if not content_id: - return return merge_dicts(self.url_result( content_id, ie=YoutubeIE.ie_key(), video_id=content_id), { 'title': traverse_obj(view_model, ( 'overlayMetadata', 'primaryText', 'content', T(compat_str))), 'thumbnails': self._extract_thumbnails( view_model, 'thumbnail', final_key='sources'), - }) + }) if content_id else None def _video_entry(self, video_renderer): video_id = video_renderer.get('videoId') - if video_id: - return self._extract_video(video_renderer) + return self._extract_video(video_renderer) if video_id else None def _post_thread_entries(self, post_thread_renderer): post_renderer = try_get( @@ -3491,15 +3733,10 @@ def _post_thread_entries(self, post_thread_renderer): yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=video_id) def _post_thread_continuation_entries(self, post_thread_continuation): - contents = post_thread_continuation.get('contents') - if not isinstance(contents, list): - return - for content in contents: - renderer = content.get('backstagePostThreadRenderer') - if not isinstance(renderer, dict): - continue - for entry in self._post_thread_entries(renderer): - yield entry + for renderer in traverse_obj(post_thread_continuation, ( + 'contents', Ellipsis, 'backstagePostThreadRenderer', T(dict))): + for from_ in self._post_thread_entries(renderer): + yield from_ def _rich_grid_entries(self, contents): for content in traverse_obj( @@ -3574,17 +3811,10 @@ def _entries(self, tab, item_id, webpage): if slr_renderer: is_channels_tab = tab.get('title') == 'Channels' continuation = None - slr_contents = try_get(slr_renderer, lambda x: x['contents'], list) or [] - for slr_content in slr_contents: - if not isinstance(slr_content, dict): - continue - is_renderer = try_get(slr_content, lambda x: x['itemSectionRenderer'], dict) - if not is_renderer: - continue - isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or [] - for isr_content in isr_contents: - if not isinstance(isr_content, dict): - continue + for is_renderer in traverse_obj(slr_renderer, ( + 'contents', Ellipsis, 'itemSectionRenderer', T(dict))): + for isr_content in traverse_obj(is_renderer, ( + 'contents', Ellipsis, T(dict))): renderer = isr_content.get('playlistVideoListRenderer') if renderer: for entry in self._playlist_entries(renderer): @@ -3779,18 +4009,34 @@ def _extract_uploader(self, metadata, data): uploader['channel'] = uploader['uploader'] return uploader - @classmethod - def _extract_alert(cls, data): - alerts = [] - for alert in traverse_obj(data, ('alerts', Ellipsis), expected_type=dict): - alert_text = traverse_obj( - alert, (None, lambda x: x['alertRenderer']['text']), get_all=False) - if not alert_text: - continue - text = cls._get_text(alert_text, 'text') - if text: - alerts.append(text) - return '\n'.join(alerts) + def _extract_and_report_alerts(self, data, expected=True, fatal=True, only_once=False): + + def alerts(): + for alert in traverse_obj(data, ('alerts', Ellipsis), expected_type=dict): + alert_dict = traverse_obj( + alert, 'alertRenderer', None, expected_type=dict, get_all=False) + alert_type = traverse_obj(alert_dict, 'type') + if not alert_type: + continue + message = self._get_text(alert_dict, 'text') + if message: + yield alert_type, message + + errors, warnings = [], [] + _IGNORED_WARNINGS = T('Unavailable videos will be hidden during playback') + for alert_type, alert_message in alerts(): + if alert_type.lower() == 'error' and fatal: + errors.append([alert_type, alert_message]) + elif alert_message not in _IGNORED_WARNINGS: + warnings.append([alert_type, alert_message]) + + for alert_type, alert_message in itertools.chain(warnings, errors[:-1]): + self.report_warning( + 'YouTube said: %s - %s' % (alert_type, alert_message), + only_once=only_once) + if errors: + raise ExtractorError( + 'YouTube said: %s' % (errors[-1][1],), expected=expected) def _extract_from_tabs(self, item_id, webpage, data, tabs): selected_tab = self._extract_selected_tab(tabs) @@ -3890,10 +4136,10 @@ def qs_get(key, default=None): compat_str) or video_id if video_id: return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id) + # Capture and output alerts - alert = self._extract_alert(data) - if alert: - raise ExtractorError(alert, expected=True) + self._extract_and_report_alerts(data) + # Failed to recognize raise ExtractorError('Unable to recognize tab page') @@ -4047,7 +4293,7 @@ class YoutubeFavouritesIE(YoutubeBaseInfoExtractor): 'only_matching': True, }] - def _real_extract(self, url): + def _real_extract(self, _): return self.url_result( 'https://www.youtube.com/playlist?list=LL', ie=YoutubeTabIE.ie_key()) @@ -4119,6 +4365,7 @@ class YoutubeFeedsInfoExtractor(YoutubeTabIE): Subclasses must define the _FEED_NAME property. """ + _LOGIN_REQUIRED = True @property @@ -4128,7 +4375,7 @@ def IE_NAME(self): def _real_initialize(self): self._login() - def _real_extract(self, url): + def _real_extract(self, _): return self.url_result( 'https://www.youtube.com/feed/%s' % self._FEED_NAME, ie=YoutubeTabIE.ie_key()) @@ -4143,7 +4390,7 @@ class YoutubeWatchLaterIE(InfoExtractor): 'only_matching': True, }] - def _real_extract(self, url): + def _real_extract(self, _): return self.url_result( 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key()) @@ -4223,7 +4470,7 @@ class YoutubeTruncatedURLIE(InfoExtractor): 'only_matching': True, }] - def _real_extract(self, url): + def _real_extract(self, _): raise ExtractorError( 'Did you forget to quote the URL? Remember that & is a meta ' 'character in most shells, so you want to put the URL in quotes, ' diff --git a/script.module.youtube.dl/lib/youtube_dl/options.py b/script.module.youtube.dl/lib/youtube_dl/options.py index 61705d1f0..9b0d77a23 100644 --- a/script.module.youtube.dl/lib/youtube_dl/options.py +++ b/script.module.youtube.dl/lib/youtube_dl/options.py @@ -404,6 +404,10 @@ def _comma_separated_values_options_callback(option, opt_str, value, parser): '-F', '--list-formats', action='store_true', dest='listformats', help='List all available formats of requested videos') + video_format.add_option( + '--no-list-formats', + action='store_false', dest='listformats', + help='Do not list available formats of requested videos (default)') video_format.add_option( '--youtube-include-dash-manifest', action='store_true', dest='youtube_include_dash_manifest', default=True, @@ -412,6 +416,17 @@ def _comma_separated_values_options_callback(option, opt_str, value, parser): '--youtube-skip-dash-manifest', action='store_false', dest='youtube_include_dash_manifest', help='Do not download the DASH manifests and related data on YouTube videos') + video_format.add_option( + '--youtube-player-js-variant', + action='store', dest='youtube_player_js_variant', + help='For YouTube, the player javascript variant to use for n/sig deciphering; `actual` to follow the site; default `%default`.', + choices=('actual', 'main', 'tcc', 'tce', 'es5', 'es6', 'tv', 'tv_es6', 'phone', 'tablet'), + default='actual', metavar='VARIANT') + video_format.add_option( + '--youtube-player-js-version', + action='store', dest='youtube_player_js_version', + help='For YouTube, the player javascript version to use for n/sig deciphering, specified as `signature_timestamp@hash`, or `actual` to follow the site; default `%default`', + default='actual', metavar='STS@HASH') video_format.add_option( '--merge-output-format', action='store', dest='merge_output_format', metavar='FORMAT', default=None, diff --git a/script.module.youtube.dl/lib/youtube_dl/traversal.py b/script.module.youtube.dl/lib/youtube_dl/traversal.py index 834cfef7f..1de48b145 100644 --- a/script.module.youtube.dl/lib/youtube_dl/traversal.py +++ b/script.module.youtube.dl/lib/youtube_dl/traversal.py @@ -5,6 +5,10 @@ from .utils import ( dict_get, get_first, + require, + subs_list_to_dict, T, traverse_obj, + unpack, + value, ) diff --git a/script.module.youtube.dl/lib/youtube_dl/utils.py b/script.module.youtube.dl/lib/youtube_dl/utils.py index c4262936e..02a49ff49 100644 --- a/script.module.youtube.dl/lib/youtube_dl/utils.py +++ b/script.module.youtube.dl/lib/youtube_dl/utils.py @@ -53,6 +53,8 @@ compat_etree_fromstring, compat_etree_iterfind, compat_expanduser, + compat_filter as filter, + compat_filter_fns, compat_html_entities, compat_html_entities_html5, compat_http_client, @@ -1859,6 +1861,39 @@ def write_json_file(obj, fn): raise +class partial_application(object): + """Allow a function to use pre-set argument values""" + + # see _try_bind_args() + try: + inspect.signature + + @staticmethod + def required_args(fn): + return [ + param.name for param in inspect.signature(fn).parameters.values() + if (param.kind in (inspect.Parameter.POSITIONAL_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD) + and param.default is inspect.Parameter.empty)] + + except AttributeError: + + # Py < 3.3 + @staticmethod + def required_args(fn): + fn_args = inspect.getargspec(fn) + n_defaults = len(fn_args.defaults or []) + return (fn_args.args or [])[:-n_defaults if n_defaults > 0 else None] + + def __new__(cls, func): + @functools.wraps(func) + def wrapped(*args, **kwargs): + if set(cls.required_args(func)[len(args):]).difference(kwargs): + return functools.partial(func, *args, **kwargs) + return func(*args, **kwargs) + + return wrapped + + if sys.version_info >= (2, 7): def find_xpath_attr(node, xpath, key, val=None): """ Find the xpath xpath[@key=val] """ @@ -3152,6 +3187,7 @@ def extract_timezone(date_str): return timezone, date_str +@partial_application def parse_iso8601(date_str, delimiter='T', timezone=None): """ Return a UNIX timestamp from the given date """ @@ -3229,6 +3265,7 @@ def unified_timestamp(date_str, day_first=True): return calendar.timegm(timetuple) + pm_delta * 3600 - compat_datetime_timedelta_total_seconds(timezone) +@partial_application def determine_ext(url, default_ext='unknown_video'): if url is None or '.' not in url: return default_ext @@ -3807,6 +3844,7 @@ def base_url(url): return re.match(r'https?://[^?#&]+/', url).group() +@partial_application def urljoin(base, path): path = _decode_compat_str(path, encoding='utf-8', or_none=True) if not path: @@ -3831,6 +3869,7 @@ def get_method(self): return 'PUT' +@partial_application def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1, base=None): if get_attr: if v is not None: @@ -3857,6 +3896,7 @@ def str_to_int(int_str): return int_or_none(int_str) +@partial_application def float_or_none(v, scale=1, invscale=1, default=None): if v is None: return default @@ -3891,38 +3931,46 @@ def parse_duration(s): return None s = s.strip() + if not s: + return None days, hours, mins, secs, ms = [None] * 5 - m = re.match(r'(?:(?:(?:(?P[0-9]+):)?(?P[0-9]+):)?(?P[0-9]+):)?(?P[0-9]+)(?P\.[0-9]+)?Z?$', s) + m = re.match(r'''(?x) + (?P + (?:(?:(?P[0-9]+):)?(?P[0-9]+):)? + (?P[0-9]+):)? + (?P(?(before_secs)[0-9]{1,2}|[0-9]+)) + (?:[.:](?P[0-9]+))?Z?$ + ''', s) if m: - days, hours, mins, secs, ms = m.groups() + days, hours, mins, secs, ms = m.group('days', 'hours', 'mins', 'secs', 'ms') else: m = re.match( r'''(?ix)(?:P? (?: - [0-9]+\s*y(?:ears?)?\s* + [0-9]+\s*y(?:ears?)?,?\s* )? (?: - [0-9]+\s*m(?:onths?)?\s* + [0-9]+\s*m(?:onths?)?,?\s* )? (?: - [0-9]+\s*w(?:eeks?)?\s* + [0-9]+\s*w(?:eeks?)?,?\s* )? (?: - (?P[0-9]+)\s*d(?:ays?)?\s* + (?P[0-9]+)\s*d(?:ays?)?,?\s* )? T)? (?: - (?P[0-9]+)\s*h(?:ours?)?\s* + (?P[0-9]+)\s*h(?:(?:ou)?rs?)?,?\s* )? (?: - (?P[0-9]+)\s*m(?:in(?:ute)?s?)?\s* + (?P[0-9]+)\s*m(?:in(?:ute)?s?)?,?\s* )? (?: - (?P[0-9]+)(?P\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s* + (?P[0-9]+)(?:\.(?P[0-9]+))?\s*s(?:ec(?:ond)?s?)?\s* )?Z?$''', s) if m: - days, hours, mins, secs, ms = m.groups() + days, hours, mins, secs, ms = m.group('days', 'hours', 'mins', 'secs', 'ms') else: m = re.match(r'(?i)(?:(?P[0-9.]+)\s*(?:hours?)|(?P[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s) if m: @@ -3930,17 +3978,13 @@ def parse_duration(s): else: return None - duration = 0 - if secs: - duration += float(secs) - if mins: - duration += float(mins) * 60 - if hours: - duration += float(hours) * 60 * 60 - if days: - duration += float(days) * 24 * 60 * 60 - if ms: - duration += float(ms) + duration = ( + ((((float(days) * 24) if days else 0) + + (float(hours) if hours else 0)) * 60 + + (float(mins) if mins else 0)) * 60 + + (float(secs) if secs else 0) + + (float(ms) / 10 ** len(ms) if ms else 0)) + return duration @@ -4251,6 +4295,7 @@ def urlencode_postdata(*args, **kargs): return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii') +@partial_application def update_url(url, **kwargs): """Replace URL components specified by kwargs url: compat_str or parsed URL tuple @@ -4272,6 +4317,7 @@ def update_url(url, **kwargs): return compat_urllib_parse.urlunparse(url._replace(**kwargs)) +@partial_application def update_url_query(url, query): return update_url(url, query_update=query) @@ -4698,30 +4744,45 @@ def parse_codecs(codecs_str): if not codecs_str: return {} split_codecs = list(filter(None, map( - lambda str: str.strip(), codecs_str.strip().strip(',').split(',')))) - vcodec, acodec = None, None + lambda s: s.strip(), codecs_str.strip().split(',')))) + vcodec, acodec, hdr = None, None, None for full_codec in split_codecs: - codec = full_codec.split('.')[0] - if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'): - if not vcodec: - vcodec = full_codec - elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'): + codec, rest = full_codec.partition('.')[::2] + codec = codec.lower() + full_codec = '.'.join((codec, rest)) if rest else codec + codec = re.sub(r'0+(?=\d)', '', codec) + if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', + 'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'): + if vcodec: + continue + vcodec = full_codec + if codec in ('dvh1', 'dvhe'): + hdr = 'DV' + elif codec in ('av1', 'vp9'): + n, m = { + 'av1': (2, '10'), + 'vp9': (0, '2'), + }[codec] + if (rest.split('.', n + 1)[n:] or [''])[0].lstrip('0') == m: + hdr = 'HDR10' + elif codec in ('flac', 'mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-4', + 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'): if not acodec: acodec = full_codec else: - write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr) - if not vcodec and not acodec: - if len(split_codecs) == 2: - return { - 'vcodec': split_codecs[0], - 'acodec': split_codecs[1], - } - else: - return { + write_string('WARNING: Unknown codec %s\n' % (full_codec,), sys.stderr) + + return ( + filter_dict({ 'vcodec': vcodec or 'none', 'acodec': acodec or 'none', - } - return {} + 'dynamic_range': hdr, + }) if vcodec or acodec + else { + 'vcodec': split_codecs[0], + 'acodec': split_codecs[1], + } if len(split_codecs) == 2 + else {}) def urlhandle_detect_ext(url_handle): @@ -6283,6 +6344,7 @@ def traverse_obj(obj, *paths, **kwargs): Read as: `{key: traverse_obj(obj, path) for key, path in dct.items()}`. - `any`-builtin: Take the first matching object and return it, resetting branching. - `all`-builtin: Take all matching objects and return them as a list, resetting branching. + - `filter`-builtin: Return the value if it is truthy, `None` otherwise. `tuple`, `list`, and `dict` all support nested paths and branches. @@ -6324,6 +6386,11 @@ def traverse_obj(obj, *paths, **kwargs): # instant compat str = compat_str + from .compat import ( + compat_builtins_dict as dict_, # the basic dict type + compat_dict as dict, # dict preserving imsertion order + ) + casefold = lambda k: compat_casefold(k) if isinstance(k, str) else k if isinstance(expected_type, type): @@ -6406,7 +6473,7 @@ def apply_key(key, obj, is_last): if not branching: # string traversal result = ''.join(result) - elif isinstance(key, dict): + elif isinstance(key, dict_): iter_obj = ((k, _traverse_obj(obj, v, False, is_last)) for k, v in key.items()) result = dict((k, v if v is not None else default) for k, v in iter_obj if v is not None or default is not NO_DEFAULT) or None @@ -6484,7 +6551,7 @@ def apply_path(start_obj, path, test_type): has_branched = False key = None - for last, key in lazy_last(variadic(path, (str, bytes, dict, set))): + for last, key in lazy_last(variadic(path, (str, bytes, dict_, set))): if not casesense and isinstance(key, str): key = compat_casefold(key) @@ -6497,6 +6564,11 @@ def apply_path(start_obj, path, test_type): objs = (list(filtered_objs),) continue + # filter might be from __builtin__, future_builtins, or itertools.ifilter + if key in compat_filter_fns: + objs = filter(None, objs) + continue + if __debug__ and callable(key): # Verify function signature _try_bind_args(key, None, None) @@ -6509,10 +6581,10 @@ def apply_path(start_obj, path, test_type): objs = from_iterable(new_objs) - if test_type and not isinstance(key, (dict, list, tuple)): + if test_type and not isinstance(key, (dict_, list, tuple)): objs = map(type_test, objs) - return objs, has_branched, isinstance(key, dict) + return objs, has_branched, isinstance(key, dict_) def _traverse_obj(obj, path, allow_empty, test_type): results, has_branched, is_dict = apply_path(obj, path, test_type) @@ -6535,6 +6607,76 @@ def _traverse_obj(obj, path, allow_empty, test_type): return None if default is NO_DEFAULT else default +def value(value): + return lambda _: value + + +class require(ExtractorError): + def __init__(self, name, expected=False): + super(require, self).__init__( + 'Unable to extract {0}'.format(name), expected=expected) + + def __call__(self, value): + if value is None: + raise self + + return value + + +@partial_application +# typing: (subs: list[dict], /, *, lang='und', ext=None) -> dict[str, list[dict] +def subs_list_to_dict(subs, lang='und', ext=None): + """ + Convert subtitles from a traversal into a subtitle dict. + The path should have an `all` immediately before this function. + + Arguments: + `lang` The default language tag for subtitle dicts with no + `lang` (`und`: undefined) + `ext` The default value for `ext` in the subtitle dicts + + In the dict you can set the following additional items: + `id` The language tag to which the subtitle dict should be added + `quality` The sort order for each subtitle dict + """ + + result = collections.defaultdict(list) + + for sub in subs: + tn_url = url_or_none(sub.pop('url', None)) + if tn_url: + sub['url'] = tn_url + elif not sub.get('data'): + continue + sub_lang = sub.pop('id', None) + if not isinstance(sub_lang, compat_str): + if not lang: + continue + sub_lang = lang + sub_ext = sub.get('ext') + if not isinstance(sub_ext, compat_str): + if not ext: + sub.pop('ext', None) + else: + sub['ext'] = ext + result[sub_lang].append(sub) + result = dict(result) + + for subs in result.values(): + subs.sort(key=lambda x: x.pop('quality', 0) or 0) + + return result + + +def unpack(func, **kwargs): + """Make a function that applies `partial(func, **kwargs)` to its argument as *args""" + @functools.wraps(func) + def inner(items): + return func(*items, **kwargs) + + return inner + + def T(*x): """ For use in yt-dl instead of {type, ...} or set((type, ...)) """ return set(x)