diff --git a/README.md b/README.md index 9625437382..e094ccba7f 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ [![License: Unlicense](https://img.shields.io/badge/-Unlicense-blue.svg?style=for-the-badge)](LICENSE "License") [![CI Status](https://img.shields.io/github/workflow/status/yt-dlp/yt-dlp/Core%20Tests/master?label=Tests&style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/actions "CI Status") [![Commits](https://img.shields.io/github/commit-activity/m/yt-dlp/yt-dlp?label=commits&style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/commits "Commit History") -[![Last Commit](https://img.shields.io/github/last-commit/yt-dlp/yt-dlp/master?label=&style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/commits "Commit History") +[![Last Commit](https://img.shields.io/github/last-commit/yt-dlp/yt-dlp/master?label=&style=for-the-badge&display_timestamp=committer)](https://github.com/yt-dlp/yt-dlp/commits "Commit History") @@ -1642,9 +1642,9 @@ # MODIFYING METADATA `--replace-in-metadata FIELDS REGEX REPLACE` is used to replace text in any metadata field using [python regular expression](https://docs.python.org/3/library/re.html#regular-expression-syntax). [Backreferences](https://docs.python.org/3/library/re.html?highlight=backreferences#re.sub) can be used in the replace string for advanced use. -The general syntax of `--parse-metadata FROM:TO` is to give the name of a field or an [output template](#output-template) to extract data from, and the format to interpret it as, separated by a colon `:`. Either a [python regular expression](https://docs.python.org/3/library/re.html#regular-expression-syntax) with named capture groups or a similar syntax to the [output template](#output-template) (only `%(field)s` formatting is supported) can be used for `TO`. The option can be used multiple times to parse and modify various fields. +The general syntax of `--parse-metadata FROM:TO` is to give the name of a field or an [output template](#output-template) to extract data from, and the format to interpret it as, separated by a colon `:`. Either a [python regular expression](https://docs.python.org/3/library/re.html#regular-expression-syntax) with named capture groups, a single field name, or a similar syntax to the [output template](#output-template) (only `%(field)s` formatting is supported) can be used for `TO`. The option can be used multiple times to parse and modify various fields. -Note that any field created by this can be used in the [output template](#output-template) and will also affect the media file's metadata added when using `--embed-metadata`. +Note that these options preserve their relative order, allowing replacements to be made in parsed fields and viceversa. Also, any field thus created can be used in the [output template](#output-template) and will also affect the media file's metadata added when using `--embed-metadata`. This option also has a few special uses: diff --git a/supportedsites.md b/supportedsites.md index 48888f61fa..44fc1d4849 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -23,7 +23,7 @@ # Supported sites - **9now.com.au** - **abc.net.au** - **abc.net.au:iview** - - **abc.net.au:iview:showseries** + - **abc.net.au:​iview:showseries** - **abcnews** - **abcnews:video** - **abcotvs**: ABC Owned Television Stations @@ -124,8 +124,8 @@ # Supported sites - **bbc**: [bbc] BBC - **bbc.co.uk**: [bbc] BBC iPlayer - **bbc.co.uk:article**: BBC articles - - **bbc.co.uk:iplayer:episodes** - - **bbc.co.uk:iplayer:group** + - **bbc.co.uk:​iplayer:episodes** + - **bbc.co.uk:​iplayer:group** - **bbc.co.uk:playlist** - **BBVTV**: [bbvtv] - **BBVTVLive**: [bbvtv] @@ -274,7 +274,7 @@ # Supported sites - **crunchyroll**: [crunchyroll] - **crunchyroll:beta**: [crunchyroll] - **crunchyroll:playlist**: [crunchyroll] - - **crunchyroll:playlist:beta**: [crunchyroll] + - **crunchyroll:​playlist:beta**: [crunchyroll] - **CSpan**: C-SPAN - **CSpanCongress** - **CtsNews**: 華視新聞 @@ -483,7 +483,7 @@ # Supported sites - **Golem** - **goodgame:stream** - **google:podcasts** - - **google:podcasts:feed** + - **google:​podcasts:feed** - **GoogleDrive** - **GoogleDrive:Folder** - **GoPlay**: [goplay] @@ -618,7 +618,7 @@ # Supported sites - **kuwo:singer**: 酷我音乐 - 歌手 - **kuwo:song**: 酷我音乐 - **la7.it** - - **la7.it:pod:episode** + - **la7.it:​pod:episode** - **la7.it:podcast** - **laola1tv** - **laola1tv:embed** @@ -652,7 +652,7 @@ # Supported sites - **LineLiveChannel** - **LinkedIn**: [linkedin] - **linkedin:learning**: [linkedin] - - **linkedin:learning:course**: [linkedin] + - **linkedin:​learning:course**: [linkedin] - **LinuxAcademy**: [linuxacademy] - **Liputan6** - **LiTV** @@ -673,7 +673,7 @@ # Supported sites - **MagentaMusik360** - **mailru**: Видео@Mail.Ru - **mailru:music**: Музыка@Mail.Ru - - **mailru:music:search**: Музыка@Mail.Ru + - **mailru:​music:search**: Музыка@Mail.Ru - **MainStreaming**: MainStreaming Player - **MallTV** - **mangomolo:live** @@ -718,7 +718,7 @@ # Supported sites - **microsoftstream**: Microsoft Stream - **mildom**: Record ongoing live by specific user in Mildom - **mildom:clip**: Clip in Mildom - - **mildom:user:vod**: Download all VODs from specific user in Mildom + - **mildom:​user:vod**: Download all VODs from specific user in Mildom - **mildom:vod**: VOD in Mildom - **minds** - **minds:channel** @@ -803,7 +803,7 @@ # Supported sites - **navernow** - **NBA** - **nba:watch** - - **nba:watch:collection** + - **nba:​watch:collection** - **NBAChannel** - **NBAEmbed** - **NBAWatchEmbed** @@ -817,7 +817,7 @@ # Supported sites - **NBCStations** - **ndr**: NDR.de - Norddeutscher Rundfunk - **ndr:embed** - - **ndr:embed:base** + - **ndr:​embed:base** - **NDTV** - **Nebula**: [watchnebula] - **nebula:channel**: [watchnebula] @@ -869,7 +869,7 @@ # Supported sites - **niconico:tag**: NicoNico video tag URLs - **NiconicoUser** - **nicovideo:search**: Nico video search; "nicosearch:" prefix - - **nicovideo:search:date**: Nico video search, newest first; "nicosearchdate:" prefix + - **nicovideo:​search:date**: Nico video search, newest first; "nicosearchdate:" prefix - **nicovideo:search_url**: Nico video search URLs - **Nintendo** - **Nitter** @@ -892,7 +892,7 @@ # Supported sites - **npo**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl - **npo.nl:live** - **npo.nl:radio** - - **npo.nl:radio:fragment** + - **npo.nl:​radio:fragment** - **Npr** - **NRK** - **NRKPlaylist** @@ -933,7 +933,7 @@ # Supported sites - **openrec:capture** - **openrec:movie** - **OraTV** - - **orf:fm4:story**: fm4.orf.at stories + - **orf:​fm4:story**: fm4.orf.at stories - **orf:iptv**: iptv.ORF.at - **orf:radio** - **orf:tvthek**: ORF TVthek @@ -981,7 +981,7 @@ # Supported sites - **Pinterest** - **PinterestCollection** - **pixiv:sketch** - - **pixiv:sketch:user** + - **pixiv:​sketch:user** - **Pladform** - **PlanetMarathi** - **Platzi**: [platzi] @@ -1010,7 +1010,7 @@ # Supported sites - **polskieradio:kierowcow** - **polskieradio:player** - **polskieradio:podcast** - - **polskieradio:podcast:list** + - **polskieradio:​podcast:list** - **PolskieRadioCategory** - **Popcorntimes** - **PopcornTV** @@ -1122,7 +1122,7 @@ # Supported sites - **rtl.nl**: rtl.nl and rtlxl.nl - **rtl2** - **rtl2:you** - - **rtl2:you:series** + - **rtl2:​you:series** - **RTLLuLive** - **RTLLuRadio** - **RTNews** @@ -1198,9 +1198,9 @@ # Supported sites - **Skeb** - **sky.it** - **sky:news** - - **sky:news:story** + - **sky:​news:story** - **sky:sports** - - **sky:sports:news** + - **sky:​sports:news** - **skyacademy.it** - **SkylineWebcams** - **skynewsarabia:article** @@ -1289,7 +1289,7 @@ # Supported sites - **Teachable**: [teachable] - **TeachableCourse**: [teachable] - **teachertube**: teachertube.com videos - - **teachertube:user:collection**: teachertube.com user and collection videos + - **teachertube:​user:collection**: teachertube.com user and collection videos - **TeachingChannel** - **Teamcoco** - **TeamTreeHouse**: [teamtreehouse] @@ -1614,12 +1614,12 @@ # Supported sites - **XXXYMovies** - **Yahoo**: Yahoo screen and movies - **yahoo:gyao** - - **yahoo:gyao:player** + - **yahoo:​gyao:player** - **yahoo:japannews**: Yahoo! Japan News - **YandexDisk** - **yandexmusic:album**: Яндекс.Музыка - Альбом - - **yandexmusic:artist:albums**: Яндекс.Музыка - Артист - Альбомы - - **yandexmusic:artist:tracks**: Яндекс.Музыка - Артист - Треки + - **yandexmusic:​artist:albums**: Яндекс.Музыка - Артист - Альбомы + - **yandexmusic:​artist:tracks**: Яндекс.Музыка - Артист - Треки - **yandexmusic:playlist**: Яндекс.Музыка - Плейлист - **yandexmusic:track**: Яндекс.Музыка - Трек - **YandexVideo** @@ -1641,14 +1641,14 @@ # Supported sites - **youtube:clip** - **youtube:favorites**: YouTube liked videos; ":ytfav" keyword (requires cookies) - **youtube:history**: Youtube watch history; ":ythis" keyword (requires cookies) - - **youtube:music:search_url**: YouTube music search URLs with selectable sections, e.g. #songs + - **youtube:​music:search_url**: YouTube music search URLs with selectable sections, e.g. #songs - **youtube:notif**: YouTube notifications; ":ytnotif" keyword (requires cookies) - **youtube:playlist**: YouTube playlists - **youtube:recommended**: YouTube recommended videos; ":ytrec" keyword - **youtube:search**: YouTube search; "ytsearch:" prefix - - **youtube:search:date**: YouTube search, newest videos first; "ytsearchdate:" prefix + - **youtube:​search:date**: YouTube search, newest videos first; "ytsearchdate:" prefix - **youtube:search_url**: YouTube search URLs with sorting and filter support - - **youtube:shorts:pivot:audio**: YouTube Shorts audio pivot (Shorts using audio of a given video) + - **youtube:​shorts:pivot:audio**: YouTube Shorts audio pivot (Shorts using audio of a given video) - **youtube:stories**: YouTube channel stories; "ytstories:" prefix - **youtube:subscriptions**: YouTube subscriptions feed; ":ytsubs" keyword (requires cookies) - **youtube:tab**: YouTube Tabs diff --git a/test/helper.py b/test/helper.py index e918d8c469..3b3b44580d 100644 --- a/test/helper.py +++ b/test/helper.py @@ -260,8 +260,8 @@ def _repr(v): info_dict_str += ''.join( f' {_repr(k)}: {_repr(test_info_dict[k])},\n' for k in missing_keys) - write_string( - '\n\'info_dict\': {\n' + info_dict_str + '},\n', out=sys.stderr) + info_dict_str = '\n\'info_dict\': {\n' + info_dict_str + '},\n' + write_string(info_dict_str.replace('\n', '\n '), out=sys.stderr) self.assertFalse( missing_keys, 'Missing keys in test definition: %s' % ( diff --git a/yt_dlp/extractor/acfun.py b/yt_dlp/extractor/acfun.py index 92b905fa7c..9ec259a75f 100644 --- a/yt_dlp/extractor/acfun.py +++ b/yt_dlp/extractor/acfun.py @@ -161,7 +161,7 @@ class AcFunBangumiIE(AcFunVideoBaseIE): def _real_extract(self, url): video_id = self._match_id(url) ac_idx = parse_qs(url).get('ac', [None])[-1] - video_id = f'{video_id}{format_field(ac_idx, template="__%s")}' + video_id = f'{video_id}{format_field(ac_idx, None, "__%s")}' webpage = self._download_webpage(url, video_id) json_bangumi_data = self._search_json(r'window.bangumiData\s*=', webpage, 'bangumiData', video_id) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index a237343c61..de28aa4b70 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -368,7 +368,7 @@ def _real_extract(self, url): or '正在观看预览,大会员免费看全片' in webpage): self.raise_login_required('This video is for premium members only') - play_info = self._search_json(r'window\.__playinfo__\s*=\s*', webpage, 'play info', video_id)['data'] + play_info = self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id)['data'] formats = self.extract_formats(play_info) if (not formats and '成为大会员抢先看' in webpage and play_info.get('durl') and not play_info.get('dash')): diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 84a2b95af5..20ed522163 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -3725,7 +3725,8 @@ def description(cls, *, markdown=True, search_examples=None): if not cls.working(): desc += ' (**Currently broken**)' if markdown else ' (Currently broken)' - name = f' - **{cls.IE_NAME}**' if markdown else cls.IE_NAME + # Escape emojis. Ref: https://github.com/github/markup/issues/1153 + name = (' - **%s**' % re.sub(r':(\w+:)', ':\u200B\\g<1>', cls.IE_NAME)) if markdown else cls.IE_NAME return f'{name}:{desc}' if desc else name def extract_subtitles(self, *args, **kwargs): diff --git a/yt_dlp/extractor/manyvids.py b/yt_dlp/extractor/manyvids.py index c713805c54..63ff5f054e 100644 --- a/yt_dlp/extractor/manyvids.py +++ b/yt_dlp/extractor/manyvids.py @@ -68,7 +68,7 @@ def _real_extract(self, url): ) def txt_or_none(s, default=None): - return (s.strip() or default) if isinstance(s, compat_str) else default + return (s.strip() or default) if isinstance(s, str) else default uploader = txt_or_none(info.get('data-meta-author')) diff --git a/yt_dlp/extractor/neteasemusic.py b/yt_dlp/extractor/neteasemusic.py index 44fa60ce9b..5cf96ad7ef 100644 --- a/yt_dlp/extractor/neteasemusic.py +++ b/yt_dlp/extractor/neteasemusic.py @@ -1,3 +1,4 @@ +import itertools import json import re import time @@ -39,8 +40,7 @@ def _encrypt(cls, dfsid): result = b64encode(m.digest()).decode('ascii') return result.replace('/', '_').replace('+', '-') - @classmethod - def make_player_api_request_data_and_headers(cls, song_id, bitrate): + def make_player_api_request_data_and_headers(self, song_id, bitrate): KEY = b'e82ckenh8dichen8' URL = '/api/song/enhance/player/url' now = int(time.time() * 1000) diff --git a/yt_dlp/extractor/yandexvideo.py b/yt_dlp/extractor/yandexvideo.py index 7932edf335..5e6cf6edd5 100644 --- a/yt_dlp/extractor/yandexvideo.py +++ b/yt_dlp/extractor/yandexvideo.py @@ -255,7 +255,7 @@ class ZenYandexIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - redirect = self._search_json(r'var it\s*=\s*', webpage, 'redirect', id, default={}).get('retpath') + redirect = self._search_json(r'var it\s*=', webpage, 'redirect', id, default={}).get('retpath') if redirect: video_id = self._match_id(redirect) webpage = self._download_webpage(redirect, video_id, note='Redirecting') @@ -373,7 +373,7 @@ def _real_extract(self, url): item_id = self._match_id(url) webpage = self._download_webpage(url, item_id) redirect = self._search_json( - r'var it\s*=\s*', webpage, 'redirect', item_id, default={}).get('retpath') + r'var it\s*=', webpage, 'redirect', item_id, default={}).get('retpath') if redirect: item_id = self._match_id(redirect) webpage = self._download_webpage(redirect, item_id, note='Redirecting') diff --git a/yt_dlp/options.py b/yt_dlp/options.py index d3dfee820a..bee867aa94 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -294,9 +294,10 @@ def _create_alias(option, opt_str, value, parser): aliases = (x if x.startswith('-') else f'--{x}' for x in map(str.strip, aliases.split(','))) try: + args = [f'ARG{i}' for i in range(nargs)] alias_group.add_option( - *aliases, help=opts, nargs=nargs, dest=parser.ALIAS_DEST, type='str' if nargs else None, - metavar=' '.join(f'ARG{i}' for i in range(nargs)), action='callback', + *aliases, nargs=nargs, dest=parser.ALIAS_DEST, type='str' if nargs else None, + metavar=' '.join(args), help=opts.format(*args), action='callback', callback=_alias_callback, callback_kwargs={'opts': opts, 'nargs': nargs}) except Exception as err: raise optparse.OptionValueError(f'wrong {opt_str} formatting; {err}') @@ -549,11 +550,11 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs): selection.add_option( '--min-filesize', metavar='SIZE', dest='min_filesize', default=None, - help='Do not download any videos smaller than SIZE, e.g. 50k or 44.6M') + help='Abort download if filesize is smaller than SIZE, e.g. 50k or 44.6M') selection.add_option( '--max-filesize', metavar='SIZE', dest='max_filesize', default=None, - help='Do not download any videos larger than SIZE, e.g. 50k or 44.6M') + help='Abort download if filesize if larger than SIZE, e.g. 50k or 44.6M') selection.add_option( '--date', metavar='DATE', dest='date', default=None, diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 4d1247eea3..d0513496e1 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -480,6 +480,7 @@ def handle_endtag(self, tag): raise self.HTMLBreakOnClosingTagException() +# XXX: This should be far less strict def get_element_text_and_html_by_tag(tag, html): """ For the first element with the specified tag in the passed HTML document