Merge branch 'yt-dlp:master' into pr/malformed-manifest-fix

This commit is contained in:
bashonly 2024-03-01 09:25:38 -06:00
commit 2f7c93c01c
No known key found for this signature in database
GPG Key ID: 783F096F253D15B0
47 changed files with 1215 additions and 500 deletions

View File

@ -164,7 +164,7 @@ jobs:
- name: Upload artifacts - name: Upload artifacts
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v4
with: with:
name: build-${{ github.job }} name: build-bin-${{ github.job }}
path: | path: |
yt-dlp yt-dlp
yt-dlp.tar.gz yt-dlp.tar.gz
@ -227,7 +227,7 @@ jobs:
- name: Upload artifacts - name: Upload artifacts
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v4
with: with:
name: build-linux_${{ matrix.architecture }} name: build-bin-linux_${{ matrix.architecture }}
path: | # run-on-arch-action designates armv7l as armv7 path: | # run-on-arch-action designates armv7l as armv7
repo/dist/yt-dlp_linux_${{ (matrix.architecture == 'armv7' && 'armv7l') || matrix.architecture }} repo/dist/yt-dlp_linux_${{ (matrix.architecture == 'armv7' && 'armv7l') || matrix.architecture }}
compression-level: 0 compression-level: 0
@ -271,7 +271,7 @@ jobs:
- name: Upload artifacts - name: Upload artifacts
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v4
with: with:
name: build-${{ github.job }} name: build-bin-${{ github.job }}
path: | path: |
dist/yt-dlp_macos dist/yt-dlp_macos
dist/yt-dlp_macos.zip dist/yt-dlp_macos.zip
@ -324,7 +324,7 @@ jobs:
- name: Upload artifacts - name: Upload artifacts
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v4
with: with:
name: build-${{ github.job }} name: build-bin-${{ github.job }}
path: | path: |
dist/yt-dlp_macos_legacy dist/yt-dlp_macos_legacy
compression-level: 0 compression-level: 0
@ -373,7 +373,7 @@ jobs:
- name: Upload artifacts - name: Upload artifacts
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v4
with: with:
name: build-${{ github.job }} name: build-bin-${{ github.job }}
path: | path: |
dist/yt-dlp.exe dist/yt-dlp.exe
dist/yt-dlp_min.exe dist/yt-dlp_min.exe
@ -421,7 +421,7 @@ jobs:
- name: Upload artifacts - name: Upload artifacts
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v4
with: with:
name: build-${{ github.job }} name: build-bin-${{ github.job }}
path: | path: |
dist/yt-dlp_x86.exe dist/yt-dlp_x86.exe
compression-level: 0 compression-level: 0
@ -441,7 +441,7 @@ jobs:
- uses: actions/download-artifact@v4 - uses: actions/download-artifact@v4
with: with:
path: artifact path: artifact
pattern: build-* pattern: build-bin-*
merge-multiple: true merge-multiple: true
- name: Make SHA2-SUMS files - name: Make SHA2-SUMS files
@ -484,3 +484,4 @@ jobs:
_update_spec _update_spec
SHA*SUMS* SHA*SUMS*
compression-level: 0 compression-level: 0
overwrite: true

View File

@ -37,14 +37,15 @@ BINDIR ?= $(PREFIX)/bin
MANDIR ?= $(PREFIX)/man MANDIR ?= $(PREFIX)/man
SHAREDIR ?= $(PREFIX)/share SHAREDIR ?= $(PREFIX)/share
PYTHON ?= /usr/bin/env python3 PYTHON ?= /usr/bin/env python3
GNUTAR ?= tar
# $(shell) and $(error) are no-ops in BSD Make and the != variable assignment operator is not supported by GNU Make <4.0
VERSION_CHECK != echo supported
VERSION_CHECK ?= $(error GNU Make 4+ or BSD Make is required)
CHECK_VERSION := $(VERSION_CHECK)
# set markdown input format to "markdown-smart" for pandoc version 2+ and to "markdown" for pandoc prior to version 2 # set markdown input format to "markdown-smart" for pandoc version 2+ and to "markdown" for pandoc prior to version 2
MARKDOWN != if [ "`pandoc -v | head -n1 | cut -d' ' -f2 | head -c1`" -ge "2" ]; then echo markdown-smart; else echo markdown; fi PANDOC_VERSION_CMD = pandoc -v 2>/dev/null | head -n1 | cut -d' ' -f2 | head -c1
PANDOC_VERSION != $(PANDOC_VERSION_CMD)
PANDOC_VERSION ?= $(shell $(PANDOC_VERSION_CMD))
MARKDOWN_CMD = if [ "$(PANDOC_VERSION)" = "1" -o "$(PANDOC_VERSION)" = "0" ]; then echo markdown; else echo markdown-smart; fi
MARKDOWN != $(MARKDOWN_CMD)
MARKDOWN ?= $(shell $(MARKDOWN_CMD))
install: lazy-extractors yt-dlp yt-dlp.1 completions install: lazy-extractors yt-dlp yt-dlp.1 completions
mkdir -p $(DESTDIR)$(BINDIR) mkdir -p $(DESTDIR)$(BINDIR)
@ -75,8 +76,12 @@ test:
offlinetest: codetest offlinetest: codetest
$(PYTHON) -m pytest -k "not download" $(PYTHON) -m pytest -k "not download"
CODE_FOLDERS != find yt_dlp -type f -name '__init__.py' -exec dirname {} \+ | grep -v '/__' | sort CODE_FOLDERS_CMD = find yt_dlp -type f -name '__init__.py' | sed 's,/__init__.py,,' | grep -v '/__' | sort
CODE_FILES != for f in $(CODE_FOLDERS) ; do echo "$$f" | sed 's,$$,/*.py,' ; done CODE_FOLDERS != $(CODE_FOLDERS_CMD)
CODE_FOLDERS ?= $(shell $(CODE_FOLDERS_CMD))
CODE_FILES_CMD = for f in $(CODE_FOLDERS) ; do echo "$$f" | sed 's,$$,/*.py,' ; done
CODE_FILES != $(CODE_FILES_CMD)
CODE_FILES ?= $(shell $(CODE_FILES_CMD))
yt-dlp: $(CODE_FILES) yt-dlp: $(CODE_FILES)
mkdir -p zip mkdir -p zip
for d in $(CODE_FOLDERS) ; do \ for d in $(CODE_FOLDERS) ; do \
@ -129,12 +134,14 @@ completions/fish/yt-dlp.fish: $(CODE_FILES) devscripts/fish-completion.in
mkdir -p completions/fish mkdir -p completions/fish
$(PYTHON) devscripts/fish-completion.py $(PYTHON) devscripts/fish-completion.py
_EXTRACTOR_FILES != find yt_dlp/extractor -name '*.py' -and -not -name 'lazy_extractors.py' _EXTRACTOR_FILES_CMD = find yt_dlp/extractor -name '*.py' -and -not -name 'lazy_extractors.py'
_EXTRACTOR_FILES != $(_EXTRACTOR_FILES_CMD)
_EXTRACTOR_FILES ?= $(shell $(_EXTRACTOR_FILES_CMD))
yt_dlp/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES) yt_dlp/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES)
$(PYTHON) devscripts/make_lazy_extractors.py $@ $(PYTHON) devscripts/make_lazy_extractors.py $@
yt-dlp.tar.gz: all yt-dlp.tar.gz: all
@tar -czf yt-dlp.tar.gz --transform "s|^|yt-dlp/|" --owner 0 --group 0 \ @$(GNUTAR) -czf yt-dlp.tar.gz --transform "s|^|yt-dlp/|" --owner 0 --group 0 \
--exclude '*.DS_Store' \ --exclude '*.DS_Store' \
--exclude '*.kate-swp' \ --exclude '*.kate-swp' \
--exclude '*.pyc' \ --exclude '*.pyc' \
@ -143,7 +150,6 @@ yt-dlp.tar.gz: all
--exclude '__pycache__' \ --exclude '__pycache__' \
--exclude '.pytest_cache' \ --exclude '.pytest_cache' \
--exclude '.git' \ --exclude '.git' \
--exclude '__pyinstaller' \
-- \ -- \
README.md supportedsites.md Changelog.md LICENSE \ README.md supportedsites.md Changelog.md LICENSE \
CONTRIBUTING.md Collaborators.md CONTRIBUTORS AUTHORS \ CONTRIBUTING.md Collaborators.md CONTRIBUTORS AUTHORS \

View File

@ -167,8 +167,8 @@ For ease of use, a few more compat options are available:
* `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams,-playlist-match-filter,-manifest-filesize-approx` * `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams,-playlist-match-filter,-manifest-filesize-approx`
* `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect,-playlist-match-filter,-manifest-filesize-approx` * `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect,-playlist-match-filter,-manifest-filesize-approx`
* `--compat-options 2021`: Same as `--compat-options 2022,no-certifi,filename-sanitization,no-youtube-prefer-utc-upload-date` * `--compat-options 2021`: Same as `--compat-options 2022,no-certifi,filename-sanitization,no-youtube-prefer-utc-upload-date`
* `--compat-options 2022`: Same as `--compat-options 2023,playlist-match-filter,no-external-downloader-progress` * `--compat-options 2022`: Same as `--compat-options 2023,playlist-match-filter,no-external-downloader-progress,prefer-legacy-http-handler,manifest-filesize-approx`
* `--compat-options 2023`: Same as `--compat-options prefer-legacy-http-handler,manifest-filesize-approx`. Use this to enable all future compat options * `--compat-options 2023`: Currently does nothing. Use this to enable all future compat options
# INSTALLATION # INSTALLATION
@ -1311,7 +1311,8 @@ The available fields are:
- `display_id` (string): An alternative identifier for the video - `display_id` (string): An alternative identifier for the video
- `uploader` (string): Full name of the video uploader - `uploader` (string): Full name of the video uploader
- `license` (string): License name the video is licensed under - `license` (string): License name the video is licensed under
- `creator` (string): The creator of the video - `creators` (list): The creators of the video
- `creator` (string): The creators of the video; comma-separated
- `timestamp` (numeric): UNIX timestamp of the moment the video became available - `timestamp` (numeric): UNIX timestamp of the moment the video became available
- `upload_date` (string): Video upload date in UTC (YYYYMMDD) - `upload_date` (string): Video upload date in UTC (YYYYMMDD)
- `release_timestamp` (numeric): UNIX timestamp of the moment the video was released - `release_timestamp` (numeric): UNIX timestamp of the moment the video was released
@ -1385,11 +1386,16 @@ Available for the media that is a track or a part of a music album:
- `track` (string): Title of the track - `track` (string): Title of the track
- `track_number` (numeric): Number of the track within an album or a disc - `track_number` (numeric): Number of the track within an album or a disc
- `track_id` (string): Id of the track - `track_id` (string): Id of the track
- `artist` (string): Artist(s) of the track - `artists` (list): Artist(s) of the track
- `genre` (string): Genre(s) of the track - `artist` (string): Artist(s) of the track; comma-separated
- `genres` (list): Genre(s) of the track
- `genre` (string): Genre(s) of the track; comma-separated
- `composers` (list): Composer(s) of the piece
- `composer` (string): Composer(s) of the piece; comma-separated
- `album` (string): Title of the album the track belongs to - `album` (string): Title of the album the track belongs to
- `album_type` (string): Type of the album - `album_type` (string): Type of the album
- `album_artist` (string): List of all artists appeared on the album - `album_artists` (list): All artists appeared on the album
- `album_artist` (string): All artists appeared on the album; comma-separated
- `disc_number` (numeric): Number of the disc or other physical medium the track belongs to - `disc_number` (numeric): Number of the disc or other physical medium the track belongs to
Available only when using `--download-sections` and for `chapter:` prefix when using `--split-chapters` for videos with internal chapters: Available only when using `--download-sections` and for `chapter:` prefix when using `--split-chapters` for videos with internal chapters:
@ -1767,10 +1773,11 @@ Metadata fields | From
`description`, `synopsis` | `description` `description`, `synopsis` | `description`
`purl`, `comment` | `webpage_url` `purl`, `comment` | `webpage_url`
`track` | `track_number` `track` | `track_number`
`artist` | `artist`, `creator`, `uploader` or `uploader_id` `artist` | `artist`, `artists`, `creator`, `creators`, `uploader` or `uploader_id`
`genre` | `genre` `composer` | `composer` or `composers`
`genre` | `genre` or `genres`
`album` | `album` `album` | `album`
`album_artist` | `album_artist` `album_artist` | `album_artist` or `album_artists`
`disc` | `disc_number` `disc` | `disc_number`
`show` | `series` `show` | `series`
`season_number` | `season_number` `season_number` | `season_number`

View File

@ -94,7 +94,6 @@ include = [
"/setup.cfg", "/setup.cfg",
"/supportedsites.md", "/supportedsites.md",
] ]
exclude = ["/yt_dlp/__pyinstaller"]
artifacts = [ artifacts = [
"/yt_dlp/extractor/lazy_extractors.py", "/yt_dlp/extractor/lazy_extractors.py",
"/completions", "/completions",
@ -105,7 +104,6 @@ artifacts = [
[tool.hatch.build.targets.wheel] [tool.hatch.build.targets.wheel]
packages = ["yt_dlp"] packages = ["yt_dlp"]
exclude = ["/yt_dlp/__pyinstaller"]
artifacts = ["/yt_dlp/extractor/lazy_extractors.py"] artifacts = ["/yt_dlp/extractor/lazy_extractors.py"]
[tool.hatch.build.targets.wheel.shared-data] [tool.hatch.build.targets.wheel.shared-data]

View File

@ -223,6 +223,10 @@ def sanitize_got_info_dict(got_dict):
if test_info_dict.get('display_id') == test_info_dict.get('id'): if test_info_dict.get('display_id') == test_info_dict.get('id'):
test_info_dict.pop('display_id') test_info_dict.pop('display_id')
# Remove deprecated fields
for old in YoutubeDL._deprecated_multivalue_fields.keys():
test_info_dict.pop(old, None)
# release_year may be generated from release_date # release_year may be generated from release_date
if try_call(lambda: test_info_dict['release_year'] == int(test_info_dict['release_date'][:4])): if try_call(lambda: test_info_dict['release_year'] == int(test_info_dict['release_date'][:4])):
test_info_dict.pop('release_year') test_info_dict.pop('release_year')

View File

@ -941,7 +941,7 @@ class TestYoutubeDL(unittest.TestCase):
def get_videos(filter_=None): def get_videos(filter_=None):
ydl = YDL({'match_filter': filter_, 'simulate': True}) ydl = YDL({'match_filter': filter_, 'simulate': True})
for v in videos: for v in videos:
ydl.process_ie_result(v, download=True) ydl.process_ie_result(v.copy(), download=True)
return [v['id'] for v in ydl.downloaded_info_dicts] return [v['id'] for v in ydl.downloaded_info_dicts]
res = get_videos() res = get_videos()

View File

@ -13,6 +13,7 @@ import http.client
import http.cookiejar import http.cookiejar
import http.server import http.server
import io import io
import logging
import pathlib import pathlib
import random import random
import ssl import ssl
@ -752,6 +753,25 @@ class TestClientCertificate:
}) })
class TestRequestHandlerMisc:
"""Misc generic tests for request handlers, not related to request or validation testing"""
@pytest.mark.parametrize('handler,logger_name', [
('Requests', 'urllib3'),
('Websockets', 'websockets.client'),
('Websockets', 'websockets.server')
], indirect=['handler'])
def test_remove_logging_handler(self, handler, logger_name):
# Ensure any logging handlers, which may contain a YoutubeDL instance,
# are removed when we close the request handler
# See: https://github.com/yt-dlp/yt-dlp/issues/8922
logging_handlers = logging.getLogger(logger_name).handlers
before_count = len(logging_handlers)
rh = handler()
assert len(logging_handlers) == before_count + 1
rh.close()
assert len(logging_handlers) == before_count
class TestUrllibRequestHandler(TestRequestHandlerBase): class TestUrllibRequestHandler(TestRequestHandlerBase):
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True) @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
def test_file_urls(self, handler): def test_file_urls(self, handler):
@ -827,6 +847,7 @@ class TestUrllibRequestHandler(TestRequestHandlerBase):
assert not isinstance(exc_info.value, TransportError) assert not isinstance(exc_info.value, TransportError)
@pytest.mark.parametrize('handler', ['Requests'], indirect=True)
class TestRequestsRequestHandler(TestRequestHandlerBase): class TestRequestsRequestHandler(TestRequestHandlerBase):
@pytest.mark.parametrize('raised,expected', [ @pytest.mark.parametrize('raised,expected', [
(lambda: requests.exceptions.ConnectTimeout(), TransportError), (lambda: requests.exceptions.ConnectTimeout(), TransportError),
@ -843,7 +864,6 @@ class TestRequestsRequestHandler(TestRequestHandlerBase):
(lambda: requests.exceptions.RequestException(), RequestError) (lambda: requests.exceptions.RequestException(), RequestError)
# (lambda: requests.exceptions.TooManyRedirects(), HTTPError) - Needs a response object # (lambda: requests.exceptions.TooManyRedirects(), HTTPError) - Needs a response object
]) ])
@pytest.mark.parametrize('handler', ['Requests'], indirect=True)
def test_request_error_mapping(self, handler, monkeypatch, raised, expected): def test_request_error_mapping(self, handler, monkeypatch, raised, expected):
with handler() as rh: with handler() as rh:
def mock_get_instance(*args, **kwargs): def mock_get_instance(*args, **kwargs):
@ -877,7 +897,6 @@ class TestRequestsRequestHandler(TestRequestHandlerBase):
'3 bytes read, 5 more expected' '3 bytes read, 5 more expected'
), ),
]) ])
@pytest.mark.parametrize('handler', ['Requests'], indirect=True)
def test_response_error_mapping(self, handler, monkeypatch, raised, expected, match): def test_response_error_mapping(self, handler, monkeypatch, raised, expected, match):
from requests.models import Response as RequestsResponse from requests.models import Response as RequestsResponse
from urllib3.response import HTTPResponse as Urllib3Response from urllib3.response import HTTPResponse as Urllib3Response
@ -896,6 +915,21 @@ class TestRequestsRequestHandler(TestRequestHandlerBase):
assert exc_info.type is expected assert exc_info.type is expected
def test_close(self, handler, monkeypatch):
rh = handler()
session = rh._get_instance(cookiejar=rh.cookiejar)
called = False
original_close = session.close
def mock_close(*args, **kwargs):
nonlocal called
called = True
return original_close(*args, **kwargs)
monkeypatch.setattr(session, 'close', mock_close)
rh.close()
assert called
def run_validation(handler, error, req, **handler_kwargs): def run_validation(handler, error, req, **handler_kwargs):
with handler(**handler_kwargs) as rh: with handler(**handler_kwargs) as rh:
@ -1205,6 +1239,19 @@ class TestRequestDirector:
assert director.send(Request('http://')).read() == b'' assert director.send(Request('http://')).read() == b''
assert director.send(Request('http://', headers={'prefer': '1'})).read() == b'supported' assert director.send(Request('http://', headers={'prefer': '1'})).read() == b'supported'
def test_close(self, monkeypatch):
director = RequestDirector(logger=FakeLogger())
director.add_handler(FakeRH(logger=FakeLogger()))
called = False
def mock_close(*args, **kwargs):
nonlocal called
called = True
monkeypatch.setattr(director.handlers[FakeRH.RH_KEY], 'close', mock_close)
director.close()
assert called
# XXX: do we want to move this to test_YoutubeDL.py? # XXX: do we want to move this to test_YoutubeDL.py?
class TestYoutubeDLNetworking: class TestYoutubeDLNetworking:

View File

@ -580,6 +580,13 @@ class YoutubeDL:
'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options', 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options',
'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time' 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
} }
_deprecated_multivalue_fields = {
'album_artist': 'album_artists',
'artist': 'artists',
'composer': 'composers',
'creator': 'creators',
'genre': 'genres',
}
_format_selection_exts = { _format_selection_exts = {
'audio': set(MEDIA_EXTENSIONS.common_audio), 'audio': set(MEDIA_EXTENSIONS.common_audio),
'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )), 'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )),
@ -683,7 +690,6 @@ class YoutubeDL:
self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers')) self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers'))
self._load_cookies(self.params['http_headers'].get('Cookie')) # compat self._load_cookies(self.params['http_headers'].get('Cookie')) # compat
self.params['http_headers'].pop('Cookie', None) self.params['http_headers'].pop('Cookie', None)
self._request_director = self.build_request_director(_REQUEST_HANDLERS.values(), _RH_PREFERENCES)
if auto_init and auto_init != 'no_verbose_header': if auto_init and auto_init != 'no_verbose_header':
self.print_debug_header() self.print_debug_header()
@ -957,6 +963,7 @@ class YoutubeDL:
def close(self): def close(self):
self.save_cookies() self.save_cookies()
self._request_director.close() self._request_director.close()
del self._request_director
def trouble(self, message=None, tb=None, is_error=True): def trouble(self, message=None, tb=None, is_error=True):
"""Determine action to take when a download problem appears. """Determine action to take when a download problem appears.
@ -2640,6 +2647,14 @@ class YoutubeDL:
if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field): if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field]) info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
for old_key, new_key in self._deprecated_multivalue_fields.items():
if new_key in info_dict and old_key in info_dict:
self.deprecation_warning(f'Do not return {old_key!r} when {new_key!r} is present')
elif old_value := info_dict.get(old_key):
info_dict[new_key] = old_value.split(', ')
elif new_value := info_dict.get(new_key):
info_dict[old_key] = ', '.join(v.replace(',', '\N{FULLWIDTH COMMA}') for v in new_value)
def _raise_pending_errors(self, info): def _raise_pending_errors(self, info):
err = info.pop('__pending_error', None) err = info.pop('__pending_error', None)
if err: if err:
@ -3483,7 +3498,8 @@ class YoutubeDL:
or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None, or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
'Possible MPEG-TS in MP4 container or malformed AAC timestamps', 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
FFmpegFixupM3u8PP) FFmpegFixupM3u8PP)
ffmpeg_fixup(info_dict.get('is_live') and downloader == 'dashsegments', ffmpeg_fixup(downloader == 'dashsegments'
and (info_dict.get('is_live') or info_dict.get('is_dash_periods')),
'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP) 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP) ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
@ -4144,6 +4160,10 @@ class YoutubeDL:
director.preferences.add(lambda rh, _: 500 if rh.RH_KEY == 'Urllib' else 0) director.preferences.add(lambda rh, _: 500 if rh.RH_KEY == 'Urllib' else 0)
return director return director
@functools.cached_property
def _request_director(self):
return self.build_request_director(_REQUEST_HANDLERS.values(), _RH_PREFERENCES)
def encode(self, s): def encode(self, s):
if isinstance(s, bytes): if isinstance(s, bytes):
return s # Already encoded return s # Already encoded

View File

@ -14,7 +14,7 @@ import os
import re import re
import traceback import traceback
from .compat import compat_shlex_quote from .compat import compat_os_name, compat_shlex_quote
from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS
from .downloader.external import get_external_downloader from .downloader.external import get_external_downloader
from .extractor import list_extractor_classes from .extractor import list_extractor_classes
@ -984,7 +984,28 @@ def _real_main(argv=None):
if pre_process: if pre_process:
return ydl._download_retcode return ydl._download_retcode
ydl.warn_if_short_id(sys.argv[1:] if argv is None else argv) args = sys.argv[1:] if argv is None else argv
ydl.warn_if_short_id(args)
# Show a useful error message and wait for keypress if not launched from shell on Windows
if not args and compat_os_name == 'nt' and getattr(sys, 'frozen', False):
import ctypes.wintypes
import msvcrt
kernel32 = ctypes.WinDLL('Kernel32')
buffer = (1 * ctypes.wintypes.DWORD)()
attached_processes = kernel32.GetConsoleProcessList(buffer, 1)
# If we only have a single process attached, then the executable was double clicked
# When using `pyinstaller` with `--onefile`, two processes get attached
is_onefile = hasattr(sys, '_MEIPASS') and os.path.basename(sys._MEIPASS).startswith('_MEI')
if attached_processes == 1 or is_onefile and attached_processes == 2:
print(parser._generate_error_message(
'Do not double-click the executable, instead call it from a command line.\n'
'Please read the README for further information on how to use yt-dlp: '
'https://github.com/yt-dlp/yt-dlp#readme'))
msvcrt.getch()
_exit(2)
parser.error( parser.error(
'You must provide at least one URL.\n' 'You must provide at least one URL.\n'
'Type yt-dlp --help to see a list of all options.') 'Type yt-dlp --help to see a list of all options.')

View File

@ -31,4 +31,4 @@ def get_hidden_imports():
hiddenimports = list(get_hidden_imports()) hiddenimports = list(get_hidden_imports())
print(f'Adding imports: {hiddenimports}') print(f'Adding imports: {hiddenimports}')
excludedimports = ['youtube_dl', 'youtube_dlc', 'test', 'ytdlp_plugins', 'devscripts'] excludedimports = ['youtube_dl', 'youtube_dlc', 'test', 'ytdlp_plugins', 'devscripts', 'bundle']

View File

@ -379,7 +379,6 @@ from .clubic import ClubicIE
from .clyp import ClypIE from .clyp import ClypIE
from .cmt import CMTIE from .cmt import CMTIE
from .cnbc import ( from .cnbc import (
CNBCIE,
CNBCVideoIE, CNBCVideoIE,
) )
from .cnn import ( from .cnn import (
@ -618,6 +617,7 @@ from .filmon import (
from .filmweb import FilmwebIE from .filmweb import FilmwebIE
from .firsttv import FirstTVIE from .firsttv import FirstTVIE
from .fivetv import FiveTVIE from .fivetv import FiveTVIE
from .flextv import FlexTVIE
from .flickr import FlickrIE from .flickr import FlickrIE
from .floatplane import ( from .floatplane import (
FloatplaneIE, FloatplaneIE,
@ -2499,6 +2499,7 @@ from .zee5 import (
Zee5SeriesIE, Zee5SeriesIE,
) )
from .zeenews import ZeeNewsIE from .zeenews import ZeeNewsIE
from .zenporn import ZenPornIE
from .zetland import ZetlandDKArticleIE from .zetland import ZetlandDKArticleIE
from .zhihu import ZhihuIE from .zhihu import ZhihuIE
from .zingmp3 import ( from .zingmp3 import (

View File

@ -22,7 +22,7 @@ class AltCensoredIE(InfoExtractor):
'title': "QUELLES SONT LES CONSÉQUENCES DE L'HYPERSEXUALISATION DE LA SOCIÉTÉ ?", 'title': "QUELLES SONT LES CONSÉQUENCES DE L'HYPERSEXUALISATION DE LA SOCIÉTÉ ?",
'display_id': 'k0srjLSkga8.webm', 'display_id': 'k0srjLSkga8.webm',
'release_date': '20180403', 'release_date': '20180403',
'creator': 'Virginie Vota', 'creators': ['Virginie Vota'],
'release_year': 2018, 'release_year': 2018,
'upload_date': '20230318', 'upload_date': '20230318',
'uploader': 'admin@altcensored.com', 'uploader': 'admin@altcensored.com',
@ -32,7 +32,7 @@ class AltCensoredIE(InfoExtractor):
'duration': 926.09, 'duration': 926.09,
'thumbnail': 'https://archive.org/download/youtube-k0srjLSkga8/youtube-k0srjLSkga8.thumbs/k0srjLSkga8_000925.jpg', 'thumbnail': 'https://archive.org/download/youtube-k0srjLSkga8/youtube-k0srjLSkga8.thumbs/k0srjLSkga8_000925.jpg',
'view_count': int, 'view_count': int,
'categories': ['News & Politics'], 'categories': ['News & Politics'], # FIXME
} }
}] }]
@ -62,14 +62,21 @@ class AltCensoredChannelIE(InfoExtractor):
'title': 'Virginie Vota', 'title': 'Virginie Vota',
'id': 'UCFPTO55xxHqFqkzRZHu4kcw', 'id': 'UCFPTO55xxHqFqkzRZHu4kcw',
}, },
'playlist_count': 91 'playlist_count': 85,
}, { }, {
'url': 'https://altcensored.com/channel/UC9CcJ96HKMWn0LZlcxlpFTw', 'url': 'https://altcensored.com/channel/UC9CcJ96HKMWn0LZlcxlpFTw',
'info_dict': { 'info_dict': {
'title': 'yukikaze775', 'title': 'yukikaze775',
'id': 'UC9CcJ96HKMWn0LZlcxlpFTw', 'id': 'UC9CcJ96HKMWn0LZlcxlpFTw',
}, },
'playlist_count': 4 'playlist_count': 4,
}, {
'url': 'https://altcensored.com/channel/UCfYbb7nga6-icsFWWgS-kWw',
'info_dict': {
'title': 'Mister Metokur',
'id': 'UCfYbb7nga6-icsFWWgS-kWw',
},
'playlist_count': 121,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -78,7 +85,7 @@ class AltCensoredChannelIE(InfoExtractor):
url, channel_id, 'Download channel webpage', 'Unable to get channel webpage') url, channel_id, 'Download channel webpage', 'Unable to get channel webpage')
title = self._html_search_meta('altcen_title', webpage, 'title', fatal=False) title = self._html_search_meta('altcen_title', webpage, 'title', fatal=False)
page_count = int_or_none(self._html_search_regex( page_count = int_or_none(self._html_search_regex(
r'<a[^>]+href="/channel/\w+/page/(\d+)">(?:\1)</a>', r'<a[^>]+href="/channel/[\w-]+/page/(\d+)">(?:\1)</a>',
webpage, 'page count', default='1')) webpage, 'page count', default='1'))
def page_func(page_num): def page_func(page_num):

View File

@ -300,7 +300,7 @@ class ArchiveOrgIE(InfoExtractor):
is_logged_in = bool(self._get_cookies('https://archive.org').get('logged-in-sig')) is_logged_in = bool(self._get_cookies('https://archive.org').get('logged-in-sig'))
if extension in KNOWN_EXTENSIONS and (not f.get('private') or is_logged_in): if extension in KNOWN_EXTENSIONS and (not f.get('private') or is_logged_in):
entry['formats'].append({ entry['formats'].append({
'url': 'https://archive.org/download/' + identifier + '/' + f['name'], 'url': 'https://archive.org/download/' + identifier + '/' + urllib.parse.quote(f['name']),
'format': f.get('format'), 'format': f.get('format'),
'width': int_or_none(f.get('width')), 'width': int_or_none(f.get('width')),
'height': int_or_none(f.get('height')), 'height': int_or_none(f.get('height')),

View File

@ -1996,7 +1996,7 @@ class BiliIntlIE(BiliIntlBaseIE):
'title': get_element_by_class( 'title': get_element_by_class(
'bstar-meta__title', webpage) or self._html_search_meta('og:title', webpage), 'bstar-meta__title', webpage) or self._html_search_meta('og:title', webpage),
'description': get_element_by_class( 'description': get_element_by_class(
'bstar-meta__desc', webpage) or self._html_search_meta('og:description'), 'bstar-meta__desc', webpage) or self._html_search_meta('og:description', webpage),
}, self._search_json_ld(webpage, video_id, default={})) }, self._search_json_ld(webpage, video_id, default={}))
def _get_comments_reply(self, root_id, next_id=0, display_id=None): def _get_comments_reply(self, root_id, next_id=0, display_id=None):

View File

@ -2,7 +2,7 @@ import functools
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, UserNotLive,
float_or_none, float_or_none,
int_or_none, int_or_none,
parse_iso8601, parse_iso8601,
@ -40,7 +40,7 @@ class CHZZKLiveIE(InfoExtractor):
note='Downloading channel info', errnote='Unable to download channel info')['content'] note='Downloading channel info', errnote='Unable to download channel info')['content']
if live_detail.get('status') == 'CLOSE': if live_detail.get('status') == 'CLOSE':
raise ExtractorError('The channel is not currently live', expected=True) raise UserNotLive(video_id=channel_id)
live_playback = self._parse_json(live_detail['livePlaybackJson'], channel_id) live_playback = self._parse_json(live_detail['livePlaybackJson'], channel_id)

View File

@ -4,27 +4,25 @@ from .common import InfoExtractor
class CloudflareStreamIE(InfoExtractor): class CloudflareStreamIE(InfoExtractor):
_SUBDOMAIN_RE = r'(?:(?:watch|iframe|customer-\w+)\.)?'
_DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)' _DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)'
_EMBED_RE = r'embed\.%s/embed/[^/]+\.js\?.*?\bvideo=' % _DOMAIN_RE _EMBED_RE = rf'embed\.{_DOMAIN_RE}/embed/[^/]+\.js\?.*?\bvideo='
_ID_RE = r'[\da-f]{32}|[\w-]+\.[\w-]+\.[\w-]+' _ID_RE = r'[\da-f]{32}|[\w-]+\.[\w-]+\.[\w-]+'
_VALID_URL = r'''(?x) _VALID_URL = rf'https?://(?:{_SUBDOMAIN_RE}{_DOMAIN_RE}/|{_EMBED_RE})(?P<id>{_ID_RE})'
https?:// _EMBED_REGEX = [
(?: rf'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE}).*?)\1',
(?:watch\.)?%s/| rf'<iframe[^>]+\bsrc=["\'](?P<url>https?://{_SUBDOMAIN_RE}{_DOMAIN_RE}/[\da-f]{{32}})',
%s ]
)
(?P<id>%s)
''' % (_DOMAIN_RE, _EMBED_RE, _ID_RE)
_EMBED_REGEX = [fr'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE}).*?)\1']
_TESTS = [{ _TESTS = [{
'url': 'https://embed.cloudflarestream.com/embed/we4g.fla9.latest.js?video=31c9291ab41fac05471db4e73aa11717', 'url': 'https://embed.cloudflarestream.com/embed/we4g.fla9.latest.js?video=31c9291ab41fac05471db4e73aa11717',
'info_dict': { 'info_dict': {
'id': '31c9291ab41fac05471db4e73aa11717', 'id': '31c9291ab41fac05471db4e73aa11717',
'ext': 'mp4', 'ext': 'mp4',
'title': '31c9291ab41fac05471db4e73aa11717', 'title': '31c9291ab41fac05471db4e73aa11717',
'thumbnail': 'https://videodelivery.net/31c9291ab41fac05471db4e73aa11717/thumbnails/thumbnail.jpg',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': 'm3u8',
}, },
}, { }, {
'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1', 'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1',
@ -35,6 +33,21 @@ class CloudflareStreamIE(InfoExtractor):
}, { }, {
'url': 'https://embed.videodelivery.net/embed/r4xu.fla9.latest.js?video=81d80727f3022488598f68d323c1ad5e', 'url': 'https://embed.videodelivery.net/embed/r4xu.fla9.latest.js?video=81d80727f3022488598f68d323c1ad5e',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://customer-aw5py76sw8wyqzmh.cloudflarestream.com/2463f6d3e06fa29710a337f5f5389fd8/iframe',
'only_matching': True,
}]
_WEBPAGE_TESTS = [{
'url': 'https://upride.cc/incident/shoulder-pass-at-light/',
'info_dict': {
'id': 'eaef9dea5159cf968be84241b5cedfe7',
'ext': 'mp4',
'title': 'eaef9dea5159cf968be84241b5cedfe7',
'thumbnail': 'https://videodelivery.net/eaef9dea5159cf968be84241b5cedfe7/thumbnails/thumbnail.jpg',
},
'params': {
'skip_download': 'm3u8',
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -1,68 +1,97 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import smuggle_url from ..utils import int_or_none, parse_iso8601, str_or_none, url_or_none
from ..utils.traversal import traverse_obj
class CNBCIE(InfoExtractor):
_VALID_URL = r'https?://video\.cnbc\.com/gallery/\?video=(?P<id>[0-9]+)'
_TEST = {
'url': 'http://video.cnbc.com/gallery/?video=3000503714',
'info_dict': {
'id': '3000503714',
'ext': 'mp4',
'title': 'Fighting zombies is big business',
'description': 'md5:0c100d8e1a7947bd2feec9a5550e519e',
'timestamp': 1459332000,
'upload_date': '20160330',
'uploader': 'NBCU-CNBC',
},
'params': {
# m3u8 download
'skip_download': True,
},
'skip': 'Dead link',
}
def _real_extract(self, url):
video_id = self._match_id(url)
return {
'_type': 'url_transparent',
'ie_key': 'ThePlatform',
'url': smuggle_url(
'http://link.theplatform.com/s/gZWlPC/media/guid/2408950221/%s?mbr=true&manifest=m3u' % video_id,
{'force_smil_url': True}),
'id': video_id,
}
class CNBCVideoIE(InfoExtractor): class CNBCVideoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?cnbc\.com(?P<path>/video/(?:[^/]+/)+(?P<id>[^./?#&]+)\.html)' _VALID_URL = r'https?://(?:www\.)?cnbc\.com/video/(?:[^/?#]+/)+(?P<id>[^./?#&]+)\.html'
_TEST = {
'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html', _TESTS = [{
'url': 'https://www.cnbc.com/video/2023/12/07/mcdonalds-just-unveiled-cosmcsits-new-spinoff-brand.html',
'info_dict': { 'info_dict': {
'id': '7000031301',
'ext': 'mp4', 'ext': 'mp4',
'title': "Trump: I don't necessarily agree with raising rates", 'id': '107344774',
'description': 'md5:878d8f0b4ebb5bb1dda3514b91b49de3', 'display_id': 'mcdonalds-just-unveiled-cosmcsits-new-spinoff-brand',
'timestamp': 1531958400, 'modified_timestamp': 1702053483,
'upload_date': '20180719', 'timestamp': 1701977810,
'uploader': 'NBCU-CNBC', 'channel': 'News Videos',
'upload_date': '20231207',
'description': 'md5:882c001d85cb43d7579b514307b3e78b',
'release_timestamp': 1701977375,
'modified_date': '20231208',
'release_date': '20231207',
'duration': 65,
'author': 'Sean Conlon',
'title': 'Here\'s a first look at McDonald\'s new spinoff brand, CosMc\'s',
'thumbnail': 'https://image.cnbcfm.com/api/v1/image/107344192-1701894812493-CosMcsskyHero_2336x1040_hero-desktop.jpg?v=1701894855',
}, },
'params': { 'expected_warnings': ['Unable to download f4m manifest'],
'skip_download': True, }, {
'url': 'https://www.cnbc.com/video/2023/12/08/jim-cramer-shares-his-take-on-seattles-tech-scene.html',
'info_dict': {
'author': 'Jim Cramer',
'channel': 'Mad Money with Jim Cramer',
'description': 'md5:72925be21b952e95eba51178dddf4e3e',
'duration': 299.0,
'ext': 'mp4',
'id': '107345451',
'display_id': 'jim-cramer-shares-his-take-on-seattles-tech-scene',
'thumbnail': 'https://image.cnbcfm.com/api/v1/image/107345481-1702079431MM-B-120823.jpg?v=1702079430',
'timestamp': 1702080139,
'title': 'Jim Cramer shares his take on Seattle\'s tech scene',
'release_date': '20231208',
'upload_date': '20231209',
'modified_timestamp': 1702080139,
'modified_date': '20231209',
'release_timestamp': 1702073551,
}, },
'skip': 'Dead link', 'expected_warnings': ['Unable to download f4m manifest'],
} }, {
'url': 'https://www.cnbc.com/video/2023/12/08/the-epicenter-of-ai-is-in-seattle-says-jim-cramer.html',
'info_dict': {
'author': 'Jim Cramer',
'channel': 'Mad Money with Jim Cramer',
'description': 'md5:72925be21b952e95eba51178dddf4e3e',
'duration': 113.0,
'ext': 'mp4',
'id': '107345474',
'display_id': 'the-epicenter-of-ai-is-in-seattle-says-jim-cramer',
'thumbnail': 'https://image.cnbcfm.com/api/v1/image/107345486-Screenshot_2023-12-08_at_70339_PM.png?v=1702080248',
'timestamp': 1702080535,
'title': 'The epicenter of AI is in Seattle, says Jim Cramer',
'release_timestamp': 1702077347,
'modified_timestamp': 1702080535,
'release_date': '20231208',
'upload_date': '20231209',
'modified_date': '20231209',
},
'expected_warnings': ['Unable to download f4m manifest'],
}]
def _real_extract(self, url): def _real_extract(self, url):
path, display_id = self._match_valid_url(url).groups() display_id = self._match_id(url)
video_id = self._download_json( webpage = self._download_webpage(url, display_id)
'https://webql-redesign.cnbcfm.com/graphql', display_id, query={ data = self._search_json(r'window\.__s_data=', webpage, 'video data', display_id)
'query': '''{
page(path: "%s") { player_data = traverse_obj(data, (
vcpsId 'page', 'page', 'layout', ..., 'columns', ..., 'modules',
} lambda _, v: v['name'] == 'clipPlayer', 'data', {dict}), get_all=False)
}''' % path,
})['data']['page']['vcpsId'] return {
return self.url_result( 'id': display_id,
'http://video.cnbc.com/gallery/?video=%d' % video_id, 'display_id': display_id,
CNBCIE.ie_key()) 'formats': self._extract_akamai_formats(player_data['playbackURL'], display_id),
**self._search_json_ld(webpage, display_id, fatal=False),
**traverse_obj(player_data, {
'id': ('id', {str_or_none}),
'title': ('title', {str}),
'description': ('description', {str}),
'author': ('author', ..., 'name', {str}),
'timestamp': ('datePublished', {parse_iso8601}),
'release_timestamp': ('uploadDate', {parse_iso8601}),
'modified_timestamp': ('dateLastPublished', {parse_iso8601}),
'thumbnail': ('thumbnail', {url_or_none}),
'duration': ('duration', {int_or_none}),
'channel': ('section', 'title', {str}),
}, get_all=False),
}

View File

@ -247,6 +247,8 @@ class InfoExtractor:
(For internal use only) (For internal use only)
* http_chunk_size Chunk size for HTTP downloads * http_chunk_size Chunk size for HTTP downloads
* ffmpeg_args Extra arguments for ffmpeg downloader * ffmpeg_args Extra arguments for ffmpeg downloader
* is_dash_periods Whether the format is a result of merging
multiple DASH periods.
RTMP formats can also have the additional fields: page_url, RTMP formats can also have the additional fields: page_url,
app, play_path, tc_url, flash_version, rtmp_live, rtmp_conn, app, play_path, tc_url, flash_version, rtmp_live, rtmp_conn,
rtmp_protocol, rtmp_real_time rtmp_protocol, rtmp_real_time
@ -278,7 +280,7 @@ class InfoExtractor:
description: Full video description. description: Full video description.
uploader: Full name of the video uploader. uploader: Full name of the video uploader.
license: License name the video is licensed under. license: License name the video is licensed under.
creator: The creator of the video. creators: List of creators of the video.
timestamp: UNIX timestamp of the moment the video was uploaded timestamp: UNIX timestamp of the moment the video was uploaded
upload_date: Video upload date in UTC (YYYYMMDD). upload_date: Video upload date in UTC (YYYYMMDD).
If not explicitly set, calculated from timestamp If not explicitly set, calculated from timestamp
@ -422,16 +424,16 @@ class InfoExtractor:
track_number: Number of the track within an album or a disc, as an integer. track_number: Number of the track within an album or a disc, as an integer.
track_id: Id of the track (useful in case of custom indexing, e.g. 6.iii), track_id: Id of the track (useful in case of custom indexing, e.g. 6.iii),
as a unicode string. as a unicode string.
artist: Artist(s) of the track. artists: List of artists of the track.
genre: Genre(s) of the track. composers: List of composers of the piece.
genres: List of genres of the track.
album: Title of the album the track belongs to. album: Title of the album the track belongs to.
album_type: Type of the album (e.g. "Demo", "Full-length", "Split", "Compilation", etc). album_type: Type of the album (e.g. "Demo", "Full-length", "Split", "Compilation", etc).
album_artist: List of all artists appeared on the album (e.g. album_artists: List of all artists appeared on the album.
"Ash Borer / Fell Voices" or "Various Artists", useful for splits E.g. ["Ash Borer", "Fell Voices"] or ["Various Artists"].
and compilations). Useful for splits and compilations.
disc_number: Number of the disc or other physical medium the track belongs to, disc_number: Number of the disc or other physical medium the track belongs to,
as an integer. as an integer.
composer: Composer of the piece
The following fields should only be set for clips that should be cut from the original video: The following fields should only be set for clips that should be cut from the original video:
@ -442,6 +444,18 @@ class InfoExtractor:
rows: Number of rows in each storyboard fragment, as an integer rows: Number of rows in each storyboard fragment, as an integer
columns: Number of columns in each storyboard fragment, as an integer columns: Number of columns in each storyboard fragment, as an integer
The following fields are deprecated and should not be set by new code:
composer: Use "composers" instead.
Composer(s) of the piece, comma-separated.
artist: Use "artists" instead.
Artist(s) of the track, comma-separated.
genre: Use "genres" instead.
Genre(s) of the track, comma-separated.
album_artist: Use "album_artists" instead.
All artists appeared on the album, comma-separated.
creator: Use "creators" instead.
The creator of the video.
Unless mentioned otherwise, the fields should be Unicode strings. Unless mentioned otherwise, the fields should be Unicode strings.
Unless mentioned otherwise, None is equivalent to absence of information. Unless mentioned otherwise, None is equivalent to absence of information.
@ -2547,7 +2561,11 @@ class InfoExtractor:
self._report_ignoring_subs('DASH') self._report_ignoring_subs('DASH')
return fmts return fmts
def _extract_mpd_formats_and_subtitles( def _extract_mpd_formats_and_subtitles(self, *args, **kwargs):
periods = self._extract_mpd_periods(*args, **kwargs)
return self._merge_mpd_periods(periods)
def _extract_mpd_periods(
self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, self, mpd_url, video_id, mpd_id=None, note=None, errnote=None,
fatal=True, data=None, headers={}, query={}): fatal=True, data=None, headers={}, query={}):
@ -2560,17 +2578,16 @@ class InfoExtractor:
errnote='Failed to download MPD manifest' if errnote is None else errnote, errnote='Failed to download MPD manifest' if errnote is None else errnote,
fatal=fatal, data=data, headers=headers, query=query) fatal=fatal, data=data, headers=headers, query=query)
if res is False: if res is False:
return [], {} return []
mpd_doc, urlh = res mpd_doc, urlh = res
if mpd_doc is None: if mpd_doc is None:
return [], {} return []
# We could have been redirected to a new url when we retrieved our mpd file. # We could have been redirected to a new url when we retrieved our mpd file.
mpd_url = urlh.url mpd_url = urlh.url
mpd_base_url = base_url(mpd_url) mpd_base_url = base_url(mpd_url)
return self._parse_mpd_formats_and_subtitles( return self._parse_mpd_periods(mpd_doc, mpd_id, mpd_base_url, mpd_url)
mpd_doc, mpd_id, mpd_base_url, mpd_url)
def _parse_mpd_formats(self, *args, **kwargs): def _parse_mpd_formats(self, *args, **kwargs):
fmts, subs = self._parse_mpd_formats_and_subtitles(*args, **kwargs) fmts, subs = self._parse_mpd_formats_and_subtitles(*args, **kwargs)
@ -2578,8 +2595,39 @@ class InfoExtractor:
self._report_ignoring_subs('DASH') self._report_ignoring_subs('DASH')
return fmts return fmts
def _parse_mpd_formats_and_subtitles( def _parse_mpd_formats_and_subtitles(self, *args, **kwargs):
self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None): periods = self._parse_mpd_periods(*args, **kwargs)
return self._merge_mpd_periods(periods)
def _merge_mpd_periods(self, periods):
"""
Combine all formats and subtitles from an MPD manifest into a single list,
by concatenate streams with similar formats.
"""
formats, subtitles = {}, {}
for period in periods:
for f in period['formats']:
assert 'is_dash_periods' not in f, 'format already processed'
f['is_dash_periods'] = True
format_key = tuple(v for k, v in f.items() if k not in (
('format_id', 'fragments', 'manifest_stream_number')))
if format_key not in formats:
formats[format_key] = f
elif 'fragments' in f:
formats[format_key].setdefault('fragments', []).extend(f['fragments'])
if subtitles and period['subtitles']:
self.report_warning(bug_reports_message(
'Found subtitles in multiple periods in the DASH manifest; '
'if part of the subtitles are missing,'
), only_once=True)
for sub_lang, sub_info in period['subtitles'].items():
subtitles.setdefault(sub_lang, []).extend(sub_info)
return list(formats.values()), subtitles
def _parse_mpd_periods(self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None):
""" """
Parse formats from MPD manifest. Parse formats from MPD manifest.
References: References:
@ -2658,9 +2706,13 @@ class InfoExtractor:
return ms_info return ms_info
mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration')) mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
formats, subtitles = [], {}
stream_numbers = collections.defaultdict(int) stream_numbers = collections.defaultdict(int)
for period in mpd_doc.findall(_add_ns('Period')): for period_idx, period in enumerate(mpd_doc.findall(_add_ns('Period'))):
period_entry = {
'id': period.get('id', f'period-{period_idx}'),
'formats': [],
'subtitles': collections.defaultdict(list),
}
period_duration = parse_duration(period.get('duration')) or mpd_duration period_duration = parse_duration(period.get('duration')) or mpd_duration
period_ms_info = extract_multisegment_info(period, { period_ms_info = extract_multisegment_info(period, {
'start_number': 1, 'start_number': 1,
@ -2910,11 +2962,10 @@ class InfoExtractor:
if content_type in ('video', 'audio', 'image/jpeg'): if content_type in ('video', 'audio', 'image/jpeg'):
f['manifest_stream_number'] = stream_numbers[f['url']] f['manifest_stream_number'] = stream_numbers[f['url']]
stream_numbers[f['url']] += 1 stream_numbers[f['url']] += 1
formats.append(f) period_entry['formats'].append(f)
elif content_type == 'text': elif content_type == 'text':
subtitles.setdefault(lang or 'und', []).append(f) period_entry['subtitles'][lang or 'und'].append(f)
yield period_entry
return formats, subtitles
def _extract_ism_formats(self, *args, **kwargs): def _extract_ism_formats(self, *args, **kwargs):
fmts, subs = self._extract_ism_formats_and_subtitles(*args, **kwargs) fmts, subs = self._extract_ism_formats_and_subtitles(*args, **kwargs)

View File

@ -9,7 +9,7 @@ from ..utils.traversal import traverse_obj
class ERRJupiterIE(InfoExtractor): class ERRJupiterIE(InfoExtractor):
_VALID_URL = r'https?://jupiter(?:pluss)?\.err\.ee/(?P<id>\d+)' _VALID_URL = r'https?://(?:jupiter(?:pluss)?|lasteekraan)\.err\.ee/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'note': 'Jupiter: Movie: siin-me-oleme', 'note': 'Jupiter: Movie: siin-me-oleme',
'url': 'https://jupiter.err.ee/1211107/siin-me-oleme', 'url': 'https://jupiter.err.ee/1211107/siin-me-oleme',
@ -145,6 +145,31 @@ class ERRJupiterIE(InfoExtractor):
'season_number': 0, 'season_number': 0,
'series': 'Лесные истории | Аисты', 'series': 'Лесные истории | Аисты',
'series_id': '1037497', 'series_id': '1037497',
}
}, {
'note': 'Lasteekraan: Pätu',
'url': 'https://lasteekraan.err.ee/1092243/patu',
'md5': 'a67eb9b9bcb3d201718c15d1638edf77',
'info_dict': {
'id': '1092243',
'ext': 'mp4',
'title': 'Pätu',
'alt_title': '',
'description': 'md5:64a7b5a80afd7042d3f8ec48c77befd9',
'release_date': '20230614',
'upload_date': '20200520',
'modified_date': '20200520',
'release_timestamp': 1686745800,
'timestamp': 1589975640,
'modified_timestamp': 1589975640,
'release_year': 1990,
'episode': 'Episode 1',
'episode_id': '1092243',
'episode_number': 1,
'season': 'Season 1',
'season_number': 1,
'series': 'Pätu',
'series_id': '1092236',
}, },
}] }]

View File

@ -0,0 +1,62 @@
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
UserNotLive,
parse_iso8601,
str_or_none,
traverse_obj,
url_or_none,
)
class FlexTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?flextv\.co\.kr/channels/(?P<id>\d+)/live'
_TESTS = [{
'url': 'https://www.flextv.co.kr/channels/231638/live',
'info_dict': {
'id': '231638',
'ext': 'mp4',
'title': r're:^214하나만\.\.\. ',
'thumbnail': r're:^https?://.+\.jpg',
'upload_date': r're:\d{8}',
'timestamp': int,
'live_status': 'is_live',
'channel': 'Hi별',
'channel_id': '244396',
},
'skip': 'The channel is offline',
}, {
'url': 'https://www.flextv.co.kr/channels/746/live',
'only_matching': True,
}]
def _real_extract(self, url):
channel_id = self._match_id(url)
try:
stream_data = self._download_json(
f'https://api.flextv.co.kr/api/channels/{channel_id}/stream',
channel_id, query={'option': 'all'})
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 400:
raise UserNotLive(video_id=channel_id)
raise
playlist_url = stream_data['sources'][0]['url']
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
playlist_url, channel_id, 'mp4')
return {
'id': channel_id,
'formats': formats,
'subtitles': subtitles,
'is_live': True,
**traverse_obj(stream_data, {
'title': ('stream', 'title', {str}),
'timestamp': ('stream', 'createdAt', {parse_iso8601}),
'thumbnail': ('thumbUrl', {url_or_none}),
'channel': ('owner', 'name', {str}),
'channel_id': ('owner', 'id', {str_or_none}),
}),
}

View File

@ -40,6 +40,22 @@ class GoPlayIE(InfoExtractor):
'title': 'A Family for the Holidays', 'title': 'A Family for the Holidays',
}, },
'skip': 'This video is only available for registered users' 'skip': 'This video is only available for registered users'
}, {
'url': 'https://www.goplay.be/video/de-mol/de-mol-s11/de-mol-s11-aflevering-1#autoplay',
'info_dict': {
'id': '03eb8f2f-153e-41cb-9805-0d3a29dab656',
'ext': 'mp4',
'title': 'S11 - Aflevering 1',
'episode': 'Episode 1',
'series': 'De Mol',
'season_number': 11,
'episode_number': 1,
'season': 'Season 11'
},
'params': {
'skip_download': True
},
'skip': 'This video is only available for registered users'
}] }]
_id_token = None _id_token = None
@ -77,16 +93,39 @@ class GoPlayIE(InfoExtractor):
api = self._download_json( api = self._download_json(
f'https://api.goplay.be/web/v1/videos/long-form/{video_id}', f'https://api.goplay.be/web/v1/videos/long-form/{video_id}',
video_id, headers={'Authorization': 'Bearer %s' % self._id_token}) video_id, headers={
'Authorization': 'Bearer %s' % self._id_token,
**self.geo_verification_headers(),
})
formats, subs = self._extract_m3u8_formats_and_subtitles( if 'manifestUrls' in api:
api['manifestUrls']['hls'], video_id, ext='mp4', m3u8_id='HLS') formats, subtitles = self._extract_m3u8_formats_and_subtitles(
api['manifestUrls']['hls'], video_id, ext='mp4', m3u8_id='HLS')
else:
if 'ssai' not in api:
raise ExtractorError('expecting Google SSAI stream')
ssai_content_source_id = api['ssai']['contentSourceID']
ssai_video_id = api['ssai']['videoID']
dai = self._download_json(
f'https://dai.google.com/ondemand/dash/content/{ssai_content_source_id}/vid/{ssai_video_id}/streams',
video_id, data=b'{"api-key":"null"}',
headers={'content-type': 'application/json'})
periods = self._extract_mpd_periods(dai['stream_manifest'], video_id)
# skip pre-roll and mid-roll ads
periods = [p for p in periods if '-ad-' not in p['id']]
formats, subtitles = self._merge_mpd_periods(periods)
info_dict.update({ info_dict.update({
'id': video_id, 'id': video_id,
'formats': formats, 'formats': formats,
'subtitles': subtitles,
}) })
return info_dict return info_dict

View File

@ -1,6 +1,7 @@
import itertools import itertools
import json import json
from .art19 import Art19IE
from .common import InfoExtractor from .common import InfoExtractor
from ..networking.exceptions import HTTPError from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
@ -112,7 +113,8 @@ class NebulaBaseIE(InfoExtractor):
class NebulaIE(NebulaBaseIE): class NebulaIE(NebulaBaseIE):
_VALID_URL = rf'{_BASE_URL_RE}/videos/(?P<id>[-\w]+)' IE_NAME = 'nebula:video'
_VALID_URL = rf'{_BASE_URL_RE}/videos/(?P<id>[\w-]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://nebula.tv/videos/that-time-disney-remade-beauty-and-the-beast', 'url': 'https://nebula.tv/videos/that-time-disney-remade-beauty-and-the-beast',
'info_dict': { 'info_dict': {
@ -236,8 +238,8 @@ class NebulaIE(NebulaBaseIE):
class NebulaClassIE(NebulaBaseIE): class NebulaClassIE(NebulaBaseIE):
IE_NAME = 'nebula:class' IE_NAME = 'nebula:media'
_VALID_URL = rf'{_BASE_URL_RE}/(?P<id>[-\w]+)/(?P<ep>\d+)' _VALID_URL = rf'{_BASE_URL_RE}/(?!(?:myshows|library|videos)/)(?P<id>[\w-]+)/(?P<ep>[\w-]+)/?(?:$|[?#])'
_TESTS = [{ _TESTS = [{
'url': 'https://nebula.tv/copyright-for-fun-and-profit/14', 'url': 'https://nebula.tv/copyright-for-fun-and-profit/14',
'info_dict': { 'info_dict': {
@ -253,6 +255,46 @@ class NebulaClassIE(NebulaBaseIE):
'title': 'Photos, Sculpture, and Video', 'title': 'Photos, Sculpture, and Video',
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://nebula.tv/extremitiespodcast/pyramiden-the-high-arctic-soviet-ghost-town',
'info_dict': {
'ext': 'mp3',
'id': '018f65f0-0033-4021-8f87-2d132beb19aa',
'description': 'md5:05d2b23ab780c955e2511a2b9127acff',
'series_id': '335e8159-d663-491a-888f-1732285706ac',
'modified_timestamp': 1599091504,
'episode_id': '018f65f0-0033-4021-8f87-2d132beb19aa',
'series': 'Extremities',
'modified_date': '20200903',
'upload_date': '20200902',
'title': 'Pyramiden: The High-Arctic Soviet Ghost Town',
'release_timestamp': 1571237958,
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
'duration': 1546.05714,
'timestamp': 1599085608,
'release_date': '20191016',
},
}, {
'url': 'https://nebula.tv/thelayover/the-layover-episode-1',
'info_dict': {
'ext': 'mp3',
'id': '9d74a762-00bb-45a8-9e8d-9ed47c04a1d0',
'episode_number': 1,
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
'release_date': '20230304',
'modified_date': '20230403',
'series': 'The Layover',
'episode_id': '9d74a762-00bb-45a8-9e8d-9ed47c04a1d0',
'modified_timestamp': 1680554566,
'duration': 3130.46401,
'release_timestamp': 1677943800,
'title': 'The Layover — Episode 1',
'series_id': '874303a5-4900-4626-a4b6-2aacac34466a',
'upload_date': '20230303',
'episode': 'Episode 1',
'timestamp': 1677883672,
'description': 'md5:002cca89258e3bc7c268d5b8c24ba482',
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -268,16 +310,38 @@ class NebulaClassIE(NebulaBaseIE):
metadata = self._call_api( metadata = self._call_api(
f'https://content.api.nebula.app/content/{slug}/{episode}/?include=lessons', f'https://content.api.nebula.app/content/{slug}/{episode}/?include=lessons',
slug, note='Fetching video metadata') slug, note='Fetching class/podcast metadata')
return { content_type = metadata.get('type')
**self._extract_video_metadata(metadata), if content_type == 'lesson':
**self._extract_formats(metadata['id'], slug), return {
} **self._extract_video_metadata(metadata),
**self._extract_formats(metadata['id'], slug),
}
elif content_type == 'podcast_episode':
episode_url = metadata['episode_url']
if not episode_url and metadata.get('premium'):
self.raise_login_required()
if Art19IE.suitable(episode_url):
return self.url_result(episode_url, Art19IE)
return traverse_obj(metadata, {
'id': ('id', {str}),
'url': ('episode_url', {url_or_none}),
'title': ('title', {str}),
'description': ('description', {str}),
'timestamp': ('published_at', {parse_iso8601}),
'duration': ('duration', {int_or_none}),
'channel_id': ('channel_id', {str}),
'chnanel': ('channel_title', {str}),
'thumbnail': ('assets', 'regular', {url_or_none}),
})
raise ExtractorError(f'Unexpected content type {content_type!r}')
class NebulaSubscriptionsIE(NebulaBaseIE): class NebulaSubscriptionsIE(NebulaBaseIE):
IE_NAME = 'nebula:subscriptions' IE_NAME = 'nebula:subscriptions'
_VALID_URL = rf'{_BASE_URL_RE}/(?P<id>myshows|library/latest-videos)' _VALID_URL = rf'{_BASE_URL_RE}/(?P<id>myshows|library/latest-videos)/?(?:$|[?#])'
_TESTS = [{ _TESTS = [{
'url': 'https://nebula.tv/myshows', 'url': 'https://nebula.tv/myshows',
'playlist_mincount': 1, 'playlist_mincount': 1,
@ -310,7 +374,7 @@ class NebulaSubscriptionsIE(NebulaBaseIE):
class NebulaChannelIE(NebulaBaseIE): class NebulaChannelIE(NebulaBaseIE):
IE_NAME = 'nebula:channel' IE_NAME = 'nebula:channel'
_VALID_URL = rf'{_BASE_URL_RE}/(?!myshows|library|videos/)(?P<id>[-\w]+)/?(?:$|[?#])' _VALID_URL = rf'{_BASE_URL_RE}/(?!myshows|library|videos)(?P<id>[\w-]+)/?(?:$|[?#])'
_TESTS = [{ _TESTS = [{
'url': 'https://nebula.tv/tom-scott-presents-money', 'url': 'https://nebula.tv/tom-scott-presents-money',
'info_dict': { 'info_dict': {
@ -343,6 +407,14 @@ class NebulaChannelIE(NebulaBaseIE):
'description': 'md5:6690248223eed044a9f11cd5a24f9742', 'description': 'md5:6690248223eed044a9f11cd5a24f9742',
}, },
'playlist_count': 23, 'playlist_count': 23,
}, {
'url': 'https://nebula.tv/trussissuespodcast',
'info_dict': {
'id': 'trussissuespodcast',
'title': 'The TLDR News Podcast',
'description': 'md5:a08c4483bc0b705881d3e0199e721385',
},
'playlist_mincount': 80,
}] }]
def _generate_playlist_entries(self, collection_id, collection_slug): def _generate_playlist_entries(self, collection_id, collection_slug):
@ -365,6 +437,17 @@ class NebulaChannelIE(NebulaBaseIE):
lesson.get('share_url') or f'https://nebula.tv/{metadata["class_slug"]}/{metadata["slug"]}', lesson.get('share_url') or f'https://nebula.tv/{metadata["class_slug"]}/{metadata["slug"]}',
{'id': lesson['id']}), NebulaClassIE, url_transparent=True, **metadata) {'id': lesson['id']}), NebulaClassIE, url_transparent=True, **metadata)
def _generate_podcast_entries(self, collection_id, collection_slug):
next_url = f'https://content.api.nebula.app/podcast_channels/{collection_id}/podcast_episodes/?ordering=-published_at&premium=true'
for page_num in itertools.count(1):
episodes = self._call_api(next_url, collection_slug, note=f'Retrieving podcast page {page_num}')
for episode in traverse_obj(episodes, ('results', lambda _, v: url_or_none(v['share_url']))):
yield self.url_result(episode['share_url'], NebulaClassIE)
next_url = episodes.get('next')
if not next_url:
break
def _real_extract(self, url): def _real_extract(self, url):
collection_slug = self._match_id(url) collection_slug = self._match_id(url)
channel = self._call_api( channel = self._call_api(
@ -373,6 +456,8 @@ class NebulaChannelIE(NebulaBaseIE):
if channel.get('type') == 'class': if channel.get('type') == 'class':
entries = self._generate_class_entries(channel) entries = self._generate_class_entries(channel)
elif channel.get('type') == 'podcast_channel':
entries = self._generate_podcast_entries(channel['id'], collection_slug)
else: else:
entries = self._generate_playlist_entries(channel['id'], collection_slug) entries = self._generate_playlist_entries(channel['id'], collection_slug)

View File

@ -1,33 +1,38 @@
import datetime
from .common import InfoExtractor from .common import InfoExtractor
from .youtube import YoutubeIE
from ..utils import parse_iso8601, url_or_none
from ..utils.traversal import traverse_obj
class NerdCubedFeedIE(InfoExtractor): class NerdCubedFeedIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?nerdcubed\.co\.uk/feed\.json' _VALID_URL = r'https?://(?:www\.)?nerdcubed\.co\.uk/?(?:$|[#?])'
_TEST = { _TEST = {
'url': 'http://www.nerdcubed.co.uk/feed.json', 'url': 'http://www.nerdcubed.co.uk/',
'info_dict': { 'info_dict': {
'id': 'nerdcubed-feed', 'id': 'nerdcubed-feed',
'title': 'nerdcubed.co.uk feed', 'title': 'nerdcubed.co.uk feed',
}, },
'playlist_mincount': 1300, 'playlist_mincount': 5500,
} }
def _extract_video(self, feed_entry):
return self.url_result(
f'https://www.youtube.com/watch?v={feed_entry["id"]}', YoutubeIE,
**traverse_obj(feed_entry, {
'id': ('id', {str}),
'title': ('title', {str}),
'description': ('description', {str}),
'timestamp': ('publishedAt', {parse_iso8601}),
'channel': ('source', 'name', {str}),
'channel_id': ('source', 'id', {str}),
'channel_url': ('source', 'url', {str}),
'thumbnail': ('thumbnail', 'source', {url_or_none}),
}), url_transparent=True)
def _real_extract(self, url): def _real_extract(self, url):
feed = self._download_json(url, url, 'Downloading NerdCubed JSON feed') video_id = 'nerdcubed-feed'
feed = self._download_json('https://www.nerdcubed.co.uk/_/cdn/videos.json', video_id)
entries = [{ return self.playlist_result(
'_type': 'url', map(self._extract_video, traverse_obj(feed, ('videos', lambda _, v: v['id']))),
'title': feed_entry['title'], video_id, 'nerdcubed.co.uk feed')
'uploader': feed_entry['source']['name'] if feed_entry['source'] else None,
'upload_date': datetime.datetime.strptime(feed_entry['date'], '%Y-%m-%d').strftime('%Y%m%d'),
'url': 'http://www.youtube.com/watch?v=' + feed_entry['youtube_id'],
} for feed_entry in feed]
return {
'_type': 'playlist',
'title': 'nerdcubed.co.uk feed',
'id': 'nerdcubed-feed',
'entries': entries,
}

View File

@ -9,6 +9,7 @@ from ..utils import (
join_nonempty, join_nonempty,
parse_duration, parse_duration,
traverse_obj, traverse_obj,
try_call,
unescapeHTML, unescapeHTML,
unified_timestamp, unified_timestamp,
url_or_none, url_or_none,
@ -473,22 +474,21 @@ class NhkRadiruIE(InfoExtractor):
IE_DESC = 'NHK らじる (Radiru/Rajiru)' IE_DESC = 'NHK らじる (Radiru/Rajiru)'
_VALID_URL = r'https?://www\.nhk\.or\.jp/radio/(?:player/ondemand|ondemand/detail)\.html\?p=(?P<site>[\da-zA-Z]+)_(?P<corner>[\da-zA-Z]+)(?:_(?P<headline>[\da-zA-Z]+))?' _VALID_URL = r'https?://www\.nhk\.or\.jp/radio/(?:player/ondemand|ondemand/detail)\.html\?p=(?P<site>[\da-zA-Z]+)_(?P<corner>[\da-zA-Z]+)(?:_(?P<headline>[\da-zA-Z]+))?'
_TESTS = [{ _TESTS = [{
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=0449_01_3853544', 'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=0449_01_3926210',
'skip': 'Episode expired on 2023-04-16', 'skip': 'Episode expired on 2024-02-24',
'info_dict': { 'info_dict': {
'channel': 'NHK-FM', 'title': 'ジャズ・トゥナイト シリーズJAZZジャイアンツ 56 ジョニー・ホッジス',
'uploader': 'NHK-FM', 'id': '0449_01_3926210',
'description': 'md5:94b08bdeadde81a97df4ec882acce3e9',
'ext': 'm4a', 'ext': 'm4a',
'id': '0449_01_3853544',
'series': 'ジャズ・トゥナイト', 'series': 'ジャズ・トゥナイト',
'uploader': 'NHK-FM',
'channel': 'NHK-FM',
'thumbnail': 'https://www.nhk.or.jp/prog/img/449/g449.jpg', 'thumbnail': 'https://www.nhk.or.jp/prog/img/449/g449.jpg',
'timestamp': 1680969600, 'release_date': '20240217',
'title': 'ジャズ・トゥナイト NEWジャズ特集', 'description': 'md5:a456ee8e5e59e6dd2a7d32e62386e811',
'upload_date': '20230408', 'timestamp': 1708185600,
'release_timestamp': 1680962400, 'release_timestamp': 1708178400,
'release_date': '20230408', 'upload_date': '20240217',
'was_live': True,
}, },
}, { }, {
# playlist, airs every weekday so it should _hopefully_ be okay forever # playlist, airs every weekday so it should _hopefully_ be okay forever
@ -519,7 +519,8 @@ class NhkRadiruIE(InfoExtractor):
'series': 'らじる文庫 by ラジオ深夜便 ', 'series': 'らじる文庫 by ラジオ深夜便 ',
'release_timestamp': 1481126700, 'release_timestamp': 1481126700,
'upload_date': '20211101', 'upload_date': '20211101',
} },
'expected_warnings': ['Unable to download JSON metadata', 'Failed to get extended description'],
}, { }, {
# news # news
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=F261_01_3855109', 'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=F261_01_3855109',
@ -539,9 +540,28 @@ class NhkRadiruIE(InfoExtractor):
}, },
}] }]
_API_URL_TMPL = None
def _extract_extended_description(self, episode_id, episode):
service, _, area = traverse_obj(episode, ('aa_vinfo2', {str}, {lambda x: (x or '').partition(',')}))
aa_vinfo3 = traverse_obj(episode, ('aa_vinfo3', {str}))
detail_url = try_call(
lambda: self._API_URL_TMPL.format(service=service, area=area, dateid=aa_vinfo3))
if not detail_url:
return
full_meta = traverse_obj(
self._download_json(detail_url, episode_id, 'Downloading extended metadata', fatal=False),
('list', service, 0, {dict})) or {}
return join_nonempty('subtitle', 'content', 'act', 'music', delim='\n\n', from_dict=full_meta)
def _extract_episode_info(self, headline, programme_id, series_meta): def _extract_episode_info(self, headline, programme_id, series_meta):
episode_id = f'{programme_id}_{headline["headline_id"]}' episode_id = f'{programme_id}_{headline["headline_id"]}'
episode = traverse_obj(headline, ('file_list', 0, {dict})) episode = traverse_obj(headline, ('file_list', 0, {dict}))
description = self._extract_extended_description(episode_id, episode)
if not description:
self.report_warning('Failed to get extended description, falling back to summary')
description = traverse_obj(episode, ('file_title_sub', {str}))
return { return {
**series_meta, **series_meta,
@ -551,14 +571,21 @@ class NhkRadiruIE(InfoExtractor):
'was_live': True, 'was_live': True,
'series': series_meta.get('title'), 'series': series_meta.get('title'),
'thumbnail': url_or_none(headline.get('headline_image')) or series_meta.get('thumbnail'), 'thumbnail': url_or_none(headline.get('headline_image')) or series_meta.get('thumbnail'),
'description': description,
**traverse_obj(episode, { **traverse_obj(episode, {
'title': 'file_title', 'title': 'file_title',
'description': 'file_title_sub',
'timestamp': ('open_time', {unified_timestamp}), 'timestamp': ('open_time', {unified_timestamp}),
'release_timestamp': ('aa_vinfo4', {lambda x: x.split('_')[0]}, {unified_timestamp}), 'release_timestamp': ('aa_vinfo4', {lambda x: x.split('_')[0]}, {unified_timestamp}),
}), }),
} }
def _real_initialize(self):
if self._API_URL_TMPL:
return
api_config = self._download_xml(
'https://www.nhk.or.jp/radio/config/config_web.xml', None, 'Downloading API config', fatal=False)
NhkRadiruIE._API_URL_TMPL = try_call(lambda: f'https:{api_config.find(".//url_program_detail").text}')
def _real_extract(self, url): def _real_extract(self, url):
site_id, corner_id, headline_id = self._match_valid_url(url).group('site', 'corner', 'headline') site_id, corner_id, headline_id = self._match_valid_url(url).group('site', 'corner', 'headline')
programme_id = f'{site_id}_{corner_id}' programme_id = f'{site_id}_{corner_id}'

View File

@ -13,13 +13,11 @@ from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
OnDemandPagedList, OnDemandPagedList,
bug_reports_message,
clean_html, clean_html,
float_or_none, float_or_none,
int_or_none, int_or_none,
join_nonempty, join_nonempty,
parse_duration, parse_duration,
parse_filesize,
parse_iso8601, parse_iso8601,
parse_resolution, parse_resolution,
qualities, qualities,
@ -55,25 +53,31 @@ class NiconicoIE(InfoExtractor):
'duration': 33, 'duration': 33,
'view_count': int, 'view_count': int,
'comment_count': int, 'comment_count': int,
'genres': ['未設定'],
'tags': [],
'expected_protocol': str,
}, },
'skip': 'Requires an account',
}, { }, {
# File downloaded with and without credentials are different, so omit # File downloaded with and without credentials are different, so omit
# the md5 field # the md5 field
'url': 'http://www.nicovideo.jp/watch/nm14296458', 'url': 'http://www.nicovideo.jp/watch/nm14296458',
'info_dict': { 'info_dict': {
'id': 'nm14296458', 'id': 'nm14296458',
'ext': 'swf', 'ext': 'mp4',
'title': '鏡音リン】Dance on media【オリジナル】take2!', 'title': 'Kagamine Rin】Dance on media【Original】take2!',
'description': 'md5:689f066d74610b3b22e0f1739add0f58', 'description': 'md5:9368f2b1f4178de64f2602c2f3d6cbf5',
'thumbnail': r're:https?://.*', 'thumbnail': r're:https?://.*',
'uploader': 'りょうた', 'uploader': 'りょうた',
'uploader_id': '18822557', 'uploader_id': '18822557',
'upload_date': '20110429', 'upload_date': '20110429',
'timestamp': 1304065916, 'timestamp': 1304065916,
'duration': 209, 'duration': 208.0,
'comment_count': int,
'view_count': int,
'genres': ['音楽・サウンド'],
'tags': ['Translation_Request', 'Kagamine_Rin', 'Rin_Original'],
'expected_protocol': str,
}, },
'skip': 'Requires an account',
}, { }, {
# 'video exists but is marked as "deleted" # 'video exists but is marked as "deleted"
# md5 is unstable # md5 is unstable
@ -107,22 +111,24 @@ class NiconicoIE(InfoExtractor):
}, { }, {
# video not available via `getflv`; "old" HTML5 video # video not available via `getflv`; "old" HTML5 video
'url': 'http://www.nicovideo.jp/watch/sm1151009', 'url': 'http://www.nicovideo.jp/watch/sm1151009',
'md5': '8fa81c364eb619d4085354eab075598a', 'md5': 'f95a3d259172667b293530cc2e41ebda',
'info_dict': { 'info_dict': {
'id': 'sm1151009', 'id': 'sm1151009',
'ext': 'mp4', 'ext': 'mp4',
'title': 'マスターシステム本体内蔵のスペハリのメインテーマ(PSG版)', 'title': 'マスターシステム本体内蔵のスペハリのメインテーマ(PSG版)',
'description': 'md5:6ee077e0581ff5019773e2e714cdd0b7', 'description': 'md5:f95a3d259172667b293530cc2e41ebda',
'thumbnail': r're:https?://.*', 'thumbnail': r're:https?://.*',
'duration': 184, 'duration': 184,
'timestamp': 1190868283, 'timestamp': 1190835883,
'upload_date': '20070927', 'upload_date': '20070926',
'uploader': 'denden2', 'uploader': 'denden2',
'uploader_id': '1392194', 'uploader_id': '1392194',
'view_count': int, 'view_count': int,
'comment_count': int, 'comment_count': int,
'genres': ['ゲーム'],
'tags': [],
'expected_protocol': str,
}, },
'skip': 'Requires an account',
}, { }, {
# "New" HTML5 video # "New" HTML5 video
# md5 is unstable # md5 is unstable
@ -132,16 +138,18 @@ class NiconicoIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': '新作TVアニメ「戦姫絶唱シンフォギアAXZ」PV 最高画質', 'title': '新作TVアニメ「戦姫絶唱シンフォギアAXZ」PV 最高画質',
'description': 'md5:e52974af9a96e739196b2c1ca72b5feb', 'description': 'md5:e52974af9a96e739196b2c1ca72b5feb',
'timestamp': 1498514060, 'timestamp': 1498481660,
'upload_date': '20170626', 'upload_date': '20170626',
'uploader': 'ゲスト', 'uploader': 'no-namamae',
'uploader_id': '40826363', 'uploader_id': '40826363',
'thumbnail': r're:https?://.*', 'thumbnail': r're:https?://.*',
'duration': 198, 'duration': 198,
'view_count': int, 'view_count': int,
'comment_count': int, 'comment_count': int,
'genres': ['アニメ'],
'tags': [],
'expected_protocol': str,
}, },
'skip': 'Requires an account',
}, { }, {
# Video without owner # Video without owner
'url': 'http://www.nicovideo.jp/watch/sm18238488', 'url': 'http://www.nicovideo.jp/watch/sm18238488',
@ -151,7 +159,7 @@ class NiconicoIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': '【実写版】ミュータントタートルズ', 'title': '【実写版】ミュータントタートルズ',
'description': 'md5:15df8988e47a86f9e978af2064bf6d8e', 'description': 'md5:15df8988e47a86f9e978af2064bf6d8e',
'timestamp': 1341160408, 'timestamp': 1341128008,
'upload_date': '20120701', 'upload_date': '20120701',
'uploader': None, 'uploader': None,
'uploader_id': None, 'uploader_id': None,
@ -159,8 +167,10 @@ class NiconicoIE(InfoExtractor):
'duration': 5271, 'duration': 5271,
'view_count': int, 'view_count': int,
'comment_count': int, 'comment_count': int,
'genres': ['エンターテイメント'],
'tags': [],
'expected_protocol': str,
}, },
'skip': 'Requires an account',
}, { }, {
'url': 'http://sp.nicovideo.jp/watch/sm28964488?ss_pos=1&cp_in=wt_tg', 'url': 'http://sp.nicovideo.jp/watch/sm28964488?ss_pos=1&cp_in=wt_tg',
'only_matching': True, 'only_matching': True,
@ -172,9 +182,6 @@ class NiconicoIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www\.|secure\.|sp\.)?nicovideo\.jp/watch|nico\.ms)/(?P<id>(?:[a-z]{2})?[0-9]+)' _VALID_URL = r'https?://(?:(?:www\.|secure\.|sp\.)?nicovideo\.jp/watch|nico\.ms)/(?P<id>(?:[a-z]{2})?[0-9]+)'
_NETRC_MACHINE = 'niconico' _NETRC_MACHINE = 'niconico'
_COMMENT_API_ENDPOINTS = (
'https://nvcomment.nicovideo.jp/legacy/api.json',
'https://nmsg.nicovideo.jp/api.json',)
_API_HEADERS = { _API_HEADERS = {
'X-Frontend-ID': '6', 'X-Frontend-ID': '6',
'X-Frontend-Version': '0', 'X-Frontend-Version': '0',
@ -356,15 +363,10 @@ class NiconicoIE(InfoExtractor):
if not audio_quality.get('isAvailable') or not video_quality.get('isAvailable'): if not audio_quality.get('isAvailable') or not video_quality.get('isAvailable'):
return None return None
def extract_video_quality(video_quality):
return parse_filesize('%sB' % self._search_regex(
r'\| ([0-9]*\.?[0-9]*[MK])', video_quality, 'vbr', default=''))
format_id = '-'.join( format_id = '-'.join(
[remove_start(s['id'], 'archive_') for s in (video_quality, audio_quality)] + [dmc_protocol]) [remove_start(s['id'], 'archive_') for s in (video_quality, audio_quality)] + [dmc_protocol])
vid_qual_label = traverse_obj(video_quality, ('metadata', 'label')) vid_qual_label = traverse_obj(video_quality, ('metadata', 'label'))
vid_quality = traverse_obj(video_quality, ('metadata', 'bitrate'))
return { return {
'url': 'niconico_dmc:%s/%s/%s' % (video_id, video_quality['id'], audio_quality['id']), 'url': 'niconico_dmc:%s/%s/%s' % (video_id, video_quality['id'], audio_quality['id']),
@ -373,10 +375,15 @@ class NiconicoIE(InfoExtractor):
'ext': 'mp4', # Session API are used in HTML5, which always serves mp4 'ext': 'mp4', # Session API are used in HTML5, which always serves mp4
'acodec': 'aac', 'acodec': 'aac',
'vcodec': 'h264', 'vcodec': 'h264',
'abr': float_or_none(traverse_obj(audio_quality, ('metadata', 'bitrate')), 1000), **traverse_obj(audio_quality, ('metadata', {
'vbr': float_or_none(vid_quality if vid_quality > 0 else extract_video_quality(vid_qual_label), 1000), 'abr': ('bitrate', {functools.partial(float_or_none, scale=1000)}),
'height': traverse_obj(video_quality, ('metadata', 'resolution', 'height')), 'asr': ('samplingRate', {int_or_none}),
'width': traverse_obj(video_quality, ('metadata', 'resolution', 'width')), })),
**traverse_obj(video_quality, ('metadata', {
'vbr': ('bitrate', {functools.partial(float_or_none, scale=1000)}),
'height': ('resolution', 'height', {int_or_none}),
'width': ('resolution', 'width', {int_or_none}),
})),
'quality': -2 if 'low' in video_quality['id'] else None, 'quality': -2 if 'low' in video_quality['id'] else None,
'protocol': 'niconico_dmc', 'protocol': 'niconico_dmc',
'expected_protocol': dmc_protocol, # XXX: This is not a documented field 'expected_protocol': dmc_protocol, # XXX: This is not a documented field
@ -386,6 +393,63 @@ class NiconicoIE(InfoExtractor):
} }
} }
def _yield_dmc_formats(self, api_data, video_id):
dmc_data = traverse_obj(api_data, ('media', 'delivery', 'movie'))
audios = traverse_obj(dmc_data, ('audios', ..., {dict}))
videos = traverse_obj(dmc_data, ('videos', ..., {dict}))
protocols = traverse_obj(dmc_data, ('session', 'protocols', ..., {str}))
if not all((audios, videos, protocols)):
return
for audio_quality, video_quality, protocol in itertools.product(audios, videos, protocols):
if fmt := self._extract_format_for_quality(video_id, audio_quality, video_quality, protocol):
yield fmt
def _yield_dms_formats(self, api_data, video_id):
fmt_filter = lambda _, v: v['isAvailable'] and v['id']
videos = traverse_obj(api_data, ('media', 'domand', 'videos', fmt_filter))
audios = traverse_obj(api_data, ('media', 'domand', 'audios', fmt_filter))
access_key = traverse_obj(api_data, ('media', 'domand', 'accessRightKey', {str}))
track_id = traverse_obj(api_data, ('client', 'watchTrackId', {str}))
if not all((videos, audios, access_key, track_id)):
return
dms_m3u8_url = self._download_json(
f'https://nvapi.nicovideo.jp/v1/watch/{video_id}/access-rights/hls', video_id,
data=json.dumps({
'outputs': list(itertools.product((v['id'] for v in videos), (a['id'] for a in audios)))
}).encode(), query={'actionTrackId': track_id}, headers={
'x-access-right-key': access_key,
'x-frontend-id': 6,
'x-frontend-version': 0,
'x-request-with': 'https://www.nicovideo.jp',
})['data']['contentUrl']
# Getting all audio formats results in duplicate video formats which we filter out later
dms_fmts = self._extract_m3u8_formats(dms_m3u8_url, video_id)
# m3u8 extraction does not provide audio bitrates, so extract from the API data and fix
for audio_fmt in traverse_obj(dms_fmts, lambda _, v: v['vcodec'] == 'none'):
yield {
**audio_fmt,
**traverse_obj(audios, (lambda _, v: audio_fmt['format_id'].startswith(v['id']), {
'format_id': ('id', {str}),
'abr': ('bitRate', {functools.partial(float_or_none, scale=1000)}),
'asr': ('samplingRate', {int_or_none}),
}), get_all=False),
'acodec': 'aac',
'ext': 'm4a',
}
# Sort before removing dupes to keep the format dicts with the lowest tbr
video_fmts = sorted((fmt for fmt in dms_fmts if fmt['vcodec'] != 'none'), key=lambda f: f['tbr'])
self._remove_duplicate_formats(video_fmts)
# Calculate the true vbr/tbr by subtracting the lowest abr
min_abr = min(traverse_obj(audios, (..., 'bitRate', {float_or_none})), default=0) / 1000
for video_fmt in video_fmts:
video_fmt['tbr'] -= min_abr
video_fmt['format_id'] = f'video-{video_fmt["tbr"]:.0f}'
yield video_fmt
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
@ -412,19 +476,17 @@ class NiconicoIE(InfoExtractor):
webpage, 'error reason', default=None) webpage, 'error reason', default=None)
if not error_msg: if not error_msg:
raise raise
raise ExtractorError(re.sub(r'\s+', ' ', error_msg), expected=True) raise ExtractorError(clean_html(error_msg), expected=True)
formats = [] club_joined = traverse_obj(api_data, ('channel', 'viewer', 'follow', 'isFollowed', {bool}))
if club_joined is None:
def get_video_info(*items, get_first=True, **kwargs): fail_msg = self._html_search_regex(
return traverse_obj(api_data, ('video', *items), get_all=not get_first, **kwargs) r'<p[^>]+\bclass="fail-message"[^>]*>(?P<msg>.+?)</p>',
webpage, 'fail message', default=None, group='msg')
quality_info = api_data['media']['delivery']['movie'] if fail_msg:
session_api_data = quality_info['session'] self.raise_login_required(clean_html(fail_msg), metadata_available=True)
for (audio_quality, video_quality, protocol) in itertools.product(quality_info['audios'], quality_info['videos'], session_api_data['protocols']): elif not club_joined:
fmt = self._extract_format_for_quality(video_id, audio_quality, video_quality, protocol) self.raise_login_required('This video is for members only', metadata_available=True)
if fmt:
formats.append(fmt)
# Start extracting information # Start extracting information
tags = None tags = None
@ -443,11 +505,15 @@ class NiconicoIE(InfoExtractor):
thumb_prefs = qualities(['url', 'middleUrl', 'largeUrl', 'player', 'ogp']) thumb_prefs = qualities(['url', 'middleUrl', 'largeUrl', 'player', 'ogp'])
def get_video_info(*items, get_first=True, **kwargs):
return traverse_obj(api_data, ('video', *items), get_all=not get_first, **kwargs)
return { return {
'id': video_id, 'id': video_id,
'_api_data': api_data, '_api_data': api_data,
'title': get_video_info(('originalTitle', 'title')) or self._og_search_title(webpage, default=None), 'title': get_video_info(('originalTitle', 'title')) or self._og_search_title(webpage, default=None),
'formats': formats, 'formats': [*self._yield_dmc_formats(api_data, video_id),
*self._yield_dms_formats(api_data, video_id)],
'thumbnails': [{ 'thumbnails': [{
'id': key, 'id': key,
'url': url, 'url': url,
@ -470,93 +536,19 @@ class NiconicoIE(InfoExtractor):
parse_duration(self._html_search_meta('video:duration', webpage, 'video duration', default=None)) parse_duration(self._html_search_meta('video:duration', webpage, 'video duration', default=None))
or get_video_info('duration')), or get_video_info('duration')),
'webpage_url': url_or_none(url) or f'https://www.nicovideo.jp/watch/{video_id}', 'webpage_url': url_or_none(url) or f'https://www.nicovideo.jp/watch/{video_id}',
'subtitles': self.extract_subtitles(video_id, api_data, session_api_data), 'subtitles': self.extract_subtitles(video_id, api_data),
} }
def _get_subtitles(self, video_id, api_data, session_api_data): def _get_subtitles(self, video_id, api_data):
comment_user_key = traverse_obj(api_data, ('comment', 'keys', 'userKey')) comments_info = traverse_obj(api_data, ('comment', 'nvComment', {dict})) or {}
user_id_str = session_api_data.get('serviceUserId') if not comments_info.get('server'):
thread_ids = traverse_obj(api_data, ('comment', 'threads', lambda _, v: v['isActive']))
legacy_danmaku = self._extract_legacy_comments(video_id, thread_ids, user_id_str, comment_user_key) or []
new_comments = traverse_obj(api_data, ('comment', 'nvComment'))
new_danmaku = self._extract_new_comments(
new_comments.get('server'), video_id,
new_comments.get('params'), new_comments.get('threadKey'))
if not legacy_danmaku and not new_danmaku:
self.report_warning(f'Failed to get comments. {bug_reports_message()}')
return return
return { danmaku = traverse_obj(self._download_json(
'comments': [{ f'{comments_info["server"]}/v1/threads', video_id, data=json.dumps({
'ext': 'json',
'data': json.dumps(legacy_danmaku + new_danmaku),
}],
}
def _extract_legacy_comments(self, video_id, threads, user_id, user_key):
auth_data = {
'user_id': user_id,
'userkey': user_key,
} if user_id and user_key else {'user_id': ''}
api_url = traverse_obj(threads, (..., 'server'), get_all=False)
# Request Start
post_data = [{'ping': {'content': 'rs:0'}}]
for i, thread in enumerate(threads):
thread_id = thread['id']
thread_fork = thread['fork']
# Post Start (2N)
post_data.append({'ping': {'content': f'ps:{i * 2}'}})
post_data.append({'thread': {
'fork': thread_fork,
'language': 0,
'nicoru': 3,
'scores': 1,
'thread': thread_id,
'version': '20090904',
'with_global': 1,
**auth_data,
}})
# Post Final (2N)
post_data.append({'ping': {'content': f'pf:{i * 2}'}})
# Post Start (2N+1)
post_data.append({'ping': {'content': f'ps:{i * 2 + 1}'}})
post_data.append({'thread_leaves': {
# format is '<bottom of minute range>-<top of minute range>:<comments per minute>,<total last comments'
# unfortunately NND limits (deletes?) comment returns this way, so you're only able to grab the last 1000 per language
'content': '0-999999:999999,999999,nicoru:999999',
'fork': thread_fork,
'language': 0,
'nicoru': 3,
'scores': 1,
'thread': thread_id,
**auth_data,
}})
# Post Final (2N+1)
post_data.append({'ping': {'content': f'pf:{i * 2 + 1}'}})
# Request Final
post_data.append({'ping': {'content': 'rf:0'}})
return self._download_json(
f'{api_url}/api.json', video_id, data=json.dumps(post_data).encode(), fatal=False,
headers={
'Referer': f'https://www.nicovideo.jp/watch/{video_id}',
'Origin': 'https://www.nicovideo.jp',
'Content-Type': 'text/plain;charset=UTF-8',
},
note='Downloading comments', errnote=f'Failed to access endpoint {api_url}')
def _extract_new_comments(self, endpoint, video_id, params, thread_key):
comments = self._download_json(
f'{endpoint}/v1/threads', video_id, data=json.dumps({
'additionals': {}, 'additionals': {},
'params': params, 'params': comments_info.get('params'),
'threadKey': thread_key, 'threadKey': comments_info.get('threadKey'),
}).encode(), fatal=False, }).encode(), fatal=False,
headers={ headers={
'Referer': 'https://www.nicovideo.jp/', 'Referer': 'https://www.nicovideo.jp/',
@ -566,8 +558,15 @@ class NiconicoIE(InfoExtractor):
'x-frontend-id': '6', 'x-frontend-id': '6',
'x-frontend-version': '0', 'x-frontend-version': '0',
}, },
note='Downloading comments (new)', errnote='Failed to download comments (new)') note='Downloading comments', errnote='Failed to download comments'),
return traverse_obj(comments, ('data', 'threads', ..., 'comments', ...)) ('data', 'threads', ..., 'comments', ...))
return {
'comments': [{
'ext': 'json',
'data': json.dumps(danmaku),
}],
}
class NiconicoPlaylistBaseIE(InfoExtractor): class NiconicoPlaylistBaseIE(InfoExtractor):

View File

@ -135,14 +135,15 @@ class NovaIE(InfoExtractor):
_VALID_URL = r'https?://(?:[^.]+\.)?(?P<site>tv(?:noviny)?|tn|novaplus|vymena|fanda|krasna|doma|prask)\.nova\.cz/(?:[^/]+/)+(?P<id>[^/]+?)(?:\.html|/|$)' _VALID_URL = r'https?://(?:[^.]+\.)?(?P<site>tv(?:noviny)?|tn|novaplus|vymena|fanda|krasna|doma|prask)\.nova\.cz/(?:[^/]+/)+(?P<id>[^/]+?)(?:\.html|/|$)'
_TESTS = [{ _TESTS = [{
'url': 'http://tn.nova.cz/clanek/tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci.html#player_13260', 'url': 'http://tn.nova.cz/clanek/tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci.html#player_13260',
'md5': '249baab7d0104e186e78b0899c7d5f28', 'md5': 'da8f3f1fcdaf9fb0f112a32a165760a3',
'info_dict': { 'info_dict': {
'id': '1757139', 'id': '8OvQqEvV3MW',
'display_id': 'tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci', 'display_id': '8OvQqEvV3MW',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Podzemní nemocnice v pražské Krči', 'title': 'Podzemní nemocnice v pražské Krči',
'description': 'md5:f0a42dd239c26f61c28f19e62d20ef53', 'description': 'md5:f0a42dd239c26f61c28f19e62d20ef53',
'thumbnail': r're:^https?://.*\.(?:jpg)', 'thumbnail': r're:^https?://.*\.(?:jpg)',
'duration': 151,
} }
}, { }, {
'url': 'http://fanda.nova.cz/clanek/fun-and-games/krvavy-epos-zaklinac-3-divoky-hon-vychazi-vyhrajte-ho-pro-sebe.html', 'url': 'http://fanda.nova.cz/clanek/fun-and-games/krvavy-epos-zaklinac-3-divoky-hon-vychazi-vyhrajte-ho-pro-sebe.html',
@ -210,7 +211,7 @@ class NovaIE(InfoExtractor):
# novaplus # novaplus
embed_id = self._search_regex( embed_id = self._search_regex(
r'<iframe[^>]+\bsrc=["\'](?:https?:)?//media\.cms\.nova\.cz/embed/([^/?#&]+)', r'<iframe[^>]+\bsrc=["\'](?:https?:)?//media(?:tn)?\.cms\.nova\.cz/embed/([^/?#&"\']+)',
webpage, 'embed url', default=None) webpage, 'embed url', default=None)
if embed_id: if embed_id:
return { return {

View File

@ -35,6 +35,7 @@ class NTVRuIE(InfoExtractor):
'duration': 172, 'duration': 172,
'view_count': int, 'view_count': int,
}, },
'skip': '404 Not Found',
}, { }, {
'url': 'http://www.ntv.ru/peredacha/segodnya/m23700/o232416', 'url': 'http://www.ntv.ru/peredacha/segodnya/m23700/o232416',
'md5': '82dbd49b38e3af1d00df16acbeab260c', 'md5': '82dbd49b38e3af1d00df16acbeab260c',
@ -78,7 +79,8 @@ class NTVRuIE(InfoExtractor):
}] }]
_VIDEO_ID_REGEXES = [ _VIDEO_ID_REGEXES = [
r'<meta property="og:url" content="http://www\.ntv\.ru/video/(\d+)', r'<meta property="og:url" content="https?://www\.ntv\.ru/video/(\d+)',
r'<meta property="og:video:(?:url|iframe)" content="https?://www\.ntv\.ru/embed/(\d+)',
r'<video embed=[^>]+><id>(\d+)</id>', r'<video embed=[^>]+><id>(\d+)</id>',
r'<video restriction[^>]+><key>(\d+)</key>', r'<video restriction[^>]+><key>(\d+)</key>',
] ]

View File

@ -1,4 +1,6 @@
from .common import InfoExtractor from .common import InfoExtractor
from .jwplatform import JWPlatformIE
from ..utils import make_archive_id
class OneFootballIE(InfoExtractor): class OneFootballIE(InfoExtractor):
@ -7,41 +9,43 @@ class OneFootballIE(InfoExtractor):
_TESTS = [{ _TESTS = [{
'url': 'https://onefootball.com/en/video/highlights-fc-zuerich-3-3-fc-basel-34012334', 'url': 'https://onefootball.com/en/video/highlights-fc-zuerich-3-3-fc-basel-34012334',
'info_dict': { 'info_dict': {
'id': '34012334', 'id': 'Y2VtcWAT',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Highlights: FC Zürich 3-3 FC Basel', 'title': 'Highlights: FC Zürich 3-3 FC Basel',
'description': 'md5:33d9855cb790702c4fe42a513700aba8', 'description': 'md5:33d9855cb790702c4fe42a513700aba8',
'thumbnail': 'https://photobooth-api.onefootball.com/api/screenshot/https:%2F%2Fperegrine-api.onefootball.com%2Fv2%2Fphotobooth%2Fcms%2Fen%2F34012334', 'thumbnail': 'https://cdn.jwplayer.com/v2/media/Y2VtcWAT/poster.jpg?width=720',
'timestamp': 1635874604, 'timestamp': 1635874895,
'upload_date': '20211102' 'upload_date': '20211102',
'duration': 375.0,
'tags': ['Football', 'Soccer', 'OneFootball'],
'_old_archive_ids': ['onefootball 34012334'],
}, },
'params': {'skip_download': True} 'params': {'skip_download': True},
'expected_warnings': ['Failed to download m3u8 information'],
}, { }, {
'url': 'https://onefootball.com/en/video/klopp-fumes-at-var-decisions-in-west-ham-defeat-34041020', 'url': 'https://onefootball.com/en/video/klopp-fumes-at-var-decisions-in-west-ham-defeat-34041020',
'info_dict': { 'info_dict': {
'id': '34041020', 'id': 'leVJrMho',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Klopp fumes at VAR decisions in West Ham defeat', 'title': 'Klopp fumes at VAR decisions in West Ham defeat',
'description': 'md5:9c50371095a01ad3f63311c73d8f51a5', 'description': 'md5:9c50371095a01ad3f63311c73d8f51a5',
'thumbnail': 'https://photobooth-api.onefootball.com/api/screenshot/https:%2F%2Fperegrine-api.onefootball.com%2Fv2%2Fphotobooth%2Fcms%2Fen%2F34041020', 'thumbnail': 'https://cdn.jwplayer.com/v2/media/leVJrMho/poster.jpg?width=720',
'timestamp': 1636314103, 'timestamp': 1636315232,
'upload_date': '20211107' 'upload_date': '20211107',
'duration': 93.0,
'tags': ['Football', 'Soccer', 'OneFootball'],
'_old_archive_ids': ['onefootball 34041020'],
}, },
'params': {'skip_download': True} 'params': {'skip_download': True}
}] }]
def _real_extract(self, url): def _real_extract(self, url):
id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, id) webpage = self._download_webpage(url, video_id)
data_json = self._search_json_ld(webpage, id) data_json = self._search_json_ld(webpage, video_id, fatal=False)
m3u8_url = self._html_search_regex(r'(https://cdn\.jwplayer\.com/manifests/.+\.m3u8)', webpage, 'm3u8_url') data_json.pop('url', None)
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, id) m3u8_url = self._html_search_regex(r'(https://cdn\.jwplayer\.com/manifests/\w+\.m3u8)', webpage, 'm3u8_url')
return {
'id': id, return self.url_result(
'title': data_json.get('title'), m3u8_url, JWPlatformIE, video_id, _old_archive_ids=[make_archive_id(self, video_id)],
'description': data_json.get('description'), **data_json, url_transparent=True)
'thumbnail': data_json.get('thumbnail'),
'timestamp': data_json.get('timestamp'),
'formats': formats,
'subtitles': subtitles,
}

View File

@ -12,6 +12,8 @@ from ..compat import compat_str
class OpenRecBaseIE(InfoExtractor): class OpenRecBaseIE(InfoExtractor):
_M3U8_HEADERS = {'Referer': 'https://www.openrec.tv/'}
def _extract_pagestore(self, webpage, video_id): def _extract_pagestore(self, webpage, video_id):
return self._parse_json( return self._parse_json(
self._search_regex(r'(?m)window\.pageStore\s*=\s*(\{.+?\});$', webpage, 'window.pageStore'), video_id) self._search_regex(r'(?m)window\.pageStore\s*=\s*(\{.+?\});$', webpage, 'window.pageStore'), video_id)
@ -21,7 +23,7 @@ class OpenRecBaseIE(InfoExtractor):
if not m3u8_url: if not m3u8_url:
continue continue
yield from self._extract_m3u8_formats( yield from self._extract_m3u8_formats(
m3u8_url, video_id, ext='mp4', m3u8_id=name) m3u8_url, video_id, ext='mp4', m3u8_id=name, headers=self._M3U8_HEADERS)
def _extract_movie(self, webpage, video_id, name, is_live): def _extract_movie(self, webpage, video_id, name, is_live):
window_stores = self._extract_pagestore(webpage, video_id) window_stores = self._extract_pagestore(webpage, video_id)
@ -60,6 +62,7 @@ class OpenRecBaseIE(InfoExtractor):
'uploader_id': get_first(movie_stores, ('channel', 'user', 'id')), 'uploader_id': get_first(movie_stores, ('channel', 'user', 'id')),
'timestamp': int_or_none(get_first(movie_stores, ['publishedAt', 'time']), scale=1000) or unified_timestamp(get_first(movie_stores, 'publishedAt')), 'timestamp': int_or_none(get_first(movie_stores, ['publishedAt', 'time']), scale=1000) or unified_timestamp(get_first(movie_stores, 'publishedAt')),
'is_live': is_live, 'is_live': is_live,
'http_headers': self._M3U8_HEADERS,
} }
@ -110,7 +113,7 @@ class OpenRecCaptureIE(OpenRecBaseIE):
raise ExtractorError('Cannot extract title') raise ExtractorError('Cannot extract title')
formats = self._extract_m3u8_formats( formats = self._extract_m3u8_formats(
capture_data.get('source'), video_id, ext='mp4') capture_data.get('source'), video_id, ext='mp4', headers=self._M3U8_HEADERS)
return { return {
'id': video_id, 'id': video_id,
@ -121,6 +124,7 @@ class OpenRecCaptureIE(OpenRecBaseIE):
'uploader': traverse_obj(movie_store, ('channel', 'name'), expected_type=compat_str), 'uploader': traverse_obj(movie_store, ('channel', 'name'), expected_type=compat_str),
'uploader_id': traverse_obj(movie_store, ('channel', 'id'), expected_type=compat_str), 'uploader_id': traverse_obj(movie_store, ('channel', 'id'), expected_type=compat_str),
'upload_date': unified_strdate(capture_data.get('createdAt')), 'upload_date': unified_strdate(capture_data.get('createdAt')),
'http_headers': self._M3U8_HEADERS,
} }

View File

@ -87,8 +87,8 @@ class PornHubBaseIE(InfoExtractor):
def is_logged(webpage): def is_logged(webpage):
return any(re.search(p, webpage) for p in ( return any(re.search(p, webpage) for p in (
r'class=["\']signOut', r'id="profileMenuDropdown"',
r'>Sign\s+[Oo]ut\s*<')) r'class="ph-icon-logout"'))
if is_logged(login_page): if is_logged(login_page):
self._logged_in = True self._logged_in = True

View File

@ -28,6 +28,29 @@ class RaiBaseIE(InfoExtractor):
_GEO_COUNTRIES = ['IT'] _GEO_COUNTRIES = ['IT']
_GEO_BYPASS = False _GEO_BYPASS = False
def _fix_m3u8_formats(self, media_url, video_id):
fmts = self._extract_m3u8_formats(
media_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
# Fix malformed m3u8 manifests by setting audio-only/video-only formats
for f in fmts:
if not f.get('acodec'):
f['acodec'] = 'mp4a'
if not f.get('vcodec'):
f['vcodec'] = 'avc1'
man_url = f['url']
if re.search(r'chunklist(?:_b\d+)*_ao[_.]', man_url): # audio only
f['vcodec'] = 'none'
elif re.search(r'chunklist(?:_b\d+)*_vo[_.]', man_url): # video only
f['acodec'] = 'none'
else: # video+audio
if f['acodec'] == 'none':
f['acodec'] = 'mp4a'
if f['vcodec'] == 'none':
f['vcodec'] = 'avc1'
return fmts
def _extract_relinker_info(self, relinker_url, video_id, audio_only=False): def _extract_relinker_info(self, relinker_url, video_id, audio_only=False):
def fix_cdata(s): def fix_cdata(s):
# remove \r\n\t before and after <![CDATA[ ]]> to avoid # remove \r\n\t before and after <![CDATA[ ]]> to avoid
@ -69,8 +92,7 @@ class RaiBaseIE(InfoExtractor):
'format_id': 'https-mp3', 'format_id': 'https-mp3',
}) })
elif ext == 'm3u8' or 'format=m3u8' in media_url: elif ext == 'm3u8' or 'format=m3u8' in media_url:
formats.extend(self._extract_m3u8_formats( formats.extend(self._fix_m3u8_formats(media_url, video_id))
media_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
elif ext == 'f4m': elif ext == 'f4m':
# very likely no longer needed. Cannot find any url that uses it. # very likely no longer needed. Cannot find any url that uses it.
manifest_url = update_url_query( manifest_url = update_url_query(
@ -153,10 +175,10 @@ class RaiBaseIE(InfoExtractor):
'format_id': f'https-{tbr}', 'format_id': f'https-{tbr}',
'width': format_copy.get('width'), 'width': format_copy.get('width'),
'height': format_copy.get('height'), 'height': format_copy.get('height'),
'tbr': format_copy.get('tbr'), 'tbr': format_copy.get('tbr') or tbr,
'vcodec': format_copy.get('vcodec'), 'vcodec': format_copy.get('vcodec') or 'avc1',
'acodec': format_copy.get('acodec'), 'acodec': format_copy.get('acodec') or 'mp4a',
'fps': format_copy.get('fps'), 'fps': format_copy.get('fps') or 25,
} if format_copy else { } if format_copy else {
'format_id': f'https-{tbr}', 'format_id': f'https-{tbr}',
'width': _QUALITY[tbr][0], 'width': _QUALITY[tbr][0],
@ -245,7 +267,7 @@ class RaiPlayIE(RaiBaseIE):
'series': 'Report', 'series': 'Report',
'season': '2013/14', 'season': '2013/14',
'subtitles': {'it': 'count:4'}, 'subtitles': {'it': 'count:4'},
'release_year': 2022, 'release_year': 2024,
'episode': 'Espresso nel caffè - 07/04/2014', 'episode': 'Espresso nel caffè - 07/04/2014',
'timestamp': 1396919880, 'timestamp': 1396919880,
'upload_date': '20140408', 'upload_date': '20140408',
@ -253,7 +275,7 @@ class RaiPlayIE(RaiBaseIE):
}, },
'params': {'skip_download': True}, 'params': {'skip_download': True},
}, { }, {
# 1080p direct mp4 url # 1080p
'url': 'https://www.raiplay.it/video/2021/11/Blanca-S1E1-Senza-occhi-b1255a4a-8e72-4a2f-b9f3-fc1308e00736.html', 'url': 'https://www.raiplay.it/video/2021/11/Blanca-S1E1-Senza-occhi-b1255a4a-8e72-4a2f-b9f3-fc1308e00736.html',
'md5': 'aeda7243115380b2dd5e881fd42d949a', 'md5': 'aeda7243115380b2dd5e881fd42d949a',
'info_dict': { 'info_dict': {
@ -274,7 +296,7 @@ class RaiPlayIE(RaiBaseIE):
'episode': 'Senza occhi', 'episode': 'Senza occhi',
'timestamp': 1637318940, 'timestamp': 1637318940,
'upload_date': '20211119', 'upload_date': '20211119',
'formats': 'count:12', 'formats': 'count:7',
}, },
'params': {'skip_download': True}, 'params': {'skip_download': True},
'expected_warnings': ['Video not available. Likely due to geo-restriction.'] 'expected_warnings': ['Video not available. Likely due to geo-restriction.']
@ -527,7 +549,7 @@ class RaiPlaySoundPlaylistIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': 'ilruggitodelconiglio', 'id': 'ilruggitodelconiglio',
'title': 'Il Ruggito del Coniglio', 'title': 'Il Ruggito del Coniglio',
'description': 'md5:48cff6972435964284614d70474132e6', 'description': 'md5:62a627b3a2d0635d08fa8b6e0a04f27e',
}, },
'playlist_mincount': 65, 'playlist_mincount': 65,
}, { }, {
@ -634,19 +656,20 @@ class RaiIE(RaiBaseIE):
} }
class RaiNewsIE(RaiIE): # XXX: Do not subclass from concrete IE class RaiNewsIE(RaiBaseIE):
_VALID_URL = rf'https?://(www\.)?rainews\.it/(?!articoli)[^?#]+-(?P<id>{RaiBaseIE._UUID_RE})(?:-[^/?#]+)?\.html' _VALID_URL = rf'https?://(www\.)?rainews\.it/(?!articoli)[^?#]+-(?P<id>{RaiBaseIE._UUID_RE})(?:-[^/?#]+)?\.html'
_EMBED_REGEX = [rf'<iframe[^>]+data-src="(?P<url>/iframe/[^?#]+?{RaiBaseIE._UUID_RE}\.html)'] _EMBED_REGEX = [rf'<iframe[^>]+data-src="(?P<url>/iframe/[^?#]+?{RaiBaseIE._UUID_RE}\.html)']
_TESTS = [{ _TESTS = [{
# new rainews player (#3911) # new rainews player (#3911)
'url': 'https://www.rainews.it/rubriche/24mm/video/2022/05/24mm-del-29052022-12cf645d-1ffd-4220-b27c-07c226dbdecf.html', 'url': 'https://www.rainews.it/video/2024/02/membri-della-croce-rossa-evacuano-gli-abitanti-di-un-villaggio-nella-regione-ucraina-di-kharkiv-il-filmato-dallucraina--31e8017c-845c-43f5-9c48-245b43c3a079.html',
'info_dict': { 'info_dict': {
'id': '12cf645d-1ffd-4220-b27c-07c226dbdecf', 'id': '31e8017c-845c-43f5-9c48-245b43c3a079',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Puntata del 29/05/2022', 'title': 'md5:1e81364b09de4a149042bac3c7d36f0b',
'duration': 1589, 'duration': 196,
'upload_date': '20220529', 'upload_date': '20240225',
'uploader': 'rainews', 'uploader': 'rainews',
'formats': 'count:2',
}, },
'params': {'skip_download': True}, 'params': {'skip_download': True},
}, { }, {
@ -659,7 +682,8 @@ class RaiNewsIE(RaiIE): # XXX: Do not subclass from concrete IE
'description': 'I film in uscita questa settimana.', 'description': 'I film in uscita questa settimana.',
'thumbnail': r're:^https?://.*\.png$', 'thumbnail': r're:^https?://.*\.png$',
'duration': 833, 'duration': 833,
'upload_date': '20161103' 'upload_date': '20161103',
'formats': 'count:8',
}, },
'params': {'skip_download': True}, 'params': {'skip_download': True},
'expected_warnings': ['unable to extract player_data'], 'expected_warnings': ['unable to extract player_data'],
@ -684,7 +708,7 @@ class RaiNewsIE(RaiIE): # XXX: Do not subclass from concrete IE
if not relinker_url: if not relinker_url:
# fallback on old implementation for some old content # fallback on old implementation for some old content
try: try:
return self._extract_from_content_id(video_id, url) return RaiIE._real_extract(self, url)
except GeoRestrictedError: except GeoRestrictedError:
raise raise
except ExtractorError as e: except ExtractorError as e:

View File

@ -247,17 +247,17 @@ class MujRozhlasIE(RozhlasBaseIE):
'url': 'https://www.mujrozhlas.cz/vykopavky/ach-jo-zase-teleci-rizek-je-mnohem-min-cesky-nez-jsme-si-mysleli', 'url': 'https://www.mujrozhlas.cz/vykopavky/ach-jo-zase-teleci-rizek-je-mnohem-min-cesky-nez-jsme-si-mysleli',
'md5': '6f8fd68663e64936623e67c152a669e0', 'md5': '6f8fd68663e64936623e67c152a669e0',
'info_dict': { 'info_dict': {
'id': '10739193', 'id': '10787730',
'ext': 'mp3', 'ext': 'mp3',
'title': 'Ach jo, zase to telecí! Řízek je mnohem míň český, než jsme si mysleli', 'title': 'Ach jo, zase to telecí! Řízek je mnohem míň český, než jsme si mysleli',
'description': 'md5:db7141e9caaedc9041ec7cefb9a62908', 'description': 'md5:db7141e9caaedc9041ec7cefb9a62908',
'timestamp': 1684915200, 'timestamp': 1684915200,
'modified_timestamp': 1684922446, 'modified_timestamp': 1687550432,
'series': 'Vykopávky', 'series': 'Vykopávky',
'thumbnail': 'https://portal.rozhlas.cz/sites/default/files/images/84377046610af6ddc54d910b1dd7a22b.jpg', 'thumbnail': 'https://portal.rozhlas.cz/sites/default/files/images/84377046610af6ddc54d910b1dd7a22b.jpg',
'channel_id': 'radio-wave', 'channel_id': 'radio-wave',
'upload_date': '20230524', 'upload_date': '20230524',
'modified_date': '20230524', 'modified_date': '20230623',
}, },
}, { }, {
# serial extraction # serial extraction
@ -277,6 +277,26 @@ class MujRozhlasIE(RozhlasBaseIE):
'title': 'Nespavci', 'title': 'Nespavci',
'description': 'md5:c430adcbf9e2b9eac88b745881e814dc', 'description': 'md5:c430adcbf9e2b9eac88b745881e814dc',
}, },
}, {
# serialPart
'url': 'https://www.mujrozhlas.cz/povidka/gustavo-adolfo-becquer-hora-duchu',
'info_dict': {
'id': '8889035',
'ext': 'm4a',
'title': 'Gustavo Adolfo Bécquer: Hora duchů',
'description': 'md5:343a15257b376c276e210b78e900ffea',
'chapter': 'Hora duchů a Polibek dva tajemné příběhy Gustava Adolfa Bécquera',
'thumbnail': 'https://portal.rozhlas.cz/sites/default/files/images/2adfe1387fb140634be725c1ccf26214.jpg',
'timestamp': 1708173000,
'episode': 'Episode 1',
'episode_number': 1,
'series': 'Povídka',
'modified_date': '20240217',
'upload_date': '20240217',
'modified_timestamp': 1708173198,
'channel_id': 'vltava',
},
'params': {'skip_download': 'dash'},
}] }]
def _call_api(self, path, item_id, msg='API JSON'): def _call_api(self, path, item_id, msg='API JSON'):
@ -322,7 +342,7 @@ class MujRozhlasIE(RozhlasBaseIE):
entity = info['siteEntityBundle'] entity = info['siteEntityBundle']
if entity == 'episode': if entity in ('episode', 'serialPart'):
return self._extract_audio_entry(self._call_api( return self._extract_audio_entry(self._call_api(
'episodes', info['contentId'], 'episode info API JSON')) 'episodes', info['contentId'], 'episode info API JSON'))

View File

@ -9,7 +9,6 @@ from ..utils import (
get_element_html_by_class, get_element_html_by_class,
get_elements_by_class, get_elements_by_class,
int_or_none, int_or_none,
join_nonempty,
parse_count, parse_count,
parse_duration, parse_duration,
unescapeHTML, unescapeHTML,
@ -57,7 +56,7 @@ class Rule34VideoIE(InfoExtractor):
'comment_count': int, 'comment_count': int,
'timestamp': 1640131200, 'timestamp': 1640131200,
'description': '', 'description': '',
'creator': 'WildeerStudio', 'creators': ['WildeerStudio'],
'upload_date': '20211222', 'upload_date': '20211222',
'uploader': 'CerZule', 'uploader': 'CerZule',
'uploader_url': 'https://rule34video.com/members/36281/', 'uploader_url': 'https://rule34video.com/members/36281/',
@ -81,13 +80,13 @@ class Rule34VideoIE(InfoExtractor):
'quality': quality, 'quality': quality,
}) })
categories, creator, uploader, uploader_url = [None] * 4 categories, creators, uploader, uploader_url = [None] * 4
for col in get_elements_by_class('col', webpage): for col in get_elements_by_class('col', webpage):
label = clean_html(get_element_by_class('label', col)) label = clean_html(get_element_by_class('label', col))
if label == 'Categories:': if label == 'Categories:':
categories = list(map(clean_html, get_elements_by_class('item', col))) categories = list(map(clean_html, get_elements_by_class('item', col)))
elif label == 'Artist:': elif label == 'Artist:':
creator = join_nonempty(*map(clean_html, get_elements_by_class('item', col)), delim=', ') creators = list(map(clean_html, get_elements_by_class('item', col)))
elif label == 'Uploaded By:': elif label == 'Uploaded By:':
uploader = clean_html(get_element_by_class('name', col)) uploader = clean_html(get_element_by_class('name', col))
uploader_url = extract_attributes(get_element_html_by_class('name', col) or '').get('href') uploader_url = extract_attributes(get_element_html_by_class('name', col) or '').get('href')
@ -115,7 +114,7 @@ class Rule34VideoIE(InfoExtractor):
'comment_count': int_or_none(self._search_regex( 'comment_count': int_or_none(self._search_regex(
r'[^(]+\((\d+)\)', get_element_by_attribute('href', '#tab_comments', webpage), 'comment count', fatal=False)), r'[^(]+\((\d+)\)', get_element_by_attribute('href', '#tab_comments', webpage), 'comment count', fatal=False)),
'age_limit': 18, 'age_limit': 18,
'creator': creator, 'creators': creators,
'uploader': uploader, 'uploader': uploader,
'uploader_url': uploader_url, 'uploader_url': uploader_url,
'categories': categories, 'categories': categories,

View File

@ -5,7 +5,10 @@ from ..utils import traverse_obj, update_url_query
class ScreencastifyIE(InfoExtractor): class ScreencastifyIE(InfoExtractor):
_VALID_URL = r'https?://watch\.screencastify\.com/v/(?P<id>[^/?#]+)' _VALID_URL = [
r'https?://watch\.screencastify\.com/v/(?P<id>[^/?#]+)',
r'https?://app\.screencastify\.com/v[23]/watch/(?P<id>[^/?#]+)',
]
_TESTS = [{ _TESTS = [{
'url': 'https://watch.screencastify.com/v/sYVkZip3quLKhHw4Ybk8', 'url': 'https://watch.screencastify.com/v/sYVkZip3quLKhHw4Ybk8',
'info_dict': { 'info_dict': {
@ -19,6 +22,21 @@ class ScreencastifyIE(InfoExtractor):
'params': { 'params': {
'skip_download': 'm3u8', 'skip_download': 'm3u8',
}, },
}, {
'url': 'https://app.screencastify.com/v3/watch/J5N7H11wofDN1jZUCr3t',
'info_dict': {
'id': 'J5N7H11wofDN1jZUCr3t',
'ext': 'mp4',
'uploader': 'Scott Piesen',
'description': '',
'title': 'Lesson Recording 1-17 Burrr...',
},
'params': {
'skip_download': 'm3u8',
},
}, {
'url': 'https://app.screencastify.com/v2/watch/BQ26VbUdfbQLhKzkktOk',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -1,5 +1,5 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import int_or_none, traverse_obj from ..utils import ExtractorError, int_or_none, traverse_obj
class SwearnetEpisodeIE(InfoExtractor): class SwearnetEpisodeIE(InfoExtractor):
@ -51,7 +51,13 @@ class SwearnetEpisodeIE(InfoExtractor):
display_id, season_number, episode_number = self._match_valid_url(url).group('id', 'season_num', 'episode_num') display_id, season_number, episode_number = self._match_valid_url(url).group('id', 'season_num', 'episode_num')
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
external_id = self._search_regex(r'externalid\s*=\s*"([^"]+)', webpage, 'externalid') try:
external_id = self._search_regex(r'externalid\s*=\s*"([^"]+)', webpage, 'externalid')
except ExtractorError:
if 'Upgrade Now' in webpage:
self.raise_login_required()
raise
json_data = self._download_json( json_data = self._download_json(
f'https://play.vidyard.com/player/{external_id}.json', display_id)['payload']['chapters'][0] f'https://play.vidyard.com/player/{external_id}.json', display_id)['payload']['chapters'][0]

View File

@ -6,7 +6,7 @@ import string
import time import time
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urllib_parse_unquote, compat_urllib_parse_urlparse from ..compat import compat_urllib_parse_urlparse
from ..networking import HEADRequest from ..networking import HEADRequest
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
@ -15,7 +15,6 @@ from ..utils import (
UserNotLive, UserNotLive,
determine_ext, determine_ext,
format_field, format_field,
get_first,
int_or_none, int_or_none,
join_nonempty, join_nonempty,
merge_dicts, merge_dicts,
@ -219,8 +218,8 @@ class TikTokBaseIE(InfoExtractor):
def extract_addr(addr, add_meta={}): def extract_addr(addr, add_meta={}):
parsed_meta, res = parse_url_key(addr.get('url_key', '')) parsed_meta, res = parse_url_key(addr.get('url_key', ''))
if res: if res:
known_resolutions.setdefault(res, {}).setdefault('height', add_meta.get('height') or addr.get('height')) known_resolutions.setdefault(res, {}).setdefault('height', int_or_none(addr.get('height')))
known_resolutions[res].setdefault('width', add_meta.get('width') or addr.get('width')) known_resolutions[res].setdefault('width', int_or_none(addr.get('width')))
parsed_meta.update(known_resolutions.get(res, {})) parsed_meta.update(known_resolutions.get(res, {}))
add_meta.setdefault('height', int_or_none(res[:-1])) add_meta.setdefault('height', int_or_none(res[:-1]))
return [{ return [{
@ -237,22 +236,26 @@ class TikTokBaseIE(InfoExtractor):
# Hack: Add direct video links first to prioritize them when removing duplicate formats # Hack: Add direct video links first to prioritize them when removing duplicate formats
formats = [] formats = []
width = int_or_none(video_info.get('width'))
height = int_or_none(video_info.get('height'))
if video_info.get('play_addr'): if video_info.get('play_addr'):
formats.extend(extract_addr(video_info['play_addr'], { formats.extend(extract_addr(video_info['play_addr'], {
'format_id': 'play_addr', 'format_id': 'play_addr',
'format_note': 'Direct video', 'format_note': 'Direct video',
'vcodec': 'h265' if traverse_obj( 'vcodec': 'h265' if traverse_obj(
video_info, 'is_bytevc1', 'is_h265') else 'h264', # TODO: Check for "direct iOS" videos, like https://www.tiktok.com/@cookierun_dev/video/7039716639834656002 video_info, 'is_bytevc1', 'is_h265') else 'h264', # TODO: Check for "direct iOS" videos, like https://www.tiktok.com/@cookierun_dev/video/7039716639834656002
'width': video_info.get('width'), 'width': width,
'height': video_info.get('height'), 'height': height,
})) }))
if video_info.get('download_addr'): if video_info.get('download_addr'):
formats.extend(extract_addr(video_info['download_addr'], { download_addr = video_info['download_addr']
dl_width = int_or_none(download_addr.get('width'))
formats.extend(extract_addr(download_addr, {
'format_id': 'download_addr', 'format_id': 'download_addr',
'format_note': 'Download video%s' % (', watermarked' if video_info.get('has_watermark') else ''), 'format_note': 'Download video%s' % (', watermarked' if video_info.get('has_watermark') else ''),
'vcodec': 'h264', 'vcodec': 'h264',
'width': video_info.get('width'), 'width': dl_width or width,
'height': video_info.get('height'), 'height': try_call(lambda: int(dl_width / 0.5625)) or height, # download_addr['height'] is wrong
'preference': -2 if video_info.get('has_watermark') else -1, 'preference': -2 if video_info.get('has_watermark') else -1,
})) }))
if video_info.get('play_addr_h264'): if video_info.get('play_addr_h264'):
@ -315,9 +318,6 @@ class TikTokBaseIE(InfoExtractor):
return { return {
'id': aweme_id, 'id': aweme_id,
'extractor_key': TikTokIE.ie_key(),
'extractor': TikTokIE.IE_NAME,
'webpage_url': self._create_url(author_info.get('uid'), aweme_id),
**traverse_obj(aweme_detail, { **traverse_obj(aweme_detail, {
'title': ('desc', {str}), 'title': ('desc', {str}),
'description': ('desc', {str}), 'description': ('desc', {str}),
@ -921,20 +921,23 @@ class DouyinIE(TikTokBaseIE):
_VALID_URL = r'https?://(?:www\.)?douyin\.com/video/(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:www\.)?douyin\.com/video/(?P<id>[0-9]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.douyin.com/video/6961737553342991651', 'url': 'https://www.douyin.com/video/6961737553342991651',
'md5': 'a97db7e3e67eb57bf40735c022ffa228', 'md5': '9ecce7bc5b302601018ecb2871c63a75',
'info_dict': { 'info_dict': {
'id': '6961737553342991651', 'id': '6961737553342991651',
'ext': 'mp4', 'ext': 'mp4',
'title': '#杨超越 小小水手带你去远航❤️', 'title': '#杨超越 小小水手带你去远航❤️',
'description': '#杨超越 小小水手带你去远航❤️', 'description': '#杨超越 小小水手带你去远航❤️',
'uploader': '6897520xka',
'uploader_id': '110403406559', 'uploader_id': '110403406559',
'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98', 'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98', 'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
'creator': '杨超越', 'creator': '杨超越',
'duration': 19782, 'creators': ['杨超越'],
'duration': 19,
'timestamp': 1620905839, 'timestamp': 1620905839,
'upload_date': '20210513', 'upload_date': '20210513',
'track': '@杨超越创作的原声', 'track': '@杨超越创作的原声',
'artists': ['杨超越'],
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
'repost_count': int, 'repost_count': int,
@ -943,20 +946,23 @@ class DouyinIE(TikTokBaseIE):
}, },
}, { }, {
'url': 'https://www.douyin.com/video/6982497745948921092', 'url': 'https://www.douyin.com/video/6982497745948921092',
'md5': '34a87ebff3833357733da3fe17e37c0e', 'md5': '15c5e660b7048af3707304e3cc02bbb5',
'info_dict': { 'info_dict': {
'id': '6982497745948921092', 'id': '6982497745948921092',
'ext': 'mp4', 'ext': 'mp4',
'title': '这个夏日和小羊@杨超越 一起遇见白色幻想', 'title': '这个夏日和小羊@杨超越 一起遇见白色幻想',
'description': '这个夏日和小羊@杨超越 一起遇见白色幻想', 'description': '这个夏日和小羊@杨超越 一起遇见白色幻想',
'uploader': '0731chaoyue',
'uploader_id': '408654318141572', 'uploader_id': '408654318141572',
'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAZJpnglcjW2f_CMVcnqA_6oVBXKWMpH0F8LIHuUu8-lA', 'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAZJpnglcjW2f_CMVcnqA_6oVBXKWMpH0F8LIHuUu8-lA',
'channel_id': 'MS4wLjABAAAAZJpnglcjW2f_CMVcnqA_6oVBXKWMpH0F8LIHuUu8-lA', 'channel_id': 'MS4wLjABAAAAZJpnglcjW2f_CMVcnqA_6oVBXKWMpH0F8LIHuUu8-lA',
'creator': '杨超越工作室', 'creator': '杨超越工作室',
'duration': 42479, 'creators': ['杨超越工作室'],
'duration': 42,
'timestamp': 1625739481, 'timestamp': 1625739481,
'upload_date': '20210708', 'upload_date': '20210708',
'track': '@杨超越工作室创作的原声', 'track': '@杨超越工作室创作的原声',
'artists': ['杨超越工作室'],
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
'repost_count': int, 'repost_count': int,
@ -965,20 +971,23 @@ class DouyinIE(TikTokBaseIE):
}, },
}, { }, {
'url': 'https://www.douyin.com/video/6953975910773099811', 'url': 'https://www.douyin.com/video/6953975910773099811',
'md5': 'dde3302460f19db59c47060ff013b902', 'md5': '0e6443758b8355db9a3c34864a4276be',
'info_dict': { 'info_dict': {
'id': '6953975910773099811', 'id': '6953975910773099811',
'ext': 'mp4', 'ext': 'mp4',
'title': '#一起看海 出现在你的夏日里', 'title': '#一起看海 出现在你的夏日里',
'description': '#一起看海 出现在你的夏日里', 'description': '#一起看海 出现在你的夏日里',
'uploader': '6897520xka',
'uploader_id': '110403406559', 'uploader_id': '110403406559',
'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98', 'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98', 'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
'creator': '杨超越', 'creator': '杨超越',
'duration': 17343, 'creators': ['杨超越'],
'duration': 17,
'timestamp': 1619098692, 'timestamp': 1619098692,
'upload_date': '20210422', 'upload_date': '20210422',
'track': '@杨超越创作的原声', 'track': '@杨超越创作的原声',
'artists': ['杨超越'],
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
'repost_count': int, 'repost_count': int,
@ -1004,20 +1013,23 @@ class DouyinIE(TikTokBaseIE):
'skip': 'No longer available', 'skip': 'No longer available',
}, { }, {
'url': 'https://www.douyin.com/video/6963263655114722595', 'url': 'https://www.douyin.com/video/6963263655114722595',
'md5': 'cf9f11f0ec45d131445ec2f06766e122', 'md5': '1440bcf59d8700f8e014da073a4dfea8',
'info_dict': { 'info_dict': {
'id': '6963263655114722595', 'id': '6963263655114722595',
'ext': 'mp4', 'ext': 'mp4',
'title': '#哪个爱豆的105度最甜 换个角度看看我哈哈', 'title': '#哪个爱豆的105度最甜 换个角度看看我哈哈',
'description': '#哪个爱豆的105度最甜 换个角度看看我哈哈', 'description': '#哪个爱豆的105度最甜 换个角度看看我哈哈',
'uploader': '6897520xka',
'uploader_id': '110403406559', 'uploader_id': '110403406559',
'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98', 'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98', 'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
'creator': '杨超越', 'creator': '杨超越',
'duration': 15115, 'creators': ['杨超越'],
'duration': 15,
'timestamp': 1621261163, 'timestamp': 1621261163,
'upload_date': '20210517', 'upload_date': '20210517',
'track': '@杨超越创作的原声', 'track': '@杨超越创作的原声',
'artists': ['杨超越'],
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
'repost_count': int, 'repost_count': int,
@ -1025,34 +1037,23 @@ class DouyinIE(TikTokBaseIE):
'thumbnail': r're:https?://.+\.jpe?g', 'thumbnail': r're:https?://.+\.jpe?g',
}, },
}] }]
_APP_VERSIONS = [('23.3.0', '230300')]
_APP_NAME = 'aweme'
_AID = 1128
_API_HOSTNAME = 'aweme.snssdk.com'
_UPLOADER_URL_FORMAT = 'https://www.douyin.com/user/%s' _UPLOADER_URL_FORMAT = 'https://www.douyin.com/user/%s'
_WEBPAGE_HOST = 'https://www.douyin.com/' _WEBPAGE_HOST = 'https://www.douyin.com/'
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
try: detail = traverse_obj(self._download_json(
return self._extract_aweme_app(video_id) 'https://www.douyin.com/aweme/v1/web/aweme/detail/', video_id,
except ExtractorError as e: 'Downloading web detail JSON', 'Failed to download web detail JSON',
e.expected = True query={'aweme_id': video_id}, fatal=False), ('aweme_detail', {dict}))
self.to_screen(f'{e}; trying with webpage') if not detail:
webpage = self._download_webpage(url, video_id)
render_data = self._search_json(
r'<script [^>]*\bid=[\'"]RENDER_DATA[\'"][^>]*>', webpage, 'render data', video_id,
contains_pattern=r'%7B(?s:.+)%7D', fatal=False, transform_source=compat_urllib_parse_unquote)
if not render_data:
# TODO: Run verification challenge code to generate signature cookies # TODO: Run verification challenge code to generate signature cookies
cookies = self._get_cookies(self._WEBPAGE_HOST)
expected = not cookies.get('s_v_web_id') or not cookies.get('ttwid')
raise ExtractorError( raise ExtractorError(
'Fresh cookies (not necessarily logged in) are needed', expected=expected) 'Fresh cookies (not necessarily logged in) are needed',
expected=not self._get_cookies(self._WEBPAGE_HOST).get('s_v_web_id'))
return self._parse_aweme_video_web(get_first(render_data, ('aweme', 'detail')), url, video_id) return self._parse_aweme_video_app(detail)
class TikTokVMIE(InfoExtractor): class TikTokVMIE(InfoExtractor):

View File

@ -100,9 +100,13 @@ class TwitterBaseIE(InfoExtractor):
if not variant_url: if not variant_url:
return [], {} return [], {}
elif '.m3u8' in variant_url: elif '.m3u8' in variant_url:
return self._extract_m3u8_formats_and_subtitles( fmts, subs = self._extract_m3u8_formats_and_subtitles(
variant_url, video_id, 'mp4', 'm3u8_native', variant_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False) m3u8_id='hls', fatal=False)
for f in traverse_obj(fmts, lambda _, v: v['vcodec'] == 'none' and v.get('tbr') is None):
if mobj := re.match(r'hls-[Aa]udio-(?P<bitrate>\d{4,})', f['format_id']):
f['tbr'] = int_or_none(mobj.group('bitrate'), 1000)
return fmts, subs
else: else:
tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
f = { f = {
@ -471,6 +475,7 @@ class TwitterIE(TwitterBaseIE):
'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!', 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ', 'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
'channel_id': '549749560',
'uploader': 'FREE THE NIPPLE', 'uploader': 'FREE THE NIPPLE',
'uploader_id': 'freethenipple', 'uploader_id': 'freethenipple',
'duration': 12.922, 'duration': 12.922,
@ -484,6 +489,7 @@ class TwitterIE(TwitterBaseIE):
'age_limit': 18, 'age_limit': 18,
'_old_archive_ids': ['twitter 643211948184596480'], '_old_archive_ids': ['twitter 643211948184596480'],
}, },
'skip': 'Requires authentication',
}, { }, {
'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1', 'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
'md5': 'f36dcd5fb92bf7057f155e7d927eeb42', 'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
@ -506,6 +512,7 @@ class TwitterIE(TwitterBaseIE):
'ext': 'mp4', 'ext': 'mp4',
'title': r're:Star Wars.*A new beginning is coming December 18.*', 'title': r're:Star Wars.*A new beginning is coming December 18.*',
'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ', 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
'channel_id': '20106852',
'uploader_id': 'starwars', 'uploader_id': 'starwars',
'uploader': r're:Star Wars.*', 'uploader': r're:Star Wars.*',
'timestamp': 1447395772, 'timestamp': 1447395772,
@ -551,6 +558,7 @@ class TwitterIE(TwitterBaseIE):
'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel', 'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ', 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'channel_id': '1383165541',
'uploader': 'jaydin donte geer', 'uploader': 'jaydin donte geer',
'uploader_id': 'jaydingeer', 'uploader_id': 'jaydingeer',
'duration': 30.0, 'duration': 30.0,
@ -591,6 +599,7 @@ class TwitterIE(TwitterBaseIE):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.', 'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI', 'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
'channel_id': '701615052',
'uploader_id': 'CaptainAmerica', 'uploader_id': 'CaptainAmerica',
'uploader': 'Captain America', 'uploader': 'Captain America',
'duration': 3.17, 'duration': 3.17,
@ -627,6 +636,7 @@ class TwitterIE(TwitterBaseIE):
'ext': 'mp4', 'ext': 'mp4',
'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة', 'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN', 'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN',
'channel_id': '2526757026',
'uploader': 'عالم الأخبار', 'uploader': 'عالم الأخبار',
'uploader_id': 'news_al3alm', 'uploader_id': 'news_al3alm',
'duration': 277.4, 'duration': 277.4,
@ -651,6 +661,7 @@ class TwitterIE(TwitterBaseIE):
'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.', 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo', 'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
'channel_id': '2319432498',
'uploader': 'Préfet de Guadeloupe', 'uploader': 'Préfet de Guadeloupe',
'uploader_id': 'Prefet971', 'uploader_id': 'Prefet971',
'duration': 47.48, 'duration': 47.48,
@ -677,6 +688,7 @@ class TwitterIE(TwitterBaseIE):
'title': 're:.*?Shep is on a roll today.*?', 'title': 're:.*?Shep is on a roll today.*?',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09', 'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
'channel_id': '255036353',
'uploader': 'Lis Power', 'uploader': 'Lis Power',
'uploader_id': 'LisPower1', 'uploader_id': 'LisPower1',
'duration': 111.278, 'duration': 111.278,
@ -741,6 +753,7 @@ class TwitterIE(TwitterBaseIE):
'title': 'md5:d1c4941658e4caaa6cb579260d85dcba', 'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'description': 'md5:71ead15ec44cee55071547d6447c6a3e', 'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
'channel_id': '18552281',
'uploader': 'Brooklyn Nets', 'uploader': 'Brooklyn Nets',
'uploader_id': 'BrooklynNets', 'uploader_id': 'BrooklynNets',
'duration': 324.484, 'duration': 324.484,
@ -763,10 +776,11 @@ class TwitterIE(TwitterBaseIE):
'id': '1577855447914409984', 'id': '1577855447914409984',
'display_id': '1577855540407197696', 'display_id': '1577855540407197696',
'ext': 'mp4', 'ext': 'mp4',
'title': 'md5:9d198efb93557b8f8d5b78c480407214', 'title': 'md5:466a3a8b049b5f5a13164ce915484b51',
'description': 'md5:b9c3699335447391d11753ab21c70a74', 'description': 'md5:b9c3699335447391d11753ab21c70a74',
'upload_date': '20221006', 'upload_date': '20221006',
'uploader': 'oshtru', 'channel_id': '143077138',
'uploader': 'Oshtru',
'uploader_id': 'oshtru', 'uploader_id': 'oshtru',
'uploader_url': 'https://twitter.com/oshtru', 'uploader_url': 'https://twitter.com/oshtru',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
@ -784,9 +798,10 @@ class TwitterIE(TwitterBaseIE):
'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464', 'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
'info_dict': { 'info_dict': {
'id': '1577719286659006464', 'id': '1577719286659006464',
'title': 'Ultima - Test', 'title': 'Ultima Reload - Test',
'description': 'Test https://t.co/Y3KEZD7Dad', 'description': 'Test https://t.co/Y3KEZD7Dad',
'uploader': 'Ultima', 'channel_id': '168922496',
'uploader': 'Ultima Reload',
'uploader_id': 'UltimaShadowX', 'uploader_id': 'UltimaShadowX',
'uploader_url': 'https://twitter.com/UltimaShadowX', 'uploader_url': 'https://twitter.com/UltimaShadowX',
'upload_date': '20221005', 'upload_date': '20221005',
@ -808,6 +823,7 @@ class TwitterIE(TwitterBaseIE):
'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9', 'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'description': 'md5:95aea692fda36a12081b9629b02daa92', 'description': 'md5:95aea692fda36a12081b9629b02daa92',
'channel_id': '1094109584',
'uploader': 'Max Olson', 'uploader': 'Max Olson',
'uploader_id': 'MesoMax919', 'uploader_id': 'MesoMax919',
'uploader_url': 'https://twitter.com/MesoMax919', 'uploader_url': 'https://twitter.com/MesoMax919',
@ -830,6 +846,7 @@ class TwitterIE(TwitterBaseIE):
'ext': 'mp4', 'ext': 'mp4',
'title': str, 'title': str,
'description': str, 'description': str,
'channel_id': '1217167793541480450',
'uploader': str, 'uploader': str,
'uploader_id': 'Rizdraws', 'uploader_id': 'Rizdraws',
'uploader_url': 'https://twitter.com/Rizdraws', 'uploader_url': 'https://twitter.com/Rizdraws',
@ -840,7 +857,8 @@ class TwitterIE(TwitterBaseIE):
'repost_count': int, 'repost_count': int,
'comment_count': int, 'comment_count': int,
'age_limit': 18, 'age_limit': 18,
'tags': [] 'tags': [],
'_old_archive_ids': ['twitter 1575199173472927762'],
}, },
'params': {'skip_download': 'The media could not be played'}, 'params': {'skip_download': 'The media could not be played'},
'skip': 'Requires authentication', 'skip': 'Requires authentication',
@ -852,6 +870,7 @@ class TwitterIE(TwitterBaseIE):
'id': '1395079556562706435', 'id': '1395079556562706435',
'title': str, 'title': str,
'tags': [], 'tags': [],
'channel_id': '21539378',
'uploader': str, 'uploader': str,
'like_count': int, 'like_count': int,
'upload_date': '20210519', 'upload_date': '20210519',
@ -869,6 +888,7 @@ class TwitterIE(TwitterBaseIE):
'info_dict': { 'info_dict': {
'id': '1578353380363501568', 'id': '1578353380363501568',
'title': str, 'title': str,
'channel_id': '2195866214',
'uploader_id': 'DavidToons_', 'uploader_id': 'DavidToons_',
'repost_count': int, 'repost_count': int,
'like_count': int, 'like_count': int,
@ -888,6 +908,7 @@ class TwitterIE(TwitterBaseIE):
'id': '1578401165338976258', 'id': '1578401165338976258',
'title': str, 'title': str,
'description': 'md5:659a6b517a034b4cee5d795381a2dc41', 'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
'channel_id': '19338359',
'uploader': str, 'uploader': str,
'uploader_id': 'primevideouk', 'uploader_id': 'primevideouk',
'timestamp': 1665155137, 'timestamp': 1665155137,
@ -929,6 +950,7 @@ class TwitterIE(TwitterBaseIE):
'description': 'md5:591c19ce66fadc2359725d5cd0d1052c', 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
'comment_count': int, 'comment_count': int,
'uploader_id': 'CTVJLaidlaw', 'uploader_id': 'CTVJLaidlaw',
'channel_id': '80082014',
'repost_count': int, 'repost_count': int,
'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'], 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
'upload_date': '20221208', 'upload_date': '20221208',
@ -946,6 +968,7 @@ class TwitterIE(TwitterBaseIE):
'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1', 'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1',
'thumbnail': r're:^https?://.+\.jpg', 'thumbnail': r're:^https?://.+\.jpg',
'timestamp': 1670459604.0, 'timestamp': 1670459604.0,
'channel_id': '80082014',
'uploader_id': 'CTVJLaidlaw', 'uploader_id': 'CTVJLaidlaw',
'uploader': 'Jocelyn Laidlaw', 'uploader': 'Jocelyn Laidlaw',
'repost_count': int, 'repost_count': int,
@ -972,6 +995,7 @@ class TwitterIE(TwitterBaseIE):
'title': '뽀 - 아 최우제 이동속도 봐', 'title': '뽀 - 아 최우제 이동속도 봐',
'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB', 'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB',
'duration': 24.598, 'duration': 24.598,
'channel_id': '1281839411068432384',
'uploader': '', 'uploader': '',
'uploader_id': 's2FAKER', 'uploader_id': 's2FAKER',
'uploader_url': 'https://twitter.com/s2FAKER', 'uploader_url': 'https://twitter.com/s2FAKER',
@ -985,6 +1009,7 @@ class TwitterIE(TwitterBaseIE):
'comment_count': int, 'comment_count': int,
'_old_archive_ids': ['twitter 1621117700482416640'], '_old_archive_ids': ['twitter 1621117700482416640'],
}, },
'skip': 'Requires authentication',
}, { }, {
'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2', 'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
'info_dict': { 'info_dict': {
@ -992,6 +1017,7 @@ class TwitterIE(TwitterBaseIE):
'display_id': '1599108751385972737', 'display_id': '1599108751385972737',
'ext': 'mp4', 'ext': 'mp4',
'title': '\u06ea - \U0001F48B', 'title': '\u06ea - \U0001F48B',
'channel_id': '1347791436809441283',
'uploader_url': 'https://twitter.com/hlo_again', 'uploader_url': 'https://twitter.com/hlo_again',
'like_count': int, 'like_count': int,
'uploader_id': 'hlo_again', 'uploader_id': 'hlo_again',
@ -1014,6 +1040,7 @@ class TwitterIE(TwitterBaseIE):
'id': '1600009362759733248', 'id': '1600009362759733248',
'display_id': '1600009574919962625', 'display_id': '1600009574919962625',
'ext': 'mp4', 'ext': 'mp4',
'channel_id': '211814412',
'uploader_url': 'https://twitter.com/MunTheShinobi', 'uploader_url': 'https://twitter.com/MunTheShinobi',
'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml', 'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig', 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
@ -1061,6 +1088,7 @@ class TwitterIE(TwitterBaseIE):
'display_id': '1695424220702888009', 'display_id': '1695424220702888009',
'title': 'md5:e8daa9527bc2b947121395494f786d9d', 'title': 'md5:e8daa9527bc2b947121395494f786d9d',
'description': 'md5:004f2d37fd58737724ec75bc7e679938', 'description': 'md5:004f2d37fd58737724ec75bc7e679938',
'channel_id': '15212187',
'uploader': 'Benny Johnson', 'uploader': 'Benny Johnson',
'uploader_id': 'bennyjohnson', 'uploader_id': 'bennyjohnson',
'uploader_url': 'https://twitter.com/bennyjohnson', 'uploader_url': 'https://twitter.com/bennyjohnson',
@ -1084,6 +1112,7 @@ class TwitterIE(TwitterBaseIE):
'display_id': '1695424220702888009', 'display_id': '1695424220702888009',
'title': 'md5:e8daa9527bc2b947121395494f786d9d', 'title': 'md5:e8daa9527bc2b947121395494f786d9d',
'description': 'md5:004f2d37fd58737724ec75bc7e679938', 'description': 'md5:004f2d37fd58737724ec75bc7e679938',
'channel_id': '15212187',
'uploader': 'Benny Johnson', 'uploader': 'Benny Johnson',
'uploader_id': 'bennyjohnson', 'uploader_id': 'bennyjohnson',
'uploader_url': 'https://twitter.com/bennyjohnson', 'uploader_url': 'https://twitter.com/bennyjohnson',
@ -1117,7 +1146,7 @@ class TwitterIE(TwitterBaseIE):
}, },
'add_ie': ['TwitterBroadcast'], 'add_ie': ['TwitterBroadcast'],
}, { }, {
# Animated gif and quote tweet video, with syndication API # Animated gif and quote tweet video
'url': 'https://twitter.com/BAKKOOONN/status/1696256659889565950', 'url': 'https://twitter.com/BAKKOOONN/status/1696256659889565950',
'playlist_mincount': 2, 'playlist_mincount': 2,
'info_dict': { 'info_dict': {
@ -1125,6 +1154,7 @@ class TwitterIE(TwitterBaseIE):
'title': 'BAKOON - https://t.co/zom968d0a0', 'title': 'BAKOON - https://t.co/zom968d0a0',
'description': 'https://t.co/zom968d0a0', 'description': 'https://t.co/zom968d0a0',
'tags': [], 'tags': [],
'channel_id': '1263540390',
'uploader': 'BAKOON', 'uploader': 'BAKOON',
'uploader_id': 'BAKKOOONN', 'uploader_id': 'BAKKOOONN',
'uploader_url': 'https://twitter.com/BAKKOOONN', 'uploader_url': 'https://twitter.com/BAKKOOONN',
@ -1132,19 +1162,21 @@ class TwitterIE(TwitterBaseIE):
'timestamp': 1693254077.0, 'timestamp': 1693254077.0,
'upload_date': '20230828', 'upload_date': '20230828',
'like_count': int, 'like_count': int,
'comment_count': int,
'repost_count': int,
}, },
'params': {'extractor_args': {'twitter': {'api': ['syndication']}}}, 'skip': 'Requires authentication',
'expected_warnings': ['Not all metadata'],
}, { }, {
# "stale tweet" with typename "TweetWithVisibilityResults" # "stale tweet" with typename "TweetWithVisibilityResults"
'url': 'https://twitter.com/RobertKennedyJr/status/1724884212803834154', 'url': 'https://twitter.com/RobertKennedyJr/status/1724884212803834154',
'md5': '62b1e11cdc2cdd0e527f83adb081f536', 'md5': '511377ff8dfa7545307084dca4dce319',
'info_dict': { 'info_dict': {
'id': '1724883339285544960', 'id': '1724883339285544960',
'ext': 'mp4', 'ext': 'mp4',
'title': 'md5:cc56716f9ed0b368de2ba54c478e493c', 'title': 'md5:cc56716f9ed0b368de2ba54c478e493c',
'description': 'md5:9dc14f5b0f1311fc7caf591ae253a164', 'description': 'md5:9dc14f5b0f1311fc7caf591ae253a164',
'display_id': '1724884212803834154', 'display_id': '1724884212803834154',
'channel_id': '337808606',
'uploader': 'Robert F. Kennedy Jr', 'uploader': 'Robert F. Kennedy Jr',
'uploader_id': 'RobertKennedyJr', 'uploader_id': 'RobertKennedyJr',
'uploader_url': 'https://twitter.com/RobertKennedyJr', 'uploader_url': 'https://twitter.com/RobertKennedyJr',
@ -1386,6 +1418,7 @@ class TwitterIE(TwitterBaseIE):
'description': description, 'description': description,
'uploader': uploader, 'uploader': uploader,
'timestamp': unified_timestamp(status.get('created_at')), 'timestamp': unified_timestamp(status.get('created_at')),
'channel_id': str_or_none(status.get('user_id_str')) or str_or_none(user.get('id_str')),
'uploader_id': uploader_id, 'uploader_id': uploader_id,
'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'), 'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'),
'like_count': int_or_none(status.get('favorite_count')), 'like_count': int_or_none(status.get('favorite_count')),

View File

@ -10,7 +10,8 @@ from ..utils import (
class UtreonIE(InfoExtractor): class UtreonIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?utreon\.com/v/(?P<id>[\w-]+)' IE_NAME = 'playeur'
_VALID_URL = r'https?://(?:www\.)?(?:utreon|playeur)\.com/v/(?P<id>[\w-]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://utreon.com/v/z_I7ikQbuDw', 'url': 'https://utreon.com/v/z_I7ikQbuDw',
'info_dict': { 'info_dict': {
@ -19,8 +20,9 @@ class UtreonIE(InfoExtractor):
'title': 'Freedom Friday meditation - Rising in the wind', 'title': 'Freedom Friday meditation - Rising in the wind',
'description': 'md5:a9bf15a42434a062fe313b938343ad1b', 'description': 'md5:a9bf15a42434a062fe313b938343ad1b',
'uploader': 'Heather Dawn Elemental Health', 'uploader': 'Heather Dawn Elemental Health',
'thumbnail': 'https://data-1.utreon.com/v/MG/M2/NT/z_I7ikQbuDw/z_I7ikQbuDw_preview.jpg', 'thumbnail': r're:^https?://.+\.jpg',
'release_date': '20210723', 'release_date': '20210723',
'duration': 586,
} }
}, { }, {
'url': 'https://utreon.com/v/jerJw5EOOVU', 'url': 'https://utreon.com/v/jerJw5EOOVU',
@ -28,10 +30,11 @@ class UtreonIE(InfoExtractor):
'id': 'jerJw5EOOVU', 'id': 'jerJw5EOOVU',
'ext': 'mp4', 'ext': 'mp4',
'title': 'When I\'m alone, I love to reflect in peace, to make my dreams come true... [Quotes and Poems]', 'title': 'When I\'m alone, I love to reflect in peace, to make my dreams come true... [Quotes and Poems]',
'description': 'md5:61ee6c2da98be51b04b969ca80273aaa', 'description': 'md5:4026aa3a2c10169c3649926ac8ef62b6',
'uploader': 'Frases e Poemas Quotes and Poems', 'uploader': 'Frases e Poemas Quotes and Poems',
'thumbnail': 'https://data-1.utreon.com/v/Mz/Zh/ND/jerJw5EOOVU/jerJw5EOOVU_89af85470a4b16eededde7f8674c96d9_cover.jpg', 'thumbnail': r're:^https?://.+\.jpg',
'release_date': '20210723', 'release_date': '20210723',
'duration': 60,
} }
}, { }, {
'url': 'https://utreon.com/v/C4ZxXhYBBmE', 'url': 'https://utreon.com/v/C4ZxXhYBBmE',
@ -39,10 +42,11 @@ class UtreonIE(InfoExtractor):
'id': 'C4ZxXhYBBmE', 'id': 'C4ZxXhYBBmE',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Bidens Capital Gains Tax Rate to Test Worlds Highest', 'title': 'Bidens Capital Gains Tax Rate to Test Worlds Highest',
'description': 'md5:fb5a6c2e506f013cc76f133f673bc5c8', 'description': 'md5:995aa9ad0733c0e5863ebdeff954f40e',
'uploader': 'Nomad Capitalist', 'uploader': 'Nomad Capitalist',
'thumbnail': 'https://data-1.utreon.com/v/ZD/k1/Mj/C4ZxXhYBBmE/C4ZxXhYBBmE_628342076198c9c06dd6b2c665978584_cover.jpg', 'thumbnail': r're:^https?://.+\.jpg',
'release_date': '20210723', 'release_date': '20210723',
'duration': 884,
} }
}, { }, {
'url': 'https://utreon.com/v/Y-stEH-FBm8', 'url': 'https://utreon.com/v/Y-stEH-FBm8',
@ -52,15 +56,28 @@ class UtreonIE(InfoExtractor):
'title': 'Creeper-Chan Pranks Steve! 💚 [MINECRAFT ANIME]', 'title': 'Creeper-Chan Pranks Steve! 💚 [MINECRAFT ANIME]',
'description': 'md5:7a48450b0d761b96dec194be0c5ecb5f', 'description': 'md5:7a48450b0d761b96dec194be0c5ecb5f',
'uploader': 'Merryweather Comics', 'uploader': 'Merryweather Comics',
'thumbnail': 'https://data-1.utreon.com/v/MT/E4/Zj/Y-stEH-FBm8/Y-stEH-FBm8_5290676a41a4a1096db133b09f54f77b_cover.jpg', 'thumbnail': r're:^https?://.+\.jpg',
'release_date': '20210718', 'release_date': '20210718',
}}, 'duration': 151,
] }
}, {
'url': 'https://playeur.com/v/Wzqp-UrxSeu',
'info_dict': {
'id': 'Wzqp-UrxSeu',
'ext': 'mp4',
'title': 'Update: Clockwork Basilisk Books on the Way!',
'description': 'md5:d9756b0b1884c904655b0e170d17cea5',
'uploader': 'Forgotten Weapons',
'release_date': '20240208',
'thumbnail': r're:^https?://.+\.jpg',
'duration': 262,
}
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
json_data = self._download_json( json_data = self._download_json(
'https://api.utreon.com/v1/videos/' + video_id, 'https://api.playeur.com/v1/videos/' + video_id,
video_id) video_id)
videos_json = json_data['videos'] videos_json = json_data['videos']
formats = [{ formats = [{

View File

@ -48,17 +48,15 @@ class VimeoBaseInfoExtractor(InfoExtractor):
return url, data, headers return url, data, headers
def _perform_login(self, username, password): def _perform_login(self, username, password):
webpage = self._download_webpage( viewer = self._download_json('https://vimeo.com/_next/viewer', None, 'Downloading login token')
self._LOGIN_URL, None, 'Downloading login page')
token, vuid = self._extract_xsrft_and_vuid(webpage)
data = { data = {
'action': 'login', 'action': 'login',
'email': username, 'email': username,
'password': password, 'password': password,
'service': 'vimeo', 'service': 'vimeo',
'token': token, 'token': viewer['xsrft'],
} }
self._set_vimeo_cookie('vuid', vuid) self._set_vimeo_cookie('vuid', viewer['vuid'])
try: try:
self._download_webpage( self._download_webpage(
self._LOGIN_URL, None, 'Logging in', self._LOGIN_URL, None, 'Logging in',

View File

@ -114,9 +114,9 @@ INNERTUBE_CLIENTS = {
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
'clientName': 'ANDROID', 'clientName': 'ANDROID',
'clientVersion': '17.31.35', 'clientVersion': '18.11.34',
'androidSdkVersion': 30, 'androidSdkVersion': 30,
'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip' 'userAgent': 'com.google.android.youtube/18.11.34 (Linux; U; Android 11) gzip'
} }
}, },
'INNERTUBE_CONTEXT_CLIENT_NAME': 3, 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
@ -127,9 +127,9 @@ INNERTUBE_CLIENTS = {
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
'clientName': 'ANDROID_EMBEDDED_PLAYER', 'clientName': 'ANDROID_EMBEDDED_PLAYER',
'clientVersion': '17.31.35', 'clientVersion': '18.11.34',
'androidSdkVersion': 30, 'androidSdkVersion': 30,
'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip' 'userAgent': 'com.google.android.youtube/18.11.34 (Linux; U; Android 11) gzip'
}, },
}, },
'INNERTUBE_CONTEXT_CLIENT_NAME': 55, 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
@ -168,9 +168,9 @@ INNERTUBE_CLIENTS = {
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
'clientName': 'IOS', 'clientName': 'IOS',
'clientVersion': '17.33.2', 'clientVersion': '18.11.34',
'deviceModel': 'iPhone14,3', 'deviceModel': 'iPhone14,3',
'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)' 'userAgent': 'com.google.ios.youtube/18.11.34 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
} }
}, },
'INNERTUBE_CONTEXT_CLIENT_NAME': 5, 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
@ -180,9 +180,9 @@ INNERTUBE_CLIENTS = {
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
'clientName': 'IOS_MESSAGES_EXTENSION', 'clientName': 'IOS_MESSAGES_EXTENSION',
'clientVersion': '17.33.2', 'clientVersion': '18.11.34',
'deviceModel': 'iPhone14,3', 'deviceModel': 'iPhone14,3',
'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)' 'userAgent': 'com.google.ios.youtube/18.11.34 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
}, },
}, },
'INNERTUBE_CONTEXT_CLIENT_NAME': 66, 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
@ -2068,7 +2068,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'title': 'Voyeur Girl', 'title': 'Voyeur Girl',
'description': 'md5:7ae382a65843d6df2685993e90a8628f', 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
'upload_date': '20190312', 'upload_date': '20190312',
'artist': 'Stephen', 'artists': ['Stephen'],
'creators': ['Stephen'],
'track': 'Voyeur Girl', 'track': 'Voyeur Girl',
'album': 'it\'s too much love to know my dear', 'album': 'it\'s too much love to know my dear',
'release_date': '20190313', 'release_date': '20190313',
@ -2081,7 +2082,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'channel': 'Stephen', # TODO: should be "Stephen - Topic" 'channel': 'Stephen', # TODO: should be "Stephen - Topic"
'uploader': 'Stephen', 'uploader': 'Stephen',
'availability': 'public', 'availability': 'public',
'creator': 'Stephen',
'duration': 169, 'duration': 169,
'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp', 'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
'age_limit': 0, 'age_limit': 0,
@ -3640,15 +3640,28 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return orderedSet(requested_clients) return orderedSet(requested_clients)
def _invalid_player_response(self, pr, video_id):
# YouTube may return a different video player response than expected.
# See: https://github.com/TeamNewPipe/NewPipe/issues/8713
if (pr_id := traverse_obj(pr, ('videoDetails', 'videoId'))) != video_id:
return pr_id
def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data): def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
initial_pr = None initial_pr = None
if webpage: if webpage:
initial_pr = self._search_json( initial_pr = self._search_json(
self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False) self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
prs = []
if initial_pr and not self._invalid_player_response(initial_pr, video_id):
# Android player_response does not have microFormats which are needed for
# extraction of some data. So we return the initial_pr with formats
# stripped out even if not requested by the user
# See: https://github.com/yt-dlp/yt-dlp/issues/501
prs.append({**initial_pr, 'streamingData': None})
all_clients = set(clients) all_clients = set(clients)
clients = clients[::-1] clients = clients[::-1]
prs = []
def append_client(*client_names): def append_client(*client_names):
""" Append the first client name that exists but not already used """ """ Append the first client name that exists but not already used """
@ -3660,18 +3673,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
all_clients.add(actual_client) all_clients.add(actual_client)
return return
# Android player_response does not have microFormats which are needed for
# extraction of some data. So we return the initial_pr with formats
# stripped out even if not requested by the user
# See: https://github.com/yt-dlp/yt-dlp/issues/501
if initial_pr:
pr = dict(initial_pr)
pr['streamingData'] = None
prs.append(pr)
last_error = None
tried_iframe_fallback = False tried_iframe_fallback = False
player_url = None player_url = None
skipped_clients = {}
while clients: while clients:
client, base_client, variant = _split_innertube_client(clients.pop()) client, base_client, variant = _split_innertube_client(clients.pop())
player_ytcfg = master_ytcfg if client == 'web' else {} player_ytcfg = master_ytcfg if client == 'web' else {}
@ -3692,26 +3696,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response( pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data) client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)
except ExtractorError as e: except ExtractorError as e:
if last_error: self.report_warning(e)
self.report_warning(last_error)
last_error = e
continue continue
if pr: if pr_id := self._invalid_player_response(pr, video_id):
# YouTube may return a different video player response than expected. skipped_clients[client] = pr_id
# See: https://github.com/TeamNewPipe/NewPipe/issues/8713 elif pr:
pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId')) # Save client name for introspection later
if pr_video_id and pr_video_id != video_id: name = short_client_name(client)
self.report_warning( sd = traverse_obj(pr, ('streamingData', {dict})) or {}
f'Skipping player response from {client} client (got player response for video "{pr_video_id}" instead of "{video_id}")' + bug_reports_message()) sd[STREAMING_DATA_CLIENT_NAME] = name
else: for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})):
# Save client name for introspection later f[STREAMING_DATA_CLIENT_NAME] = name
name = short_client_name(client) prs.append(pr)
sd = traverse_obj(pr, ('streamingData', {dict})) or {}
sd[STREAMING_DATA_CLIENT_NAME] = name
for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})):
f[STREAMING_DATA_CLIENT_NAME] = name
prs.append(pr)
# creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated: if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:
@ -3722,10 +3719,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
elif not variant: elif not variant:
append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded') append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')
if last_error: if skipped_clients:
if not len(prs): self.report_warning(
raise last_error f'Skipping player responses from {"/".join(skipped_clients)} clients '
self.report_warning(last_error) f'(got player responses for video "{"/".join(set(skipped_clients.values()))}" instead of "{video_id}")')
if not prs:
raise ExtractorError(
'All player responses are invalid. Your IP is likely being blocked by Youtube', expected=True)
elif not prs:
raise ExtractorError('Failed to extract any player response')
return prs, player_url return prs, player_url
def _needs_live_processing(self, live_status, duration): def _needs_live_processing(self, live_status, duration):
@ -4386,7 +4388,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
release_year = release_date[:4] release_year = release_date[:4]
info.update({ info.update({
'album': mobj.group('album'.strip()), 'album': mobj.group('album'.strip()),
'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')), 'artists': ([a] if (a := mobj.group('clean_artist'))
else [a.strip() for a in mobj.group('artist').split('·')]),
'track': mobj.group('track').strip(), 'track': mobj.group('track').strip(),
'release_date': release_date, 'release_date': release_date,
'release_year': int_or_none(release_year), 'release_year': int_or_none(release_year),
@ -4532,7 +4535,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if mrr_title == 'Album': if mrr_title == 'Album':
info['album'] = mrr_contents_text info['album'] = mrr_contents_text
elif mrr_title == 'Artist': elif mrr_title == 'Artist':
info['artist'] = mrr_contents_text info['artists'] = [mrr_contents_text] if mrr_contents_text else None
elif mrr_title == 'Song': elif mrr_title == 'Song':
info['track'] = mrr_contents_text info['track'] = mrr_contents_text
owner_badges = self._extract_badges(traverse_obj(vsir, ('owner', 'videoOwnerRenderer', 'badges'))) owner_badges = self._extract_badges(traverse_obj(vsir, ('owner', 'videoOwnerRenderer', 'badges')))
@ -4566,7 +4569,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if fmt.get('protocol') == 'm3u8_native': if fmt.get('protocol') == 'm3u8_native':
fmt['__needs_testing'] = True fmt['__needs_testing'] = True
for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]: for s_k, d_k in [('artists', 'creators'), ('track', 'alt_title')]:
v = info.get(s_k) v = info.get(s_k)
if v: if v:
info[d_k] = v info[d_k] = v

118
yt_dlp/extractor/zenporn.py Normal file
View File

@ -0,0 +1,118 @@
import base64
import binascii
from .common import InfoExtractor
from ..utils import ExtractorError, determine_ext, unified_strdate, url_or_none
from ..utils.traversal import traverse_obj
class ZenPornIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?zenporn\.com/video/(?P<id>\d+)'
_TESTS = [{
'url': 'https://zenporn.com/video/15627016/desi-bhabi-ki-chudai',
'md5': '07bd576b5920714d74975c054ca28dee',
'info_dict': {
'id': '9563799',
'display_id': '15627016',
'ext': 'mp4',
'title': 'md5:669eafd3bbc688aa29770553b738ada2',
'description': '',
'thumbnail': 'md5:2fc044a19bab450fef8f1931e7920a18',
'upload_date': '20230925',
'uploader': 'md5:9fae59847f1f58d1da8f2772016c12f3',
'age_limit': 18,
}
}, {
'url': 'https://zenporn.com/video/15570701',
'md5': 'acba0d080d692664fcc8c4e5502b1a67',
'info_dict': {
'id': '2297875',
'display_id': '15570701',
'ext': 'mp4',
'title': 'md5:47aebdf87644ec91e8b1a844bc832451',
'description': '',
'thumbnail': 'https://mstn.nv7s.com/contents/videos_screenshots/2297000/2297875/480x270/1.jpg',
'upload_date': '20230921',
'uploader': 'Lois Clarke',
'age_limit': 18,
}
}, {
'url': 'https://zenporn.com/video/8531117/amateur-students-having-a-fuck-fest-at-club/',
'md5': '67411256aa9451449e4d29f3be525541',
'info_dict': {
'id': '12791908',
'display_id': '8531117',
'ext': 'mp4',
'title': 'Amateur students having a fuck fest at club',
'description': '',
'thumbnail': 'https://tn.txxx.tube/contents/videos_screenshots/12791000/12791908/288x162/1.jpg',
'upload_date': '20191005',
'uploader': 'Jackopenass',
'age_limit': 18,
}
}, {
'url': 'https://zenporn.com/video/15872038/glad-you-came/',
'md5': '296ccab437f5bac6099433768449d8e1',
'info_dict': {
'id': '111585',
'display_id': '15872038',
'ext': 'mp4',
'title': 'Glad You Came',
'description': '',
'thumbnail': 'https://vpim.m3pd.com/contents/videos_screenshots/111000/111585/480x270/1.jpg',
'upload_date': '20231024',
'uploader': 'Martin Rudenko',
'age_limit': 18,
}
}]
def _gen_info_url(self, ext_domain, extr_id, lifetime=86400):
""" This function is a reverse engineering from the website javascript """
result = '/'.join(str(int(extr_id) // i * i) for i in (1_000_000, 1_000, 1))
return f'https://{ext_domain}/api/json/video/{lifetime}/{result}.json'
@staticmethod
def _decode_video_url(encoded_url):
""" This function is a reverse engineering from the website javascript """
# Replace lookalike characters and standardize map
translation = str.maketrans('АВСЕМ.,~', 'ABCEM+/=')
try:
return base64.b64decode(encoded_url.translate(translation), validate=True).decode()
except (binascii.Error, ValueError):
return None
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
ext_domain, video_id = self._search_regex(
r'https://(?P<ext_domain>[\w.-]+\.\w{3})/embed/(?P<extr_id>\d+)/',
webpage, 'embed info', group=('ext_domain', 'extr_id'))
info_json = self._download_json(
self._gen_info_url(ext_domain, video_id), video_id, fatal=False)
video_json = self._download_json(
f'https://{ext_domain}/api/videofile.php', video_id, query={
'video_id': video_id,
'lifetime': 8640000,
}, note='Downloading video file JSON', errnote='Failed to download video file JSON')
decoded_url = self._decode_video_url(video_json[0]['video_url'])
if not decoded_url:
raise ExtractorError('Unable to decode the video url')
return {
'id': video_id,
'display_id': display_id,
'ext': traverse_obj(video_json, (0, 'format', {determine_ext})),
'url': f'https://{ext_domain}{decoded_url}',
'age_limit': 18,
**traverse_obj(info_json, ('video', {
'title': ('title', {str}),
'description': ('description', {str}),
'thumbnail': ('thumb', {url_or_none}),
'upload_date': ('post_date', {unified_strdate}),
'uploader': ('user', 'username', {str}),
})),
}

View File

@ -258,10 +258,10 @@ class RequestsRH(RequestHandler, InstanceStoreMixin):
# Forward urllib3 debug messages to our logger # Forward urllib3 debug messages to our logger
logger = logging.getLogger('urllib3') logger = logging.getLogger('urllib3')
handler = Urllib3LoggingHandler(logger=self._logger) self.__logging_handler = Urllib3LoggingHandler(logger=self._logger)
handler.setFormatter(logging.Formatter('requests: %(message)s')) self.__logging_handler.setFormatter(logging.Formatter('requests: %(message)s'))
handler.addFilter(Urllib3LoggingFilter()) self.__logging_handler.addFilter(Urllib3LoggingFilter())
logger.addHandler(handler) logger.addHandler(self.__logging_handler)
# TODO: Use a logger filter to suppress pool reuse warning instead # TODO: Use a logger filter to suppress pool reuse warning instead
logger.setLevel(logging.ERROR) logger.setLevel(logging.ERROR)
@ -276,6 +276,9 @@ class RequestsRH(RequestHandler, InstanceStoreMixin):
def close(self): def close(self):
self._clear_instances() self._clear_instances()
# Remove the logging handler that contains a reference to our logger
# See: https://github.com/yt-dlp/yt-dlp/issues/8922
logging.getLogger('urllib3').removeHandler(self.__logging_handler)
def _check_extensions(self, extensions): def _check_extensions(self, extensions):
super()._check_extensions(extensions) super()._check_extensions(extensions)

View File

@ -90,10 +90,12 @@ class WebsocketsRH(WebSocketRequestHandler):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
self.__logging_handlers = {}
for name in ('websockets.client', 'websockets.server'): for name in ('websockets.client', 'websockets.server'):
logger = logging.getLogger(name) logger = logging.getLogger(name)
handler = logging.StreamHandler(stream=sys.stdout) handler = logging.StreamHandler(stream=sys.stdout)
handler.setFormatter(logging.Formatter(f'{self.RH_NAME}: %(message)s')) handler.setFormatter(logging.Formatter(f'{self.RH_NAME}: %(message)s'))
self.__logging_handlers[name] = handler
logger.addHandler(handler) logger.addHandler(handler)
if self.verbose: if self.verbose:
logger.setLevel(logging.DEBUG) logger.setLevel(logging.DEBUG)
@ -103,6 +105,12 @@ class WebsocketsRH(WebSocketRequestHandler):
extensions.pop('timeout', None) extensions.pop('timeout', None)
extensions.pop('cookiejar', None) extensions.pop('cookiejar', None)
def close(self):
# Remove the logging handler that contains a reference to our logger
# See: https://github.com/yt-dlp/yt-dlp/issues/8922
for name, handler in self.__logging_handlers.items():
logging.getLogger(name).removeHandler(handler)
def _send(self, request): def _send(self, request):
timeout = float(request.extensions.get('timeout') or self.timeout) timeout = float(request.extensions.get('timeout') or self.timeout)
headers = self._merge_headers(request.headers) headers = self._merge_headers(request.headers)

View File

@ -68,6 +68,7 @@ class RequestDirector:
def close(self): def close(self):
for handler in self.handlers.values(): for handler in self.handlers.values():
handler.close() handler.close()
self.handlers = {}
def add_handler(self, handler: RequestHandler): def add_handler(self, handler: RequestHandler):
"""Add a handler. If a handler of the same RH_KEY exists, it will overwrite it""" """Add a handler. If a handler of the same RH_KEY exists, it will overwrite it"""

View File

@ -196,9 +196,12 @@ class _YoutubeDLOptionParser(optparse.OptionParser):
raise raise
return self.check_values(self.values, self.largs) return self.check_values(self.values, self.largs)
def error(self, msg): def _generate_error_message(self, msg):
msg = f'{self.get_prog_name()}: error: {str(msg).strip()}\n' msg = f'{self.get_prog_name()}: error: {str(msg).strip()}\n'
raise optparse.OptParseError(f'{self.get_usage()}\n{msg}' if self.usage else msg) return f'{self.get_usage()}\n{msg}' if self.usage else msg
def error(self, msg):
raise optparse.OptParseError(self._generate_error_message(msg))
def _get_args(self, args): def _get_args(self, args):
return sys.argv[1:] if args is None else list(args) return sys.argv[1:] if args is None else list(args)
@ -476,8 +479,8 @@ def create_parser():
'youtube-dl': ['all', '-multistreams', '-playlist-match-filter', '-manifest-filesize-approx'], 'youtube-dl': ['all', '-multistreams', '-playlist-match-filter', '-manifest-filesize-approx'],
'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat', '-playlist-match-filter', '-manifest-filesize-approx'], 'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat', '-playlist-match-filter', '-manifest-filesize-approx'],
'2021': ['2022', 'no-certifi', 'filename-sanitization', 'no-youtube-prefer-utc-upload-date'], '2021': ['2022', 'no-certifi', 'filename-sanitization', 'no-youtube-prefer-utc-upload-date'],
'2022': ['2023', 'no-external-downloader-progress', 'playlist-match-filter'], '2022': ['2023', 'no-external-downloader-progress', 'playlist-match-filter', 'prefer-legacy-http-handler', 'manifest-filesize-approx'],
'2023': ['prefer-legacy-http-handler', 'manifest-filesize-approx'], '2023': [],
} }
}, help=( }, help=(
'Options that can help keep compatibility with youtube-dl or youtube-dlc ' 'Options that can help keep compatibility with youtube-dl or youtube-dlc '

View File

@ -86,11 +86,14 @@ class PluginFinder(importlib.abc.MetaPathFinder):
parts = Path(*fullname.split('.')) parts = Path(*fullname.split('.'))
for path in orderedSet(candidate_locations, lazy=True): for path in orderedSet(candidate_locations, lazy=True):
candidate = path / parts candidate = path / parts
if candidate.is_dir(): try:
yield candidate if candidate.is_dir():
elif path.suffix in ('.zip', '.egg', '.whl') and path.is_file():
if parts in dirs_in_zip(path):
yield candidate yield candidate
elif path.suffix in ('.zip', '.egg', '.whl') and path.is_file():
if parts in dirs_in_zip(path):
yield candidate
except PermissionError as e:
write_string(f'Permission error while accessing modules in "{e.filename}"\n')
def find_spec(self, fullname, path=None, target=None): def find_spec(self, fullname, path=None, target=None):
if fullname not in self.packages: if fullname not in self.packages:

View File

@ -738,9 +738,10 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
def add(meta_list, info_list=None): def add(meta_list, info_list=None):
value = next(( value = next((
str(info[key]) for key in [f'{meta_prefix}_'] + list(variadic(info_list or meta_list)) info[key] for key in [f'{meta_prefix}_'] + list(variadic(info_list or meta_list))
if info.get(key) is not None), None) if info.get(key) is not None), None)
if value not in ('', None): if value not in ('', None):
value = ', '.join(map(str, variadic(value)))
value = value.replace('\0', '') # nul character cannot be passed in command line value = value.replace('\0', '') # nul character cannot be passed in command line
metadata['common'].update({meta_f: value for meta_f in variadic(meta_list)}) metadata['common'].update({meta_f: value for meta_f in variadic(meta_list)})
@ -754,10 +755,11 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
add(('description', 'synopsis'), 'description') add(('description', 'synopsis'), 'description')
add(('purl', 'comment'), 'webpage_url') add(('purl', 'comment'), 'webpage_url')
add('track', 'track_number') add('track', 'track_number')
add('artist', ('artist', 'creator', 'uploader', 'uploader_id')) add('artist', ('artist', 'artists', 'creator', 'creators', 'uploader', 'uploader_id'))
add('genre') add('composer', ('composer', 'composers'))
add('genre', ('genre', 'genres'))
add('album') add('album')
add('album_artist') add('album_artist', ('album_artist', 'album_artists'))
add('disc', 'disc_number') add('disc', 'disc_number')
add('show', 'series') add('show', 'series')
add('season_number') add('season_number')