Merge branch 'yt-dlp:master' into viu-indonesia-fix-6482-partial

This commit is contained in:
HobbyistDev 2024-02-26 20:15:03 +08:00 committed by GitHub
commit 66d3eb246a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
46 changed files with 985 additions and 435 deletions

View file

@ -164,7 +164,7 @@ jobs:
- name: Upload artifacts - name: Upload artifacts
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v4
with: with:
name: build-${{ github.job }} name: build-bin-${{ github.job }}
path: | path: |
yt-dlp yt-dlp
yt-dlp.tar.gz yt-dlp.tar.gz
@ -227,7 +227,7 @@ jobs:
- name: Upload artifacts - name: Upload artifacts
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v4
with: with:
name: build-linux_${{ matrix.architecture }} name: build-bin-linux_${{ matrix.architecture }}
path: | # run-on-arch-action designates armv7l as armv7 path: | # run-on-arch-action designates armv7l as armv7
repo/dist/yt-dlp_linux_${{ (matrix.architecture == 'armv7' && 'armv7l') || matrix.architecture }} repo/dist/yt-dlp_linux_${{ (matrix.architecture == 'armv7' && 'armv7l') || matrix.architecture }}
compression-level: 0 compression-level: 0
@ -271,7 +271,7 @@ jobs:
- name: Upload artifacts - name: Upload artifacts
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v4
with: with:
name: build-${{ github.job }} name: build-bin-${{ github.job }}
path: | path: |
dist/yt-dlp_macos dist/yt-dlp_macos
dist/yt-dlp_macos.zip dist/yt-dlp_macos.zip
@ -324,7 +324,7 @@ jobs:
- name: Upload artifacts - name: Upload artifacts
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v4
with: with:
name: build-${{ github.job }} name: build-bin-${{ github.job }}
path: | path: |
dist/yt-dlp_macos_legacy dist/yt-dlp_macos_legacy
compression-level: 0 compression-level: 0
@ -373,7 +373,7 @@ jobs:
- name: Upload artifacts - name: Upload artifacts
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v4
with: with:
name: build-${{ github.job }} name: build-bin-${{ github.job }}
path: | path: |
dist/yt-dlp.exe dist/yt-dlp.exe
dist/yt-dlp_min.exe dist/yt-dlp_min.exe
@ -421,7 +421,7 @@ jobs:
- name: Upload artifacts - name: Upload artifacts
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v4
with: with:
name: build-${{ github.job }} name: build-bin-${{ github.job }}
path: | path: |
dist/yt-dlp_x86.exe dist/yt-dlp_x86.exe
compression-level: 0 compression-level: 0
@ -441,7 +441,7 @@ jobs:
- uses: actions/download-artifact@v4 - uses: actions/download-artifact@v4
with: with:
path: artifact path: artifact
pattern: build-* pattern: build-bin-*
merge-multiple: true merge-multiple: true
- name: Make SHA2-SUMS files - name: Make SHA2-SUMS files
@ -484,3 +484,4 @@ jobs:
_update_spec _update_spec
SHA*SUMS* SHA*SUMS*
compression-level: 0 compression-level: 0
overwrite: true

View file

@ -37,12 +37,15 @@ BINDIR ?= $(PREFIX)/bin
MANDIR ?= $(PREFIX)/man MANDIR ?= $(PREFIX)/man
SHAREDIR ?= $(PREFIX)/share SHAREDIR ?= $(PREFIX)/share
PYTHON ?= /usr/bin/env python3 PYTHON ?= /usr/bin/env python3
GNUTAR ?= tar
# set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local # set markdown input format to "markdown-smart" for pandoc version 2+ and to "markdown" for pandoc prior to version 2
SYSCONFDIR = $(shell if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi) PANDOC_VERSION_CMD = pandoc -v 2>/dev/null | head -n1 | cut -d' ' -f2 | head -c1
PANDOC_VERSION != $(PANDOC_VERSION_CMD)
# set markdown input format to "markdown-smart" for pandoc version 2 and to "markdown" for pandoc prior to version 2 PANDOC_VERSION ?= $(shell $(PANDOC_VERSION_CMD))
MARKDOWN = $(shell if [ `pandoc -v | head -n1 | cut -d" " -f2 | head -c1` = "2" ]; then echo markdown-smart; else echo markdown; fi) MARKDOWN_CMD = if [ "$(PANDOC_VERSION)" = "1" -o "$(PANDOC_VERSION)" = "0" ]; then echo markdown; else echo markdown-smart; fi
MARKDOWN != $(MARKDOWN_CMD)
MARKDOWN ?= $(shell $(MARKDOWN_CMD))
install: lazy-extractors yt-dlp yt-dlp.1 completions install: lazy-extractors yt-dlp yt-dlp.1 completions
mkdir -p $(DESTDIR)$(BINDIR) mkdir -p $(DESTDIR)$(BINDIR)
@ -73,17 +76,21 @@ test:
offlinetest: codetest offlinetest: codetest
$(PYTHON) -m pytest -k "not download" $(PYTHON) -m pytest -k "not download"
CODE_FOLDERS := $(shell find yt_dlp -type d -not -name '__*' -exec sh -c 'test -e "$$1"/__init__.py' sh {} \; -print) CODE_FOLDERS_CMD = find yt_dlp -type f -name '__init__.py' | sed 's,/__init__.py,,' | grep -v '/__' | sort
CODE_FILES := $(shell for f in $(CODE_FOLDERS); do echo "$$f" | awk '{gsub(/\/[^\/]+/,"/*"); print $$1"/*.py"}'; done | sort -u) CODE_FOLDERS != $(CODE_FOLDERS_CMD)
CODE_FOLDERS ?= $(shell $(CODE_FOLDERS_CMD))
CODE_FILES_CMD = for f in $(CODE_FOLDERS) ; do echo "$$f" | sed 's,$$,/*.py,' ; done
CODE_FILES != $(CODE_FILES_CMD)
CODE_FILES ?= $(shell $(CODE_FILES_CMD))
yt-dlp: $(CODE_FILES) yt-dlp: $(CODE_FILES)
mkdir -p zip mkdir -p zip
for d in $(CODE_FOLDERS) ; do \ for d in $(CODE_FOLDERS) ; do \
mkdir -p zip/$$d ;\ mkdir -p zip/$$d ;\
cp -pPR $$d/*.py zip/$$d/ ;\ cp -pPR $$d/*.py zip/$$d/ ;\
done done
cd zip ; touch -t 200001010101 $(CODE_FILES) (cd zip && touch -t 200001010101 $(CODE_FILES))
mv zip/yt_dlp/__main__.py zip/ mv zip/yt_dlp/__main__.py zip/
cd zip ; zip -q ../yt-dlp $(CODE_FILES) __main__.py (cd zip && zip -q ../yt-dlp $(CODE_FILES) __main__.py)
rm -rf zip rm -rf zip
echo '#!$(PYTHON)' > yt-dlp echo '#!$(PYTHON)' > yt-dlp
cat yt-dlp.zip >> yt-dlp cat yt-dlp.zip >> yt-dlp
@ -127,12 +134,14 @@ completions/fish/yt-dlp.fish: $(CODE_FILES) devscripts/fish-completion.in
mkdir -p completions/fish mkdir -p completions/fish
$(PYTHON) devscripts/fish-completion.py $(PYTHON) devscripts/fish-completion.py
_EXTRACTOR_FILES = $(shell find yt_dlp/extractor -name '*.py' -and -not -name 'lazy_extractors.py') _EXTRACTOR_FILES_CMD = find yt_dlp/extractor -name '*.py' -and -not -name 'lazy_extractors.py'
_EXTRACTOR_FILES != $(_EXTRACTOR_FILES_CMD)
_EXTRACTOR_FILES ?= $(shell $(_EXTRACTOR_FILES_CMD))
yt_dlp/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES) yt_dlp/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES)
$(PYTHON) devscripts/make_lazy_extractors.py $@ $(PYTHON) devscripts/make_lazy_extractors.py $@
yt-dlp.tar.gz: all yt-dlp.tar.gz: all
@tar -czf yt-dlp.tar.gz --transform "s|^|yt-dlp/|" --owner 0 --group 0 \ @$(GNUTAR) -czf yt-dlp.tar.gz --transform "s|^|yt-dlp/|" --owner 0 --group 0 \
--exclude '*.DS_Store' \ --exclude '*.DS_Store' \
--exclude '*.kate-swp' \ --exclude '*.kate-swp' \
--exclude '*.pyc' \ --exclude '*.pyc' \

View file

@ -167,8 +167,8 @@ ### Differences in default behavior
* `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams,-playlist-match-filter,-manifest-filesize-approx` * `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams,-playlist-match-filter,-manifest-filesize-approx`
* `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect,-playlist-match-filter,-manifest-filesize-approx` * `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect,-playlist-match-filter,-manifest-filesize-approx`
* `--compat-options 2021`: Same as `--compat-options 2022,no-certifi,filename-sanitization,no-youtube-prefer-utc-upload-date` * `--compat-options 2021`: Same as `--compat-options 2022,no-certifi,filename-sanitization,no-youtube-prefer-utc-upload-date`
* `--compat-options 2022`: Same as `--compat-options 2023,playlist-match-filter,no-external-downloader-progress` * `--compat-options 2022`: Same as `--compat-options 2023,playlist-match-filter,no-external-downloader-progress,prefer-legacy-http-handler,manifest-filesize-approx`
* `--compat-options 2023`: Same as `--compat-options prefer-legacy-http-handler,manifest-filesize-approx`. Use this to enable all future compat options * `--compat-options 2023`: Currently does nothing. Use this to enable all future compat options
# INSTALLATION # INSTALLATION
@ -1311,7 +1311,8 @@ # OUTPUT TEMPLATE
- `display_id` (string): An alternative identifier for the video - `display_id` (string): An alternative identifier for the video
- `uploader` (string): Full name of the video uploader - `uploader` (string): Full name of the video uploader
- `license` (string): License name the video is licensed under - `license` (string): License name the video is licensed under
- `creator` (string): The creator of the video - `creators` (list): The creators of the video
- `creator` (string): The creators of the video; comma-separated
- `timestamp` (numeric): UNIX timestamp of the moment the video became available - `timestamp` (numeric): UNIX timestamp of the moment the video became available
- `upload_date` (string): Video upload date in UTC (YYYYMMDD) - `upload_date` (string): Video upload date in UTC (YYYYMMDD)
- `release_timestamp` (numeric): UNIX timestamp of the moment the video was released - `release_timestamp` (numeric): UNIX timestamp of the moment the video was released
@ -1385,11 +1386,16 @@ # OUTPUT TEMPLATE
- `track` (string): Title of the track - `track` (string): Title of the track
- `track_number` (numeric): Number of the track within an album or a disc - `track_number` (numeric): Number of the track within an album or a disc
- `track_id` (string): Id of the track - `track_id` (string): Id of the track
- `artist` (string): Artist(s) of the track - `artists` (list): Artist(s) of the track
- `genre` (string): Genre(s) of the track - `artist` (string): Artist(s) of the track; comma-separated
- `genres` (list): Genre(s) of the track
- `genre` (string): Genre(s) of the track; comma-separated
- `composers` (list): Composer(s) of the piece
- `composer` (string): Composer(s) of the piece; comma-separated
- `album` (string): Title of the album the track belongs to - `album` (string): Title of the album the track belongs to
- `album_type` (string): Type of the album - `album_type` (string): Type of the album
- `album_artist` (string): List of all artists appeared on the album - `album_artists` (list): All artists appeared on the album
- `album_artist` (string): All artists appeared on the album; comma-separated
- `disc_number` (numeric): Number of the disc or other physical medium the track belongs to - `disc_number` (numeric): Number of the disc or other physical medium the track belongs to
Available only when using `--download-sections` and for `chapter:` prefix when using `--split-chapters` for videos with internal chapters: Available only when using `--download-sections` and for `chapter:` prefix when using `--split-chapters` for videos with internal chapters:
@ -1767,10 +1773,11 @@ # MODIFYING METADATA
`description`, `synopsis` | `description` `description`, `synopsis` | `description`
`purl`, `comment` | `webpage_url` `purl`, `comment` | `webpage_url`
`track` | `track_number` `track` | `track_number`
`artist` | `artist`, `creator`, `uploader` or `uploader_id` `artist` | `artist`, `artists`, `creator`, `creators`, `uploader` or `uploader_id`
`genre` | `genre` `composer` | `composer` or `composers`
`genre` | `genre` or `genres`
`album` | `album` `album` | `album`
`album_artist` | `album_artist` `album_artist` | `album_artist` or `album_artists`
`disc` | `disc_number` `disc` | `disc_number`
`show` | `series` `show` | `series`
`season_number` | `season_number` `season_number` | `season_number`

View file

@ -94,7 +94,6 @@ include = [
"/setup.cfg", "/setup.cfg",
"/supportedsites.md", "/supportedsites.md",
] ]
exclude = ["/yt_dlp/__pyinstaller"]
artifacts = [ artifacts = [
"/yt_dlp/extractor/lazy_extractors.py", "/yt_dlp/extractor/lazy_extractors.py",
"/completions", "/completions",
@ -105,7 +104,6 @@ artifacts = [
[tool.hatch.build.targets.wheel] [tool.hatch.build.targets.wheel]
packages = ["yt_dlp"] packages = ["yt_dlp"]
exclude = ["/yt_dlp/__pyinstaller"]
artifacts = ["/yt_dlp/extractor/lazy_extractors.py"] artifacts = ["/yt_dlp/extractor/lazy_extractors.py"]
[tool.hatch.build.targets.wheel.shared-data] [tool.hatch.build.targets.wheel.shared-data]

View file

@ -223,6 +223,10 @@ def sanitize(key, value):
if test_info_dict.get('display_id') == test_info_dict.get('id'): if test_info_dict.get('display_id') == test_info_dict.get('id'):
test_info_dict.pop('display_id') test_info_dict.pop('display_id')
# Remove deprecated fields
for old in YoutubeDL._deprecated_multivalue_fields.keys():
test_info_dict.pop(old, None)
# release_year may be generated from release_date # release_year may be generated from release_date
if try_call(lambda: test_info_dict['release_year'] == int(test_info_dict['release_date'][:4])): if try_call(lambda: test_info_dict['release_year'] == int(test_info_dict['release_date'][:4])):
test_info_dict.pop('release_year') test_info_dict.pop('release_year')

View file

@ -941,7 +941,7 @@ def test_match_filter(self):
def get_videos(filter_=None): def get_videos(filter_=None):
ydl = YDL({'match_filter': filter_, 'simulate': True}) ydl = YDL({'match_filter': filter_, 'simulate': True})
for v in videos: for v in videos:
ydl.process_ie_result(v, download=True) ydl.process_ie_result(v.copy(), download=True)
return [v['id'] for v in ydl.downloaded_info_dicts] return [v['id'] for v in ydl.downloaded_info_dicts]
res = get_videos() res = get_videos()

View file

@ -13,6 +13,7 @@
import http.cookiejar import http.cookiejar
import http.server import http.server
import io import io
import logging
import pathlib import pathlib
import random import random
import ssl import ssl
@ -752,6 +753,25 @@ def test_certificate_nocombined_pass(self, handler):
}) })
class TestRequestHandlerMisc:
"""Misc generic tests for request handlers, not related to request or validation testing"""
@pytest.mark.parametrize('handler,logger_name', [
('Requests', 'urllib3'),
('Websockets', 'websockets.client'),
('Websockets', 'websockets.server')
], indirect=['handler'])
def test_remove_logging_handler(self, handler, logger_name):
# Ensure any logging handlers, which may contain a YoutubeDL instance,
# are removed when we close the request handler
# See: https://github.com/yt-dlp/yt-dlp/issues/8922
logging_handlers = logging.getLogger(logger_name).handlers
before_count = len(logging_handlers)
rh = handler()
assert len(logging_handlers) == before_count + 1
rh.close()
assert len(logging_handlers) == before_count
class TestUrllibRequestHandler(TestRequestHandlerBase): class TestUrllibRequestHandler(TestRequestHandlerBase):
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True) @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
def test_file_urls(self, handler): def test_file_urls(self, handler):
@ -827,6 +847,7 @@ def test_httplib_validation_errors(self, handler, req, match, version_check):
assert not isinstance(exc_info.value, TransportError) assert not isinstance(exc_info.value, TransportError)
@pytest.mark.parametrize('handler', ['Requests'], indirect=True)
class TestRequestsRequestHandler(TestRequestHandlerBase): class TestRequestsRequestHandler(TestRequestHandlerBase):
@pytest.mark.parametrize('raised,expected', [ @pytest.mark.parametrize('raised,expected', [
(lambda: requests.exceptions.ConnectTimeout(), TransportError), (lambda: requests.exceptions.ConnectTimeout(), TransportError),
@ -843,7 +864,6 @@ class TestRequestsRequestHandler(TestRequestHandlerBase):
(lambda: requests.exceptions.RequestException(), RequestError) (lambda: requests.exceptions.RequestException(), RequestError)
# (lambda: requests.exceptions.TooManyRedirects(), HTTPError) - Needs a response object # (lambda: requests.exceptions.TooManyRedirects(), HTTPError) - Needs a response object
]) ])
@pytest.mark.parametrize('handler', ['Requests'], indirect=True)
def test_request_error_mapping(self, handler, monkeypatch, raised, expected): def test_request_error_mapping(self, handler, monkeypatch, raised, expected):
with handler() as rh: with handler() as rh:
def mock_get_instance(*args, **kwargs): def mock_get_instance(*args, **kwargs):
@ -877,7 +897,6 @@ def request(self, *args, **kwargs):
'3 bytes read, 5 more expected' '3 bytes read, 5 more expected'
), ),
]) ])
@pytest.mark.parametrize('handler', ['Requests'], indirect=True)
def test_response_error_mapping(self, handler, monkeypatch, raised, expected, match): def test_response_error_mapping(self, handler, monkeypatch, raised, expected, match):
from requests.models import Response as RequestsResponse from requests.models import Response as RequestsResponse
from urllib3.response import HTTPResponse as Urllib3Response from urllib3.response import HTTPResponse as Urllib3Response
@ -896,6 +915,21 @@ def mock_read(*args, **kwargs):
assert exc_info.type is expected assert exc_info.type is expected
def test_close(self, handler, monkeypatch):
rh = handler()
session = rh._get_instance(cookiejar=rh.cookiejar)
called = False
original_close = session.close
def mock_close(*args, **kwargs):
nonlocal called
called = True
return original_close(*args, **kwargs)
monkeypatch.setattr(session, 'close', mock_close)
rh.close()
assert called
def run_validation(handler, error, req, **handler_kwargs): def run_validation(handler, error, req, **handler_kwargs):
with handler(**handler_kwargs) as rh: with handler(**handler_kwargs) as rh:
@ -1205,6 +1239,19 @@ def some_preference(rh, request):
assert director.send(Request('http://')).read() == b'' assert director.send(Request('http://')).read() == b''
assert director.send(Request('http://', headers={'prefer': '1'})).read() == b'supported' assert director.send(Request('http://', headers={'prefer': '1'})).read() == b'supported'
def test_close(self, monkeypatch):
director = RequestDirector(logger=FakeLogger())
director.add_handler(FakeRH(logger=FakeLogger()))
called = False
def mock_close(*args, **kwargs):
nonlocal called
called = True
monkeypatch.setattr(director.handlers[FakeRH.RH_KEY], 'close', mock_close)
director.close()
assert called
# XXX: do we want to move this to test_YoutubeDL.py? # XXX: do we want to move this to test_YoutubeDL.py?
class TestYoutubeDLNetworking: class TestYoutubeDLNetworking:

View file

@ -580,6 +580,13 @@ class YoutubeDL:
'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options', 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options',
'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time' 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
} }
_deprecated_multivalue_fields = {
'album_artist': 'album_artists',
'artist': 'artists',
'composer': 'composers',
'creator': 'creators',
'genre': 'genres',
}
_format_selection_exts = { _format_selection_exts = {
'audio': set(MEDIA_EXTENSIONS.common_audio), 'audio': set(MEDIA_EXTENSIONS.common_audio),
'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )), 'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )),
@ -683,7 +690,6 @@ def process_color_policy(stream):
self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers')) self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers'))
self._load_cookies(self.params['http_headers'].get('Cookie')) # compat self._load_cookies(self.params['http_headers'].get('Cookie')) # compat
self.params['http_headers'].pop('Cookie', None) self.params['http_headers'].pop('Cookie', None)
self._request_director = self.build_request_director(_REQUEST_HANDLERS.values(), _RH_PREFERENCES)
if auto_init and auto_init != 'no_verbose_header': if auto_init and auto_init != 'no_verbose_header':
self.print_debug_header() self.print_debug_header()
@ -957,6 +963,7 @@ def __exit__(self, *args):
def close(self): def close(self):
self.save_cookies() self.save_cookies()
self._request_director.close() self._request_director.close()
del self._request_director
def trouble(self, message=None, tb=None, is_error=True): def trouble(self, message=None, tb=None, is_error=True):
"""Determine action to take when a download problem appears. """Determine action to take when a download problem appears.
@ -2640,6 +2647,14 @@ def _fill_common_fields(self, info_dict, final=True):
if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field): if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field]) info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
for old_key, new_key in self._deprecated_multivalue_fields.items():
if new_key in info_dict and old_key in info_dict:
self.deprecation_warning(f'Do not return {old_key!r} when {new_key!r} is present')
elif old_value := info_dict.get(old_key):
info_dict[new_key] = old_value.split(', ')
elif new_value := info_dict.get(new_key):
info_dict[old_key] = ', '.join(v.replace(',', '\N{FULLWIDTH COMMA}') for v in new_value)
def _raise_pending_errors(self, info): def _raise_pending_errors(self, info):
err = info.pop('__pending_error', None) err = info.pop('__pending_error', None)
if err: if err:
@ -3483,7 +3498,8 @@ def ffmpeg_fixup(cndn, msg, cls):
or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None, or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
'Possible MPEG-TS in MP4 container or malformed AAC timestamps', 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
FFmpegFixupM3u8PP) FFmpegFixupM3u8PP)
ffmpeg_fixup(info_dict.get('is_live') and downloader == 'dashsegments', ffmpeg_fixup(downloader == 'dashsegments'
and (info_dict.get('is_live') or info_dict.get('is_dash_periods')),
'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP) 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP) ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
@ -4144,6 +4160,10 @@ def build_request_director(self, handlers, preferences=None):
director.preferences.add(lambda rh, _: 500 if rh.RH_KEY == 'Urllib' else 0) director.preferences.add(lambda rh, _: 500 if rh.RH_KEY == 'Urllib' else 0)
return director return director
@functools.cached_property
def _request_director(self):
return self.build_request_director(_REQUEST_HANDLERS.values(), _RH_PREFERENCES)
def encode(self, s): def encode(self, s):
if isinstance(s, bytes): if isinstance(s, bytes):
return s # Already encoded return s # Already encoded

View file

@ -14,7 +14,7 @@
import re import re
import traceback import traceback
from .compat import compat_shlex_quote from .compat import compat_os_name, compat_shlex_quote
from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS
from .downloader.external import get_external_downloader from .downloader.external import get_external_downloader
from .extractor import list_extractor_classes from .extractor import list_extractor_classes
@ -984,7 +984,28 @@ def _real_main(argv=None):
if pre_process: if pre_process:
return ydl._download_retcode return ydl._download_retcode
ydl.warn_if_short_id(sys.argv[1:] if argv is None else argv) args = sys.argv[1:] if argv is None else argv
ydl.warn_if_short_id(args)
# Show a useful error message and wait for keypress if not launched from shell on Windows
if not args and compat_os_name == 'nt' and getattr(sys, 'frozen', False):
import ctypes.wintypes
import msvcrt
kernel32 = ctypes.WinDLL('Kernel32')
buffer = (1 * ctypes.wintypes.DWORD)()
attached_processes = kernel32.GetConsoleProcessList(buffer, 1)
# If we only have a single process attached, then the executable was double clicked
# When using `pyinstaller` with `--onefile`, two processes get attached
is_onefile = hasattr(sys, '_MEIPASS') and os.path.basename(sys._MEIPASS).startswith('_MEI')
if attached_processes == 1 or is_onefile and attached_processes == 2:
print(parser._generate_error_message(
'Do not double-click the executable, instead call it from a command line.\n'
'Please read the README for further information on how to use yt-dlp: '
'https://github.com/yt-dlp/yt-dlp#readme'))
msvcrt.getch()
_exit(2)
parser.error( parser.error(
'You must provide at least one URL.\n' 'You must provide at least one URL.\n'
'Type yt-dlp --help to see a list of all options.') 'Type yt-dlp --help to see a list of all options.')

View file

@ -31,4 +31,4 @@ def get_hidden_imports():
hiddenimports = list(get_hidden_imports()) hiddenimports = list(get_hidden_imports())
print(f'Adding imports: {hiddenimports}') print(f'Adding imports: {hiddenimports}')
excludedimports = ['youtube_dl', 'youtube_dlc', 'test', 'ytdlp_plugins', 'devscripts'] excludedimports = ['youtube_dl', 'youtube_dlc', 'test', 'ytdlp_plugins', 'devscripts', 'bundle']

View file

@ -379,7 +379,6 @@
from .clyp import ClypIE from .clyp import ClypIE
from .cmt import CMTIE from .cmt import CMTIE
from .cnbc import ( from .cnbc import (
CNBCIE,
CNBCVideoIE, CNBCVideoIE,
) )
from .cnn import ( from .cnn import (
@ -618,6 +617,7 @@
from .filmweb import FilmwebIE from .filmweb import FilmwebIE
from .firsttv import FirstTVIE from .firsttv import FirstTVIE
from .fivetv import FiveTVIE from .fivetv import FiveTVIE
from .flextv import FlexTVIE
from .flickr import FlickrIE from .flickr import FlickrIE
from .floatplane import ( from .floatplane import (
FloatplaneIE, FloatplaneIE,

View file

@ -22,7 +22,7 @@ class AltCensoredIE(InfoExtractor):
'title': "QUELLES SONT LES CONSÉQUENCES DE L'HYPERSEXUALISATION DE LA SOCIÉTÉ ?", 'title': "QUELLES SONT LES CONSÉQUENCES DE L'HYPERSEXUALISATION DE LA SOCIÉTÉ ?",
'display_id': 'k0srjLSkga8.webm', 'display_id': 'k0srjLSkga8.webm',
'release_date': '20180403', 'release_date': '20180403',
'creator': 'Virginie Vota', 'creators': ['Virginie Vota'],
'release_year': 2018, 'release_year': 2018,
'upload_date': '20230318', 'upload_date': '20230318',
'uploader': 'admin@altcensored.com', 'uploader': 'admin@altcensored.com',
@ -32,7 +32,7 @@ class AltCensoredIE(InfoExtractor):
'duration': 926.09, 'duration': 926.09,
'thumbnail': 'https://archive.org/download/youtube-k0srjLSkga8/youtube-k0srjLSkga8.thumbs/k0srjLSkga8_000925.jpg', 'thumbnail': 'https://archive.org/download/youtube-k0srjLSkga8/youtube-k0srjLSkga8.thumbs/k0srjLSkga8_000925.jpg',
'view_count': int, 'view_count': int,
'categories': ['News & Politics'], 'categories': ['News & Politics'], # FIXME
} }
}] }]
@ -62,14 +62,21 @@ class AltCensoredChannelIE(InfoExtractor):
'title': 'Virginie Vota', 'title': 'Virginie Vota',
'id': 'UCFPTO55xxHqFqkzRZHu4kcw', 'id': 'UCFPTO55xxHqFqkzRZHu4kcw',
}, },
'playlist_count': 91 'playlist_count': 85,
}, { }, {
'url': 'https://altcensored.com/channel/UC9CcJ96HKMWn0LZlcxlpFTw', 'url': 'https://altcensored.com/channel/UC9CcJ96HKMWn0LZlcxlpFTw',
'info_dict': { 'info_dict': {
'title': 'yukikaze775', 'title': 'yukikaze775',
'id': 'UC9CcJ96HKMWn0LZlcxlpFTw', 'id': 'UC9CcJ96HKMWn0LZlcxlpFTw',
}, },
'playlist_count': 4 'playlist_count': 4,
}, {
'url': 'https://altcensored.com/channel/UCfYbb7nga6-icsFWWgS-kWw',
'info_dict': {
'title': 'Mister Metokur',
'id': 'UCfYbb7nga6-icsFWWgS-kWw',
},
'playlist_count': 121,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -78,7 +85,7 @@ def _real_extract(self, url):
url, channel_id, 'Download channel webpage', 'Unable to get channel webpage') url, channel_id, 'Download channel webpage', 'Unable to get channel webpage')
title = self._html_search_meta('altcen_title', webpage, 'title', fatal=False) title = self._html_search_meta('altcen_title', webpage, 'title', fatal=False)
page_count = int_or_none(self._html_search_regex( page_count = int_or_none(self._html_search_regex(
r'<a[^>]+href="/channel/\w+/page/(\d+)">(?:\1)</a>', r'<a[^>]+href="/channel/[\w-]+/page/(\d+)">(?:\1)</a>',
webpage, 'page count', default='1')) webpage, 'page count', default='1'))
def page_func(page_num): def page_func(page_num):

View file

@ -300,7 +300,7 @@ def _real_extract(self, url):
is_logged_in = bool(self._get_cookies('https://archive.org').get('logged-in-sig')) is_logged_in = bool(self._get_cookies('https://archive.org').get('logged-in-sig'))
if extension in KNOWN_EXTENSIONS and (not f.get('private') or is_logged_in): if extension in KNOWN_EXTENSIONS and (not f.get('private') or is_logged_in):
entry['formats'].append({ entry['formats'].append({
'url': 'https://archive.org/download/' + identifier + '/' + f['name'], 'url': 'https://archive.org/download/' + identifier + '/' + urllib.parse.quote(f['name']),
'format': f.get('format'), 'format': f.get('format'),
'width': int_or_none(f.get('width')), 'width': int_or_none(f.get('width')),
'height': int_or_none(f.get('height')), 'height': int_or_none(f.get('height')),

View file

@ -1996,7 +1996,7 @@ def _extract_video_metadata(self, url, video_id, season_id):
'title': get_element_by_class( 'title': get_element_by_class(
'bstar-meta__title', webpage) or self._html_search_meta('og:title', webpage), 'bstar-meta__title', webpage) or self._html_search_meta('og:title', webpage),
'description': get_element_by_class( 'description': get_element_by_class(
'bstar-meta__desc', webpage) or self._html_search_meta('og:description'), 'bstar-meta__desc', webpage) or self._html_search_meta('og:description', webpage),
}, self._search_json_ld(webpage, video_id, default={})) }, self._search_json_ld(webpage, video_id, default={}))
def _get_comments_reply(self, root_id, next_id=0, display_id=None): def _get_comments_reply(self, root_id, next_id=0, display_id=None):

View file

@ -4,27 +4,25 @@
class CloudflareStreamIE(InfoExtractor): class CloudflareStreamIE(InfoExtractor):
_SUBDOMAIN_RE = r'(?:(?:watch|iframe|customer-\w+)\.)?'
_DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)' _DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)'
_EMBED_RE = r'embed\.%s/embed/[^/]+\.js\?.*?\bvideo=' % _DOMAIN_RE _EMBED_RE = rf'embed\.{_DOMAIN_RE}/embed/[^/]+\.js\?.*?\bvideo='
_ID_RE = r'[\da-f]{32}|[\w-]+\.[\w-]+\.[\w-]+' _ID_RE = r'[\da-f]{32}|[\w-]+\.[\w-]+\.[\w-]+'
_VALID_URL = r'''(?x) _VALID_URL = rf'https?://(?:{_SUBDOMAIN_RE}{_DOMAIN_RE}/|{_EMBED_RE})(?P<id>{_ID_RE})'
https?:// _EMBED_REGEX = [
(?: rf'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE}).*?)\1',
(?:watch\.)?%s/| rf'<iframe[^>]+\bsrc=["\'](?P<url>https?://{_SUBDOMAIN_RE}{_DOMAIN_RE}/[\da-f]{{32}})',
%s ]
)
(?P<id>%s)
''' % (_DOMAIN_RE, _EMBED_RE, _ID_RE)
_EMBED_REGEX = [fr'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE}).*?)\1']
_TESTS = [{ _TESTS = [{
'url': 'https://embed.cloudflarestream.com/embed/we4g.fla9.latest.js?video=31c9291ab41fac05471db4e73aa11717', 'url': 'https://embed.cloudflarestream.com/embed/we4g.fla9.latest.js?video=31c9291ab41fac05471db4e73aa11717',
'info_dict': { 'info_dict': {
'id': '31c9291ab41fac05471db4e73aa11717', 'id': '31c9291ab41fac05471db4e73aa11717',
'ext': 'mp4', 'ext': 'mp4',
'title': '31c9291ab41fac05471db4e73aa11717', 'title': '31c9291ab41fac05471db4e73aa11717',
'thumbnail': 'https://videodelivery.net/31c9291ab41fac05471db4e73aa11717/thumbnails/thumbnail.jpg',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': 'm3u8',
}, },
}, { }, {
'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1', 'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1',
@ -35,6 +33,21 @@ class CloudflareStreamIE(InfoExtractor):
}, { }, {
'url': 'https://embed.videodelivery.net/embed/r4xu.fla9.latest.js?video=81d80727f3022488598f68d323c1ad5e', 'url': 'https://embed.videodelivery.net/embed/r4xu.fla9.latest.js?video=81d80727f3022488598f68d323c1ad5e',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://customer-aw5py76sw8wyqzmh.cloudflarestream.com/2463f6d3e06fa29710a337f5f5389fd8/iframe',
'only_matching': True,
}]
_WEBPAGE_TESTS = [{
'url': 'https://upride.cc/incident/shoulder-pass-at-light/',
'info_dict': {
'id': 'eaef9dea5159cf968be84241b5cedfe7',
'ext': 'mp4',
'title': 'eaef9dea5159cf968be84241b5cedfe7',
'thumbnail': 'https://videodelivery.net/eaef9dea5159cf968be84241b5cedfe7/thumbnails/thumbnail.jpg',
},
'params': {
'skip_download': 'm3u8',
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View file

@ -1,68 +1,97 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import smuggle_url from ..utils import int_or_none, parse_iso8601, str_or_none, url_or_none
from ..utils.traversal import traverse_obj
class CNBCIE(InfoExtractor):
_VALID_URL = r'https?://video\.cnbc\.com/gallery/\?video=(?P<id>[0-9]+)'
_TEST = {
'url': 'http://video.cnbc.com/gallery/?video=3000503714',
'info_dict': {
'id': '3000503714',
'ext': 'mp4',
'title': 'Fighting zombies is big business',
'description': 'md5:0c100d8e1a7947bd2feec9a5550e519e',
'timestamp': 1459332000,
'upload_date': '20160330',
'uploader': 'NBCU-CNBC',
},
'params': {
# m3u8 download
'skip_download': True,
},
'skip': 'Dead link',
}
def _real_extract(self, url):
video_id = self._match_id(url)
return {
'_type': 'url_transparent',
'ie_key': 'ThePlatform',
'url': smuggle_url(
'http://link.theplatform.com/s/gZWlPC/media/guid/2408950221/%s?mbr=true&manifest=m3u' % video_id,
{'force_smil_url': True}),
'id': video_id,
}
class CNBCVideoIE(InfoExtractor): class CNBCVideoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?cnbc\.com(?P<path>/video/(?:[^/]+/)+(?P<id>[^./?#&]+)\.html)' _VALID_URL = r'https?://(?:www\.)?cnbc\.com/video/(?:[^/?#]+/)+(?P<id>[^./?#&]+)\.html'
_TEST = {
'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html', _TESTS = [{
'url': 'https://www.cnbc.com/video/2023/12/07/mcdonalds-just-unveiled-cosmcsits-new-spinoff-brand.html',
'info_dict': { 'info_dict': {
'id': '7000031301',
'ext': 'mp4', 'ext': 'mp4',
'title': "Trump: I don't necessarily agree with raising rates", 'id': '107344774',
'description': 'md5:878d8f0b4ebb5bb1dda3514b91b49de3', 'display_id': 'mcdonalds-just-unveiled-cosmcsits-new-spinoff-brand',
'timestamp': 1531958400, 'modified_timestamp': 1702053483,
'upload_date': '20180719', 'timestamp': 1701977810,
'uploader': 'NBCU-CNBC', 'channel': 'News Videos',
'upload_date': '20231207',
'description': 'md5:882c001d85cb43d7579b514307b3e78b',
'release_timestamp': 1701977375,
'modified_date': '20231208',
'release_date': '20231207',
'duration': 65,
'author': 'Sean Conlon',
'title': 'Here\'s a first look at McDonald\'s new spinoff brand, CosMc\'s',
'thumbnail': 'https://image.cnbcfm.com/api/v1/image/107344192-1701894812493-CosMcsskyHero_2336x1040_hero-desktop.jpg?v=1701894855',
}, },
'params': { 'expected_warnings': ['Unable to download f4m manifest'],
'skip_download': True, }, {
'url': 'https://www.cnbc.com/video/2023/12/08/jim-cramer-shares-his-take-on-seattles-tech-scene.html',
'info_dict': {
'author': 'Jim Cramer',
'channel': 'Mad Money with Jim Cramer',
'description': 'md5:72925be21b952e95eba51178dddf4e3e',
'duration': 299.0,
'ext': 'mp4',
'id': '107345451',
'display_id': 'jim-cramer-shares-his-take-on-seattles-tech-scene',
'thumbnail': 'https://image.cnbcfm.com/api/v1/image/107345481-1702079431MM-B-120823.jpg?v=1702079430',
'timestamp': 1702080139,
'title': 'Jim Cramer shares his take on Seattle\'s tech scene',
'release_date': '20231208',
'upload_date': '20231209',
'modified_timestamp': 1702080139,
'modified_date': '20231209',
'release_timestamp': 1702073551,
}, },
'skip': 'Dead link', 'expected_warnings': ['Unable to download f4m manifest'],
} }, {
'url': 'https://www.cnbc.com/video/2023/12/08/the-epicenter-of-ai-is-in-seattle-says-jim-cramer.html',
'info_dict': {
'author': 'Jim Cramer',
'channel': 'Mad Money with Jim Cramer',
'description': 'md5:72925be21b952e95eba51178dddf4e3e',
'duration': 113.0,
'ext': 'mp4',
'id': '107345474',
'display_id': 'the-epicenter-of-ai-is-in-seattle-says-jim-cramer',
'thumbnail': 'https://image.cnbcfm.com/api/v1/image/107345486-Screenshot_2023-12-08_at_70339_PM.png?v=1702080248',
'timestamp': 1702080535,
'title': 'The epicenter of AI is in Seattle, says Jim Cramer',
'release_timestamp': 1702077347,
'modified_timestamp': 1702080535,
'release_date': '20231208',
'upload_date': '20231209',
'modified_date': '20231209',
},
'expected_warnings': ['Unable to download f4m manifest'],
}]
def _real_extract(self, url): def _real_extract(self, url):
path, display_id = self._match_valid_url(url).groups() display_id = self._match_id(url)
video_id = self._download_json( webpage = self._download_webpage(url, display_id)
'https://webql-redesign.cnbcfm.com/graphql', display_id, query={ data = self._search_json(r'window\.__s_data=', webpage, 'video data', display_id)
'query': '''{
page(path: "%s") { player_data = traverse_obj(data, (
vcpsId 'page', 'page', 'layout', ..., 'columns', ..., 'modules',
} lambda _, v: v['name'] == 'clipPlayer', 'data', {dict}), get_all=False)
}''' % path,
})['data']['page']['vcpsId'] return {
return self.url_result( 'id': display_id,
'http://video.cnbc.com/gallery/?video=%d' % video_id, 'display_id': display_id,
CNBCIE.ie_key()) 'formats': self._extract_akamai_formats(player_data['playbackURL'], display_id),
**self._search_json_ld(webpage, display_id, fatal=False),
**traverse_obj(player_data, {
'id': ('id', {str_or_none}),
'title': ('title', {str}),
'description': ('description', {str}),
'author': ('author', ..., 'name', {str}),
'timestamp': ('datePublished', {parse_iso8601}),
'release_timestamp': ('uploadDate', {parse_iso8601}),
'modified_timestamp': ('dateLastPublished', {parse_iso8601}),
'thumbnail': ('thumbnail', {url_or_none}),
'duration': ('duration', {int_or_none}),
'channel': ('section', 'title', {str}),
}, get_all=False),
}

View file

@ -247,6 +247,8 @@ class InfoExtractor:
(For internal use only) (For internal use only)
* http_chunk_size Chunk size for HTTP downloads * http_chunk_size Chunk size for HTTP downloads
* ffmpeg_args Extra arguments for ffmpeg downloader * ffmpeg_args Extra arguments for ffmpeg downloader
* is_dash_periods Whether the format is a result of merging
multiple DASH periods.
RTMP formats can also have the additional fields: page_url, RTMP formats can also have the additional fields: page_url,
app, play_path, tc_url, flash_version, rtmp_live, rtmp_conn, app, play_path, tc_url, flash_version, rtmp_live, rtmp_conn,
rtmp_protocol, rtmp_real_time rtmp_protocol, rtmp_real_time
@ -278,7 +280,7 @@ class InfoExtractor:
description: Full video description. description: Full video description.
uploader: Full name of the video uploader. uploader: Full name of the video uploader.
license: License name the video is licensed under. license: License name the video is licensed under.
creator: The creator of the video. creators: List of creators of the video.
timestamp: UNIX timestamp of the moment the video was uploaded timestamp: UNIX timestamp of the moment the video was uploaded
upload_date: Video upload date in UTC (YYYYMMDD). upload_date: Video upload date in UTC (YYYYMMDD).
If not explicitly set, calculated from timestamp If not explicitly set, calculated from timestamp
@ -422,16 +424,16 @@ class InfoExtractor:
track_number: Number of the track within an album or a disc, as an integer. track_number: Number of the track within an album or a disc, as an integer.
track_id: Id of the track (useful in case of custom indexing, e.g. 6.iii), track_id: Id of the track (useful in case of custom indexing, e.g. 6.iii),
as a unicode string. as a unicode string.
artist: Artist(s) of the track. artists: List of artists of the track.
genre: Genre(s) of the track. composers: List of composers of the piece.
genres: List of genres of the track.
album: Title of the album the track belongs to. album: Title of the album the track belongs to.
album_type: Type of the album (e.g. "Demo", "Full-length", "Split", "Compilation", etc). album_type: Type of the album (e.g. "Demo", "Full-length", "Split", "Compilation", etc).
album_artist: List of all artists appeared on the album (e.g. album_artists: List of all artists appeared on the album.
"Ash Borer / Fell Voices" or "Various Artists", useful for splits E.g. ["Ash Borer", "Fell Voices"] or ["Various Artists"].
and compilations). Useful for splits and compilations.
disc_number: Number of the disc or other physical medium the track belongs to, disc_number: Number of the disc or other physical medium the track belongs to,
as an integer. as an integer.
composer: Composer of the piece
The following fields should only be set for clips that should be cut from the original video: The following fields should only be set for clips that should be cut from the original video:
@ -442,6 +444,18 @@ class InfoExtractor:
rows: Number of rows in each storyboard fragment, as an integer rows: Number of rows in each storyboard fragment, as an integer
columns: Number of columns in each storyboard fragment, as an integer columns: Number of columns in each storyboard fragment, as an integer
The following fields are deprecated and should not be set by new code:
composer: Use "composers" instead.
Composer(s) of the piece, comma-separated.
artist: Use "artists" instead.
Artist(s) of the track, comma-separated.
genre: Use "genres" instead.
Genre(s) of the track, comma-separated.
album_artist: Use "album_artists" instead.
All artists appeared on the album, comma-separated.
creator: Use "creators" instead.
The creator of the video.
Unless mentioned otherwise, the fields should be Unicode strings. Unless mentioned otherwise, the fields should be Unicode strings.
Unless mentioned otherwise, None is equivalent to absence of information. Unless mentioned otherwise, None is equivalent to absence of information.
@ -2530,7 +2544,11 @@ def _extract_mpd_formats(self, *args, **kwargs):
self._report_ignoring_subs('DASH') self._report_ignoring_subs('DASH')
return fmts return fmts
def _extract_mpd_formats_and_subtitles( def _extract_mpd_formats_and_subtitles(self, *args, **kwargs):
periods = self._extract_mpd_periods(*args, **kwargs)
return self._merge_mpd_periods(periods)
def _extract_mpd_periods(
self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, self, mpd_url, video_id, mpd_id=None, note=None, errnote=None,
fatal=True, data=None, headers={}, query={}): fatal=True, data=None, headers={}, query={}):
@ -2543,17 +2561,16 @@ def _extract_mpd_formats_and_subtitles(
errnote='Failed to download MPD manifest' if errnote is None else errnote, errnote='Failed to download MPD manifest' if errnote is None else errnote,
fatal=fatal, data=data, headers=headers, query=query) fatal=fatal, data=data, headers=headers, query=query)
if res is False: if res is False:
return [], {} return []
mpd_doc, urlh = res mpd_doc, urlh = res
if mpd_doc is None: if mpd_doc is None:
return [], {} return []
# We could have been redirected to a new url when we retrieved our mpd file. # We could have been redirected to a new url when we retrieved our mpd file.
mpd_url = urlh.url mpd_url = urlh.url
mpd_base_url = base_url(mpd_url) mpd_base_url = base_url(mpd_url)
return self._parse_mpd_formats_and_subtitles( return self._parse_mpd_periods(mpd_doc, mpd_id, mpd_base_url, mpd_url)
mpd_doc, mpd_id, mpd_base_url, mpd_url)
def _parse_mpd_formats(self, *args, **kwargs): def _parse_mpd_formats(self, *args, **kwargs):
fmts, subs = self._parse_mpd_formats_and_subtitles(*args, **kwargs) fmts, subs = self._parse_mpd_formats_and_subtitles(*args, **kwargs)
@ -2561,8 +2578,39 @@ def _parse_mpd_formats(self, *args, **kwargs):
self._report_ignoring_subs('DASH') self._report_ignoring_subs('DASH')
return fmts return fmts
def _parse_mpd_formats_and_subtitles( def _parse_mpd_formats_and_subtitles(self, *args, **kwargs):
self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None): periods = self._parse_mpd_periods(*args, **kwargs)
return self._merge_mpd_periods(periods)
def _merge_mpd_periods(self, periods):
"""
Combine all formats and subtitles from an MPD manifest into a single list,
by concatenate streams with similar formats.
"""
formats, subtitles = {}, {}
for period in periods:
for f in period['formats']:
assert 'is_dash_periods' not in f, 'format already processed'
f['is_dash_periods'] = True
format_key = tuple(v for k, v in f.items() if k not in (
('format_id', 'fragments', 'manifest_stream_number')))
if format_key not in formats:
formats[format_key] = f
elif 'fragments' in f:
formats[format_key].setdefault('fragments', []).extend(f['fragments'])
if subtitles and period['subtitles']:
self.report_warning(bug_reports_message(
'Found subtitles in multiple periods in the DASH manifest; '
'if part of the subtitles are missing,'
), only_once=True)
for sub_lang, sub_info in period['subtitles'].items():
subtitles.setdefault(sub_lang, []).extend(sub_info)
return list(formats.values()), subtitles
def _parse_mpd_periods(self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None):
""" """
Parse formats from MPD manifest. Parse formats from MPD manifest.
References: References:
@ -2641,9 +2689,13 @@ def extract_Initialization(source):
return ms_info return ms_info
mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration')) mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
formats, subtitles = [], {}
stream_numbers = collections.defaultdict(int) stream_numbers = collections.defaultdict(int)
for period in mpd_doc.findall(_add_ns('Period')): for period_idx, period in enumerate(mpd_doc.findall(_add_ns('Period'))):
period_entry = {
'id': period.get('id', f'period-{period_idx}'),
'formats': [],
'subtitles': collections.defaultdict(list),
}
period_duration = parse_duration(period.get('duration')) or mpd_duration period_duration = parse_duration(period.get('duration')) or mpd_duration
period_ms_info = extract_multisegment_info(period, { period_ms_info = extract_multisegment_info(period, {
'start_number': 1, 'start_number': 1,
@ -2893,11 +2945,10 @@ def add_segment_url():
if content_type in ('video', 'audio', 'image/jpeg'): if content_type in ('video', 'audio', 'image/jpeg'):
f['manifest_stream_number'] = stream_numbers[f['url']] f['manifest_stream_number'] = stream_numbers[f['url']]
stream_numbers[f['url']] += 1 stream_numbers[f['url']] += 1
formats.append(f) period_entry['formats'].append(f)
elif content_type == 'text': elif content_type == 'text':
subtitles.setdefault(lang or 'und', []).append(f) period_entry['subtitles'][lang or 'und'].append(f)
yield period_entry
return formats, subtitles
def _extract_ism_formats(self, *args, **kwargs): def _extract_ism_formats(self, *args, **kwargs):
fmts, subs = self._extract_ism_formats_and_subtitles(*args, **kwargs) fmts, subs = self._extract_ism_formats_and_subtitles(*args, **kwargs)

View file

@ -9,7 +9,7 @@
class ERRJupiterIE(InfoExtractor): class ERRJupiterIE(InfoExtractor):
_VALID_URL = r'https?://jupiter(?:pluss)?\.err\.ee/(?P<id>\d+)' _VALID_URL = r'https?://(?:jupiter(?:pluss)?|lasteekraan)\.err\.ee/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'note': 'Jupiter: Movie: siin-me-oleme', 'note': 'Jupiter: Movie: siin-me-oleme',
'url': 'https://jupiter.err.ee/1211107/siin-me-oleme', 'url': 'https://jupiter.err.ee/1211107/siin-me-oleme',
@ -145,6 +145,31 @@ class ERRJupiterIE(InfoExtractor):
'season_number': 0, 'season_number': 0,
'series': 'Лесные истории | Аисты', 'series': 'Лесные истории | Аисты',
'series_id': '1037497', 'series_id': '1037497',
}
}, {
'note': 'Lasteekraan: Pätu',
'url': 'https://lasteekraan.err.ee/1092243/patu',
'md5': 'a67eb9b9bcb3d201718c15d1638edf77',
'info_dict': {
'id': '1092243',
'ext': 'mp4',
'title': 'Pätu',
'alt_title': '',
'description': 'md5:64a7b5a80afd7042d3f8ec48c77befd9',
'release_date': '20230614',
'upload_date': '20200520',
'modified_date': '20200520',
'release_timestamp': 1686745800,
'timestamp': 1589975640,
'modified_timestamp': 1589975640,
'release_year': 1990,
'episode': 'Episode 1',
'episode_id': '1092243',
'episode_number': 1,
'season': 'Season 1',
'season_number': 1,
'series': 'Pätu',
'series_id': '1092236',
}, },
}] }]

View file

@ -500,6 +500,7 @@ def extract_metadata(webpage):
webpage, 'description', default=None) webpage, 'description', default=None)
uploader_data = ( uploader_data = (
get_first(media, ('owner', {dict})) get_first(media, ('owner', {dict}))
or get_first(post, ('video', 'creation_story', 'attachments', ..., 'media', lambda k, v: k == 'owner' and v['name']))
or get_first(post, (..., 'video', lambda k, v: k == 'owner' and v['name'])) or get_first(post, (..., 'video', lambda k, v: k == 'owner' and v['name']))
or get_first(post, ('node', 'actors', ..., {dict})) or get_first(post, ('node', 'actors', ..., {dict}))
or get_first(post, ('event', 'event_creator', {dict})) or {}) or get_first(post, ('event', 'event_creator', {dict})) or {})
@ -583,8 +584,8 @@ def extract_relay_data(_filter):
def extract_relay_prefetched_data(_filter): def extract_relay_prefetched_data(_filter):
return traverse_obj(extract_relay_data(_filter), ( return traverse_obj(extract_relay_data(_filter), (
'require', (None, (..., ..., ..., '__bbox', 'require')), 'require', (None, (..., ..., ..., '__bbox', 'require')),
lambda _, v: 'RelayPrefetchedStreamCache' in v, ..., ..., lambda _, v: any(key.startswith('RelayPrefetchedStreamCache') for key in v),
'__bbox', 'result', 'data', {dict}), get_all=False) or {} ..., ..., '__bbox', 'result', 'data', {dict}), get_all=False) or {}
if not video_data: if not video_data:
server_js_data = self._parse_json(self._search_regex([ server_js_data = self._parse_json(self._search_regex([

View file

@ -0,0 +1,62 @@
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
UserNotLive,
parse_iso8601,
str_or_none,
traverse_obj,
url_or_none,
)
class FlexTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?flextv\.co\.kr/channels/(?P<id>\d+)/live'
_TESTS = [{
'url': 'https://www.flextv.co.kr/channels/231638/live',
'info_dict': {
'id': '231638',
'ext': 'mp4',
'title': r're:^214하나만\.\.\. ',
'thumbnail': r're:^https?://.+\.jpg',
'upload_date': r're:\d{8}',
'timestamp': int,
'live_status': 'is_live',
'channel': 'Hi별',
'channel_id': '244396',
},
'skip': 'The channel is offline',
}, {
'url': 'https://www.flextv.co.kr/channels/746/live',
'only_matching': True,
}]
def _real_extract(self, url):
channel_id = self._match_id(url)
try:
stream_data = self._download_json(
f'https://api.flextv.co.kr/api/channels/{channel_id}/stream',
channel_id, query={'option': 'all'})
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 400:
raise UserNotLive(video_id=channel_id)
raise
playlist_url = stream_data['sources'][0]['url']
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
playlist_url, channel_id, 'mp4')
return {
'id': channel_id,
'formats': formats,
'subtitles': subtitles,
'is_live': True,
**traverse_obj(stream_data, {
'title': ('stream', 'title', {str}),
'timestamp': ('stream', 'createdAt', {parse_iso8601}),
'thumbnail': ('thumbUrl', {url_or_none}),
'channel': ('owner', 'name', {str}),
'channel_id': ('owner', 'id', {str_or_none}),
}),
}

View file

@ -40,6 +40,22 @@ class GoPlayIE(InfoExtractor):
'title': 'A Family for the Holidays', 'title': 'A Family for the Holidays',
}, },
'skip': 'This video is only available for registered users' 'skip': 'This video is only available for registered users'
}, {
'url': 'https://www.goplay.be/video/de-mol/de-mol-s11/de-mol-s11-aflevering-1#autoplay',
'info_dict': {
'id': '03eb8f2f-153e-41cb-9805-0d3a29dab656',
'ext': 'mp4',
'title': 'S11 - Aflevering 1',
'episode': 'Episode 1',
'series': 'De Mol',
'season_number': 11,
'episode_number': 1,
'season': 'Season 11'
},
'params': {
'skip_download': True
},
'skip': 'This video is only available for registered users'
}] }]
_id_token = None _id_token = None
@ -77,16 +93,39 @@ def _real_extract(self, url):
api = self._download_json( api = self._download_json(
f'https://api.goplay.be/web/v1/videos/long-form/{video_id}', f'https://api.goplay.be/web/v1/videos/long-form/{video_id}',
video_id, headers={'Authorization': 'Bearer %s' % self._id_token}) video_id, headers={
'Authorization': 'Bearer %s' % self._id_token,
**self.geo_verification_headers(),
})
formats, subs = self._extract_m3u8_formats_and_subtitles( if 'manifestUrls' in api:
api['manifestUrls']['hls'], video_id, ext='mp4', m3u8_id='HLS') formats, subtitles = self._extract_m3u8_formats_and_subtitles(
api['manifestUrls']['hls'], video_id, ext='mp4', m3u8_id='HLS')
else:
if 'ssai' not in api:
raise ExtractorError('expecting Google SSAI stream')
ssai_content_source_id = api['ssai']['contentSourceID']
ssai_video_id = api['ssai']['videoID']
dai = self._download_json(
f'https://dai.google.com/ondemand/dash/content/{ssai_content_source_id}/vid/{ssai_video_id}/streams',
video_id, data=b'{"api-key":"null"}',
headers={'content-type': 'application/json'})
periods = self._extract_mpd_periods(dai['stream_manifest'], video_id)
# skip pre-roll and mid-roll ads
periods = [p for p in periods if '-ad-' not in p['id']]
formats, subtitles = self._merge_mpd_periods(periods)
info_dict.update({ info_dict.update({
'id': video_id, 'id': video_id,
'formats': formats, 'formats': formats,
'subtitles': subtitles,
}) })
return info_dict return info_dict

View file

@ -3,16 +3,15 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
clean_html,
extract_attributes,
ExtractorError, ExtractorError,
extract_attributes,
float_or_none, float_or_none,
get_element_by_class,
int_or_none, int_or_none,
srt_subtitles_timecode, srt_subtitles_timecode,
strip_or_none,
mimetype2ext, mimetype2ext,
traverse_obj,
try_get, try_get,
url_or_none,
urlencode_postdata, urlencode_postdata,
urljoin, urljoin,
) )
@ -83,15 +82,29 @@ def _get_video_id(self, video_data, course_slug, video_slug):
class LinkedInIE(LinkedInBaseIE): class LinkedInIE(LinkedInBaseIE):
_VALID_URL = r'https?://(?:www\.)?linkedin\.com/posts/.+?(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?linkedin\.com/posts/[^/?#]+-(?P<id>\d+)-\w{4}/?(?:[?#]|$)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.linkedin.com/posts/mishalkhawaja_sendinblueviews-toronto-digitalmarketing-ugcPost-6850898786781339649-mM20', 'url': 'https://www.linkedin.com/posts/mishalkhawaja_sendinblueviews-toronto-digitalmarketing-ugcPost-6850898786781339649-mM20',
'info_dict': { 'info_dict': {
'id': '6850898786781339649', 'id': '6850898786781339649',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Mishal K. on LinkedIn: #sendinblueviews #toronto #digitalmarketing', 'title': 'Mishal K. on LinkedIn: #sendinblueviews #toronto #digitalmarketing #nowhiring #sendinblue…',
'description': 'md5:be125430bab1c574f16aeb186a4d5b19', 'description': 'md5:2998a31f6f479376dd62831f53a80f71',
'creator': 'Mishal K.' 'uploader': 'Mishal K.',
'thumbnail': 're:^https?://media.licdn.com/dms/image/.*$',
'like_count': int
},
}, {
'url': 'https://www.linkedin.com/posts/the-mathworks_2_what-is-mathworks-cloud-center-activity-7151241570371948544-4Gu7',
'info_dict': {
'id': '7151241570371948544',
'ext': 'mp4',
'title': 'MathWorks on LinkedIn: What Is MathWorks Cloud Center?',
'description': 'md5:95f9d4eeb6337882fb47eefe13d7a40c',
'uploader': 'MathWorks',
'thumbnail': 're:^https?://media.licdn.com/dms/image/.*$',
'like_count': int,
'subtitles': 'mincount:1'
}, },
}] }]
@ -99,26 +112,30 @@ def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
title = self._html_extract_title(webpage) video_attrs = extract_attributes(self._search_regex(r'(<video[^>]+>)', webpage, 'video'))
description = clean_html(get_element_by_class('share-update-card__update-text', webpage)) sources = self._parse_json(video_attrs['data-sources'], video_id)
like_count = int_or_none(get_element_by_class('social-counts-reactions__social-counts-numRections', webpage))
creator = strip_or_none(clean_html(get_element_by_class('comment__actor-name', webpage)))
sources = self._parse_json(extract_attributes(self._search_regex(r'(<video[^>]+>)', webpage, 'video'))['data-sources'], video_id)
formats = [{ formats = [{
'url': source['src'], 'url': source['src'],
'ext': mimetype2ext(source.get('type')), 'ext': mimetype2ext(source.get('type')),
'tbr': float_or_none(source.get('data-bitrate'), scale=1000), 'tbr': float_or_none(source.get('data-bitrate'), scale=1000),
} for source in sources] } for source in sources]
subtitles = {'en': [{
'url': video_attrs['data-captions-url'],
'ext': 'vtt',
}]} if url_or_none(video_attrs.get('data-captions-url')) else {}
return { return {
'id': video_id, 'id': video_id,
'formats': formats, 'formats': formats,
'title': title, 'title': self._og_search_title(webpage, default=None) or self._html_extract_title(webpage),
'like_count': like_count, 'like_count': int_or_none(self._search_regex(
'creator': creator, r'\bdata-num-reactions="(\d+)"', webpage, 'reactions', default=None)),
'uploader': traverse_obj(
self._yield_json_ld(webpage, video_id),
(lambda _, v: v['@type'] == 'SocialMediaPosting', 'author', 'name', {str}), get_all=False),
'thumbnail': self._og_search_thumbnail(webpage), 'thumbnail': self._og_search_thumbnail(webpage),
'description': description, 'description': self._og_search_description(webpage, default=None),
'subtitles': subtitles,
} }

View file

@ -1,6 +1,7 @@
import itertools import itertools
import json import json
from .art19 import Art19IE
from .common import InfoExtractor from .common import InfoExtractor
from ..networking.exceptions import HTTPError from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
@ -112,7 +113,8 @@ def _extract_video_metadata(self, episode):
class NebulaIE(NebulaBaseIE): class NebulaIE(NebulaBaseIE):
_VALID_URL = rf'{_BASE_URL_RE}/videos/(?P<id>[-\w]+)' IE_NAME = 'nebula:video'
_VALID_URL = rf'{_BASE_URL_RE}/videos/(?P<id>[\w-]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://nebula.tv/videos/that-time-disney-remade-beauty-and-the-beast', 'url': 'https://nebula.tv/videos/that-time-disney-remade-beauty-and-the-beast',
'info_dict': { 'info_dict': {
@ -236,8 +238,8 @@ def _real_extract(self, url):
class NebulaClassIE(NebulaBaseIE): class NebulaClassIE(NebulaBaseIE):
IE_NAME = 'nebula:class' IE_NAME = 'nebula:media'
_VALID_URL = rf'{_BASE_URL_RE}/(?P<id>[-\w]+)/(?P<ep>\d+)' _VALID_URL = rf'{_BASE_URL_RE}/(?!(?:myshows|library|videos)/)(?P<id>[\w-]+)/(?P<ep>[\w-]+)/?(?:$|[?#])'
_TESTS = [{ _TESTS = [{
'url': 'https://nebula.tv/copyright-for-fun-and-profit/14', 'url': 'https://nebula.tv/copyright-for-fun-and-profit/14',
'info_dict': { 'info_dict': {
@ -253,6 +255,46 @@ class NebulaClassIE(NebulaBaseIE):
'title': 'Photos, Sculpture, and Video', 'title': 'Photos, Sculpture, and Video',
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://nebula.tv/extremitiespodcast/pyramiden-the-high-arctic-soviet-ghost-town',
'info_dict': {
'ext': 'mp3',
'id': '018f65f0-0033-4021-8f87-2d132beb19aa',
'description': 'md5:05d2b23ab780c955e2511a2b9127acff',
'series_id': '335e8159-d663-491a-888f-1732285706ac',
'modified_timestamp': 1599091504,
'episode_id': '018f65f0-0033-4021-8f87-2d132beb19aa',
'series': 'Extremities',
'modified_date': '20200903',
'upload_date': '20200902',
'title': 'Pyramiden: The High-Arctic Soviet Ghost Town',
'release_timestamp': 1571237958,
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
'duration': 1546.05714,
'timestamp': 1599085608,
'release_date': '20191016',
},
}, {
'url': 'https://nebula.tv/thelayover/the-layover-episode-1',
'info_dict': {
'ext': 'mp3',
'id': '9d74a762-00bb-45a8-9e8d-9ed47c04a1d0',
'episode_number': 1,
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
'release_date': '20230304',
'modified_date': '20230403',
'series': 'The Layover',
'episode_id': '9d74a762-00bb-45a8-9e8d-9ed47c04a1d0',
'modified_timestamp': 1680554566,
'duration': 3130.46401,
'release_timestamp': 1677943800,
'title': 'The Layover — Episode 1',
'series_id': '874303a5-4900-4626-a4b6-2aacac34466a',
'upload_date': '20230303',
'episode': 'Episode 1',
'timestamp': 1677883672,
'description': 'md5:002cca89258e3bc7c268d5b8c24ba482',
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -268,16 +310,38 @@ def _real_extract(self, url):
metadata = self._call_api( metadata = self._call_api(
f'https://content.api.nebula.app/content/{slug}/{episode}/?include=lessons', f'https://content.api.nebula.app/content/{slug}/{episode}/?include=lessons',
slug, note='Fetching video metadata') slug, note='Fetching class/podcast metadata')
return { content_type = metadata.get('type')
**self._extract_video_metadata(metadata), if content_type == 'lesson':
**self._extract_formats(metadata['id'], slug), return {
} **self._extract_video_metadata(metadata),
**self._extract_formats(metadata['id'], slug),
}
elif content_type == 'podcast_episode':
episode_url = metadata['episode_url']
if not episode_url and metadata.get('premium'):
self.raise_login_required()
if Art19IE.suitable(episode_url):
return self.url_result(episode_url, Art19IE)
return traverse_obj(metadata, {
'id': ('id', {str}),
'url': ('episode_url', {url_or_none}),
'title': ('title', {str}),
'description': ('description', {str}),
'timestamp': ('published_at', {parse_iso8601}),
'duration': ('duration', {int_or_none}),
'channel_id': ('channel_id', {str}),
'chnanel': ('channel_title', {str}),
'thumbnail': ('assets', 'regular', {url_or_none}),
})
raise ExtractorError(f'Unexpected content type {content_type!r}')
class NebulaSubscriptionsIE(NebulaBaseIE): class NebulaSubscriptionsIE(NebulaBaseIE):
IE_NAME = 'nebula:subscriptions' IE_NAME = 'nebula:subscriptions'
_VALID_URL = rf'{_BASE_URL_RE}/(?P<id>myshows|library/latest-videos)' _VALID_URL = rf'{_BASE_URL_RE}/(?P<id>myshows|library/latest-videos)/?(?:$|[?#])'
_TESTS = [{ _TESTS = [{
'url': 'https://nebula.tv/myshows', 'url': 'https://nebula.tv/myshows',
'playlist_mincount': 1, 'playlist_mincount': 1,
@ -310,7 +374,7 @@ def _real_extract(self, url):
class NebulaChannelIE(NebulaBaseIE): class NebulaChannelIE(NebulaBaseIE):
IE_NAME = 'nebula:channel' IE_NAME = 'nebula:channel'
_VALID_URL = rf'{_BASE_URL_RE}/(?!myshows|library|videos/)(?P<id>[-\w]+)/?(?:$|[?#])' _VALID_URL = rf'{_BASE_URL_RE}/(?!myshows|library|videos)(?P<id>[\w-]+)/?(?:$|[?#])'
_TESTS = [{ _TESTS = [{
'url': 'https://nebula.tv/tom-scott-presents-money', 'url': 'https://nebula.tv/tom-scott-presents-money',
'info_dict': { 'info_dict': {
@ -343,6 +407,14 @@ class NebulaChannelIE(NebulaBaseIE):
'description': 'md5:6690248223eed044a9f11cd5a24f9742', 'description': 'md5:6690248223eed044a9f11cd5a24f9742',
}, },
'playlist_count': 23, 'playlist_count': 23,
}, {
'url': 'https://nebula.tv/trussissuespodcast',
'info_dict': {
'id': 'trussissuespodcast',
'title': 'The TLDR News Podcast',
'description': 'md5:a08c4483bc0b705881d3e0199e721385',
},
'playlist_mincount': 80,
}] }]
def _generate_playlist_entries(self, collection_id, collection_slug): def _generate_playlist_entries(self, collection_id, collection_slug):
@ -365,6 +437,17 @@ def _generate_class_entries(self, channel):
lesson.get('share_url') or f'https://nebula.tv/{metadata["class_slug"]}/{metadata["slug"]}', lesson.get('share_url') or f'https://nebula.tv/{metadata["class_slug"]}/{metadata["slug"]}',
{'id': lesson['id']}), NebulaClassIE, url_transparent=True, **metadata) {'id': lesson['id']}), NebulaClassIE, url_transparent=True, **metadata)
def _generate_podcast_entries(self, collection_id, collection_slug):
next_url = f'https://content.api.nebula.app/podcast_channels/{collection_id}/podcast_episodes/?ordering=-published_at&premium=true'
for page_num in itertools.count(1):
episodes = self._call_api(next_url, collection_slug, note=f'Retrieving podcast page {page_num}')
for episode in traverse_obj(episodes, ('results', lambda _, v: url_or_none(v['share_url']))):
yield self.url_result(episode['share_url'], NebulaClassIE)
next_url = episodes.get('next')
if not next_url:
break
def _real_extract(self, url): def _real_extract(self, url):
collection_slug = self._match_id(url) collection_slug = self._match_id(url)
channel = self._call_api( channel = self._call_api(
@ -373,6 +456,8 @@ def _real_extract(self, url):
if channel.get('type') == 'class': if channel.get('type') == 'class':
entries = self._generate_class_entries(channel) entries = self._generate_class_entries(channel)
elif channel.get('type') == 'podcast_channel':
entries = self._generate_podcast_entries(channel['id'], collection_slug)
else: else:
entries = self._generate_playlist_entries(channel['id'], collection_slug) entries = self._generate_playlist_entries(channel['id'], collection_slug)

View file

@ -1,33 +1,38 @@
import datetime
from .common import InfoExtractor from .common import InfoExtractor
from .youtube import YoutubeIE
from ..utils import parse_iso8601, url_or_none
from ..utils.traversal import traverse_obj
class NerdCubedFeedIE(InfoExtractor): class NerdCubedFeedIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?nerdcubed\.co\.uk/feed\.json' _VALID_URL = r'https?://(?:www\.)?nerdcubed\.co\.uk/?(?:$|[#?])'
_TEST = { _TEST = {
'url': 'http://www.nerdcubed.co.uk/feed.json', 'url': 'http://www.nerdcubed.co.uk/',
'info_dict': { 'info_dict': {
'id': 'nerdcubed-feed', 'id': 'nerdcubed-feed',
'title': 'nerdcubed.co.uk feed', 'title': 'nerdcubed.co.uk feed',
}, },
'playlist_mincount': 1300, 'playlist_mincount': 5500,
} }
def _extract_video(self, feed_entry):
return self.url_result(
f'https://www.youtube.com/watch?v={feed_entry["id"]}', YoutubeIE,
**traverse_obj(feed_entry, {
'id': ('id', {str}),
'title': ('title', {str}),
'description': ('description', {str}),
'timestamp': ('publishedAt', {parse_iso8601}),
'channel': ('source', 'name', {str}),
'channel_id': ('source', 'id', {str}),
'channel_url': ('source', 'url', {str}),
'thumbnail': ('thumbnail', 'source', {url_or_none}),
}), url_transparent=True)
def _real_extract(self, url): def _real_extract(self, url):
feed = self._download_json(url, url, 'Downloading NerdCubed JSON feed') video_id = 'nerdcubed-feed'
feed = self._download_json('https://www.nerdcubed.co.uk/_/cdn/videos.json', video_id)
entries = [{ return self.playlist_result(
'_type': 'url', map(self._extract_video, traverse_obj(feed, ('videos', lambda _, v: v['id']))),
'title': feed_entry['title'], video_id, 'nerdcubed.co.uk feed')
'uploader': feed_entry['source']['name'] if feed_entry['source'] else None,
'upload_date': datetime.datetime.strptime(feed_entry['date'], '%Y-%m-%d').strftime('%Y%m%d'),
'url': 'http://www.youtube.com/watch?v=' + feed_entry['youtube_id'],
} for feed_entry in feed]
return {
'_type': 'playlist',
'title': 'nerdcubed.co.uk feed',
'id': 'nerdcubed-feed',
'entries': entries,
}

View file

@ -9,6 +9,7 @@
join_nonempty, join_nonempty,
parse_duration, parse_duration,
traverse_obj, traverse_obj,
try_call,
unescapeHTML, unescapeHTML,
unified_timestamp, unified_timestamp,
url_or_none, url_or_none,
@ -473,22 +474,21 @@ class NhkRadiruIE(InfoExtractor):
IE_DESC = 'NHK らじる (Radiru/Rajiru)' IE_DESC = 'NHK らじる (Radiru/Rajiru)'
_VALID_URL = r'https?://www\.nhk\.or\.jp/radio/(?:player/ondemand|ondemand/detail)\.html\?p=(?P<site>[\da-zA-Z]+)_(?P<corner>[\da-zA-Z]+)(?:_(?P<headline>[\da-zA-Z]+))?' _VALID_URL = r'https?://www\.nhk\.or\.jp/radio/(?:player/ondemand|ondemand/detail)\.html\?p=(?P<site>[\da-zA-Z]+)_(?P<corner>[\da-zA-Z]+)(?:_(?P<headline>[\da-zA-Z]+))?'
_TESTS = [{ _TESTS = [{
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=0449_01_3853544', 'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=0449_01_3926210',
'skip': 'Episode expired on 2023-04-16', 'skip': 'Episode expired on 2024-02-24',
'info_dict': { 'info_dict': {
'channel': 'NHK-FM', 'title': 'ジャズ・トゥナイト シリーズJAZZジャイアンツ 56 ジョニー・ホッジス',
'uploader': 'NHK-FM', 'id': '0449_01_3926210',
'description': 'md5:94b08bdeadde81a97df4ec882acce3e9',
'ext': 'm4a', 'ext': 'm4a',
'id': '0449_01_3853544',
'series': 'ジャズ・トゥナイト', 'series': 'ジャズ・トゥナイト',
'uploader': 'NHK-FM',
'channel': 'NHK-FM',
'thumbnail': 'https://www.nhk.or.jp/prog/img/449/g449.jpg', 'thumbnail': 'https://www.nhk.or.jp/prog/img/449/g449.jpg',
'timestamp': 1680969600, 'release_date': '20240217',
'title': 'ジャズ・トゥナイト NEWジャズ特集', 'description': 'md5:a456ee8e5e59e6dd2a7d32e62386e811',
'upload_date': '20230408', 'timestamp': 1708185600,
'release_timestamp': 1680962400, 'release_timestamp': 1708178400,
'release_date': '20230408', 'upload_date': '20240217',
'was_live': True,
}, },
}, { }, {
# playlist, airs every weekday so it should _hopefully_ be okay forever # playlist, airs every weekday so it should _hopefully_ be okay forever
@ -519,7 +519,8 @@ class NhkRadiruIE(InfoExtractor):
'series': 'らじる文庫 by ラジオ深夜便 ', 'series': 'らじる文庫 by ラジオ深夜便 ',
'release_timestamp': 1481126700, 'release_timestamp': 1481126700,
'upload_date': '20211101', 'upload_date': '20211101',
} },
'expected_warnings': ['Unable to download JSON metadata', 'Failed to get extended description'],
}, { }, {
# news # news
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=F261_01_3855109', 'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=F261_01_3855109',
@ -539,9 +540,28 @@ class NhkRadiruIE(InfoExtractor):
}, },
}] }]
_API_URL_TMPL = None
def _extract_extended_description(self, episode_id, episode):
service, _, area = traverse_obj(episode, ('aa_vinfo2', {str}, {lambda x: (x or '').partition(',')}))
aa_vinfo3 = traverse_obj(episode, ('aa_vinfo3', {str}))
detail_url = try_call(
lambda: self._API_URL_TMPL.format(service=service, area=area, dateid=aa_vinfo3))
if not detail_url:
return
full_meta = traverse_obj(
self._download_json(detail_url, episode_id, 'Downloading extended metadata', fatal=False),
('list', service, 0, {dict})) or {}
return join_nonempty('subtitle', 'content', 'act', 'music', delim='\n\n', from_dict=full_meta)
def _extract_episode_info(self, headline, programme_id, series_meta): def _extract_episode_info(self, headline, programme_id, series_meta):
episode_id = f'{programme_id}_{headline["headline_id"]}' episode_id = f'{programme_id}_{headline["headline_id"]}'
episode = traverse_obj(headline, ('file_list', 0, {dict})) episode = traverse_obj(headline, ('file_list', 0, {dict}))
description = self._extract_extended_description(episode_id, episode)
if not description:
self.report_warning('Failed to get extended description, falling back to summary')
description = traverse_obj(episode, ('file_title_sub', {str}))
return { return {
**series_meta, **series_meta,
@ -551,14 +571,21 @@ def _extract_episode_info(self, headline, programme_id, series_meta):
'was_live': True, 'was_live': True,
'series': series_meta.get('title'), 'series': series_meta.get('title'),
'thumbnail': url_or_none(headline.get('headline_image')) or series_meta.get('thumbnail'), 'thumbnail': url_or_none(headline.get('headline_image')) or series_meta.get('thumbnail'),
'description': description,
**traverse_obj(episode, { **traverse_obj(episode, {
'title': 'file_title', 'title': 'file_title',
'description': 'file_title_sub',
'timestamp': ('open_time', {unified_timestamp}), 'timestamp': ('open_time', {unified_timestamp}),
'release_timestamp': ('aa_vinfo4', {lambda x: x.split('_')[0]}, {unified_timestamp}), 'release_timestamp': ('aa_vinfo4', {lambda x: x.split('_')[0]}, {unified_timestamp}),
}), }),
} }
def _real_initialize(self):
if self._API_URL_TMPL:
return
api_config = self._download_xml(
'https://www.nhk.or.jp/radio/config/config_web.xml', None, 'Downloading API config', fatal=False)
NhkRadiruIE._API_URL_TMPL = try_call(lambda: f'https:{api_config.find(".//url_program_detail").text}')
def _real_extract(self, url): def _real_extract(self, url):
site_id, corner_id, headline_id = self._match_valid_url(url).group('site', 'corner', 'headline') site_id, corner_id, headline_id = self._match_valid_url(url).group('site', 'corner', 'headline')
programme_id = f'{site_id}_{corner_id}' programme_id = f'{site_id}_{corner_id}'

View file

@ -172,9 +172,6 @@ class NiconicoIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www\.|secure\.|sp\.)?nicovideo\.jp/watch|nico\.ms)/(?P<id>(?:[a-z]{2})?[0-9]+)' _VALID_URL = r'https?://(?:(?:www\.|secure\.|sp\.)?nicovideo\.jp/watch|nico\.ms)/(?P<id>(?:[a-z]{2})?[0-9]+)'
_NETRC_MACHINE = 'niconico' _NETRC_MACHINE = 'niconico'
_COMMENT_API_ENDPOINTS = (
'https://nvcomment.nicovideo.jp/legacy/api.json',
'https://nmsg.nicovideo.jp/api.json',)
_API_HEADERS = { _API_HEADERS = {
'X-Frontend-ID': '6', 'X-Frontend-ID': '6',
'X-Frontend-Version': '0', 'X-Frontend-Version': '0',
@ -470,93 +467,16 @@ def get_video_info(*items, get_first=True, **kwargs):
parse_duration(self._html_search_meta('video:duration', webpage, 'video duration', default=None)) parse_duration(self._html_search_meta('video:duration', webpage, 'video duration', default=None))
or get_video_info('duration')), or get_video_info('duration')),
'webpage_url': url_or_none(url) or f'https://www.nicovideo.jp/watch/{video_id}', 'webpage_url': url_or_none(url) or f'https://www.nicovideo.jp/watch/{video_id}',
'subtitles': self.extract_subtitles(video_id, api_data, session_api_data), 'subtitles': self.extract_subtitles(video_id, api_data),
} }
def _get_subtitles(self, video_id, api_data, session_api_data): def _get_subtitles(self, video_id, api_data):
comment_user_key = traverse_obj(api_data, ('comment', 'keys', 'userKey')) comments_info = traverse_obj(api_data, ('comment', 'nvComment', {dict})) or {}
user_id_str = session_api_data.get('serviceUserId') danmaku = traverse_obj(self._download_json(
f'{comments_info.get("server")}/v1/threads', video_id, data=json.dumps({
thread_ids = traverse_obj(api_data, ('comment', 'threads', lambda _, v: v['isActive']))
legacy_danmaku = self._extract_legacy_comments(video_id, thread_ids, user_id_str, comment_user_key) or []
new_comments = traverse_obj(api_data, ('comment', 'nvComment'))
new_danmaku = self._extract_new_comments(
new_comments.get('server'), video_id,
new_comments.get('params'), new_comments.get('threadKey'))
if not legacy_danmaku and not new_danmaku:
self.report_warning(f'Failed to get comments. {bug_reports_message()}')
return
return {
'comments': [{
'ext': 'json',
'data': json.dumps(legacy_danmaku + new_danmaku),
}],
}
def _extract_legacy_comments(self, video_id, threads, user_id, user_key):
auth_data = {
'user_id': user_id,
'userkey': user_key,
} if user_id and user_key else {'user_id': ''}
api_url = traverse_obj(threads, (..., 'server'), get_all=False)
# Request Start
post_data = [{'ping': {'content': 'rs:0'}}]
for i, thread in enumerate(threads):
thread_id = thread['id']
thread_fork = thread['fork']
# Post Start (2N)
post_data.append({'ping': {'content': f'ps:{i * 2}'}})
post_data.append({'thread': {
'fork': thread_fork,
'language': 0,
'nicoru': 3,
'scores': 1,
'thread': thread_id,
'version': '20090904',
'with_global': 1,
**auth_data,
}})
# Post Final (2N)
post_data.append({'ping': {'content': f'pf:{i * 2}'}})
# Post Start (2N+1)
post_data.append({'ping': {'content': f'ps:{i * 2 + 1}'}})
post_data.append({'thread_leaves': {
# format is '<bottom of minute range>-<top of minute range>:<comments per minute>,<total last comments'
# unfortunately NND limits (deletes?) comment returns this way, so you're only able to grab the last 1000 per language
'content': '0-999999:999999,999999,nicoru:999999',
'fork': thread_fork,
'language': 0,
'nicoru': 3,
'scores': 1,
'thread': thread_id,
**auth_data,
}})
# Post Final (2N+1)
post_data.append({'ping': {'content': f'pf:{i * 2 + 1}'}})
# Request Final
post_data.append({'ping': {'content': 'rf:0'}})
return self._download_json(
f'{api_url}/api.json', video_id, data=json.dumps(post_data).encode(), fatal=False,
headers={
'Referer': f'https://www.nicovideo.jp/watch/{video_id}',
'Origin': 'https://www.nicovideo.jp',
'Content-Type': 'text/plain;charset=UTF-8',
},
note='Downloading comments', errnote=f'Failed to access endpoint {api_url}')
def _extract_new_comments(self, endpoint, video_id, params, thread_key):
comments = self._download_json(
f'{endpoint}/v1/threads', video_id, data=json.dumps({
'additionals': {}, 'additionals': {},
'params': params, 'params': comments_info.get('params'),
'threadKey': thread_key, 'threadKey': comments_info.get('threadKey'),
}).encode(), fatal=False, }).encode(), fatal=False,
headers={ headers={
'Referer': 'https://www.nicovideo.jp/', 'Referer': 'https://www.nicovideo.jp/',
@ -566,8 +486,19 @@ def _extract_new_comments(self, endpoint, video_id, params, thread_key):
'x-frontend-id': '6', 'x-frontend-id': '6',
'x-frontend-version': '0', 'x-frontend-version': '0',
}, },
note='Downloading comments (new)', errnote='Failed to download comments (new)') note='Downloading comments', errnote='Failed to download comments'),
return traverse_obj(comments, ('data', 'threads', ..., 'comments', ...)) ('data', 'threads', ..., 'comments', ...))
if not danmaku:
self.report_warning(f'Failed to get comments. {bug_reports_message()}')
return
return {
'comments': [{
'ext': 'json',
'data': json.dumps(danmaku),
}],
}
class NiconicoPlaylistBaseIE(InfoExtractor): class NiconicoPlaylistBaseIE(InfoExtractor):

View file

@ -135,14 +135,15 @@ class NovaIE(InfoExtractor):
_VALID_URL = r'https?://(?:[^.]+\.)?(?P<site>tv(?:noviny)?|tn|novaplus|vymena|fanda|krasna|doma|prask)\.nova\.cz/(?:[^/]+/)+(?P<id>[^/]+?)(?:\.html|/|$)' _VALID_URL = r'https?://(?:[^.]+\.)?(?P<site>tv(?:noviny)?|tn|novaplus|vymena|fanda|krasna|doma|prask)\.nova\.cz/(?:[^/]+/)+(?P<id>[^/]+?)(?:\.html|/|$)'
_TESTS = [{ _TESTS = [{
'url': 'http://tn.nova.cz/clanek/tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci.html#player_13260', 'url': 'http://tn.nova.cz/clanek/tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci.html#player_13260',
'md5': '249baab7d0104e186e78b0899c7d5f28', 'md5': 'da8f3f1fcdaf9fb0f112a32a165760a3',
'info_dict': { 'info_dict': {
'id': '1757139', 'id': '8OvQqEvV3MW',
'display_id': 'tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci', 'display_id': '8OvQqEvV3MW',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Podzemní nemocnice v pražské Krči', 'title': 'Podzemní nemocnice v pražské Krči',
'description': 'md5:f0a42dd239c26f61c28f19e62d20ef53', 'description': 'md5:f0a42dd239c26f61c28f19e62d20ef53',
'thumbnail': r're:^https?://.*\.(?:jpg)', 'thumbnail': r're:^https?://.*\.(?:jpg)',
'duration': 151,
} }
}, { }, {
'url': 'http://fanda.nova.cz/clanek/fun-and-games/krvavy-epos-zaklinac-3-divoky-hon-vychazi-vyhrajte-ho-pro-sebe.html', 'url': 'http://fanda.nova.cz/clanek/fun-and-games/krvavy-epos-zaklinac-3-divoky-hon-vychazi-vyhrajte-ho-pro-sebe.html',
@ -210,7 +211,7 @@ def _real_extract(self, url):
# novaplus # novaplus
embed_id = self._search_regex( embed_id = self._search_regex(
r'<iframe[^>]+\bsrc=["\'](?:https?:)?//media\.cms\.nova\.cz/embed/([^/?#&]+)', r'<iframe[^>]+\bsrc=["\'](?:https?:)?//media(?:tn)?\.cms\.nova\.cz/embed/([^/?#&"\']+)',
webpage, 'embed url', default=None) webpage, 'embed url', default=None)
if embed_id: if embed_id:
return { return {

View file

@ -35,6 +35,7 @@ class NTVRuIE(InfoExtractor):
'duration': 172, 'duration': 172,
'view_count': int, 'view_count': int,
}, },
'skip': '404 Not Found',
}, { }, {
'url': 'http://www.ntv.ru/peredacha/segodnya/m23700/o232416', 'url': 'http://www.ntv.ru/peredacha/segodnya/m23700/o232416',
'md5': '82dbd49b38e3af1d00df16acbeab260c', 'md5': '82dbd49b38e3af1d00df16acbeab260c',
@ -78,7 +79,8 @@ class NTVRuIE(InfoExtractor):
}] }]
_VIDEO_ID_REGEXES = [ _VIDEO_ID_REGEXES = [
r'<meta property="og:url" content="http://www\.ntv\.ru/video/(\d+)', r'<meta property="og:url" content="https?://www\.ntv\.ru/video/(\d+)',
r'<meta property="og:video:(?:url|iframe)" content="https?://www\.ntv\.ru/embed/(\d+)',
r'<video embed=[^>]+><id>(\d+)</id>', r'<video embed=[^>]+><id>(\d+)</id>',
r'<video restriction[^>]+><key>(\d+)</key>', r'<video restriction[^>]+><key>(\d+)</key>',
] ]

View file

@ -1,4 +1,6 @@
from .common import InfoExtractor from .common import InfoExtractor
from .jwplatform import JWPlatformIE
from ..utils import make_archive_id
class OneFootballIE(InfoExtractor): class OneFootballIE(InfoExtractor):
@ -7,41 +9,43 @@ class OneFootballIE(InfoExtractor):
_TESTS = [{ _TESTS = [{
'url': 'https://onefootball.com/en/video/highlights-fc-zuerich-3-3-fc-basel-34012334', 'url': 'https://onefootball.com/en/video/highlights-fc-zuerich-3-3-fc-basel-34012334',
'info_dict': { 'info_dict': {
'id': '34012334', 'id': 'Y2VtcWAT',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Highlights: FC Zürich 3-3 FC Basel', 'title': 'Highlights: FC Zürich 3-3 FC Basel',
'description': 'md5:33d9855cb790702c4fe42a513700aba8', 'description': 'md5:33d9855cb790702c4fe42a513700aba8',
'thumbnail': 'https://photobooth-api.onefootball.com/api/screenshot/https:%2F%2Fperegrine-api.onefootball.com%2Fv2%2Fphotobooth%2Fcms%2Fen%2F34012334', 'thumbnail': 'https://cdn.jwplayer.com/v2/media/Y2VtcWAT/poster.jpg?width=720',
'timestamp': 1635874604, 'timestamp': 1635874895,
'upload_date': '20211102' 'upload_date': '20211102',
'duration': 375.0,
'tags': ['Football', 'Soccer', 'OneFootball'],
'_old_archive_ids': ['onefootball 34012334'],
}, },
'params': {'skip_download': True} 'params': {'skip_download': True},
'expected_warnings': ['Failed to download m3u8 information'],
}, { }, {
'url': 'https://onefootball.com/en/video/klopp-fumes-at-var-decisions-in-west-ham-defeat-34041020', 'url': 'https://onefootball.com/en/video/klopp-fumes-at-var-decisions-in-west-ham-defeat-34041020',
'info_dict': { 'info_dict': {
'id': '34041020', 'id': 'leVJrMho',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Klopp fumes at VAR decisions in West Ham defeat', 'title': 'Klopp fumes at VAR decisions in West Ham defeat',
'description': 'md5:9c50371095a01ad3f63311c73d8f51a5', 'description': 'md5:9c50371095a01ad3f63311c73d8f51a5',
'thumbnail': 'https://photobooth-api.onefootball.com/api/screenshot/https:%2F%2Fperegrine-api.onefootball.com%2Fv2%2Fphotobooth%2Fcms%2Fen%2F34041020', 'thumbnail': 'https://cdn.jwplayer.com/v2/media/leVJrMho/poster.jpg?width=720',
'timestamp': 1636314103, 'timestamp': 1636315232,
'upload_date': '20211107' 'upload_date': '20211107',
'duration': 93.0,
'tags': ['Football', 'Soccer', 'OneFootball'],
'_old_archive_ids': ['onefootball 34041020'],
}, },
'params': {'skip_download': True} 'params': {'skip_download': True}
}] }]
def _real_extract(self, url): def _real_extract(self, url):
id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, id) webpage = self._download_webpage(url, video_id)
data_json = self._search_json_ld(webpage, id) data_json = self._search_json_ld(webpage, video_id, fatal=False)
m3u8_url = self._html_search_regex(r'(https://cdn\.jwplayer\.com/manifests/.+\.m3u8)', webpage, 'm3u8_url') data_json.pop('url', None)
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, id) m3u8_url = self._html_search_regex(r'(https://cdn\.jwplayer\.com/manifests/\w+\.m3u8)', webpage, 'm3u8_url')
return {
'id': id, return self.url_result(
'title': data_json.get('title'), m3u8_url, JWPlatformIE, video_id, _old_archive_ids=[make_archive_id(self, video_id)],
'description': data_json.get('description'), **data_json, url_transparent=True)
'thumbnail': data_json.get('thumbnail'),
'timestamp': data_json.get('timestamp'),
'formats': formats,
'subtitles': subtitles,
}

View file

@ -12,6 +12,8 @@
class OpenRecBaseIE(InfoExtractor): class OpenRecBaseIE(InfoExtractor):
_M3U8_HEADERS = {'Referer': 'https://www.openrec.tv/'}
def _extract_pagestore(self, webpage, video_id): def _extract_pagestore(self, webpage, video_id):
return self._parse_json( return self._parse_json(
self._search_regex(r'(?m)window\.pageStore\s*=\s*(\{.+?\});$', webpage, 'window.pageStore'), video_id) self._search_regex(r'(?m)window\.pageStore\s*=\s*(\{.+?\});$', webpage, 'window.pageStore'), video_id)
@ -21,7 +23,7 @@ def _expand_media(self, video_id, media):
if not m3u8_url: if not m3u8_url:
continue continue
yield from self._extract_m3u8_formats( yield from self._extract_m3u8_formats(
m3u8_url, video_id, ext='mp4', m3u8_id=name) m3u8_url, video_id, ext='mp4', m3u8_id=name, headers=self._M3U8_HEADERS)
def _extract_movie(self, webpage, video_id, name, is_live): def _extract_movie(self, webpage, video_id, name, is_live):
window_stores = self._extract_pagestore(webpage, video_id) window_stores = self._extract_pagestore(webpage, video_id)
@ -60,6 +62,7 @@ def _extract_movie(self, webpage, video_id, name, is_live):
'uploader_id': get_first(movie_stores, ('channel', 'user', 'id')), 'uploader_id': get_first(movie_stores, ('channel', 'user', 'id')),
'timestamp': int_or_none(get_first(movie_stores, ['publishedAt', 'time']), scale=1000) or unified_timestamp(get_first(movie_stores, 'publishedAt')), 'timestamp': int_or_none(get_first(movie_stores, ['publishedAt', 'time']), scale=1000) or unified_timestamp(get_first(movie_stores, 'publishedAt')),
'is_live': is_live, 'is_live': is_live,
'http_headers': self._M3U8_HEADERS,
} }
@ -110,7 +113,7 @@ def _real_extract(self, url):
raise ExtractorError('Cannot extract title') raise ExtractorError('Cannot extract title')
formats = self._extract_m3u8_formats( formats = self._extract_m3u8_formats(
capture_data.get('source'), video_id, ext='mp4') capture_data.get('source'), video_id, ext='mp4', headers=self._M3U8_HEADERS)
return { return {
'id': video_id, 'id': video_id,
@ -121,6 +124,7 @@ def _real_extract(self, url):
'uploader': traverse_obj(movie_store, ('channel', 'name'), expected_type=compat_str), 'uploader': traverse_obj(movie_store, ('channel', 'name'), expected_type=compat_str),
'uploader_id': traverse_obj(movie_store, ('channel', 'id'), expected_type=compat_str), 'uploader_id': traverse_obj(movie_store, ('channel', 'id'), expected_type=compat_str),
'upload_date': unified_strdate(capture_data.get('createdAt')), 'upload_date': unified_strdate(capture_data.get('createdAt')),
'http_headers': self._M3U8_HEADERS,
} }

View file

@ -87,8 +87,8 @@ def _login(self, host):
def is_logged(webpage): def is_logged(webpage):
return any(re.search(p, webpage) for p in ( return any(re.search(p, webpage) for p in (
r'class=["\']signOut', r'id="profileMenuDropdown"',
r'>Sign\s+[Oo]ut\s*<')) r'class="ph-icon-logout"'))
if is_logged(login_page): if is_logged(login_page):
self._logged_in = True self._logged_in = True

View file

@ -1,6 +1,7 @@
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..networking import HEADRequest
from ..utils import ( from ..utils import (
clean_html, clean_html,
determine_ext, determine_ext,
@ -91,7 +92,7 @@ def fix_cdata(s):
self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True) self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
if not audio_only and not is_live: if not audio_only and not is_live:
formats.extend(self._create_http_urls(media_url, relinker_url, formats)) formats.extend(self._create_http_urls(media_url, relinker_url, formats, video_id))
return filter_dict({ return filter_dict({
'is_live': is_live, 'is_live': is_live,
@ -99,7 +100,7 @@ def fix_cdata(s):
'formats': formats, 'formats': formats,
}) })
def _create_http_urls(self, manifest_url, relinker_url, fmts): def _create_http_urls(self, manifest_url, relinker_url, fmts, video_id):
_MANIFEST_REG = r'/(?P<id>\w+)(?:_(?P<quality>[\d\,]+))?(?:\.mp4)?(?:\.csmil)?/playlist\.m3u8' _MANIFEST_REG = r'/(?P<id>\w+)(?:_(?P<quality>[\d\,]+))?(?:\.mp4)?(?:\.csmil)?/playlist\.m3u8'
_MP4_TMPL = '%s&overrideUserAgentRule=mp4-%s' _MP4_TMPL = '%s&overrideUserAgentRule=mp4-%s'
_QUALITY = { _QUALITY = {
@ -166,6 +167,14 @@ def get_format_info(tbr):
'fps': 25, 'fps': 25,
} }
# Check if MP4 download is available
try:
self._request_webpage(
HEADRequest(_MP4_TMPL % (relinker_url, '*')), video_id, 'Checking MP4 availability')
except ExtractorError as e:
self.to_screen(f'{video_id}: MP4 direct download is not available: {e.cause}')
return []
# filter out single-stream formats # filter out single-stream formats
fmts = [f for f in fmts fmts = [f for f in fmts
if not f.get('vcodec') == 'none' and not f.get('acodec') == 'none'] if not f.get('vcodec') == 'none' and not f.get('acodec') == 'none']

View file

@ -9,7 +9,6 @@
get_element_html_by_class, get_element_html_by_class,
get_elements_by_class, get_elements_by_class,
int_or_none, int_or_none,
join_nonempty,
parse_count, parse_count,
parse_duration, parse_duration,
unescapeHTML, unescapeHTML,
@ -57,7 +56,7 @@ class Rule34VideoIE(InfoExtractor):
'comment_count': int, 'comment_count': int,
'timestamp': 1640131200, 'timestamp': 1640131200,
'description': '', 'description': '',
'creator': 'WildeerStudio', 'creators': ['WildeerStudio'],
'upload_date': '20211222', 'upload_date': '20211222',
'uploader': 'CerZule', 'uploader': 'CerZule',
'uploader_url': 'https://rule34video.com/members/36281/', 'uploader_url': 'https://rule34video.com/members/36281/',
@ -81,13 +80,13 @@ def _real_extract(self, url):
'quality': quality, 'quality': quality,
}) })
categories, creator, uploader, uploader_url = [None] * 4 categories, creators, uploader, uploader_url = [None] * 4
for col in get_elements_by_class('col', webpage): for col in get_elements_by_class('col', webpage):
label = clean_html(get_element_by_class('label', col)) label = clean_html(get_element_by_class('label', col))
if label == 'Categories:': if label == 'Categories:':
categories = list(map(clean_html, get_elements_by_class('item', col))) categories = list(map(clean_html, get_elements_by_class('item', col)))
elif label == 'Artist:': elif label == 'Artist:':
creator = join_nonempty(*map(clean_html, get_elements_by_class('item', col)), delim=', ') creators = list(map(clean_html, get_elements_by_class('item', col)))
elif label == 'Uploaded By:': elif label == 'Uploaded By:':
uploader = clean_html(get_element_by_class('name', col)) uploader = clean_html(get_element_by_class('name', col))
uploader_url = extract_attributes(get_element_html_by_class('name', col) or '').get('href') uploader_url = extract_attributes(get_element_html_by_class('name', col) or '').get('href')
@ -115,7 +114,7 @@ def _real_extract(self, url):
'comment_count': int_or_none(self._search_regex( 'comment_count': int_or_none(self._search_regex(
r'[^(]+\((\d+)\)', get_element_by_attribute('href', '#tab_comments', webpage), 'comment count', fatal=False)), r'[^(]+\((\d+)\)', get_element_by_attribute('href', '#tab_comments', webpage), 'comment count', fatal=False)),
'age_limit': 18, 'age_limit': 18,
'creator': creator, 'creators': creators,
'uploader': uploader, 'uploader': uploader,
'uploader_url': uploader_url, 'uploader_url': uploader_url,
'categories': categories, 'categories': categories,

View file

@ -5,7 +5,10 @@
class ScreencastifyIE(InfoExtractor): class ScreencastifyIE(InfoExtractor):
_VALID_URL = r'https?://watch\.screencastify\.com/v/(?P<id>[^/?#]+)' _VALID_URL = [
r'https?://watch\.screencastify\.com/v/(?P<id>[^/?#]+)',
r'https?://app\.screencastify\.com/v[23]/watch/(?P<id>[^/?#]+)',
]
_TESTS = [{ _TESTS = [{
'url': 'https://watch.screencastify.com/v/sYVkZip3quLKhHw4Ybk8', 'url': 'https://watch.screencastify.com/v/sYVkZip3quLKhHw4Ybk8',
'info_dict': { 'info_dict': {
@ -19,6 +22,21 @@ class ScreencastifyIE(InfoExtractor):
'params': { 'params': {
'skip_download': 'm3u8', 'skip_download': 'm3u8',
}, },
}, {
'url': 'https://app.screencastify.com/v3/watch/J5N7H11wofDN1jZUCr3t',
'info_dict': {
'id': 'J5N7H11wofDN1jZUCr3t',
'ext': 'mp4',
'uploader': 'Scott Piesen',
'description': '',
'title': 'Lesson Recording 1-17 Burrr...',
},
'params': {
'skip_download': 'm3u8',
},
}, {
'url': 'https://app.screencastify.com/v2/watch/BQ26VbUdfbQLhKzkktOk',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View file

@ -7,8 +7,6 @@
determine_ext, determine_ext,
dict_get, dict_get,
int_or_none, int_or_none,
str_or_none,
strip_or_none,
traverse_obj, traverse_obj,
try_get, try_get,
unified_timestamp, unified_timestamp,
@ -388,15 +386,55 @@ def _real_extract(self, url):
dict_get(series, ('longDescription', 'shortDescription'))) dict_get(series, ('longDescription', 'shortDescription')))
class SVTPageIE(InfoExtractor): class SVTPageIE(SVTBaseIE):
_VALID_URL = r'https?://(?:www\.)?svt\.se/(?P<path>(?:[^/]+/)*(?P<id>[^/?&#]+))' _VALID_URL = r'https?://(?:www\.)?svt\.se/(?:[^/?#]+/)*(?P<id>[^/?&#]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.svt.se/nyheter/lokalt/skane/viktor-18-forlorade-armar-och-ben-i-sepsis-vill-ateruppta-karaten-och-bli-svetsare',
'info_dict': {
'title': 'Viktor, 18, förlorade armar och ben i sepsis vill återuppta karaten och bli svetsare',
'id': 'viktor-18-forlorade-armar-och-ben-i-sepsis-vill-ateruppta-karaten-och-bli-svetsare',
},
'playlist_count': 2,
}, {
'url': 'https://www.svt.se/nyheter/lokalt/skane/forsvarsmakten-om-trafikkaoset-pa-e22-kunde-inte-varit-dar-snabbare',
'info_dict': {
'id': 'jXvk42E',
'title': 'Försvarsmakten om trafikkaoset på E22: Kunde inte varit där snabbare',
'ext': 'mp4',
"duration": 80,
'age_limit': 0,
'timestamp': 1704370009,
'episode': 'Försvarsmakten om trafikkaoset på E22: Kunde inte varit där snabbare',
'series': 'Lokala Nyheter Skåne',
'upload_date': '20240104'
},
'params': {
'skip_download': True,
}
}, {
'url': 'https://www.svt.se/nyheter/svtforum/2023-tungt-ar-for-svensk-media',
'info_dict': {
'title': '2023 tungt år för svensk media',
'id': 'ewqAZv4',
'ext': 'mp4',
"duration": 3074,
'age_limit': 0,
'series': '',
'timestamp': 1702980479,
'upload_date': '20231219',
'episode': 'Mediestudier'
},
'params': {
'skip_download': True,
}
}, {
'url': 'https://www.svt.se/sport/ishockey/bakom-masken-lehners-kamp-mot-mental-ohalsa', 'url': 'https://www.svt.se/sport/ishockey/bakom-masken-lehners-kamp-mot-mental-ohalsa',
'info_dict': { 'info_dict': {
'id': '25298267', 'id': '25298267',
'title': 'Bakom masken Lehners kamp mot mental ohälsa', 'title': 'Bakom masken Lehners kamp mot mental ohälsa',
}, },
'playlist_count': 4, 'playlist_count': 4,
'skip': 'Video is gone'
}, { }, {
'url': 'https://www.svt.se/nyheter/utrikes/svenska-andrea-ar-en-mil-fran-branderna-i-kalifornien', 'url': 'https://www.svt.se/nyheter/utrikes/svenska-andrea-ar-en-mil-fran-branderna-i-kalifornien',
'info_dict': { 'info_dict': {
@ -404,6 +442,7 @@ class SVTPageIE(InfoExtractor):
'title': 'Svenska Andrea redo att fly sitt hem i Kalifornien', 'title': 'Svenska Andrea redo att fly sitt hem i Kalifornien',
}, },
'playlist_count': 2, 'playlist_count': 2,
'skip': 'Video is gone'
}, { }, {
# only programTitle # only programTitle
'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun', 'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
@ -414,6 +453,7 @@ class SVTPageIE(InfoExtractor):
'duration': 27, 'duration': 27,
'age_limit': 0, 'age_limit': 0,
}, },
'skip': 'Video is gone'
}, { }, {
'url': 'https://www.svt.se/nyheter/lokalt/vast/svt-testar-tar-nagon-upp-skrapet-1', 'url': 'https://www.svt.se/nyheter/lokalt/vast/svt-testar-tar-nagon-upp-skrapet-1',
'only_matching': True, 'only_matching': True,
@ -427,26 +467,23 @@ def suitable(cls, url):
return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super(SVTPageIE, cls).suitable(url) return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super(SVTPageIE, cls).suitable(url)
def _real_extract(self, url): def _real_extract(self, url):
path, display_id = self._match_valid_url(url).groups() display_id = self._match_id(url)
article = self._download_json( webpage = self._download_webpage(url, display_id)
'https://api.svt.se/nss-api/page/' + path, display_id, title = self._og_search_title(webpage)
query={'q': 'articles'})['articles']['content'][0]
entries = [] urql_state = self._search_json(
r'window\.svt\.nyh\.urqlState\s*=', webpage, 'json data', display_id)
def _process_content(content): data = traverse_obj(urql_state, (..., 'data', {str}, {json.loads}), get_all=False) or {}
if content.get('_type') in ('VIDEOCLIP', 'VIDEOEPISODE'):
video_id = compat_str(content['image']['svtId'])
entries.append(self.url_result(
'svt:' + video_id, SVTPlayIE.ie_key(), video_id))
for media in article.get('media', []): def entries():
_process_content(media) for video_id in set(traverse_obj(data, (
'page', (('topMedia', 'svtId'), ('body', ..., 'video', 'svtId')), {str}
))):
info = self._extract_video(
self._download_json(f'https://api.svt.se/video/{video_id}', video_id), video_id)
info['title'] = title
yield info
for obj in article.get('structuredBody', []): return self.playlist_result(entries(), display_id, title)
_process_content(obj.get('content') or {})
return self.playlist_result(
entries, str_or_none(article.get('id')),
strip_or_none(article.get('title')))

View file

@ -1,5 +1,5 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import int_or_none, traverse_obj from ..utils import ExtractorError, int_or_none, traverse_obj
class SwearnetEpisodeIE(InfoExtractor): class SwearnetEpisodeIE(InfoExtractor):
@ -51,7 +51,13 @@ def _real_extract(self, url):
display_id, season_number, episode_number = self._match_valid_url(url).group('id', 'season_num', 'episode_num') display_id, season_number, episode_number = self._match_valid_url(url).group('id', 'season_num', 'episode_num')
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
external_id = self._search_regex(r'externalid\s*=\s*"([^"]+)', webpage, 'externalid') try:
external_id = self._search_regex(r'externalid\s*=\s*"([^"]+)', webpage, 'externalid')
except ExtractorError:
if 'Upgrade Now' in webpage:
self.raise_login_required()
raise
json_data = self._download_json( json_data = self._download_json(
f'https://play.vidyard.com/player/{external_id}.json', display_id)['payload']['chapters'][0] f'https://play.vidyard.com/player/{external_id}.json', display_id)['payload']['chapters'][0]

View file

@ -100,9 +100,13 @@ def _extract_variant_formats(self, variant, video_id):
if not variant_url: if not variant_url:
return [], {} return [], {}
elif '.m3u8' in variant_url: elif '.m3u8' in variant_url:
return self._extract_m3u8_formats_and_subtitles( fmts, subs = self._extract_m3u8_formats_and_subtitles(
variant_url, video_id, 'mp4', 'm3u8_native', variant_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False) m3u8_id='hls', fatal=False)
for f in traverse_obj(fmts, lambda _, v: v['vcodec'] == 'none' and v.get('tbr') is None):
if mobj := re.match(r'hls-[Aa]udio-(?P<bitrate>\d{4,})', f['format_id']):
f['tbr'] = int_or_none(mobj.group('bitrate'), 1000)
return fmts, subs
else: else:
tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
f = { f = {
@ -471,6 +475,7 @@ class TwitterIE(TwitterBaseIE):
'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!', 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ', 'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
'channel_id': '549749560',
'uploader': 'FREE THE NIPPLE', 'uploader': 'FREE THE NIPPLE',
'uploader_id': 'freethenipple', 'uploader_id': 'freethenipple',
'duration': 12.922, 'duration': 12.922,
@ -484,6 +489,7 @@ class TwitterIE(TwitterBaseIE):
'age_limit': 18, 'age_limit': 18,
'_old_archive_ids': ['twitter 643211948184596480'], '_old_archive_ids': ['twitter 643211948184596480'],
}, },
'skip': 'Requires authentication',
}, { }, {
'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1', 'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
'md5': 'f36dcd5fb92bf7057f155e7d927eeb42', 'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
@ -506,6 +512,7 @@ class TwitterIE(TwitterBaseIE):
'ext': 'mp4', 'ext': 'mp4',
'title': r're:Star Wars.*A new beginning is coming December 18.*', 'title': r're:Star Wars.*A new beginning is coming December 18.*',
'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ', 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
'channel_id': '20106852',
'uploader_id': 'starwars', 'uploader_id': 'starwars',
'uploader': r're:Star Wars.*', 'uploader': r're:Star Wars.*',
'timestamp': 1447395772, 'timestamp': 1447395772,
@ -551,6 +558,7 @@ class TwitterIE(TwitterBaseIE):
'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel', 'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ', 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'channel_id': '1383165541',
'uploader': 'jaydin donte geer', 'uploader': 'jaydin donte geer',
'uploader_id': 'jaydingeer', 'uploader_id': 'jaydingeer',
'duration': 30.0, 'duration': 30.0,
@ -591,6 +599,7 @@ class TwitterIE(TwitterBaseIE):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.', 'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI', 'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
'channel_id': '701615052',
'uploader_id': 'CaptainAmerica', 'uploader_id': 'CaptainAmerica',
'uploader': 'Captain America', 'uploader': 'Captain America',
'duration': 3.17, 'duration': 3.17,
@ -627,6 +636,7 @@ class TwitterIE(TwitterBaseIE):
'ext': 'mp4', 'ext': 'mp4',
'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة', 'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN', 'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN',
'channel_id': '2526757026',
'uploader': 'عالم الأخبار', 'uploader': 'عالم الأخبار',
'uploader_id': 'news_al3alm', 'uploader_id': 'news_al3alm',
'duration': 277.4, 'duration': 277.4,
@ -651,6 +661,7 @@ class TwitterIE(TwitterBaseIE):
'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.', 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo', 'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
'channel_id': '2319432498',
'uploader': 'Préfet de Guadeloupe', 'uploader': 'Préfet de Guadeloupe',
'uploader_id': 'Prefet971', 'uploader_id': 'Prefet971',
'duration': 47.48, 'duration': 47.48,
@ -677,6 +688,7 @@ class TwitterIE(TwitterBaseIE):
'title': 're:.*?Shep is on a roll today.*?', 'title': 're:.*?Shep is on a roll today.*?',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09', 'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
'channel_id': '255036353',
'uploader': 'Lis Power', 'uploader': 'Lis Power',
'uploader_id': 'LisPower1', 'uploader_id': 'LisPower1',
'duration': 111.278, 'duration': 111.278,
@ -741,6 +753,7 @@ class TwitterIE(TwitterBaseIE):
'title': 'md5:d1c4941658e4caaa6cb579260d85dcba', 'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'description': 'md5:71ead15ec44cee55071547d6447c6a3e', 'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
'channel_id': '18552281',
'uploader': 'Brooklyn Nets', 'uploader': 'Brooklyn Nets',
'uploader_id': 'BrooklynNets', 'uploader_id': 'BrooklynNets',
'duration': 324.484, 'duration': 324.484,
@ -763,10 +776,11 @@ class TwitterIE(TwitterBaseIE):
'id': '1577855447914409984', 'id': '1577855447914409984',
'display_id': '1577855540407197696', 'display_id': '1577855540407197696',
'ext': 'mp4', 'ext': 'mp4',
'title': 'md5:9d198efb93557b8f8d5b78c480407214', 'title': 'md5:466a3a8b049b5f5a13164ce915484b51',
'description': 'md5:b9c3699335447391d11753ab21c70a74', 'description': 'md5:b9c3699335447391d11753ab21c70a74',
'upload_date': '20221006', 'upload_date': '20221006',
'uploader': 'oshtru', 'channel_id': '143077138',
'uploader': 'Oshtru',
'uploader_id': 'oshtru', 'uploader_id': 'oshtru',
'uploader_url': 'https://twitter.com/oshtru', 'uploader_url': 'https://twitter.com/oshtru',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
@ -784,9 +798,10 @@ class TwitterIE(TwitterBaseIE):
'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464', 'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
'info_dict': { 'info_dict': {
'id': '1577719286659006464', 'id': '1577719286659006464',
'title': 'Ultima - Test', 'title': 'Ultima Reload - Test',
'description': 'Test https://t.co/Y3KEZD7Dad', 'description': 'Test https://t.co/Y3KEZD7Dad',
'uploader': 'Ultima', 'channel_id': '168922496',
'uploader': 'Ultima Reload',
'uploader_id': 'UltimaShadowX', 'uploader_id': 'UltimaShadowX',
'uploader_url': 'https://twitter.com/UltimaShadowX', 'uploader_url': 'https://twitter.com/UltimaShadowX',
'upload_date': '20221005', 'upload_date': '20221005',
@ -808,6 +823,7 @@ class TwitterIE(TwitterBaseIE):
'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9', 'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'description': 'md5:95aea692fda36a12081b9629b02daa92', 'description': 'md5:95aea692fda36a12081b9629b02daa92',
'channel_id': '1094109584',
'uploader': 'Max Olson', 'uploader': 'Max Olson',
'uploader_id': 'MesoMax919', 'uploader_id': 'MesoMax919',
'uploader_url': 'https://twitter.com/MesoMax919', 'uploader_url': 'https://twitter.com/MesoMax919',
@ -830,6 +846,7 @@ class TwitterIE(TwitterBaseIE):
'ext': 'mp4', 'ext': 'mp4',
'title': str, 'title': str,
'description': str, 'description': str,
'channel_id': '1217167793541480450',
'uploader': str, 'uploader': str,
'uploader_id': 'Rizdraws', 'uploader_id': 'Rizdraws',
'uploader_url': 'https://twitter.com/Rizdraws', 'uploader_url': 'https://twitter.com/Rizdraws',
@ -840,7 +857,8 @@ class TwitterIE(TwitterBaseIE):
'repost_count': int, 'repost_count': int,
'comment_count': int, 'comment_count': int,
'age_limit': 18, 'age_limit': 18,
'tags': [] 'tags': [],
'_old_archive_ids': ['twitter 1575199173472927762'],
}, },
'params': {'skip_download': 'The media could not be played'}, 'params': {'skip_download': 'The media could not be played'},
'skip': 'Requires authentication', 'skip': 'Requires authentication',
@ -852,6 +870,7 @@ class TwitterIE(TwitterBaseIE):
'id': '1395079556562706435', 'id': '1395079556562706435',
'title': str, 'title': str,
'tags': [], 'tags': [],
'channel_id': '21539378',
'uploader': str, 'uploader': str,
'like_count': int, 'like_count': int,
'upload_date': '20210519', 'upload_date': '20210519',
@ -869,6 +888,7 @@ class TwitterIE(TwitterBaseIE):
'info_dict': { 'info_dict': {
'id': '1578353380363501568', 'id': '1578353380363501568',
'title': str, 'title': str,
'channel_id': '2195866214',
'uploader_id': 'DavidToons_', 'uploader_id': 'DavidToons_',
'repost_count': int, 'repost_count': int,
'like_count': int, 'like_count': int,
@ -888,6 +908,7 @@ class TwitterIE(TwitterBaseIE):
'id': '1578401165338976258', 'id': '1578401165338976258',
'title': str, 'title': str,
'description': 'md5:659a6b517a034b4cee5d795381a2dc41', 'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
'channel_id': '19338359',
'uploader': str, 'uploader': str,
'uploader_id': 'primevideouk', 'uploader_id': 'primevideouk',
'timestamp': 1665155137, 'timestamp': 1665155137,
@ -929,6 +950,7 @@ class TwitterIE(TwitterBaseIE):
'description': 'md5:591c19ce66fadc2359725d5cd0d1052c', 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
'comment_count': int, 'comment_count': int,
'uploader_id': 'CTVJLaidlaw', 'uploader_id': 'CTVJLaidlaw',
'channel_id': '80082014',
'repost_count': int, 'repost_count': int,
'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'], 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
'upload_date': '20221208', 'upload_date': '20221208',
@ -946,6 +968,7 @@ class TwitterIE(TwitterBaseIE):
'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1', 'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1',
'thumbnail': r're:^https?://.+\.jpg', 'thumbnail': r're:^https?://.+\.jpg',
'timestamp': 1670459604.0, 'timestamp': 1670459604.0,
'channel_id': '80082014',
'uploader_id': 'CTVJLaidlaw', 'uploader_id': 'CTVJLaidlaw',
'uploader': 'Jocelyn Laidlaw', 'uploader': 'Jocelyn Laidlaw',
'repost_count': int, 'repost_count': int,
@ -972,6 +995,7 @@ class TwitterIE(TwitterBaseIE):
'title': '뽀 - 아 최우제 이동속도 봐', 'title': '뽀 - 아 최우제 이동속도 봐',
'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB', 'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB',
'duration': 24.598, 'duration': 24.598,
'channel_id': '1281839411068432384',
'uploader': '', 'uploader': '',
'uploader_id': 's2FAKER', 'uploader_id': 's2FAKER',
'uploader_url': 'https://twitter.com/s2FAKER', 'uploader_url': 'https://twitter.com/s2FAKER',
@ -985,6 +1009,7 @@ class TwitterIE(TwitterBaseIE):
'comment_count': int, 'comment_count': int,
'_old_archive_ids': ['twitter 1621117700482416640'], '_old_archive_ids': ['twitter 1621117700482416640'],
}, },
'skip': 'Requires authentication',
}, { }, {
'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2', 'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
'info_dict': { 'info_dict': {
@ -992,6 +1017,7 @@ class TwitterIE(TwitterBaseIE):
'display_id': '1599108751385972737', 'display_id': '1599108751385972737',
'ext': 'mp4', 'ext': 'mp4',
'title': '\u06ea - \U0001F48B', 'title': '\u06ea - \U0001F48B',
'channel_id': '1347791436809441283',
'uploader_url': 'https://twitter.com/hlo_again', 'uploader_url': 'https://twitter.com/hlo_again',
'like_count': int, 'like_count': int,
'uploader_id': 'hlo_again', 'uploader_id': 'hlo_again',
@ -1014,6 +1040,7 @@ class TwitterIE(TwitterBaseIE):
'id': '1600009362759733248', 'id': '1600009362759733248',
'display_id': '1600009574919962625', 'display_id': '1600009574919962625',
'ext': 'mp4', 'ext': 'mp4',
'channel_id': '211814412',
'uploader_url': 'https://twitter.com/MunTheShinobi', 'uploader_url': 'https://twitter.com/MunTheShinobi',
'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml', 'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig', 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
@ -1061,6 +1088,7 @@ class TwitterIE(TwitterBaseIE):
'display_id': '1695424220702888009', 'display_id': '1695424220702888009',
'title': 'md5:e8daa9527bc2b947121395494f786d9d', 'title': 'md5:e8daa9527bc2b947121395494f786d9d',
'description': 'md5:004f2d37fd58737724ec75bc7e679938', 'description': 'md5:004f2d37fd58737724ec75bc7e679938',
'channel_id': '15212187',
'uploader': 'Benny Johnson', 'uploader': 'Benny Johnson',
'uploader_id': 'bennyjohnson', 'uploader_id': 'bennyjohnson',
'uploader_url': 'https://twitter.com/bennyjohnson', 'uploader_url': 'https://twitter.com/bennyjohnson',
@ -1084,6 +1112,7 @@ class TwitterIE(TwitterBaseIE):
'display_id': '1695424220702888009', 'display_id': '1695424220702888009',
'title': 'md5:e8daa9527bc2b947121395494f786d9d', 'title': 'md5:e8daa9527bc2b947121395494f786d9d',
'description': 'md5:004f2d37fd58737724ec75bc7e679938', 'description': 'md5:004f2d37fd58737724ec75bc7e679938',
'channel_id': '15212187',
'uploader': 'Benny Johnson', 'uploader': 'Benny Johnson',
'uploader_id': 'bennyjohnson', 'uploader_id': 'bennyjohnson',
'uploader_url': 'https://twitter.com/bennyjohnson', 'uploader_url': 'https://twitter.com/bennyjohnson',
@ -1117,7 +1146,7 @@ class TwitterIE(TwitterBaseIE):
}, },
'add_ie': ['TwitterBroadcast'], 'add_ie': ['TwitterBroadcast'],
}, { }, {
# Animated gif and quote tweet video, with syndication API # Animated gif and quote tweet video
'url': 'https://twitter.com/BAKKOOONN/status/1696256659889565950', 'url': 'https://twitter.com/BAKKOOONN/status/1696256659889565950',
'playlist_mincount': 2, 'playlist_mincount': 2,
'info_dict': { 'info_dict': {
@ -1125,6 +1154,7 @@ class TwitterIE(TwitterBaseIE):
'title': 'BAKOON - https://t.co/zom968d0a0', 'title': 'BAKOON - https://t.co/zom968d0a0',
'description': 'https://t.co/zom968d0a0', 'description': 'https://t.co/zom968d0a0',
'tags': [], 'tags': [],
'channel_id': '1263540390',
'uploader': 'BAKOON', 'uploader': 'BAKOON',
'uploader_id': 'BAKKOOONN', 'uploader_id': 'BAKKOOONN',
'uploader_url': 'https://twitter.com/BAKKOOONN', 'uploader_url': 'https://twitter.com/BAKKOOONN',
@ -1132,19 +1162,21 @@ class TwitterIE(TwitterBaseIE):
'timestamp': 1693254077.0, 'timestamp': 1693254077.0,
'upload_date': '20230828', 'upload_date': '20230828',
'like_count': int, 'like_count': int,
'comment_count': int,
'repost_count': int,
}, },
'params': {'extractor_args': {'twitter': {'api': ['syndication']}}}, 'skip': 'Requires authentication',
'expected_warnings': ['Not all metadata'],
}, { }, {
# "stale tweet" with typename "TweetWithVisibilityResults" # "stale tweet" with typename "TweetWithVisibilityResults"
'url': 'https://twitter.com/RobertKennedyJr/status/1724884212803834154', 'url': 'https://twitter.com/RobertKennedyJr/status/1724884212803834154',
'md5': '62b1e11cdc2cdd0e527f83adb081f536', 'md5': '511377ff8dfa7545307084dca4dce319',
'info_dict': { 'info_dict': {
'id': '1724883339285544960', 'id': '1724883339285544960',
'ext': 'mp4', 'ext': 'mp4',
'title': 'md5:cc56716f9ed0b368de2ba54c478e493c', 'title': 'md5:cc56716f9ed0b368de2ba54c478e493c',
'description': 'md5:9dc14f5b0f1311fc7caf591ae253a164', 'description': 'md5:9dc14f5b0f1311fc7caf591ae253a164',
'display_id': '1724884212803834154', 'display_id': '1724884212803834154',
'channel_id': '337808606',
'uploader': 'Robert F. Kennedy Jr', 'uploader': 'Robert F. Kennedy Jr',
'uploader_id': 'RobertKennedyJr', 'uploader_id': 'RobertKennedyJr',
'uploader_url': 'https://twitter.com/RobertKennedyJr', 'uploader_url': 'https://twitter.com/RobertKennedyJr',
@ -1386,6 +1418,7 @@ def _real_extract(self, url):
'description': description, 'description': description,
'uploader': uploader, 'uploader': uploader,
'timestamp': unified_timestamp(status.get('created_at')), 'timestamp': unified_timestamp(status.get('created_at')),
'channel_id': str_or_none(status.get('user_id_str')) or str_or_none(user.get('id_str')),
'uploader_id': uploader_id, 'uploader_id': uploader_id,
'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'), 'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'),
'like_count': int_or_none(status.get('favorite_count')), 'like_count': int_or_none(status.get('favorite_count')),

View file

@ -10,7 +10,8 @@
class UtreonIE(InfoExtractor): class UtreonIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?utreon\.com/v/(?P<id>[\w-]+)' IE_NAME = 'playeur'
_VALID_URL = r'https?://(?:www\.)?(?:utreon|playeur)\.com/v/(?P<id>[\w-]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://utreon.com/v/z_I7ikQbuDw', 'url': 'https://utreon.com/v/z_I7ikQbuDw',
'info_dict': { 'info_dict': {
@ -19,8 +20,9 @@ class UtreonIE(InfoExtractor):
'title': 'Freedom Friday meditation - Rising in the wind', 'title': 'Freedom Friday meditation - Rising in the wind',
'description': 'md5:a9bf15a42434a062fe313b938343ad1b', 'description': 'md5:a9bf15a42434a062fe313b938343ad1b',
'uploader': 'Heather Dawn Elemental Health', 'uploader': 'Heather Dawn Elemental Health',
'thumbnail': 'https://data-1.utreon.com/v/MG/M2/NT/z_I7ikQbuDw/z_I7ikQbuDw_preview.jpg', 'thumbnail': r're:^https?://.+\.jpg',
'release_date': '20210723', 'release_date': '20210723',
'duration': 586,
} }
}, { }, {
'url': 'https://utreon.com/v/jerJw5EOOVU', 'url': 'https://utreon.com/v/jerJw5EOOVU',
@ -28,10 +30,11 @@ class UtreonIE(InfoExtractor):
'id': 'jerJw5EOOVU', 'id': 'jerJw5EOOVU',
'ext': 'mp4', 'ext': 'mp4',
'title': 'When I\'m alone, I love to reflect in peace, to make my dreams come true... [Quotes and Poems]', 'title': 'When I\'m alone, I love to reflect in peace, to make my dreams come true... [Quotes and Poems]',
'description': 'md5:61ee6c2da98be51b04b969ca80273aaa', 'description': 'md5:4026aa3a2c10169c3649926ac8ef62b6',
'uploader': 'Frases e Poemas Quotes and Poems', 'uploader': 'Frases e Poemas Quotes and Poems',
'thumbnail': 'https://data-1.utreon.com/v/Mz/Zh/ND/jerJw5EOOVU/jerJw5EOOVU_89af85470a4b16eededde7f8674c96d9_cover.jpg', 'thumbnail': r're:^https?://.+\.jpg',
'release_date': '20210723', 'release_date': '20210723',
'duration': 60,
} }
}, { }, {
'url': 'https://utreon.com/v/C4ZxXhYBBmE', 'url': 'https://utreon.com/v/C4ZxXhYBBmE',
@ -39,10 +42,11 @@ class UtreonIE(InfoExtractor):
'id': 'C4ZxXhYBBmE', 'id': 'C4ZxXhYBBmE',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Bidens Capital Gains Tax Rate to Test Worlds Highest', 'title': 'Bidens Capital Gains Tax Rate to Test Worlds Highest',
'description': 'md5:fb5a6c2e506f013cc76f133f673bc5c8', 'description': 'md5:995aa9ad0733c0e5863ebdeff954f40e',
'uploader': 'Nomad Capitalist', 'uploader': 'Nomad Capitalist',
'thumbnail': 'https://data-1.utreon.com/v/ZD/k1/Mj/C4ZxXhYBBmE/C4ZxXhYBBmE_628342076198c9c06dd6b2c665978584_cover.jpg', 'thumbnail': r're:^https?://.+\.jpg',
'release_date': '20210723', 'release_date': '20210723',
'duration': 884,
} }
}, { }, {
'url': 'https://utreon.com/v/Y-stEH-FBm8', 'url': 'https://utreon.com/v/Y-stEH-FBm8',
@ -52,15 +56,28 @@ class UtreonIE(InfoExtractor):
'title': 'Creeper-Chan Pranks Steve! 💚 [MINECRAFT ANIME]', 'title': 'Creeper-Chan Pranks Steve! 💚 [MINECRAFT ANIME]',
'description': 'md5:7a48450b0d761b96dec194be0c5ecb5f', 'description': 'md5:7a48450b0d761b96dec194be0c5ecb5f',
'uploader': 'Merryweather Comics', 'uploader': 'Merryweather Comics',
'thumbnail': 'https://data-1.utreon.com/v/MT/E4/Zj/Y-stEH-FBm8/Y-stEH-FBm8_5290676a41a4a1096db133b09f54f77b_cover.jpg', 'thumbnail': r're:^https?://.+\.jpg',
'release_date': '20210718', 'release_date': '20210718',
}}, 'duration': 151,
] }
}, {
'url': 'https://playeur.com/v/Wzqp-UrxSeu',
'info_dict': {
'id': 'Wzqp-UrxSeu',
'ext': 'mp4',
'title': 'Update: Clockwork Basilisk Books on the Way!',
'description': 'md5:d9756b0b1884c904655b0e170d17cea5',
'uploader': 'Forgotten Weapons',
'release_date': '20240208',
'thumbnail': r're:^https?://.+\.jpg',
'duration': 262,
}
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
json_data = self._download_json( json_data = self._download_json(
'https://api.utreon.com/v1/videos/' + video_id, 'https://api.playeur.com/v1/videos/' + video_id,
video_id) video_id)
videos_json = json_data['videos'] videos_json = json_data['videos']
formats = [{ formats = [{

View file

@ -48,17 +48,15 @@ def _unsmuggle_headers(self, url):
return url, data, headers return url, data, headers
def _perform_login(self, username, password): def _perform_login(self, username, password):
webpage = self._download_webpage( viewer = self._download_json('https://vimeo.com/_next/viewer', None, 'Downloading login token')
self._LOGIN_URL, None, 'Downloading login page')
token, vuid = self._extract_xsrft_and_vuid(webpage)
data = { data = {
'action': 'login', 'action': 'login',
'email': username, 'email': username,
'password': password, 'password': password,
'service': 'vimeo', 'service': 'vimeo',
'token': token, 'token': viewer['xsrft'],
} }
self._set_vimeo_cookie('vuid', vuid) self._set_vimeo_cookie('vuid', viewer['vuid'])
try: try:
self._download_webpage( self._download_webpage(
self._LOGIN_URL, None, 'Logging in', self._LOGIN_URL, None, 'Logging in',

View file

@ -2068,7 +2068,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'title': 'Voyeur Girl', 'title': 'Voyeur Girl',
'description': 'md5:7ae382a65843d6df2685993e90a8628f', 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
'upload_date': '20190312', 'upload_date': '20190312',
'artist': 'Stephen', 'artists': ['Stephen'],
'creators': ['Stephen'],
'track': 'Voyeur Girl', 'track': 'Voyeur Girl',
'album': 'it\'s too much love to know my dear', 'album': 'it\'s too much love to know my dear',
'release_date': '20190313', 'release_date': '20190313',
@ -2081,7 +2082,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'channel': 'Stephen', # TODO: should be "Stephen - Topic" 'channel': 'Stephen', # TODO: should be "Stephen - Topic"
'uploader': 'Stephen', 'uploader': 'Stephen',
'availability': 'public', 'availability': 'public',
'creator': 'Stephen',
'duration': 169, 'duration': 169,
'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp', 'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
'age_limit': 0, 'age_limit': 0,
@ -3640,15 +3640,28 @@ def _get_requested_clients(self, url, smuggled_data):
return orderedSet(requested_clients) return orderedSet(requested_clients)
def _invalid_player_response(self, pr, video_id):
# YouTube may return a different video player response than expected.
# See: https://github.com/TeamNewPipe/NewPipe/issues/8713
if (pr_id := traverse_obj(pr, ('videoDetails', 'videoId'))) != video_id:
return pr_id
def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data): def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
initial_pr = None initial_pr = None
if webpage: if webpage:
initial_pr = self._search_json( initial_pr = self._search_json(
self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False) self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
prs = []
if initial_pr and not self._invalid_player_response(initial_pr, video_id):
# Android player_response does not have microFormats which are needed for
# extraction of some data. So we return the initial_pr with formats
# stripped out even if not requested by the user
# See: https://github.com/yt-dlp/yt-dlp/issues/501
prs.append({**initial_pr, 'streamingData': None})
all_clients = set(clients) all_clients = set(clients)
clients = clients[::-1] clients = clients[::-1]
prs = []
def append_client(*client_names): def append_client(*client_names):
""" Append the first client name that exists but not already used """ """ Append the first client name that exists but not already used """
@ -3660,18 +3673,9 @@ def append_client(*client_names):
all_clients.add(actual_client) all_clients.add(actual_client)
return return
# Android player_response does not have microFormats which are needed for
# extraction of some data. So we return the initial_pr with formats
# stripped out even if not requested by the user
# See: https://github.com/yt-dlp/yt-dlp/issues/501
if initial_pr:
pr = dict(initial_pr)
pr['streamingData'] = None
prs.append(pr)
last_error = None
tried_iframe_fallback = False tried_iframe_fallback = False
player_url = None player_url = None
skipped_clients = {}
while clients: while clients:
client, base_client, variant = _split_innertube_client(clients.pop()) client, base_client, variant = _split_innertube_client(clients.pop())
player_ytcfg = master_ytcfg if client == 'web' else {} player_ytcfg = master_ytcfg if client == 'web' else {}
@ -3692,26 +3696,19 @@ def append_client(*client_names):
pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response( pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data) client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)
except ExtractorError as e: except ExtractorError as e:
if last_error: self.report_warning(e)
self.report_warning(last_error)
last_error = e
continue continue
if pr: if pr_id := self._invalid_player_response(pr, video_id):
# YouTube may return a different video player response than expected. skipped_clients[client] = pr_id
# See: https://github.com/TeamNewPipe/NewPipe/issues/8713 elif pr:
pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId')) # Save client name for introspection later
if pr_video_id and pr_video_id != video_id: name = short_client_name(client)
self.report_warning( sd = traverse_obj(pr, ('streamingData', {dict})) or {}
f'Skipping player response from {client} client (got player response for video "{pr_video_id}" instead of "{video_id}")' + bug_reports_message()) sd[STREAMING_DATA_CLIENT_NAME] = name
else: for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})):
# Save client name for introspection later f[STREAMING_DATA_CLIENT_NAME] = name
name = short_client_name(client) prs.append(pr)
sd = traverse_obj(pr, ('streamingData', {dict})) or {}
sd[STREAMING_DATA_CLIENT_NAME] = name
for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})):
f[STREAMING_DATA_CLIENT_NAME] = name
prs.append(pr)
# creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated: if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:
@ -3722,10 +3719,15 @@ def append_client(*client_names):
elif not variant: elif not variant:
append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded') append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')
if last_error: if skipped_clients:
if not len(prs): self.report_warning(
raise last_error f'Skipping player responses from {"/".join(skipped_clients)} clients '
self.report_warning(last_error) f'(got player responses for video "{"/".join(set(skipped_clients.values()))}" instead of "{video_id}")')
if not prs:
raise ExtractorError(
'All player responses are invalid. Your IP is likely being blocked by Youtube', expected=True)
elif not prs:
raise ExtractorError('Failed to extract any player response')
return prs, player_url return prs, player_url
def _needs_live_processing(self, live_status, duration): def _needs_live_processing(self, live_status, duration):
@ -4386,7 +4388,8 @@ def process_language(container, base_url, lang_code, sub_name, query):
release_year = release_date[:4] release_year = release_date[:4]
info.update({ info.update({
'album': mobj.group('album'.strip()), 'album': mobj.group('album'.strip()),
'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')), 'artists': ([a] if (a := mobj.group('clean_artist'))
else [a.strip() for a in mobj.group('artist').split('·')]),
'track': mobj.group('track').strip(), 'track': mobj.group('track').strip(),
'release_date': release_date, 'release_date': release_date,
'release_year': int_or_none(release_year), 'release_year': int_or_none(release_year),
@ -4532,7 +4535,7 @@ def process_language(container, base_url, lang_code, sub_name, query):
if mrr_title == 'Album': if mrr_title == 'Album':
info['album'] = mrr_contents_text info['album'] = mrr_contents_text
elif mrr_title == 'Artist': elif mrr_title == 'Artist':
info['artist'] = mrr_contents_text info['artists'] = [mrr_contents_text] if mrr_contents_text else None
elif mrr_title == 'Song': elif mrr_title == 'Song':
info['track'] = mrr_contents_text info['track'] = mrr_contents_text
owner_badges = self._extract_badges(traverse_obj(vsir, ('owner', 'videoOwnerRenderer', 'badges'))) owner_badges = self._extract_badges(traverse_obj(vsir, ('owner', 'videoOwnerRenderer', 'badges')))
@ -4566,7 +4569,7 @@ def process_language(container, base_url, lang_code, sub_name, query):
if fmt.get('protocol') == 'm3u8_native': if fmt.get('protocol') == 'm3u8_native':
fmt['__needs_testing'] = True fmt['__needs_testing'] = True
for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]: for s_k, d_k in [('artists', 'creators'), ('track', 'alt_title')]:
v = info.get(s_k) v = info.get(s_k)
if v: if v:
info[d_k] = v info[d_k] = v

View file

@ -258,10 +258,10 @@ def __init__(self, *args, **kwargs):
# Forward urllib3 debug messages to our logger # Forward urllib3 debug messages to our logger
logger = logging.getLogger('urllib3') logger = logging.getLogger('urllib3')
handler = Urllib3LoggingHandler(logger=self._logger) self.__logging_handler = Urllib3LoggingHandler(logger=self._logger)
handler.setFormatter(logging.Formatter('requests: %(message)s')) self.__logging_handler.setFormatter(logging.Formatter('requests: %(message)s'))
handler.addFilter(Urllib3LoggingFilter()) self.__logging_handler.addFilter(Urllib3LoggingFilter())
logger.addHandler(handler) logger.addHandler(self.__logging_handler)
# TODO: Use a logger filter to suppress pool reuse warning instead # TODO: Use a logger filter to suppress pool reuse warning instead
logger.setLevel(logging.ERROR) logger.setLevel(logging.ERROR)
@ -276,6 +276,9 @@ def __init__(self, *args, **kwargs):
def close(self): def close(self):
self._clear_instances() self._clear_instances()
# Remove the logging handler that contains a reference to our logger
# See: https://github.com/yt-dlp/yt-dlp/issues/8922
logging.getLogger('urllib3').removeHandler(self.__logging_handler)
def _check_extensions(self, extensions): def _check_extensions(self, extensions):
super()._check_extensions(extensions) super()._check_extensions(extensions)

View file

@ -90,10 +90,12 @@ class WebsocketsRH(WebSocketRequestHandler):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
self.__logging_handlers = {}
for name in ('websockets.client', 'websockets.server'): for name in ('websockets.client', 'websockets.server'):
logger = logging.getLogger(name) logger = logging.getLogger(name)
handler = logging.StreamHandler(stream=sys.stdout) handler = logging.StreamHandler(stream=sys.stdout)
handler.setFormatter(logging.Formatter(f'{self.RH_NAME}: %(message)s')) handler.setFormatter(logging.Formatter(f'{self.RH_NAME}: %(message)s'))
self.__logging_handlers[name] = handler
logger.addHandler(handler) logger.addHandler(handler)
if self.verbose: if self.verbose:
logger.setLevel(logging.DEBUG) logger.setLevel(logging.DEBUG)
@ -103,6 +105,12 @@ def _check_extensions(self, extensions):
extensions.pop('timeout', None) extensions.pop('timeout', None)
extensions.pop('cookiejar', None) extensions.pop('cookiejar', None)
def close(self):
# Remove the logging handler that contains a reference to our logger
# See: https://github.com/yt-dlp/yt-dlp/issues/8922
for name, handler in self.__logging_handlers.items():
logging.getLogger(name).removeHandler(handler)
def _send(self, request): def _send(self, request):
timeout = float(request.extensions.get('timeout') or self.timeout) timeout = float(request.extensions.get('timeout') or self.timeout)
headers = self._merge_headers(request.headers) headers = self._merge_headers(request.headers)

View file

@ -68,6 +68,7 @@ def __init__(self, logger, verbose=False):
def close(self): def close(self):
for handler in self.handlers.values(): for handler in self.handlers.values():
handler.close() handler.close()
self.handlers = {}
def add_handler(self, handler: RequestHandler): def add_handler(self, handler: RequestHandler):
"""Add a handler. If a handler of the same RH_KEY exists, it will overwrite it""" """Add a handler. If a handler of the same RH_KEY exists, it will overwrite it"""

View file

@ -196,9 +196,12 @@ def parse_known_args(self, args=None, values=None, strict=True):
raise raise
return self.check_values(self.values, self.largs) return self.check_values(self.values, self.largs)
def error(self, msg): def _generate_error_message(self, msg):
msg = f'{self.get_prog_name()}: error: {str(msg).strip()}\n' msg = f'{self.get_prog_name()}: error: {str(msg).strip()}\n'
raise optparse.OptParseError(f'{self.get_usage()}\n{msg}' if self.usage else msg) return f'{self.get_usage()}\n{msg}' if self.usage else msg
def error(self, msg):
raise optparse.OptParseError(self._generate_error_message(msg))
def _get_args(self, args): def _get_args(self, args):
return sys.argv[1:] if args is None else list(args) return sys.argv[1:] if args is None else list(args)
@ -476,8 +479,8 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
'youtube-dl': ['all', '-multistreams', '-playlist-match-filter', '-manifest-filesize-approx'], 'youtube-dl': ['all', '-multistreams', '-playlist-match-filter', '-manifest-filesize-approx'],
'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat', '-playlist-match-filter', '-manifest-filesize-approx'], 'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat', '-playlist-match-filter', '-manifest-filesize-approx'],
'2021': ['2022', 'no-certifi', 'filename-sanitization', 'no-youtube-prefer-utc-upload-date'], '2021': ['2022', 'no-certifi', 'filename-sanitization', 'no-youtube-prefer-utc-upload-date'],
'2022': ['2023', 'no-external-downloader-progress', 'playlist-match-filter'], '2022': ['2023', 'no-external-downloader-progress', 'playlist-match-filter', 'prefer-legacy-http-handler', 'manifest-filesize-approx'],
'2023': ['prefer-legacy-http-handler', 'manifest-filesize-approx'], '2023': [],
} }
}, help=( }, help=(
'Options that can help keep compatibility with youtube-dl or youtube-dlc ' 'Options that can help keep compatibility with youtube-dl or youtube-dlc '

View file

@ -86,11 +86,14 @@ def _get_package_paths(*root_paths, containing_folder='plugins'):
parts = Path(*fullname.split('.')) parts = Path(*fullname.split('.'))
for path in orderedSet(candidate_locations, lazy=True): for path in orderedSet(candidate_locations, lazy=True):
candidate = path / parts candidate = path / parts
if candidate.is_dir(): try:
yield candidate if candidate.is_dir():
elif path.suffix in ('.zip', '.egg', '.whl') and path.is_file():
if parts in dirs_in_zip(path):
yield candidate yield candidate
elif path.suffix in ('.zip', '.egg', '.whl') and path.is_file():
if parts in dirs_in_zip(path):
yield candidate
except PermissionError as e:
write_string(f'Permission error while accessing modules in "{e.filename}"\n')
def find_spec(self, fullname, path=None, target=None): def find_spec(self, fullname, path=None, target=None):
if fullname not in self.packages: if fullname not in self.packages:

View file

@ -738,9 +738,10 @@ def _get_metadata_opts(self, info):
def add(meta_list, info_list=None): def add(meta_list, info_list=None):
value = next(( value = next((
str(info[key]) for key in [f'{meta_prefix}_'] + list(variadic(info_list or meta_list)) info[key] for key in [f'{meta_prefix}_'] + list(variadic(info_list or meta_list))
if info.get(key) is not None), None) if info.get(key) is not None), None)
if value not in ('', None): if value not in ('', None):
value = ', '.join(map(str, variadic(value)))
value = value.replace('\0', '') # nul character cannot be passed in command line value = value.replace('\0', '') # nul character cannot be passed in command line
metadata['common'].update({meta_f: value for meta_f in variadic(meta_list)}) metadata['common'].update({meta_f: value for meta_f in variadic(meta_list)})
@ -754,10 +755,11 @@ def add(meta_list, info_list=None):
add(('description', 'synopsis'), 'description') add(('description', 'synopsis'), 'description')
add(('purl', 'comment'), 'webpage_url') add(('purl', 'comment'), 'webpage_url')
add('track', 'track_number') add('track', 'track_number')
add('artist', ('artist', 'creator', 'uploader', 'uploader_id')) add('artist', ('artist', 'artists', 'creator', 'creators', 'uploader', 'uploader_id'))
add('genre') add('composer', ('composer', 'composers'))
add('genre', ('genre', 'genres'))
add('album') add('album')
add('album_artist') add('album_artist', ('album_artist', 'album_artists'))
add('disc', 'disc_number') add('disc', 'disc_number')
add('show', 'series') add('show', 'series')
add('season_number') add('season_number')