mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-02 14:37:21 +00:00
Merge branch 'master' into fb_parsedata_error
This commit is contained in:
commit
6ef07d4ec9
|
@ -167,7 +167,8 @@ ### Differences in default behavior
|
|||
* `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams,-playlist-match-filter,-manifest-filesize-approx`
|
||||
* `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect,-playlist-match-filter,-manifest-filesize-approx`
|
||||
* `--compat-options 2021`: Same as `--compat-options 2022,no-certifi,filename-sanitization,no-youtube-prefer-utc-upload-date`
|
||||
* `--compat-options 2022`: Same as `--compat-options playlist-match-filter,no-external-downloader-progress,prefer-legacy-http-handler,manifest-filesize-approx`. Use this to enable all future compat options
|
||||
* `--compat-options 2022`: Same as `--compat-options 2023,playlist-match-filter,no-external-downloader-progress`
|
||||
* `--compat-options 2023`: Same as `--compat-options prefer-legacy-http-handler,manifest-filesize-approx`. Use this to enable all future compat options
|
||||
|
||||
|
||||
# INSTALLATION
|
||||
|
@ -1888,6 +1889,9 @@ #### nhkradirulive (NHK らじる★らじる LIVE)
|
|||
#### nflplusreplay
|
||||
* `type`: Type(s) of game replays to extract. Valid types are: `full_game`, `full_game_spanish`, `condensed_game` and `all_22`. You can use `all` to extract all available replay types, which is the default
|
||||
|
||||
#### jiosaavn
|
||||
* `bitrate`: Audio bitrates to request. One or more of `16`, `32`, `64`, `128`, `320`. Default is `128,320`
|
||||
|
||||
**Note**: These options may be changed/removed in the future without concern for backward compatibility
|
||||
|
||||
<!-- MANPAGE: MOVE "INSTALLATION" SECTION HERE -->
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
import yt_dlp.extractor
|
||||
from yt_dlp import YoutubeDL
|
||||
from yt_dlp.compat import compat_os_name
|
||||
from yt_dlp.utils import preferredencoding, try_call, write_string
|
||||
from yt_dlp.utils import preferredencoding, try_call, write_string, find_available_port
|
||||
|
||||
if 'pytest' in sys.modules:
|
||||
import pytest
|
||||
|
@ -329,3 +329,8 @@ def http_server_port(httpd):
|
|||
else:
|
||||
sock = httpd.socket
|
||||
return sock.getsockname()[1]
|
||||
|
||||
|
||||
def verify_address_availability(address):
|
||||
if find_available_port(address) is None:
|
||||
pytest.skip(f'Unable to bind to source address {address} (address may not exist)')
|
||||
|
|
|
@ -26,7 +26,7 @@
|
|||
from email.message import Message
|
||||
from http.cookiejar import CookieJar
|
||||
|
||||
from test.helper import FakeYDL, http_server_port
|
||||
from test.helper import FakeYDL, http_server_port, verify_address_availability
|
||||
from yt_dlp.cookies import YoutubeDLCookieJar
|
||||
from yt_dlp.dependencies import brotli, requests, urllib3
|
||||
from yt_dlp.networking import (
|
||||
|
@ -180,6 +180,12 @@ def do_GET(self):
|
|||
self.send_header('Location', '/a/b/./../../headers')
|
||||
self.send_header('Content-Length', '0')
|
||||
self.end_headers()
|
||||
elif self.path == '/redirect_dotsegments_absolute':
|
||||
self.send_response(301)
|
||||
# redirect to /headers but with dot segments before - absolute url
|
||||
self.send_header('Location', f'http://127.0.0.1:{http_server_port(self.server)}/a/b/./../../headers')
|
||||
self.send_header('Content-Length', '0')
|
||||
self.end_headers()
|
||||
elif self.path.startswith('/redirect_'):
|
||||
self._redirect()
|
||||
elif self.path.startswith('/method'):
|
||||
|
@ -345,16 +351,17 @@ def test_percent_encode(self, handler):
|
|||
res.close()
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_remove_dot_segments(self, handler):
|
||||
with handler() as rh:
|
||||
@pytest.mark.parametrize('path', [
|
||||
'/a/b/./../../headers',
|
||||
'/redirect_dotsegments',
|
||||
# https://github.com/yt-dlp/yt-dlp/issues/9020
|
||||
'/redirect_dotsegments_absolute',
|
||||
])
|
||||
def test_remove_dot_segments(self, handler, path):
|
||||
with handler(verbose=True) as rh:
|
||||
# This isn't a comprehensive test,
|
||||
# but it should be enough to check whether the handler is removing dot segments
|
||||
res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/a/b/./../../headers'))
|
||||
assert res.status == 200
|
||||
assert res.url == f'http://127.0.0.1:{self.http_port}/headers'
|
||||
res.close()
|
||||
|
||||
res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_dotsegments'))
|
||||
# but it should be enough to check whether the handler is removing dot segments in required scenarios
|
||||
res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}{path}'))
|
||||
assert res.status == 200
|
||||
assert res.url == f'http://127.0.0.1:{self.http_port}/headers'
|
||||
res.close()
|
||||
|
@ -538,6 +545,9 @@ def test_timeout(self, handler):
|
|||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_source_address(self, handler):
|
||||
source_address = f'127.0.0.{random.randint(5, 255)}'
|
||||
# on some systems these loopback addresses we need for testing may not be available
|
||||
# see: https://github.com/yt-dlp/yt-dlp/issues/8890
|
||||
verify_address_availability(source_address)
|
||||
with handler(source_address=source_address) as rh:
|
||||
data = validate_and_send(
|
||||
rh, Request(f'http://127.0.0.1:{self.http_port}/source_address')).read().decode()
|
||||
|
|
|
@ -8,13 +8,9 @@
|
|||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
import contextlib
|
||||
import io
|
||||
import platform
|
||||
import random
|
||||
import ssl
|
||||
import urllib.error
|
||||
import warnings
|
||||
|
||||
from yt_dlp.cookies import YoutubeDLCookieJar
|
||||
from yt_dlp.dependencies import certifi
|
||||
|
@ -30,7 +26,6 @@
|
|||
from yt_dlp.networking.exceptions import (
|
||||
HTTPError,
|
||||
IncompleteRead,
|
||||
_CompatHTTPError,
|
||||
)
|
||||
from yt_dlp.socks import ProxyType
|
||||
from yt_dlp.utils.networking import HTTPHeaderDict
|
||||
|
@ -179,11 +174,10 @@ class TestNetworkingExceptions:
|
|||
def create_response(status):
|
||||
return Response(fp=io.BytesIO(b'test'), url='http://example.com', headers={'tesT': 'test'}, status=status)
|
||||
|
||||
@pytest.mark.parametrize('http_error_class', [HTTPError, lambda r: _CompatHTTPError(HTTPError(r))])
|
||||
def test_http_error(self, http_error_class):
|
||||
def test_http_error(self):
|
||||
|
||||
response = self.create_response(403)
|
||||
error = http_error_class(response)
|
||||
error = HTTPError(response)
|
||||
|
||||
assert error.status == 403
|
||||
assert str(error) == error.msg == 'HTTP Error 403: Forbidden'
|
||||
|
@ -194,80 +188,12 @@ def test_http_error(self, http_error_class):
|
|||
assert data == b'test'
|
||||
assert repr(error) == '<HTTPError 403: Forbidden>'
|
||||
|
||||
@pytest.mark.parametrize('http_error_class', [HTTPError, lambda *args, **kwargs: _CompatHTTPError(HTTPError(*args, **kwargs))])
|
||||
def test_redirect_http_error(self, http_error_class):
|
||||
def test_redirect_http_error(self):
|
||||
response = self.create_response(301)
|
||||
error = http_error_class(response, redirect_loop=True)
|
||||
error = HTTPError(response, redirect_loop=True)
|
||||
assert str(error) == error.msg == 'HTTP Error 301: Moved Permanently (redirect loop detected)'
|
||||
assert error.reason == 'Moved Permanently'
|
||||
|
||||
def test_compat_http_error(self):
|
||||
response = self.create_response(403)
|
||||
error = _CompatHTTPError(HTTPError(response))
|
||||
assert isinstance(error, HTTPError)
|
||||
assert isinstance(error, urllib.error.HTTPError)
|
||||
|
||||
@contextlib.contextmanager
|
||||
def raises_deprecation_warning():
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
warnings.simplefilter('always')
|
||||
yield
|
||||
|
||||
if len(w) == 0:
|
||||
pytest.fail('Did not raise DeprecationWarning')
|
||||
if len(w) > 1:
|
||||
pytest.fail(f'Raised multiple warnings: {w}')
|
||||
|
||||
if not issubclass(w[-1].category, DeprecationWarning):
|
||||
pytest.fail(f'Expected DeprecationWarning, got {w[-1].category}')
|
||||
w.clear()
|
||||
|
||||
with raises_deprecation_warning():
|
||||
assert error.code == 403
|
||||
|
||||
with raises_deprecation_warning():
|
||||
assert error.getcode() == 403
|
||||
|
||||
with raises_deprecation_warning():
|
||||
assert error.hdrs is error.response.headers
|
||||
|
||||
with raises_deprecation_warning():
|
||||
assert error.info() is error.response.headers
|
||||
|
||||
with raises_deprecation_warning():
|
||||
assert error.headers is error.response.headers
|
||||
|
||||
with raises_deprecation_warning():
|
||||
assert error.filename == error.response.url
|
||||
|
||||
with raises_deprecation_warning():
|
||||
assert error.url == error.response.url
|
||||
|
||||
with raises_deprecation_warning():
|
||||
assert error.geturl() == error.response.url
|
||||
|
||||
# Passthrough file operations
|
||||
with raises_deprecation_warning():
|
||||
assert error.read() == b'test'
|
||||
|
||||
with raises_deprecation_warning():
|
||||
assert not error.closed
|
||||
|
||||
with raises_deprecation_warning():
|
||||
# Technically Response operations are also passed through, which should not be used.
|
||||
assert error.get_header('test') == 'test'
|
||||
|
||||
# Should not raise a warning
|
||||
error.close()
|
||||
|
||||
@pytest.mark.skipif(
|
||||
platform.python_implementation() == 'PyPy', reason='garbage collector works differently in pypy')
|
||||
def test_compat_http_error_autoclose(self):
|
||||
# Compat HTTPError should not autoclose response
|
||||
response = self.create_response(403)
|
||||
_CompatHTTPError(HTTPError(response))
|
||||
assert not response.closed
|
||||
|
||||
def test_incomplete_read_error(self):
|
||||
error = IncompleteRead(4, 3, cause='test')
|
||||
assert isinstance(error, IncompleteRead)
|
||||
|
|
|
@ -25,7 +25,7 @@
|
|||
ThreadingTCPServer,
|
||||
)
|
||||
|
||||
from test.helper import http_server_port
|
||||
from test.helper import http_server_port, verify_address_availability
|
||||
from yt_dlp.networking import Request
|
||||
from yt_dlp.networking.exceptions import ProxyError, TransportError
|
||||
from yt_dlp.socks import (
|
||||
|
@ -326,6 +326,7 @@ def test_socks4a_domain_target(self, handler, ctx):
|
|||
def test_ipv4_client_source_address(self, handler, ctx):
|
||||
with ctx.socks_server(Socks4ProxyHandler) as server_address:
|
||||
source_address = f'127.0.0.{random.randint(5, 255)}'
|
||||
verify_address_availability(source_address)
|
||||
with handler(proxies={'all': f'socks4://{server_address}'},
|
||||
source_address=source_address) as rh:
|
||||
response = ctx.socks_info_request(rh)
|
||||
|
@ -441,6 +442,7 @@ def test_ipv6_socks5_proxy(self, handler, ctx):
|
|||
def test_ipv4_client_source_address(self, handler, ctx):
|
||||
with ctx.socks_server(Socks5ProxyHandler) as server_address:
|
||||
source_address = f'127.0.0.{random.randint(5, 255)}'
|
||||
verify_address_availability(source_address)
|
||||
with handler(proxies={'all': f'socks5://{server_address}'}, source_address=source_address) as rh:
|
||||
response = ctx.socks_info_request(rh)
|
||||
assert response['client_address'][0] == source_address
|
||||
|
|
|
@ -6,6 +6,8 @@
|
|||
|
||||
import pytest
|
||||
|
||||
from test.helper import verify_address_availability
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
import http.client
|
||||
|
@ -227,6 +229,7 @@ def test_cookies(self, handler):
|
|||
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
|
||||
def test_source_address(self, handler):
|
||||
source_address = f'127.0.0.{random.randint(5, 255)}'
|
||||
verify_address_availability(source_address)
|
||||
with handler(source_address=source_address) as rh:
|
||||
ws = validate_and_send(rh, Request(self.ws_base_url))
|
||||
ws.send('source_address')
|
||||
|
|
|
@ -40,7 +40,6 @@
|
|||
NoSupportingHandlers,
|
||||
RequestError,
|
||||
SSLError,
|
||||
_CompatHTTPError,
|
||||
network_exceptions,
|
||||
)
|
||||
from .plugins import directories as plugin_directories
|
||||
|
@ -2452,7 +2451,7 @@ def selector_function(ctx):
|
|||
# for extractors with incomplete formats (audio only (soundcloud)
|
||||
# or video only (imgur)) best/worst will fallback to
|
||||
# best/worst {video,audio}-only format
|
||||
matches = formats
|
||||
matches = list(filter(lambda f: f.get('vcodec') != 'none' or f.get('acodec') != 'none', formats))
|
||||
elif seperate_fallback and not ctx['has_merged_format']:
|
||||
# for compatibility with youtube-dl when there is no pre-merged format
|
||||
matches = list(filter(seperate_fallback, formats))
|
||||
|
@ -4110,8 +4109,6 @@ def urlopen(self, req):
|
|||
'SSLV3_ALERT_HANDSHAKE_FAILURE: The server may not support the current cipher list. '
|
||||
'Try using --legacy-server-connect', cause=e) from e
|
||||
raise
|
||||
except HTTPError as e: # TODO: Remove in a future release
|
||||
raise _CompatHTTPError(e) from e
|
||||
|
||||
def build_request_director(self, handlers, preferences=None):
|
||||
logger = _YDLLogger(self)
|
||||
|
|
|
@ -35,6 +35,7 @@
|
|||
from ..dependencies import brotli as compat_brotli # noqa: F401
|
||||
from ..dependencies import websockets as compat_websockets # noqa: F401
|
||||
from ..dependencies.Cryptodome import AES as compat_pycrypto_AES # noqa: F401
|
||||
from ..networking.exceptions import HTTPError as compat_HTTPError # noqa: F401
|
||||
|
||||
passthrough_module(__name__, '...utils', ('WINDOWS_VT_MODE', 'windows_enable_vt_mode'))
|
||||
|
||||
|
@ -70,7 +71,6 @@ def compat_setenv(key, value, env=os.environ):
|
|||
compat_HTMLParser = compat_html_parser_HTMLParser = html.parser.HTMLParser
|
||||
compat_http_client = http.client
|
||||
compat_http_server = http.server
|
||||
compat_HTTPError = urllib.error.HTTPError
|
||||
compat_input = input
|
||||
compat_integer_types = (int, )
|
||||
compat_itertools_count = itertools.count
|
||||
|
@ -88,7 +88,7 @@ def compat_setenv(key, value, env=os.environ):
|
|||
compat_subprocess_get_DEVNULL = lambda: subprocess.DEVNULL
|
||||
compat_tokenize_tokenize = tokenize.tokenize
|
||||
compat_urllib_error = urllib.error
|
||||
compat_urllib_HTTPError = urllib.error.HTTPError
|
||||
compat_urllib_HTTPError = compat_HTTPError
|
||||
compat_urllib_parse = urllib.parse
|
||||
compat_urllib_parse_parse_qs = urllib.parse.parse_qs
|
||||
compat_urllib_parse_quote = urllib.parse.quote
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
import base64
|
||||
import collections
|
||||
import contextlib
|
||||
import glob
|
||||
import http.cookiejar
|
||||
import http.cookies
|
||||
import io
|
||||
|
@ -23,7 +24,8 @@
|
|||
aes_gcm_decrypt_and_verify_bytes,
|
||||
unpad_pkcs7,
|
||||
)
|
||||
from .compat import functools
|
||||
from .compat import functools # isort: split
|
||||
from .compat import compat_os_name
|
||||
from .dependencies import (
|
||||
_SECRETSTORAGE_UNAVAILABLE_REASON,
|
||||
secretstorage,
|
||||
|
@ -31,6 +33,7 @@
|
|||
)
|
||||
from .minicurses import MultilinePrinter, QuietMultilinePrinter
|
||||
from .utils import (
|
||||
DownloadError,
|
||||
Popen,
|
||||
error_to_str,
|
||||
expand_path,
|
||||
|
@ -122,13 +125,14 @@ def _extract_firefox_cookies(profile, container, logger):
|
|||
return YoutubeDLCookieJar()
|
||||
|
||||
if profile is None:
|
||||
search_root = _firefox_browser_dir()
|
||||
search_roots = list(_firefox_browser_dirs())
|
||||
elif _is_path(profile):
|
||||
search_root = profile
|
||||
search_roots = [profile]
|
||||
else:
|
||||
search_root = os.path.join(_firefox_browser_dir(), profile)
|
||||
search_roots = [os.path.join(path, profile) for path in _firefox_browser_dirs()]
|
||||
search_root = ', '.join(map(repr, search_roots))
|
||||
|
||||
cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite', logger)
|
||||
cookie_database_path = _newest(_firefox_cookie_dbs(search_roots))
|
||||
if cookie_database_path is None:
|
||||
raise FileNotFoundError(f'could not find firefox cookies database in {search_root}')
|
||||
logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
|
||||
|
@ -182,12 +186,21 @@ def _extract_firefox_cookies(profile, container, logger):
|
|||
cursor.connection.close()
|
||||
|
||||
|
||||
def _firefox_browser_dir():
|
||||
def _firefox_browser_dirs():
|
||||
if sys.platform in ('cygwin', 'win32'):
|
||||
return os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles')
|
||||
yield os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles')
|
||||
|
||||
elif sys.platform == 'darwin':
|
||||
return os.path.expanduser('~/Library/Application Support/Firefox/Profiles')
|
||||
return os.path.expanduser('~/.mozilla/firefox')
|
||||
yield os.path.expanduser('~/Library/Application Support/Firefox/Profiles')
|
||||
|
||||
else:
|
||||
yield from map(os.path.expanduser, ('~/.mozilla/firefox', '~/snap/firefox/common/.mozilla/firefox'))
|
||||
|
||||
|
||||
def _firefox_cookie_dbs(roots):
|
||||
for root in map(os.path.abspath, roots):
|
||||
for pattern in ('', '*/', 'Profiles/*/'):
|
||||
yield from glob.iglob(os.path.join(root, pattern, 'cookies.sqlite'))
|
||||
|
||||
|
||||
def _get_chromium_based_browser_settings(browser_name):
|
||||
|
@ -268,7 +281,7 @@ def _extract_chrome_cookies(browser_name, profile, keyring, logger):
|
|||
logger.error(f'{browser_name} does not support profiles')
|
||||
search_root = config['browser_dir']
|
||||
|
||||
cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies', logger)
|
||||
cookie_database_path = _newest(_find_files(search_root, 'Cookies', logger))
|
||||
if cookie_database_path is None:
|
||||
raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"')
|
||||
logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
|
||||
|
@ -307,6 +320,12 @@ def _extract_chrome_cookies(browser_name, profile, keyring, logger):
|
|||
counts['unencrypted'] = unencrypted_cookies
|
||||
logger.debug(f'cookie version breakdown: {counts}')
|
||||
return jar
|
||||
except PermissionError as error:
|
||||
if compat_os_name == 'nt' and error.errno == 13:
|
||||
message = 'Could not copy Chrome cookie database. See https://github.com/yt-dlp/yt-dlp/issues/7271 for more info'
|
||||
logger.error(message)
|
||||
raise DownloadError(message) # force exit
|
||||
raise
|
||||
finally:
|
||||
if cursor is not None:
|
||||
cursor.connection.close()
|
||||
|
@ -947,7 +966,7 @@ def _get_windows_v10_key(browser_root, logger):
|
|||
References:
|
||||
- [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
|
||||
"""
|
||||
path = _find_most_recently_used_file(browser_root, 'Local State', logger)
|
||||
path = _newest(_find_files(browser_root, 'Local State', logger))
|
||||
if path is None:
|
||||
logger.error('could not find local state file')
|
||||
return None
|
||||
|
@ -1049,17 +1068,20 @@ def _get_column_names(cursor, table_name):
|
|||
return [row[1].decode() for row in table_info]
|
||||
|
||||
|
||||
def _find_most_recently_used_file(root, filename, logger):
|
||||
def _newest(files):
|
||||
return max(files, key=lambda path: os.lstat(path).st_mtime, default=None)
|
||||
|
||||
|
||||
def _find_files(root, filename, logger):
|
||||
# if there are multiple browser profiles, take the most recently used one
|
||||
i, paths = 0, []
|
||||
i = 0
|
||||
with _create_progress_bar(logger) as progress_bar:
|
||||
for curr_root, dirs, files in os.walk(root):
|
||||
for curr_root, _, files in os.walk(root):
|
||||
for file in files:
|
||||
i += 1
|
||||
progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched')
|
||||
if file == filename:
|
||||
paths.append(os.path.join(curr_root, file))
|
||||
return None if not paths else max(paths, key=lambda path: os.lstat(path).st_mtime)
|
||||
yield os.path.join(curr_root, file)
|
||||
|
||||
|
||||
def _merge_cookie_jars(jars):
|
||||
|
@ -1073,7 +1095,7 @@ def _merge_cookie_jars(jars):
|
|||
|
||||
|
||||
def _is_path(value):
|
||||
return os.path.sep in value
|
||||
return any(sep in value for sep in (os.path.sep, os.path.altsep) if sep)
|
||||
|
||||
|
||||
def _parse_browser_specification(browser_name, profile=None, keyring=None, container=None):
|
||||
|
|
|
@ -369,7 +369,10 @@ def fin_fragments():
|
|||
|
||||
return output.getvalue().encode()
|
||||
|
||||
self.download_and_append_fragments(
|
||||
ctx, fragments, info_dict, pack_func=pack_fragment, finish_func=fin_fragments)
|
||||
if len(fragments) == 1:
|
||||
self.download_and_append_fragments(ctx, fragments, info_dict)
|
||||
else:
|
||||
self.download_and_append_fragments(
|
||||
ctx, fragments, info_dict, pack_func=pack_fragment, finish_func=fin_fragments)
|
||||
else:
|
||||
return self.download_and_append_fragments(ctx, fragments, info_dict)
|
||||
|
|
|
@ -47,7 +47,7 @@
|
|||
ACastChannelIE,
|
||||
)
|
||||
from .acfun import AcFunVideoIE, AcFunBangumiIE
|
||||
from .adn import ADNIE
|
||||
from .adn import ADNIE, ADNSeasonIE
|
||||
from .adobeconnect import AdobeConnectIE
|
||||
from .adobetv import (
|
||||
AdobeTVEmbedIE,
|
||||
|
@ -93,6 +93,7 @@
|
|||
AluraIE,
|
||||
AluraCourseIE
|
||||
)
|
||||
from .amadeustv import AmadeusTVIE
|
||||
from .amara import AmaraIE
|
||||
from .amcnetworks import AMCNetworksIE
|
||||
from .amazon import (
|
||||
|
@ -137,6 +138,10 @@
|
|||
ARDMediathekCollectionIE,
|
||||
ARDIE,
|
||||
)
|
||||
from .art19 import (
|
||||
Art19IE,
|
||||
Art19ShowIE,
|
||||
)
|
||||
from .arte import (
|
||||
ArteTVIE,
|
||||
ArteTVEmbedIE,
|
||||
|
@ -144,6 +149,7 @@
|
|||
ArteTVCategoryIE,
|
||||
)
|
||||
from .arnes import ArnesIE
|
||||
from .asobichannel import AsobiChannelIE, AsobiChannelTagURLIE
|
||||
from .atresplayer import AtresPlayerIE
|
||||
from .atscaleconf import AtScaleConfEventIE
|
||||
from .atvat import ATVAtIE
|
||||
|
@ -345,6 +351,10 @@
|
|||
ChingariIE,
|
||||
ChingariUserIE,
|
||||
)
|
||||
from .chzzk import (
|
||||
CHZZKLiveIE,
|
||||
CHZZKVideoIE,
|
||||
)
|
||||
from .cinemax import CinemaxIE
|
||||
from .cinetecamilano import CinetecaMilanoIE
|
||||
from .cineverse import (
|
||||
|
@ -363,6 +373,7 @@
|
|||
from .cliprs import ClipRsIE
|
||||
from .closertotruth import CloserToTruthIE
|
||||
from .cloudflarestream import CloudflareStreamIE
|
||||
from .cloudycdn import CloudyCDNIE
|
||||
from .clubic import ClubicIE
|
||||
from .clyp import ClypIE
|
||||
from .cmt import CMTIE
|
||||
|
@ -540,6 +551,7 @@
|
|||
from .eighttracks import EightTracksIE
|
||||
from .einthusan import EinthusanIE
|
||||
from .eitb import EitbIE
|
||||
from .elementorembed import ElementorEmbedIE
|
||||
from .elonet import ElonetIE
|
||||
from .elpais import ElPaisIE
|
||||
from .eltrecetv import ElTreceTVIE
|
||||
|
@ -557,6 +569,7 @@
|
|||
EroProfileIE,
|
||||
EroProfileAlbumIE,
|
||||
)
|
||||
from .err import ERRJupiterIE
|
||||
from .ertgr import (
|
||||
ERTFlixCodenameIE,
|
||||
ERTFlixIE,
|
||||
|
@ -581,6 +594,7 @@
|
|||
FacebookPluginsVideoIE,
|
||||
FacebookRedirectURLIE,
|
||||
FacebookReelIE,
|
||||
FacebookAdsIE,
|
||||
)
|
||||
from .fancode import (
|
||||
FancodeVodIE,
|
||||
|
@ -680,6 +694,10 @@
|
|||
GeniusIE,
|
||||
GeniusLyricsIE,
|
||||
)
|
||||
from .getcourseru import (
|
||||
GetCourseRuPlayerIE,
|
||||
GetCourseRuIE
|
||||
)
|
||||
from .gettr import (
|
||||
GettrIE,
|
||||
GettrStreamingIE,
|
||||
|
@ -787,6 +805,7 @@
|
|||
IHeartRadioIE,
|
||||
IHeartRadioPodcastIE,
|
||||
)
|
||||
from .ilpost import IlPostIE
|
||||
from .iltalehti import IltalehtiIE
|
||||
from .imdb import (
|
||||
ImdbIE,
|
||||
|
@ -899,6 +918,7 @@
|
|||
from .kth import KTHIE
|
||||
from .krasview import KrasViewIE
|
||||
from .ku6 import Ku6IE
|
||||
from .kukululive import KukuluLiveIE
|
||||
from .kusi import KUSIIE
|
||||
from .kuwo import (
|
||||
KuwoIE,
|
||||
|
@ -987,6 +1007,11 @@
|
|||
LRTVODIE,
|
||||
LRTStreamIE
|
||||
)
|
||||
from .lsm import (
|
||||
LSMLREmbedIE,
|
||||
LSMLTVEmbedIE,
|
||||
LSMReplayIE
|
||||
)
|
||||
from .lumni import (
|
||||
LumniIE
|
||||
)
|
||||
|
@ -996,7 +1021,7 @@
|
|||
)
|
||||
from .maariv import MaarivIE
|
||||
from .magellantv import MagellanTVIE
|
||||
from .magentamusik360 import MagentaMusik360IE
|
||||
from .magentamusik import MagentaMusikIE
|
||||
from .mailru import (
|
||||
MailRuIE,
|
||||
MailRuMusicIE,
|
||||
|
@ -1098,6 +1123,7 @@
|
|||
MotherlessIE,
|
||||
MotherlessGroupIE,
|
||||
MotherlessGalleryIE,
|
||||
MotherlessUploaderIE,
|
||||
)
|
||||
from .motorsport import MotorsportIE
|
||||
from .moviepilot import MoviepilotIE
|
||||
|
@ -1124,6 +1150,11 @@
|
|||
MusicdexArtistIE,
|
||||
MusicdexPlaylistIE,
|
||||
)
|
||||
from .mx3 import (
|
||||
Mx3IE,
|
||||
Mx3NeoIE,
|
||||
Mx3VolksmusikIE,
|
||||
)
|
||||
from .mxplayer import (
|
||||
MxplayerIE,
|
||||
MxplayerShowIE,
|
||||
|
@ -1263,6 +1294,7 @@
|
|||
NiconicoChannelPlusChannelLivesIE,
|
||||
)
|
||||
from .ninegag import NineGagIE
|
||||
from .ninenews import NineNewsIE
|
||||
from .ninenow import NineNowIE
|
||||
from .nintendo import NintendoIE
|
||||
from .nitter import NitterIE
|
||||
|
@ -1579,6 +1611,7 @@
|
|||
RedBullIE,
|
||||
)
|
||||
from .reddit import RedditIE
|
||||
from .redge import RedCDNLivxIE
|
||||
from .redgifs import (
|
||||
RedGifsIE,
|
||||
RedGifsSearchIE,
|
||||
|
@ -1594,7 +1627,10 @@
|
|||
from .reuters import ReutersIE
|
||||
from .reverbnation import ReverbNationIE
|
||||
from .rheinmaintv import RheinMainTVIE
|
||||
from .rinsefm import RinseFMIE
|
||||
from .rinsefm import (
|
||||
RinseFMIE,
|
||||
RinseFMArtistPlaylistIE,
|
||||
)
|
||||
from .rmcdecouverte import RMCDecouverteIE
|
||||
from .rockstargames import RockstarGamesIE
|
||||
from .rokfin import (
|
||||
|
@ -1710,6 +1746,7 @@
|
|||
)
|
||||
from .scrolller import ScrolllerIE
|
||||
from .seeker import SeekerIE
|
||||
from .sejmpl import SejmIE
|
||||
from .senalcolombia import SenalColombiaLiveIE
|
||||
from .senategov import SenateISVPIE, SenateGovIE
|
||||
from .sendtonews import SendtoNewsIE
|
||||
|
@ -2002,6 +2039,7 @@
|
|||
TrovoChannelClipIE,
|
||||
)
|
||||
from .trtcocuk import TrtCocukVideoIE
|
||||
from .trtworld import TrtWorldIE
|
||||
from .trueid import TrueIDIE
|
||||
from .trunews import TruNewsIE
|
||||
from .truth import TruthIE
|
||||
|
|
|
@ -92,6 +92,8 @@ def abematv_license_open(self, url):
|
|||
|
||||
|
||||
class AbemaTVBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'abematv'
|
||||
|
||||
_USERTOKEN = None
|
||||
_DEVICE_ID = None
|
||||
_MEDIATOKEN = None
|
||||
|
@ -136,11 +138,15 @@ def _get_device_token(self):
|
|||
if self._USERTOKEN:
|
||||
return self._USERTOKEN
|
||||
|
||||
add_opener(self._downloader, AbemaLicenseHandler(self))
|
||||
|
||||
username, _ = self._get_login_info()
|
||||
AbemaTVBaseIE._USERTOKEN = username and self.cache.load(self._NETRC_MACHINE, username)
|
||||
auth_cache = username and self.cache.load(self._NETRC_MACHINE, username, min_ver='2024.01.19')
|
||||
AbemaTVBaseIE._USERTOKEN = auth_cache and auth_cache.get('usertoken')
|
||||
if AbemaTVBaseIE._USERTOKEN:
|
||||
# try authentication with locally stored token
|
||||
try:
|
||||
AbemaTVBaseIE._DEVICE_ID = auth_cache.get('device_id')
|
||||
self._get_media_token(True)
|
||||
return
|
||||
except ExtractorError as e:
|
||||
|
@ -159,7 +165,6 @@ def _get_device_token(self):
|
|||
})
|
||||
AbemaTVBaseIE._USERTOKEN = user_data['token']
|
||||
|
||||
add_opener(self._downloader, AbemaLicenseHandler(self))
|
||||
return self._USERTOKEN
|
||||
|
||||
def _get_media_token(self, invalidate=False, to_show=True):
|
||||
|
@ -181,6 +186,37 @@ def _get_media_token(self, invalidate=False, to_show=True):
|
|||
|
||||
return self._MEDIATOKEN
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
self._get_device_token()
|
||||
if self.cache.load(self._NETRC_MACHINE, username, min_ver='2024.01.19') and self._get_media_token():
|
||||
self.write_debug('Skipping logging in')
|
||||
return
|
||||
|
||||
if '@' in username: # don't strictly check if it's email address or not
|
||||
ep, method = 'user/email', 'email'
|
||||
else:
|
||||
ep, method = 'oneTimePassword', 'userId'
|
||||
|
||||
login_response = self._download_json(
|
||||
f'https://api.abema.io/v1/auth/{ep}', None, note='Logging in',
|
||||
data=json.dumps({
|
||||
method: username,
|
||||
'password': password
|
||||
}).encode('utf-8'), headers={
|
||||
'Authorization': f'bearer {self._get_device_token()}',
|
||||
'Origin': 'https://abema.tv',
|
||||
'Referer': 'https://abema.tv/',
|
||||
'Content-Type': 'application/json',
|
||||
})
|
||||
|
||||
AbemaTVBaseIE._USERTOKEN = login_response['token']
|
||||
self._get_media_token(True)
|
||||
auth_cache = {
|
||||
'device_id': AbemaTVBaseIE._DEVICE_ID,
|
||||
'usertoken': AbemaTVBaseIE._USERTOKEN,
|
||||
}
|
||||
self.cache.store(self._NETRC_MACHINE, username, auth_cache)
|
||||
|
||||
def _call_api(self, endpoint, video_id, query=None, note='Downloading JSON metadata'):
|
||||
return self._download_json(
|
||||
f'https://api.abema.io/{endpoint}', video_id, query=query or {},
|
||||
|
@ -204,7 +240,6 @@ def _extract_breadcrumb_list(self, webpage, video_id):
|
|||
|
||||
class AbemaTVIE(AbemaTVBaseIE):
|
||||
_VALID_URL = r'https?://abema\.tv/(?P<type>now-on-air|video/episode|channels/.+?/slots)/(?P<id>[^?/]+)'
|
||||
_NETRC_MACHINE = 'abematv'
|
||||
_TESTS = [{
|
||||
'url': 'https://abema.tv/video/episode/194-25_s2_p1',
|
||||
'info_dict': {
|
||||
|
@ -253,33 +288,6 @@ class AbemaTVIE(AbemaTVBaseIE):
|
|||
}]
|
||||
_TIMETABLE = None
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
self._get_device_token()
|
||||
if self.cache.load(self._NETRC_MACHINE, username) and self._get_media_token():
|
||||
self.write_debug('Skipping logging in')
|
||||
return
|
||||
|
||||
if '@' in username: # don't strictly check if it's email address or not
|
||||
ep, method = 'user/email', 'email'
|
||||
else:
|
||||
ep, method = 'oneTimePassword', 'userId'
|
||||
|
||||
login_response = self._download_json(
|
||||
f'https://api.abema.io/v1/auth/{ep}', None, note='Logging in',
|
||||
data=json.dumps({
|
||||
method: username,
|
||||
'password': password
|
||||
}).encode('utf-8'), headers={
|
||||
'Authorization': f'bearer {self._get_device_token()}',
|
||||
'Origin': 'https://abema.tv',
|
||||
'Referer': 'https://abema.tv/',
|
||||
'Content-Type': 'application/json',
|
||||
})
|
||||
|
||||
AbemaTVBaseIE._USERTOKEN = login_response['token']
|
||||
self._get_media_token(True)
|
||||
self.cache.store(self._NETRC_MACHINE, username, AbemaTVBaseIE._USERTOKEN)
|
||||
|
||||
def _real_extract(self, url):
|
||||
# starting download using infojson from this extractor is undefined behavior,
|
||||
# and never be fixed in the future; you must trigger downloads by directly specifying URL.
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
import json
|
||||
import os
|
||||
import random
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
|
||||
|
@ -17,17 +18,38 @@
|
|||
int_or_none,
|
||||
intlist_to_bytes,
|
||||
long_to_bytes,
|
||||
parse_iso8601,
|
||||
pkcs1pad,
|
||||
strip_or_none,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
urlencode_postdata,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class ADNIE(InfoExtractor):
|
||||
class ADNBaseIE(InfoExtractor):
|
||||
IE_DESC = 'Animation Digital Network'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.fr/video/[^/]+/(?P<id>\d+)'
|
||||
_NETRC_MACHINE = 'animationdigitalnetwork'
|
||||
_BASE = 'animationdigitalnetwork.fr'
|
||||
_API_BASE_URL = f'https://gw.api.{_BASE}/'
|
||||
_PLAYER_BASE_URL = f'{_API_BASE_URL}player/'
|
||||
_HEADERS = {}
|
||||
_LOGIN_ERR_MESSAGE = 'Unable to log in'
|
||||
_RSA_KEY = (0x9B42B08905199A5CCE2026274399CA560ECB209EE9878A708B1C0812E1BB8CB5D1FB7441861147C1A1F2F3A0476DD63A9CAC20D3E983613346850AA6CB38F16DC7D720FD7D86FC6E5B3D5BBC72E14CD0BF9E869F2CEA2CCAD648F1DCE38F1FF916CEFB2D339B64AA0264372344BC775E265E8A852F88144AB0BD9AA06C1A4ABB, 65537)
|
||||
_POS_ALIGN_MAP = {
|
||||
'start': 1,
|
||||
'end': 3,
|
||||
}
|
||||
_LINE_ALIGN_MAP = {
|
||||
'middle': 8,
|
||||
'end': 4,
|
||||
}
|
||||
|
||||
|
||||
class ADNIE(ADNBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.(?P<lang>fr|de)/video/[^/?#]+/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://animationdigitalnetwork.fr/video/fruits-basket/9841-episode-1-a-ce-soir',
|
||||
'md5': '1c9ef066ceb302c86f80c2b371615261',
|
||||
|
@ -44,29 +66,35 @@ class ADNIE(InfoExtractor):
|
|||
'season_number': 1,
|
||||
'episode': 'À ce soir !',
|
||||
'episode_number': 1,
|
||||
'thumbnail': str,
|
||||
'season': 'Season 1',
|
||||
},
|
||||
'skip': 'Only available in region (FR, ...)',
|
||||
'skip': 'Only available in French and German speaking Europe',
|
||||
}, {
|
||||
'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://animationdigitalnetwork.de/video/the-eminence-in-shadow/23550-folge-1',
|
||||
'md5': '5c5651bf5791fa6fcd7906012b9d94e8',
|
||||
'info_dict': {
|
||||
'id': '23550',
|
||||
'ext': 'mp4',
|
||||
'episode_number': 1,
|
||||
'duration': 1417,
|
||||
'release_date': '20231004',
|
||||
'series': 'The Eminence in Shadow',
|
||||
'season_number': 2,
|
||||
'episode': str,
|
||||
'title': str,
|
||||
'thumbnail': str,
|
||||
'season': 'Season 2',
|
||||
'comment_count': int,
|
||||
'average_rating': float,
|
||||
'description': str,
|
||||
},
|
||||
# 'skip': 'Only available in French and German speaking Europe',
|
||||
}]
|
||||
|
||||
_NETRC_MACHINE = 'animationdigitalnetwork'
|
||||
_BASE = 'animationdigitalnetwork.fr'
|
||||
_API_BASE_URL = 'https://gw.api.' + _BASE + '/'
|
||||
_PLAYER_BASE_URL = _API_BASE_URL + 'player/'
|
||||
_HEADERS = {}
|
||||
_LOGIN_ERR_MESSAGE = 'Unable to log in'
|
||||
_RSA_KEY = (0x9B42B08905199A5CCE2026274399CA560ECB209EE9878A708B1C0812E1BB8CB5D1FB7441861147C1A1F2F3A0476DD63A9CAC20D3E983613346850AA6CB38F16DC7D720FD7D86FC6E5B3D5BBC72E14CD0BF9E869F2CEA2CCAD648F1DCE38F1FF916CEFB2D339B64AA0264372344BC775E265E8A852F88144AB0BD9AA06C1A4ABB, 65537)
|
||||
_POS_ALIGN_MAP = {
|
||||
'start': 1,
|
||||
'end': 3,
|
||||
}
|
||||
_LINE_ALIGN_MAP = {
|
||||
'middle': 8,
|
||||
'end': 4,
|
||||
}
|
||||
|
||||
def _get_subtitles(self, sub_url, video_id):
|
||||
if not sub_url:
|
||||
return None
|
||||
|
@ -116,6 +144,8 @@ def _get_subtitles(self, sub_url, video_id):
|
|||
|
||||
if sub_lang == 'vostf':
|
||||
sub_lang = 'fr'
|
||||
elif sub_lang == 'vostde':
|
||||
sub_lang = 'de'
|
||||
subtitles.setdefault(sub_lang, []).extend([{
|
||||
'ext': 'json',
|
||||
'data': json.dumps(sub),
|
||||
|
@ -147,7 +177,7 @@ def _perform_login(self, username, password):
|
|||
self.report_warning(message or self._LOGIN_ERR_MESSAGE)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
lang, video_id = self._match_valid_url(url).group('lang', 'id')
|
||||
video_base_url = self._PLAYER_BASE_URL + 'video/%s/' % video_id
|
||||
player = self._download_json(
|
||||
video_base_url + 'configuration', video_id,
|
||||
|
@ -157,12 +187,15 @@ def _real_extract(self, url):
|
|||
|
||||
user = options['user']
|
||||
if not user.get('hasAccess'):
|
||||
self.raise_login_required()
|
||||
start_date = traverse_obj(options, ('video', 'startDate', {str}))
|
||||
if (parse_iso8601(start_date) or 0) > time.time():
|
||||
raise ExtractorError(f'This video is not available yet. Release date: {start_date}', expected=True)
|
||||
self.raise_login_required('This video requires a subscription', method='password')
|
||||
|
||||
token = self._download_json(
|
||||
user.get('refreshTokenUrl') or (self._PLAYER_BASE_URL + 'refresh/token'),
|
||||
video_id, 'Downloading access token', headers={
|
||||
'x-player-refresh-token': user['refreshToken']
|
||||
'X-Player-Refresh-Token': user['refreshToken'],
|
||||
}, data=b'')['token']
|
||||
|
||||
links_url = try_get(options, lambda x: x['video']['url']) or (video_base_url + 'link')
|
||||
|
@ -184,7 +217,9 @@ def _real_extract(self, url):
|
|||
try:
|
||||
links_data = self._download_json(
|
||||
links_url, video_id, 'Downloading links JSON metadata', headers={
|
||||
'X-Player-Token': authorization
|
||||
'X-Player-Token': authorization,
|
||||
'X-Target-Distribution': lang,
|
||||
**self._HEADERS
|
||||
}, query={
|
||||
'freeWithAds': 'true',
|
||||
'adaptive': 'false',
|
||||
|
@ -232,8 +267,14 @@ def _real_extract(self, url):
|
|||
if format_id == 'vf':
|
||||
for f in m3u8_formats:
|
||||
f['language'] = 'fr'
|
||||
elif format_id == 'vde':
|
||||
for f in m3u8_formats:
|
||||
f['language'] = 'de'
|
||||
formats.extend(m3u8_formats)
|
||||
|
||||
if not formats:
|
||||
self.raise_login_required('This video requires a subscription', method='password')
|
||||
|
||||
video = (self._download_json(
|
||||
self._API_BASE_URL + 'video/%s' % video_id, video_id,
|
||||
'Downloading additional video metadata', fatal=False) or {}).get('video') or {}
|
||||
|
@ -255,3 +296,40 @@ def _real_extract(self, url):
|
|||
'average_rating': float_or_none(video.get('rating') or metas.get('rating')),
|
||||
'comment_count': int_or_none(video.get('commentsCount')),
|
||||
}
|
||||
|
||||
|
||||
class ADNSeasonIE(ADNBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.(?P<lang>fr|de)/video/(?P<id>[^/?#]+)/?(?:$|[#?])'
|
||||
_TESTS = [{
|
||||
'url': 'https://animationdigitalnetwork.fr/video/tokyo-mew-mew-new',
|
||||
'playlist_count': 12,
|
||||
'info_dict': {
|
||||
'id': '911',
|
||||
'title': 'Tokyo Mew Mew New',
|
||||
},
|
||||
# 'skip': 'Only available in French end German speaking Europe',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
lang, video_show_slug = self._match_valid_url(url).group('lang', 'id')
|
||||
show = self._download_json(
|
||||
f'{self._API_BASE_URL}show/{video_show_slug}/', video_show_slug,
|
||||
'Downloading show JSON metadata', headers=self._HEADERS)['show']
|
||||
show_id = str(show['id'])
|
||||
episodes = self._download_json(
|
||||
f'{self._API_BASE_URL}video/show/{show_id}', video_show_slug,
|
||||
'Downloading episode list', headers={
|
||||
'X-Target-Distribution': lang,
|
||||
**self._HEADERS
|
||||
}, query={
|
||||
'order': 'asc',
|
||||
'limit': '-1',
|
||||
})
|
||||
|
||||
def entries():
|
||||
for episode_id in traverse_obj(episodes, ('videos', ..., 'id', {str_or_none})):
|
||||
yield self.url_result(
|
||||
f'https://animationdigitalnetwork.{lang}/video/{video_show_slug}/{episode_id}',
|
||||
ADNIE, episode_id)
|
||||
|
||||
return self.playlist_result(entries(), show_id, show.get('title'))
|
||||
|
|
|
@ -93,7 +93,7 @@ def _extract_aetn_info(self, domain, filter_key, filter_value, url):
|
|||
resource = self._get_mvpd_resource(
|
||||
requestor_id, theplatform_metadata['title'],
|
||||
theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'),
|
||||
theplatform_metadata['ratings'][0]['rating'])
|
||||
traverse_obj(theplatform_metadata, ('ratings', 0, 'rating')))
|
||||
auth = self._extract_mvpd_auth(
|
||||
url, video_id, requestor_id, resource)
|
||||
info.update(self._extract_aen_smil(media_url, video_id, auth))
|
||||
|
|
77
yt_dlp/extractor/amadeustv.py
Normal file
77
yt_dlp/extractor/amadeustv.py
Normal file
|
@ -0,0 +1,77 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class AmadeusTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?amadeus\.tv/library/(?P<id>[\da-f]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.amadeus.tv/library/65091a87ff85af59d9fc54c3',
|
||||
'info_dict': {
|
||||
'id': '5576678021301411311',
|
||||
'ext': 'mp4',
|
||||
'title': 'Jieon Park - 第五届珠海莫扎特国际青少年音乐周小提琴C组第三轮',
|
||||
'thumbnail': 'http://1253584441.vod2.myqcloud.com/a0046a27vodtransbj1253584441/7db4af535576678021301411311/coverBySnapshot_10_0.jpg',
|
||||
'duration': 1264.8,
|
||||
'upload_date': '20230918',
|
||||
'timestamp': 1695034800,
|
||||
'display_id': '65091a87ff85af59d9fc54c3',
|
||||
'view_count': int,
|
||||
'description': 'md5:a0357b9c215489e2067cbae0b777bb95',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
nuxt_data = self._search_nuxt_data(webpage, display_id, traverse=('fetch', '0'))
|
||||
video_id = traverse_obj(nuxt_data, ('item', 'video', {str}))
|
||||
|
||||
if not video_id:
|
||||
raise ExtractorError('Unable to extract actual video ID')
|
||||
|
||||
video_data = self._download_json(
|
||||
f'http://playvideo.qcloud.com/getplayinfo/v2/1253584441/{video_id}',
|
||||
video_id, headers={'Referer': 'http://www.amadeus.tv/'})
|
||||
|
||||
formats = []
|
||||
for video in traverse_obj(video_data, ('videoInfo', ('sourceVideo', ('transcodeList', ...)), {dict})):
|
||||
if not url_or_none(video.get('url')):
|
||||
continue
|
||||
formats.append({
|
||||
**traverse_obj(video, {
|
||||
'url': 'url',
|
||||
'format_id': ('definition', {lambda x: f'http-{x or "0"}'}),
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
'filesize': (('totalSize', 'size'), {int_or_none}),
|
||||
'vcodec': ('videoStreamList', 0, 'codec'),
|
||||
'acodec': ('audioStreamList', 0, 'codec'),
|
||||
'fps': ('videoStreamList', 0, 'fps', {float_or_none}),
|
||||
}, get_all=False),
|
||||
'http_headers': {'Referer': 'http://www.amadeus.tv/'},
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'formats': formats,
|
||||
**traverse_obj(video_data, {
|
||||
'title': ('videoInfo', 'basicInfo', 'name', {str}),
|
||||
'thumbnail': ('coverInfo', 'coverUrl', {url_or_none}),
|
||||
'duration': ('videoInfo', 'sourceVideo', ('floatDuration', 'duration'), {float_or_none}),
|
||||
}, get_all=False),
|
||||
**traverse_obj(nuxt_data, ('item', {
|
||||
'title': (('title', 'title_en', 'title_cn'), {str}),
|
||||
'description': (('description', 'description_en', 'description_cn'), {str}),
|
||||
'timestamp': ('date', {parse_iso8601}),
|
||||
'view_count': ('view', {int_or_none}),
|
||||
}), get_all=False),
|
||||
}
|
|
@ -8,6 +8,7 @@
|
|||
determine_ext,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
jwt_decode_hs256,
|
||||
make_archive_id,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
|
@ -238,6 +239,7 @@ class ARDBetaMediathekIE(InfoExtractor):
|
|||
(?P<id>[a-zA-Z0-9]+)
|
||||
/?(?:[?#]|$)'''
|
||||
_GEO_COUNTRIES = ['DE']
|
||||
_TOKEN_URL = 'https://sso.ardmediathek.de/sso/token'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ardmediathek.de/video/filme-im-mdr/liebe-auf-vier-pfoten/mdr-fernsehen/Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0',
|
||||
|
@ -359,12 +361,27 @@ def _extract_episode_info(self, title):
|
|||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
query = {'embedded': 'false', 'mcV6': 'true'}
|
||||
headers = {}
|
||||
|
||||
if self._get_cookies(self._TOKEN_URL).get('ams'):
|
||||
token = self._download_json(
|
||||
self._TOKEN_URL, display_id, 'Fetching token for age verification',
|
||||
'Unable to fetch age verification token', fatal=False)
|
||||
id_token = traverse_obj(token, ('idToken', {str}))
|
||||
decoded_token = traverse_obj(id_token, ({jwt_decode_hs256}, {dict}))
|
||||
user_id = traverse_obj(decoded_token, (('user_id', 'sub'), {str}), get_all=False)
|
||||
if not user_id:
|
||||
self.report_warning('Unable to extract token, continuing without authentication')
|
||||
else:
|
||||
headers['x-authorization'] = f'Bearer {id_token}'
|
||||
query['userId'] = user_id
|
||||
if decoded_token.get('age_rating') != 18:
|
||||
self.report_warning('Account is not verified as 18+; video may be unavailable')
|
||||
|
||||
page_data = self._download_json(
|
||||
f'https://api.ardmediathek.de/page-gateway/pages/ard/item/{display_id}', display_id, query={
|
||||
'embedded': 'false',
|
||||
'mcV6': 'true',
|
||||
})
|
||||
f'https://api.ardmediathek.de/page-gateway/pages/ard/item/{display_id}',
|
||||
display_id, query=query, headers=headers)
|
||||
|
||||
# For user convenience we use the old contentId instead of the longer crid
|
||||
# Ref: https://github.com/yt-dlp/yt-dlp/issues/8731#issuecomment-1874398283
|
||||
|
@ -383,7 +400,7 @@ def _real_extract(self, url):
|
|||
media_data = traverse_obj(player_data, ('mediaCollection', 'embedded', {dict}))
|
||||
|
||||
if player_data.get('blockedByFsk'):
|
||||
self.raise_no_formats('This video is only available after 22:00', expected=True)
|
||||
self.raise_login_required('This video is only available for age verified users or after 22:00')
|
||||
|
||||
formats = []
|
||||
subtitles = {}
|
||||
|
|
303
yt_dlp/extractor/art19.py
Normal file
303
yt_dlp/extractor/art19.py
Normal file
|
@ -0,0 +1,303 @@
|
|||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import float_or_none, int_or_none, parse_iso8601, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class Art19IE(InfoExtractor):
|
||||
_UUID_REGEX = r'[\da-f]{8}-?[\da-f]{4}-?[\da-f]{4}-?[\da-f]{4}-?[\da-f]{12}'
|
||||
_VALID_URL = [
|
||||
rf'https?://(?:www\.)?art19\.com/shows/[^/#?]+/episodes/(?P<id>{_UUID_REGEX})',
|
||||
rf'https?://rss\.art19\.com/episodes/(?P<id>{_UUID_REGEX})\.mp3',
|
||||
]
|
||||
_EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL[0]})']
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://rss.art19.com/episodes/5ba1413c-48b8-472b-9cc3-cfd952340bdb.mp3',
|
||||
'info_dict': {
|
||||
'id': '5ba1413c-48b8-472b-9cc3-cfd952340bdb',
|
||||
'ext': 'mp3',
|
||||
'title': 'Why Did DeSantis Drop Out?',
|
||||
'series': 'The Daily Briefing',
|
||||
'release_timestamp': 1705941275,
|
||||
'description': 'md5:da38961da4a3f7e419471365e3c6b49f',
|
||||
'episode': 'Episode 582',
|
||||
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
|
||||
'series_id': 'ed52a0ab-08b1-4def-8afc-549e4d93296d',
|
||||
'upload_date': '20240122',
|
||||
'timestamp': 1705940815,
|
||||
'episode_number': 582,
|
||||
'modified_date': '20240122',
|
||||
'episode_id': '5ba1413c-48b8-472b-9cc3-cfd952340bdb',
|
||||
'modified_timestamp': 1705941275,
|
||||
'release_date': '20240122',
|
||||
'duration': 527.4,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://art19.com/shows/scamfluencers/episodes/8319b776-4153-4d22-8630-631f204a03dd',
|
||||
'info_dict': {
|
||||
'id': '8319b776-4153-4d22-8630-631f204a03dd',
|
||||
'ext': 'mp3',
|
||||
'title': 'Martha Stewart: The Homemaker Hustler Part 2',
|
||||
'modified_date': '20240116',
|
||||
'upload_date': '20240105',
|
||||
'modified_timestamp': 1705435802,
|
||||
'episode_id': '8319b776-4153-4d22-8630-631f204a03dd',
|
||||
'series_id': 'd3c9b8ca-26b3-42f4-9bd8-21d1a9031e75',
|
||||
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
|
||||
'description': 'md5:4aa7cfd1358dc57e729835bc208d7893',
|
||||
'release_timestamp': 1705305660,
|
||||
'release_date': '20240115',
|
||||
'timestamp': 1704481536,
|
||||
'episode_number': 88,
|
||||
'series': 'Scamfluencers',
|
||||
'duration': 2588.37501,
|
||||
'episode': 'Episode 88',
|
||||
},
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://www.nu.nl/formule-1/6291456/verstappen-wordt-een-synoniem-voor-formule-1.html',
|
||||
'info_dict': {
|
||||
'id': '7d42626a-7301-47db-bb8a-3b6f054d77d7',
|
||||
'ext': 'mp3',
|
||||
'title': "'Verstappen wordt een synoniem voor Formule 1'",
|
||||
'season': 'Seizoen 6',
|
||||
'description': 'md5:39a7159a31c4cda312b2e893bdd5c071',
|
||||
'episode_id': '7d42626a-7301-47db-bb8a-3b6f054d77d7',
|
||||
'duration': 3061.82111,
|
||||
'series_id': '93f4e113-2a60-4609-a564-755058fa40d8',
|
||||
'release_date': '20231126',
|
||||
'modified_timestamp': 1701156004,
|
||||
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
|
||||
'season_number': 6,
|
||||
'episode_number': 52,
|
||||
'modified_date': '20231128',
|
||||
'upload_date': '20231126',
|
||||
'timestamp': 1701025981,
|
||||
'season_id': '36097c1e-7455-490d-a2fe-e2f10b4d5f26',
|
||||
'series': 'De Boordradio',
|
||||
'release_timestamp': 1701026308,
|
||||
'episode': 'Episode 52',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.wishtv.com/podcast-episode/larry-bucshon-announces-retirement-from-congress/',
|
||||
'info_dict': {
|
||||
'id': '8da368bd-08d1-46d0-afaa-c134a4af7dc0',
|
||||
'ext': 'mp3',
|
||||
'title': 'Larry Bucshon announces retirement from congress',
|
||||
'upload_date': '20240115',
|
||||
'episode_number': 148,
|
||||
'episode': 'Episode 148',
|
||||
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
|
||||
'release_date': '20240115',
|
||||
'timestamp': 1705328205,
|
||||
'release_timestamp': 1705329275,
|
||||
'series': 'All INdiana Politics',
|
||||
'modified_date': '20240117',
|
||||
'modified_timestamp': 1705458901,
|
||||
'series_id': 'c4af6c27-b10f-4ff2-9f84-0f407df86ff1',
|
||||
'episode_id': '8da368bd-08d1-46d0-afaa-c134a4af7dc0',
|
||||
'description': 'md5:53b5239e4d14973a87125c217c255b2a',
|
||||
'duration': 1256.18848,
|
||||
},
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def _extract_embed_urls(cls, url, webpage):
|
||||
yield from super()._extract_embed_urls(url, webpage)
|
||||
for episode_id in re.findall(
|
||||
rf'<div[^>]+\bclass=[\'"][^\'"]*art19-web-player[^\'"]*[\'"][^>]+\bdata-episode-id=[\'"]({cls._UUID_REGEX})[\'"]', webpage):
|
||||
yield f'https://rss.art19.com/episodes/{episode_id}.mp3'
|
||||
|
||||
def _real_extract(self, url):
|
||||
episode_id = self._match_id(url)
|
||||
|
||||
player_metadata = self._download_json(
|
||||
f'https://art19.com/episodes/{episode_id}', episode_id,
|
||||
note='Downloading player metadata', fatal=False,
|
||||
headers={'Accept': 'application/vnd.art19.v0+json'})
|
||||
rss_metadata = self._download_json(
|
||||
f'https://rss.art19.com/episodes/{episode_id}.json', episode_id, fatal=False,
|
||||
note='Downloading RSS metadata')
|
||||
|
||||
formats = [{
|
||||
'format_id': 'direct',
|
||||
'url': f'https://rss.art19.com/episodes/{episode_id}.mp3',
|
||||
'vcodec': 'none',
|
||||
'acodec': 'mp3',
|
||||
}]
|
||||
for fmt_id, fmt_data in traverse_obj(rss_metadata, ('content', 'media', {dict.items}, ...)):
|
||||
if fmt_id == 'waveform_bin':
|
||||
continue
|
||||
fmt_url = traverse_obj(fmt_data, ('url', {url_or_none}))
|
||||
if not fmt_url:
|
||||
continue
|
||||
formats.append({
|
||||
'format_id': fmt_id,
|
||||
'url': fmt_url,
|
||||
'vcodec': 'none',
|
||||
'acodec': fmt_id,
|
||||
'quality': -2 if fmt_id == 'ogg' else -1,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': episode_id,
|
||||
'formats': formats,
|
||||
**traverse_obj(player_metadata, ('episode', {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description_plain', {str}),
|
||||
'episode_id': ('id', {str}),
|
||||
'episode_number': ('episode_number', {int_or_none}),
|
||||
'season_id': ('season_id', {str}),
|
||||
'series_id': ('series_id', {str}),
|
||||
'timestamp': ('created_at', {parse_iso8601}),
|
||||
'release_timestamp': ('released_at', {parse_iso8601}),
|
||||
'modified_timestamp': ('updated_at', {parse_iso8601})
|
||||
})),
|
||||
**traverse_obj(rss_metadata, ('content', {
|
||||
'title': ('episode_title', {str}),
|
||||
'description': ('episode_description_plain', {str}),
|
||||
'episode_id': ('episode_id', {str}),
|
||||
'episode_number': ('episode_number', {int_or_none}),
|
||||
'season': ('season_title', {str}),
|
||||
'season_id': ('season_id', {str}),
|
||||
'season_number': ('season_number', {int_or_none}),
|
||||
'series': ('series_title', {str}),
|
||||
'series_id': ('series_id', {str}),
|
||||
'thumbnail': ('cover_image', {url_or_none}),
|
||||
'duration': ('duration', {float_or_none}),
|
||||
})),
|
||||
}
|
||||
|
||||
|
||||
class Art19ShowIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:www\.)?art19\.com/shows/(?P<id>[\w-]+)(?:/embed)?/?'
|
||||
_VALID_URL = [
|
||||
rf'{_VALID_URL_BASE}(?:$|[#?])',
|
||||
r'https?://rss\.art19\.com/(?P<id>[\w-]+)/?(?:$|[#?])',
|
||||
]
|
||||
_EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL_BASE}[^\'"])']
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.art19.com/shows/5898c087-a14f-48dc-b6fc-a2280a1ff6e0/',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': '5898c087-a14f-48dc-b6fc-a2280a1ff6e0',
|
||||
'display_id': 'echt-gebeurd',
|
||||
'title': 'Echt Gebeurd',
|
||||
'description': 'md5:5fd11dc80b76e51ffd34b6067fd5e560',
|
||||
'timestamp': 1492642167,
|
||||
'upload_date': '20170419',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
'tags': 'count:7',
|
||||
},
|
||||
'playlist_mincount': 425,
|
||||
}, {
|
||||
'url': 'https://www.art19.com/shows/echt-gebeurd',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': '5898c087-a14f-48dc-b6fc-a2280a1ff6e0',
|
||||
'display_id': 'echt-gebeurd',
|
||||
'title': 'Echt Gebeurd',
|
||||
'description': 'md5:5fd11dc80b76e51ffd34b6067fd5e560',
|
||||
'timestamp': 1492642167,
|
||||
'upload_date': '20170419',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
'tags': 'count:7',
|
||||
},
|
||||
'playlist_mincount': 425,
|
||||
}, {
|
||||
'url': 'https://rss.art19.com/scamfluencers',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'd3c9b8ca-26b3-42f4-9bd8-21d1a9031e75',
|
||||
'display_id': 'scamfluencers',
|
||||
'title': 'Scamfluencers',
|
||||
'description': 'md5:7d239d670c0ced6dadbf71c4caf764b7',
|
||||
'timestamp': 1647368573,
|
||||
'upload_date': '20220315',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
'tags': [],
|
||||
},
|
||||
'playlist_mincount': 90,
|
||||
}, {
|
||||
'url': 'https://art19.com/shows/enthuellt/embed',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'e2cacf57-bb8a-4263-aa81-719bcdd4f80c',
|
||||
'display_id': 'enthuellt',
|
||||
'title': 'Enthüllt',
|
||||
'description': 'md5:17752246643414a2fd51744fc9a1c08e',
|
||||
'timestamp': 1601645860,
|
||||
'upload_date': '20201002',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
'tags': 'count:10',
|
||||
},
|
||||
'playlist_mincount': 10,
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://deconstructingyourself.com/deconstructing-yourself-podcast',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'cfbb9b01-c295-4adb-8726-adde7c03cf21',
|
||||
'display_id': 'deconstructing-yourself',
|
||||
'title': 'Deconstructing Yourself',
|
||||
'description': 'md5:dab5082b28b248a35476abf64768854d',
|
||||
'timestamp': 1570581181,
|
||||
'upload_date': '20191009',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
'tags': 'count:5',
|
||||
},
|
||||
'playlist_mincount': 80,
|
||||
}, {
|
||||
'url': 'https://chicagoreader.com/columns-opinion/podcasts/ben-joravsky-show-podcast-episodes/',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': '9dfa2c37-ab87-4c13-8388-4897914313ec',
|
||||
'display_id': 'the-ben-joravsky-show',
|
||||
'title': 'The Ben Joravsky Show',
|
||||
'description': 'md5:c0f3ec0ee0dbea764390e521adc8780a',
|
||||
'timestamp': 1550875095,
|
||||
'upload_date': '20190222',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
'tags': ['Chicago Politics', 'chicago', 'Ben Joravsky'],
|
||||
},
|
||||
'playlist_mincount': 1900,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def _extract_embed_urls(cls, url, webpage):
|
||||
yield from super()._extract_embed_urls(url, webpage)
|
||||
for series_id in re.findall(
|
||||
r'<div[^>]+\bclass=[\'"][^\'"]*art19-web-player[^\'"]*[\'"][^>]+\bdata-series-id=[\'"]([\w-]+)[\'"]', webpage):
|
||||
yield f'https://art19.com/shows/{series_id}'
|
||||
|
||||
def _real_extract(self, url):
|
||||
series_id = self._match_id(url)
|
||||
series_metadata = self._download_json(
|
||||
f'https://art19.com/series/{series_id}', series_id, note='Downloading series metadata',
|
||||
headers={'Accept': 'application/vnd.art19.v0+json'})
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'entries': [
|
||||
self.url_result(f'https://rss.art19.com/episodes/{episode_id}.mp3', Art19IE)
|
||||
for episode_id in traverse_obj(series_metadata, ('series', 'episode_ids', ..., {str}))
|
||||
],
|
||||
**traverse_obj(series_metadata, ('series', {
|
||||
'id': ('id', {str}),
|
||||
'display_id': ('slug', {str}),
|
||||
'title': ('title', {str}),
|
||||
'description': ('description_plain', {str}),
|
||||
'timestamp': ('created_at', {parse_iso8601}),
|
||||
'modified_timestamp': ('updated_at', {parse_iso8601}),
|
||||
})),
|
||||
'tags': traverse_obj(series_metadata, ('tags', ..., 'name', {str})),
|
||||
}
|
|
@ -70,7 +70,24 @@ class ArteTVIE(ArteTVBaseIE):
|
|||
'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/q82dTTfyuCXupPsGxXsd7B/940x530',
|
||||
'upload_date': '20230930',
|
||||
'ext': 'mp4',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.arte.tv/de/videos/085374-003-A/im-hohen-norden-geboren/',
|
||||
'info_dict': {
|
||||
'id': '085374-003-A',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:ab79ec7cc472a93164415b4e4916abf9',
|
||||
'timestamp': 1702872000,
|
||||
'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/TnyHBfPxv3v2GEY3suXGZP/940x530',
|
||||
'duration': 2594,
|
||||
'title': 'Die kurze Zeit der Jugend',
|
||||
'alt_title': 'Im hohen Norden geboren',
|
||||
'upload_date': '20231218',
|
||||
'subtitles': {
|
||||
'fr': 'mincount:1',
|
||||
'fr-acc': 'mincount:1',
|
||||
},
|
||||
},
|
||||
}]
|
||||
|
||||
_GEO_BYPASS = True
|
||||
|
@ -121,6 +138,16 @@ class ArteTVIE(ArteTVBaseIE):
|
|||
),
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _fix_accessible_subs_locale(subs):
|
||||
updated_subs = {}
|
||||
for lang, sub_formats in subs.items():
|
||||
for format in sub_formats:
|
||||
if format.get('url', '').endswith('-MAL.m3u8'):
|
||||
lang += '-acc'
|
||||
updated_subs.setdefault(lang, []).append(format)
|
||||
return updated_subs
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
video_id = mobj.group('id')
|
||||
|
@ -174,6 +201,7 @@ def _real_extract(self, url):
|
|||
secondary_formats.extend(fmts)
|
||||
else:
|
||||
formats.extend(fmts)
|
||||
subs = self._fix_accessible_subs_locale(subs)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
elif stream['protocol'] in ('HTTPS', 'RTMP'):
|
||||
|
|
168
yt_dlp/extractor/asobichannel.py
Normal file
168
yt_dlp/extractor/asobichannel.py
Normal file
|
@ -0,0 +1,168 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
merge_dicts,
|
||||
parse_iso8601,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class AsobiChannelBaseIE(InfoExtractor):
|
||||
_MICROCMS_HEADER = {'X-MICROCMS-API-KEY': 'qRaKehul9AHU8KtL0dnq1OCLKnFec6yrbcz3'}
|
||||
|
||||
def _extract_info(self, metadata):
|
||||
return traverse_obj(metadata, {
|
||||
'id': ('id', {str}),
|
||||
'title': ('title', {str}),
|
||||
'description': ('body', {clean_html}),
|
||||
'thumbnail': ('contents', 'video_thumb', 'url', {url_or_none}),
|
||||
'timestamp': ('publishedAt', {parse_iso8601}),
|
||||
'modified_timestamp': ('updatedAt', {parse_iso8601}),
|
||||
'channel': ('channel', 'name', {str}),
|
||||
'channel_id': ('channel', 'id', {str}),
|
||||
})
|
||||
|
||||
|
||||
class AsobiChannelIE(AsobiChannelBaseIE):
|
||||
IE_NAME = 'asobichannel'
|
||||
IE_DESC = 'ASOBI CHANNEL'
|
||||
|
||||
_VALID_URL = r'https?://asobichannel\.asobistore\.jp/watch/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://asobichannel.asobistore.jp/watch/1ypp48qd32p',
|
||||
'md5': '39df74e872afe032c4eb27b89144fc92',
|
||||
'info_dict': {
|
||||
'id': '1ypp48qd32p',
|
||||
'ext': 'mp4',
|
||||
'title': 'アイドルマスター ミリオンライブ! 765プロch 原っぱ通信 #1',
|
||||
'description': 'md5:b930bd2199c9b2fd75951ce4aaa7efd2',
|
||||
'thumbnail': 'https://images.microcms-assets.io/assets/d2420de4b9194e11beb164f99edb1f95/a8e6f84119f54eb9ab4ce16729239905/%E3%82%B5%E3%83%A0%E3%83%8D%20(1).png',
|
||||
'timestamp': 1697098247,
|
||||
'upload_date': '20231012',
|
||||
'modified_timestamp': 1698381162,
|
||||
'modified_date': '20231027',
|
||||
'channel': 'アイドルマスター',
|
||||
'channel_id': 'idolmaster',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://asobichannel.asobistore.jp/watch/redigiwnjzqj',
|
||||
'md5': '229fa8fb5c591c75ce8c37a497f113f6',
|
||||
'info_dict': {
|
||||
'id': 'redigiwnjzqj',
|
||||
'ext': 'mp4',
|
||||
'title': '【おまけ放送】アイドルマスター ミリオンライブ! 765プロch 原っぱ通信 #1',
|
||||
'description': 'md5:7d9cd35fb54425a6967822bd564ea2d9',
|
||||
'thumbnail': 'https://images.microcms-assets.io/assets/d2420de4b9194e11beb164f99edb1f95/20e5c1d6184242eebc2512a5dec59bf0/P1_%E5%8E%9F%E3%81%A3%E3%81%B1%E3%82%B5%E3%83%A0%E3%83%8D.png',
|
||||
'modified_timestamp': 1697797125,
|
||||
'modified_date': '20231020',
|
||||
'timestamp': 1697261769,
|
||||
'upload_date': '20231014',
|
||||
'channel': 'アイドルマスター',
|
||||
'channel_id': 'idolmaster',
|
||||
},
|
||||
}]
|
||||
|
||||
_survapi_header = None
|
||||
|
||||
def _real_initialize(self):
|
||||
token = self._download_json(
|
||||
'https://asobichannel-api.asobistore.jp/api/v1/vspf/token', None,
|
||||
note='Retrieving API token')
|
||||
self._survapi_header = {'Authorization': f'Bearer {token}'}
|
||||
|
||||
def _process_vod(self, video_id, metadata):
|
||||
content_id = metadata['contents']['video_id']
|
||||
|
||||
vod_data = self._download_json(
|
||||
f'https://survapi.channel.or.jp/proxy/v1/contents/{content_id}/get_by_cuid', video_id,
|
||||
headers=self._survapi_header, note='Downloading vod data')
|
||||
|
||||
return {
|
||||
'formats': self._extract_m3u8_formats(vod_data['ex_content']['streaming_url'], video_id),
|
||||
}
|
||||
|
||||
def _process_live(self, video_id, metadata):
|
||||
content_id = metadata['contents']['video_id']
|
||||
event_data = self._download_json(
|
||||
f'https://survapi.channel.or.jp/ex/events/{content_id}?embed=channel', video_id,
|
||||
headers=self._survapi_header, note='Downloading event data')
|
||||
|
||||
player_type = traverse_obj(event_data, ('data', 'Player_type', {str}))
|
||||
if player_type == 'poster':
|
||||
self.raise_no_formats('Live event has not yet started', expected=True)
|
||||
live_status = 'is_upcoming'
|
||||
formats = []
|
||||
elif player_type == 'player':
|
||||
live_status = 'is_live'
|
||||
formats = self._extract_m3u8_formats(
|
||||
event_data['data']['Channel']['Custom_live_url'], video_id, live=True)
|
||||
else:
|
||||
raise ExtractorError('Unsupported player type {player_type!r}')
|
||||
|
||||
return {
|
||||
'release_timestamp': traverse_obj(metadata, ('period', 'start', {parse_iso8601})),
|
||||
'live_status': live_status,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
metadata = self._download_json(
|
||||
f'https://channel.microcms.io/api/v1/media/{video_id}', video_id,
|
||||
headers=self._MICROCMS_HEADER)
|
||||
|
||||
info = self._extract_info(metadata)
|
||||
|
||||
video_type = traverse_obj(metadata, ('contents', 'video_type', 0, {str}))
|
||||
if video_type == 'VOD':
|
||||
return merge_dicts(info, self._process_vod(video_id, metadata))
|
||||
if video_type == 'LIVE':
|
||||
return merge_dicts(info, self._process_live(video_id, metadata))
|
||||
|
||||
raise ExtractorError(f'Unexpected video type {video_type!r}')
|
||||
|
||||
|
||||
class AsobiChannelTagURLIE(AsobiChannelBaseIE):
|
||||
IE_NAME = 'asobichannel:tag'
|
||||
IE_DESC = 'ASOBI CHANNEL'
|
||||
|
||||
_VALID_URL = r'https?://asobichannel\.asobistore\.jp/tag/(?P<id>[a-z0-9-_]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://asobichannel.asobistore.jp/tag/bjhh-nbcja',
|
||||
'info_dict': {
|
||||
'id': 'bjhh-nbcja',
|
||||
'title': 'アイドルマスター ミリオンライブ! 765プロch 原っぱ通信',
|
||||
},
|
||||
'playlist_mincount': 16,
|
||||
}, {
|
||||
'url': 'https://asobichannel.asobistore.jp/tag/hvm5qw3c6od',
|
||||
'info_dict': {
|
||||
'id': 'hvm5qw3c6od',
|
||||
'title': 'アイマスMOIW2023ラジオ',
|
||||
},
|
||||
'playlist_mincount': 13,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
tag_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, tag_id)
|
||||
title = traverse_obj(self._search_nextjs_data(
|
||||
webpage, tag_id, fatal=False), ('props', 'pageProps', 'data', 'name', {str}))
|
||||
|
||||
media = self._download_json(
|
||||
f'https://channel.microcms.io/api/v1/media?limit=999&filters=(tag[contains]{tag_id})',
|
||||
tag_id, headers=self._MICROCMS_HEADER)
|
||||
|
||||
def entries():
|
||||
for metadata in traverse_obj(media, ('contents', lambda _, v: v['id'])):
|
||||
yield {
|
||||
'_type': 'url',
|
||||
'url': f'https://asobichannel.asobistore.jp/watch/{metadata["id"]}',
|
||||
'ie_key': AsobiChannelIE.ie_key(),
|
||||
**self._extract_info(metadata),
|
||||
}
|
||||
|
||||
return self.playlist_result(entries(), tag_id, title)
|
|
@ -18,6 +18,7 @@
|
|||
OnDemandPagedList,
|
||||
bool_or_none,
|
||||
clean_html,
|
||||
determine_ext,
|
||||
filter_dict,
|
||||
float_or_none,
|
||||
format_field,
|
||||
|
@ -1621,6 +1622,7 @@ def _real_extract(self, url):
|
|||
class BiliIntlBaseIE(InfoExtractor):
|
||||
_API_URL = 'https://api.bilibili.tv/intl/gateway'
|
||||
_NETRC_MACHINE = 'biliintl'
|
||||
_HEADERS = {'Referer': 'https://www.bilibili.com/'}
|
||||
|
||||
def _call_api(self, endpoint, *args, **kwargs):
|
||||
json = self._download_json(self._API_URL + endpoint, *args, **kwargs)
|
||||
|
@ -1658,19 +1660,34 @@ def _get_subtitles(self, *, ep_id=None, aid=None):
|
|||
'aid': aid,
|
||||
})) or {}
|
||||
subtitles = {}
|
||||
for sub in sub_json.get('subtitles') or []:
|
||||
sub_url = sub.get('url')
|
||||
if not sub_url:
|
||||
continue
|
||||
sub_data = self._download_json(
|
||||
sub_url, ep_id or aid, errnote='Unable to download subtitles', fatal=False,
|
||||
note='Downloading subtitles%s' % f' for {sub["lang"]}' if sub.get('lang') else '')
|
||||
if not sub_data:
|
||||
continue
|
||||
subtitles.setdefault(sub.get('lang_key', 'en'), []).append({
|
||||
'ext': 'srt',
|
||||
'data': self.json2srt(sub_data)
|
||||
})
|
||||
fetched_urls = set()
|
||||
for sub in traverse_obj(sub_json, (('subtitles', 'video_subtitle'), ..., {dict})):
|
||||
for url in traverse_obj(sub, ((None, 'ass', 'srt'), 'url', {url_or_none})):
|
||||
if url in fetched_urls:
|
||||
continue
|
||||
fetched_urls.add(url)
|
||||
sub_ext = determine_ext(url)
|
||||
sub_lang = sub.get('lang_key') or 'en'
|
||||
|
||||
if sub_ext == 'ass':
|
||||
subtitles.setdefault(sub_lang, []).append({
|
||||
'ext': 'ass',
|
||||
'url': url,
|
||||
})
|
||||
elif sub_ext == 'json':
|
||||
sub_data = self._download_json(
|
||||
url, ep_id or aid, fatal=False,
|
||||
note=f'Downloading subtitles{format_field(sub, "lang", " for %s")} ({sub_lang})',
|
||||
errnote='Unable to download subtitles')
|
||||
|
||||
if sub_data:
|
||||
subtitles.setdefault(sub_lang, []).append({
|
||||
'ext': 'srt',
|
||||
'data': self.json2srt(sub_data),
|
||||
})
|
||||
else:
|
||||
self.report_warning('Unexpected subtitle extension', ep_id or aid)
|
||||
|
||||
return subtitles
|
||||
|
||||
def _get_formats(self, *, ep_id=None, aid=None):
|
||||
|
@ -1716,7 +1733,9 @@ def _get_formats(self, *, ep_id=None, aid=None):
|
|||
def _parse_video_metadata(self, video_data):
|
||||
return {
|
||||
'title': video_data.get('title_display') or video_data.get('title'),
|
||||
'description': video_data.get('desc'),
|
||||
'thumbnail': video_data.get('cover'),
|
||||
'timestamp': unified_timestamp(video_data.get('formatted_pub_date')),
|
||||
'episode_number': int_or_none(self._search_regex(
|
||||
r'^E(\d+)(?:$| - )', video_data.get('title_display') or '', 'episode number', default=None)),
|
||||
}
|
||||
|
@ -1813,17 +1832,6 @@ class BiliIntlIE(BiliIntlBaseIE):
|
|||
'episode_number': 140,
|
||||
},
|
||||
'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.'
|
||||
}, {
|
||||
'url': 'https://www.bilibili.tv/en/video/2041863208',
|
||||
'info_dict': {
|
||||
'id': '2041863208',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1670874843,
|
||||
'description': 'Scheduled for April 2023.\nStudio: ufotable',
|
||||
'thumbnail': r're:https?://pic[-\.]bstarstatic.+/ugc/.+\.jpg$',
|
||||
'upload_date': '20221212',
|
||||
'title': 'Kimetsu no Yaiba Season 3 Official Trailer - Bstation',
|
||||
},
|
||||
}, {
|
||||
# episode comment extraction
|
||||
'url': 'https://www.bilibili.tv/en/play/34580/340317',
|
||||
|
@ -1864,9 +1872,9 @@ class BiliIntlIE(BiliIntlBaseIE):
|
|||
'description': 'md5:693b6f3967fb4e7e7764ea817857c33a',
|
||||
'timestamp': 1667891924,
|
||||
'upload_date': '20221108',
|
||||
'title': 'That Time I Got Reincarnated as a Slime: Scarlet Bond - Official Trailer 3| AnimeStan - Bstation',
|
||||
'title': 'That Time I Got Reincarnated as a Slime: Scarlet Bond - Official Trailer 3| AnimeStan',
|
||||
'comment_count': int,
|
||||
'thumbnail': 'https://pic.bstarstatic.com/ugc/f6c363659efd2eabe5683fbb906b1582.jpg',
|
||||
'thumbnail': r're:https://pic\.bstarstatic\.(?:com|net)/ugc/f6c363659efd2eabe5683fbb906b1582\.jpg',
|
||||
},
|
||||
'params': {
|
||||
'getcomments': True
|
||||
|
@ -1929,10 +1937,12 @@ def _extract_video_metadata(self, url, video_id, season_id):
|
|||
|
||||
# XXX: webpage metadata may not accurate, it just used to not crash when video_data not found
|
||||
return merge_dicts(
|
||||
self._parse_video_metadata(video_data), self._search_json_ld(webpage, video_id, fatal=False), {
|
||||
'title': self._html_search_meta('og:title', webpage),
|
||||
'description': self._html_search_meta('og:description', webpage)
|
||||
})
|
||||
self._parse_video_metadata(video_data), {
|
||||
'title': get_element_by_class(
|
||||
'bstar-meta__title', webpage) or self._html_search_meta('og:title', webpage),
|
||||
'description': get_element_by_class(
|
||||
'bstar-meta__desc', webpage) or self._html_search_meta('og:description'),
|
||||
}, self._search_json_ld(webpage, video_id, default={}))
|
||||
|
||||
def _get_comments_reply(self, root_id, next_id=0, display_id=None):
|
||||
comment_api_raw_data = self._download_json(
|
||||
|
@ -2020,7 +2030,8 @@ def _real_extract(self, url):
|
|||
'formats': self._get_formats(ep_id=ep_id, aid=aid),
|
||||
'subtitles': self.extract_subtitles(ep_id=ep_id, aid=aid),
|
||||
'chapters': chapters,
|
||||
'__post_extractor': self.extract_comments(video_id, ep_id)
|
||||
'__post_extractor': self.extract_comments(video_id, ep_id),
|
||||
'http_headers': self._HEADERS,
|
||||
}
|
||||
|
||||
|
||||
|
|
139
yt_dlp/extractor/chzzk.py
Normal file
139
yt_dlp/extractor/chzzk.py
Normal file
|
@ -0,0 +1,139 @@
|
|||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class CHZZKLiveIE(InfoExtractor):
|
||||
IE_NAME = 'chzzk:live'
|
||||
_VALID_URL = r'https?://chzzk\.naver\.com/live/(?P<id>[\da-f]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://chzzk.naver.com/live/c68b8ef525fb3d2fa146344d84991753',
|
||||
'info_dict': {
|
||||
'id': 'c68b8ef525fb3d2fa146344d84991753',
|
||||
'ext': 'mp4',
|
||||
'title': str,
|
||||
'channel': '진짜도현',
|
||||
'channel_id': 'c68b8ef525fb3d2fa146344d84991753',
|
||||
'channel_is_verified': False,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'timestamp': 1705510344,
|
||||
'upload_date': '20240117',
|
||||
'live_status': 'is_live',
|
||||
'view_count': int,
|
||||
'concurrent_view_count': int,
|
||||
},
|
||||
'skip': 'The channel is not currently live',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
live_detail = self._download_json(
|
||||
f'https://api.chzzk.naver.com/service/v2/channels/{channel_id}/live-detail', channel_id,
|
||||
note='Downloading channel info', errnote='Unable to download channel info')['content']
|
||||
|
||||
if live_detail.get('status') == 'CLOSE':
|
||||
raise ExtractorError('The channel is not currently live', expected=True)
|
||||
|
||||
live_playback = self._parse_json(live_detail['livePlaybackJson'], channel_id)
|
||||
|
||||
thumbnails = []
|
||||
thumbnail_template = traverse_obj(
|
||||
live_playback, ('thumbnail', 'snapshotThumbnailTemplate', {url_or_none}))
|
||||
if thumbnail_template and '{type}' in thumbnail_template:
|
||||
for width in traverse_obj(live_playback, ('thumbnail', 'types', ..., {str})):
|
||||
thumbnails.append({
|
||||
'id': width,
|
||||
'url': thumbnail_template.replace('{type}', width),
|
||||
'width': int_or_none(width),
|
||||
})
|
||||
|
||||
formats, subtitles = [], {}
|
||||
for media in traverse_obj(live_playback, ('media', lambda _, v: url_or_none(v['path']))):
|
||||
is_low_latency = media.get('mediaId') == 'LLHLS'
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
media['path'], channel_id, 'mp4', fatal=False, live=True,
|
||||
m3u8_id='hls-ll' if is_low_latency else 'hls')
|
||||
for f in fmts:
|
||||
if is_low_latency:
|
||||
f['source_preference'] = -2
|
||||
if '-afragalow.stream-audio.stream' in f['format_id']:
|
||||
f['quality'] = -2
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
return {
|
||||
'id': channel_id,
|
||||
'is_live': True,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'thumbnails': thumbnails,
|
||||
**traverse_obj(live_detail, {
|
||||
'title': ('liveTitle', {str}),
|
||||
'timestamp': ('openDate', {functools.partial(parse_iso8601, delimiter=' ')}),
|
||||
'concurrent_view_count': ('concurrentUserCount', {int_or_none}),
|
||||
'view_count': ('accumulateCount', {int_or_none}),
|
||||
'channel': ('channel', 'channelName', {str}),
|
||||
'channel_id': ('channel', 'channelId', {str}),
|
||||
'channel_is_verified': ('channel', 'verifiedMark', {bool}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class CHZZKVideoIE(InfoExtractor):
|
||||
IE_NAME = 'chzzk:video'
|
||||
_VALID_URL = r'https?://chzzk\.naver\.com/video/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://chzzk.naver.com/video/1754',
|
||||
'md5': 'b0c0c1bb888d913b93d702b1512c7f06',
|
||||
'info_dict': {
|
||||
'id': '1754',
|
||||
'ext': 'mp4',
|
||||
'title': '치지직 테스트 방송',
|
||||
'channel': '침착맨',
|
||||
'channel_id': 'bb382c2c0cc9fa7c86ab3b037fb5799c',
|
||||
'channel_is_verified': False,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 15577,
|
||||
'timestamp': 1702970505.417,
|
||||
'upload_date': '20231219',
|
||||
'view_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_meta = self._download_json(
|
||||
f'https://api.chzzk.naver.com/service/v2/videos/{video_id}', video_id,
|
||||
note='Downloading video info', errnote='Unable to download video info')['content']
|
||||
formats, subtitles = self._extract_mpd_formats_and_subtitles(
|
||||
f'https://apis.naver.com/neonplayer/vodplay/v1/playback/{video_meta["videoId"]}', video_id,
|
||||
query={
|
||||
'key': video_meta['inKey'],
|
||||
'env': 'real',
|
||||
'lc': 'en_US',
|
||||
'cpl': 'en_US',
|
||||
}, note='Downloading video playback', errnote='Unable to download video playback')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(video_meta, {
|
||||
'title': ('videoTitle', {str}),
|
||||
'thumbnail': ('thumbnailImageUrl', {url_or_none}),
|
||||
'timestamp': ('publishDateAt', {functools.partial(float_or_none, scale=1000)}),
|
||||
'view_count': ('readCount', {int_or_none}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'channel': ('channel', 'channelName', {str}),
|
||||
'channel_id': ('channel', 'channelId', {str}),
|
||||
'channel_is_verified': ('channel', 'verifiedMark', {bool}),
|
||||
}),
|
||||
}
|
|
@ -46,15 +46,18 @@ def _real_extract(self, url):
|
|||
video_id.split('.')[1] + '==='), video_id)['sub']
|
||||
manifest_base_url = base_url + 'manifest/video.'
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
manifest_base_url + 'm3u8', video_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False)
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
manifest_base_url + 'mpd', video_id, mpd_id='dash', fatal=False))
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||
manifest_base_url + 'mpd', video_id, mpd_id='dash', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_id,
|
||||
'thumbnail': base_url + 'thumbnails/thumbnail.jpg',
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
|
79
yt_dlp/extractor/cloudycdn.py
Normal file
79
yt_dlp/extractor/cloudycdn.py
Normal file
|
@ -0,0 +1,79 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class CloudyCDNIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:https?:)?//embed\.cloudycdn\.services/(?P<site_id>[^/?#]+)/media/(?P<id>[\w-]+)'
|
||||
_EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL})']
|
||||
_TESTS = [{
|
||||
'url': 'https://embed.cloudycdn.services/ltv/media/46k_d23-6000-105?',
|
||||
'md5': '64f72a360ca530d5ed89c77646c9eee5',
|
||||
'info_dict': {
|
||||
'id': '46k_d23-6000-105',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1700589151,
|
||||
'duration': 1442,
|
||||
'upload_date': '20231121',
|
||||
'title': 'D23-6000-105_cetstud',
|
||||
'thumbnail': 'https://store.cloudycdn.services/tmsp00060/assets/media/660858/placeholder1700589200.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://embed.cloudycdn.services/izm/media/26e_lv-8-5-1',
|
||||
'md5': '798828a479151e2444d8dcfbec76e482',
|
||||
'info_dict': {
|
||||
'id': '26e_lv-8-5-1',
|
||||
'ext': 'mp4',
|
||||
'title': 'LV-8-5-1',
|
||||
'timestamp': 1669767167,
|
||||
'thumbnail': 'https://store.cloudycdn.services/tmsp00120/assets/media/488306/placeholder1679423604.jpg',
|
||||
'duration': 1205,
|
||||
'upload_date': '20221130',
|
||||
}
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://www.tavaklase.lv/video/es-esmu-mina-um-2/',
|
||||
'md5': '63074e8e6c84ac2a01f2fb8bf03b8f43',
|
||||
'info_dict': {
|
||||
'id': 'cqd_lib-2',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20230223',
|
||||
'duration': 629,
|
||||
'thumbnail': 'https://store.cloudycdn.services/tmsp00120/assets/media/518407/placeholder1678748124.jpg',
|
||||
'timestamp': 1677181513,
|
||||
'title': 'LIB-2',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
site_id, video_id = self._match_valid_url(url).group('site_id', 'id')
|
||||
|
||||
data = self._download_json(
|
||||
f'https://player.cloudycdn.services/player/{site_id}/media/{video_id}/',
|
||||
video_id, data=urlencode_postdata({
|
||||
'version': '6.4.0',
|
||||
'referer': url,
|
||||
}))
|
||||
|
||||
formats, subtitles = [], {}
|
||||
for m3u8_url in traverse_obj(data, ('source', 'sources', ..., 'src', {url_or_none})):
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(data, {
|
||||
'title': ('name', {str}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'timestamp': ('upload_date', {parse_iso8601}),
|
||||
'thumbnail': ('source', 'poster', {url_or_none}),
|
||||
}),
|
||||
}
|
72
yt_dlp/extractor/elementorembed.py
Normal file
72
yt_dlp/extractor/elementorembed.py
Normal file
|
@ -0,0 +1,72 @@
|
|||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .vimeo import VimeoIE
|
||||
from .youtube import YoutubeIE
|
||||
from ..utils import unescapeHTML, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class ElementorEmbedIE(InfoExtractor):
|
||||
_VALID_URL = False
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://capitaltv.cy/2023/12/14/υγεια-και-ζωη-14-12-2023-δρ-ξενια-κωσταντινιδο/',
|
||||
'info_dict': {
|
||||
'id': 'KgzuxwuQwM4',
|
||||
'ext': 'mp4',
|
||||
'title': 'ΥΓΕΙΑ ΚΑΙ ΖΩΗ 14 12 2023 ΔΡ ΞΕΝΙΑ ΚΩΣΤΑΝΤΙΝΙΔΟΥ',
|
||||
'thumbnail': 'https://i.ytimg.com/vi/KgzuxwuQwM4/maxresdefault.jpg',
|
||||
'playable_in_embed': True,
|
||||
'tags': 'count:16',
|
||||
'like_count': int,
|
||||
'channel': 'Capital TV Cyprus',
|
||||
'channel_id': 'UCR8LwVKTLGEXt4ZAErpCMrg',
|
||||
'availability': 'public',
|
||||
'description': 'md5:7a3308a22881aea4612358c4ba121f77',
|
||||
'duration': 2891,
|
||||
'upload_date': '20231214',
|
||||
'uploader_id': '@capitaltvcyprus6389',
|
||||
'live_status': 'not_live',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCR8LwVKTLGEXt4ZAErpCMrg',
|
||||
'uploader_url': 'https://www.youtube.com/@capitaltvcyprus6389',
|
||||
'uploader': 'Capital TV Cyprus',
|
||||
'age_limit': 0,
|
||||
'categories': ['News & Politics'],
|
||||
'view_count': int,
|
||||
'channel_follower_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://elementor.com/academy/theme-builder-collection/?playlist=76011151&video=9e59909',
|
||||
'info_dict': {
|
||||
'id': '?playlist=76011151&video=9e59909',
|
||||
'title': 'Theme Builder Collection - Academy',
|
||||
'age_limit': 0,
|
||||
'timestamp': 1702196984.0,
|
||||
'upload_date': '20231210',
|
||||
'description': 'md5:7f52c52715ee9e54fd7f82210511673d',
|
||||
'thumbnail': 'https://elementor.com/academy/wp-content/uploads/2021/07/Theme-Builder-1.png',
|
||||
},
|
||||
'playlist_count': 11,
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
_WIDGET_REGEX = r'<div[^>]+class="[^"]*elementor-widget-video(?:-playlist)?[^"]*"[^>]*data-settings="([^"]*)"'
|
||||
|
||||
def _extract_from_webpage(self, url, webpage):
|
||||
for data_settings in re.findall(self._WIDGET_REGEX, webpage):
|
||||
data = self._parse_json(data_settings, None, fatal=False, transform_source=unescapeHTML)
|
||||
if youtube_url := traverse_obj(data, ('youtube_url', {url_or_none})):
|
||||
yield self.url_result(youtube_url, ie=YoutubeIE)
|
||||
|
||||
for video in traverse_obj(data, ('tabs', lambda _, v: v['_id'], {dict})):
|
||||
if youtube_url := traverse_obj(video, ('youtube_url', {url_or_none})):
|
||||
yield self.url_result(youtube_url, ie=YoutubeIE)
|
||||
if vimeo_url := traverse_obj(video, ('vimeo_url', {url_or_none})):
|
||||
yield self.url_result(vimeo_url, ie=VimeoIE)
|
||||
for direct_url in traverse_obj(video, (('hosted_url', 'external_url'), 'url', {url_or_none})):
|
||||
yield {
|
||||
'id': video['_id'],
|
||||
'url': direct_url,
|
||||
'title': video.get('title'),
|
||||
}
|
199
yt_dlp/extractor/err.py
Normal file
199
yt_dlp/extractor/err.py
Normal file
|
@ -0,0 +1,199 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class ERRJupiterIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://jupiter(?:pluss)?\.err\.ee/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'note': 'Jupiter: Movie: siin-me-oleme',
|
||||
'url': 'https://jupiter.err.ee/1211107/siin-me-oleme',
|
||||
'md5': '9b45d1682a98853acaa1e1b0c791f425',
|
||||
'info_dict': {
|
||||
'id': '1211107',
|
||||
'ext': 'mp4',
|
||||
'title': 'Siin me oleme!',
|
||||
'alt_title': '',
|
||||
'description': 'md5:1825b795f5f7584241aeb59e5bbb4f70',
|
||||
'release_date': '20231226',
|
||||
'upload_date': '20201217',
|
||||
'modified_date': '20201217',
|
||||
'release_timestamp': 1703577600,
|
||||
'timestamp': 1608210000,
|
||||
'modified_timestamp': 1608220800,
|
||||
'release_year': 1978,
|
||||
},
|
||||
}, {
|
||||
'note': 'Jupiter: Series: Impulss',
|
||||
'url': 'https://jupiter.err.ee/1609145945/impulss',
|
||||
'md5': 'a378486df07ed1ba74e46cc861886243',
|
||||
'info_dict': {
|
||||
'id': '1609145945',
|
||||
'ext': 'mp4',
|
||||
'title': 'Impulss',
|
||||
'alt_title': 'Loteriipilet hooldekodusse',
|
||||
'description': 'md5:fa8a2ed0cdccb130211513443ee4d571',
|
||||
'release_date': '20231107',
|
||||
'upload_date': '20231026',
|
||||
'modified_date': '20231118',
|
||||
'release_timestamp': 1699380000,
|
||||
'timestamp': 1698327601,
|
||||
'modified_timestamp': 1700311802,
|
||||
'series': 'Impulss',
|
||||
'season': 'Season 1',
|
||||
'season_number': 1,
|
||||
'episode': 'Loteriipilet hooldekodusse',
|
||||
'episode_number': 6,
|
||||
'series_id': '1609108187',
|
||||
'release_year': 2023,
|
||||
'episode_id': '1609145945',
|
||||
},
|
||||
}, {
|
||||
'note': 'Jupiter: Radio Show: mnemoturniir episode',
|
||||
'url': 'https://jupiter.err.ee/1037919/mnemoturniir',
|
||||
'md5': 'f1eb95fe66f9620ff84e81bbac37076a',
|
||||
'info_dict': {
|
||||
'id': '1037919',
|
||||
'ext': 'm4a',
|
||||
'title': 'Mnemoturniir',
|
||||
'alt_title': '',
|
||||
'description': 'md5:626db52394e7583c26ab74d6a34d9982',
|
||||
'release_date': '20240121',
|
||||
'upload_date': '20240108',
|
||||
'modified_date': '20240121',
|
||||
'release_timestamp': 1705827900,
|
||||
'timestamp': 1704675602,
|
||||
'modified_timestamp': 1705827601,
|
||||
'series': 'Mnemoturniir',
|
||||
'season': 'Season 0',
|
||||
'season_number': 0,
|
||||
'episode': 'Episode 0',
|
||||
'episode_number': 0,
|
||||
'series_id': '1037919',
|
||||
'release_year': 2024,
|
||||
'episode_id': '1609215101',
|
||||
},
|
||||
}, {
|
||||
'note': 'Jupiter+: Clip: bolee-zelenyj-tallinn',
|
||||
'url': 'https://jupiterpluss.err.ee/1609180445/bolee-zelenyj-tallinn',
|
||||
'md5': '1b812270c4daf6ce51c06bfeaf33ed95',
|
||||
'info_dict': {
|
||||
'id': '1609180445',
|
||||
'ext': 'mp4',
|
||||
'title': 'Более зеленый Таллинн',
|
||||
'alt_title': '',
|
||||
'description': 'md5:fd34d9bf939c28c4a725b19a7f0d6320',
|
||||
'release_date': '20231224',
|
||||
'upload_date': '20231130',
|
||||
'modified_date': '20231207',
|
||||
'release_timestamp': 1703423400,
|
||||
'timestamp': 1701338400,
|
||||
'modified_timestamp': 1701967200,
|
||||
'release_year': 2023,
|
||||
},
|
||||
}, {
|
||||
'note': 'Jupiter+: Series: The Sniffer',
|
||||
'url': 'https://jupiterpluss.err.ee/1608311387/njuhach',
|
||||
'md5': '2abdeb7131ce551bce49e8d0cea08536',
|
||||
'info_dict': {
|
||||
'id': '1608311387',
|
||||
'ext': 'mp4',
|
||||
'title': 'Нюхач',
|
||||
'alt_title': '',
|
||||
'description': 'md5:8c5c7d8f32ec6e54cd498c9e59ca83bc',
|
||||
'release_date': '20230601',
|
||||
'upload_date': '20210818',
|
||||
'modified_date': '20210903',
|
||||
'release_timestamp': 1685633400,
|
||||
'timestamp': 1629318000,
|
||||
'modified_timestamp': 1630686000,
|
||||
'release_year': 2013,
|
||||
'episode': 'Episode 1',
|
||||
'episode_id': '1608311390',
|
||||
'episode_number': 1,
|
||||
'season': 'Season 1',
|
||||
'season_number': 1,
|
||||
'series': 'Нюхач',
|
||||
'series_id': '1608311387',
|
||||
},
|
||||
}, {
|
||||
'note': 'Jupiter+: Podcast: lesnye-istorii-aisty',
|
||||
'url': 'https://jupiterpluss.err.ee/1608990335/lesnye-istorii-aisty',
|
||||
'md5': '8b46d7e4510b254a14b7a52211b5bf96',
|
||||
'info_dict': {
|
||||
'id': '1608990335',
|
||||
'ext': 'm4a',
|
||||
'title': 'Лесные истории | Аисты',
|
||||
'alt_title': '',
|
||||
'description': 'md5:065e721623e271e7a63e6540d409ca6b',
|
||||
'release_date': '20230609',
|
||||
'upload_date': '20230527',
|
||||
'modified_date': '20230608',
|
||||
'release_timestamp': 1686308700,
|
||||
'timestamp': 1685145600,
|
||||
'modified_timestamp': 1686252600,
|
||||
'release_year': 2023,
|
||||
'episode': 'Episode 0',
|
||||
'episode_id': '1608990335',
|
||||
'episode_number': 0,
|
||||
'season': 'Season 0',
|
||||
'season_number': 0,
|
||||
'series': 'Лесные истории | Аисты',
|
||||
'series_id': '1037497',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
data = self._download_json(
|
||||
'https://services.err.ee/api/v2/vodContent/getContentPageData', video_id,
|
||||
query={'contentId': video_id})['data']['mainContent']
|
||||
|
||||
media_data = traverse_obj(data, ('medias', ..., {dict}), get_all=False)
|
||||
if traverse_obj(media_data, ('restrictions', 'drm', {bool})):
|
||||
self.report_drm(video_id)
|
||||
|
||||
formats, subtitles = [], {}
|
||||
for format_url in set(traverse_obj(media_data, ('src', ('hls', 'hls2', 'hlsNew'), {url_or_none}))):
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
format_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
for format_url in set(traverse_obj(media_data, ('src', ('dash', 'dashNew'), {url_or_none}))):
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||
format_url, video_id, mpd_id='dash', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
if format_url := traverse_obj(media_data, ('src', 'file', {url_or_none})):
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': 'http',
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(data, {
|
||||
'title': ('heading', {str}),
|
||||
'alt_title': ('subHeading', {str}),
|
||||
'description': (('lead', 'body'), {clean_html}, {lambda x: x or None}),
|
||||
'timestamp': ('created', {int_or_none}),
|
||||
'modified_timestamp': ('updated', {int_or_none}),
|
||||
'release_timestamp': (('scheduleStart', 'publicStart'), {int_or_none}),
|
||||
'release_year': ('year', {int_or_none}),
|
||||
}, get_all=False),
|
||||
**(traverse_obj(data, {
|
||||
'series': ('heading', {str}),
|
||||
'series_id': ('rootContentId', {str_or_none}),
|
||||
'episode': ('subHeading', {str}),
|
||||
'season_number': ('season', {int_or_none}),
|
||||
'episode_number': ('episode', {int_or_none}),
|
||||
'episode_id': ('id', {str_or_none}),
|
||||
}) if data.get('type') == 'episode' else {}),
|
||||
}
|
|
@ -20,6 +20,7 @@
|
|||
get_element_by_id,
|
||||
get_first,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
merge_dicts,
|
||||
parse_count,
|
||||
|
@ -43,6 +44,7 @@ class FacebookIE(InfoExtractor):
|
|||
(?:[^#]*?\#!/)?
|
||||
(?:
|
||||
(?:
|
||||
permalink\.php|
|
||||
video/video\.php|
|
||||
photo\.php|
|
||||
video\.php|
|
||||
|
@ -52,12 +54,13 @@ class FacebookIE(InfoExtractor):
|
|||
)\?(?:.*?)(?:v|video_id|story_fbid)=|
|
||||
[^/]+/videos/(?:[^/]+/)?|
|
||||
[^/]+/posts/|
|
||||
events/(?:[^/]+/)?|
|
||||
groups/[^/]+/(?:permalink|posts)/|
|
||||
watchparty/
|
||||
)|
|
||||
facebook:
|
||||
)
|
||||
(?P<id>[0-9]+)
|
||||
(?P<id>pfbid[A-Za-z0-9]+|\d+)
|
||||
'''
|
||||
_EMBED_REGEX = [
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>https?://www\.facebook\.com/(?:video/embed|plugins/video\.php).+?)\1',
|
||||
|
@ -247,6 +250,41 @@ class FacebookIE(InfoExtractor):
|
|||
'thumbnail': r're:^https?://.*',
|
||||
'duration': 148.435,
|
||||
},
|
||||
}, {
|
||||
# data.node.comet_sections.content.story.attachments[].styles.attachment.media
|
||||
'url': 'https://www.facebook.com/attn/posts/pfbid0j1Czf2gGDVqeQ8KiMLFm3pWN8GxsQmeRrVhimWDzMuKQoR8r4b1knNsejELmUgyhl',
|
||||
'info_dict': {
|
||||
'id': '6968553779868435',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:2f2fcf93e97ac00244fe64521bbdb0cb',
|
||||
'uploader': 'ATTN:',
|
||||
'upload_date': '20231207',
|
||||
'title': 'ATTN:',
|
||||
'duration': 132.675,
|
||||
'uploader_id': '100064451419378',
|
||||
'view_count': int,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'timestamp': 1701975646,
|
||||
},
|
||||
}, {
|
||||
# data.node.comet_sections.content.story.attachments[].styles.attachment.media
|
||||
'url': 'https://www.facebook.com/permalink.php?story_fbid=pfbid0fqQuVEQyXRa9Dp4RcaTR14KHU3uULHV1EK7eckNXSH63JMuoALsAvVCJ97zAGitil&id=100068861234290',
|
||||
'info_dict': {
|
||||
'id': '270103405756416',
|
||||
'ext': 'mp4',
|
||||
'title': 'Lela Evans',
|
||||
'description': 'Today Makkovik\'s own Pilot Mandy Smith made her inaugural landing on the airstrip in her hometown. What a proud moment as we all cheered and...',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'uploader': 'Lela Evans',
|
||||
'uploader_id': 'pfbid0shZJipuigyy5mqrUJn9ub5LJFWNHvan5prtyi3LrDuuuJ4NwrURgnQHYR9fywBepl',
|
||||
'upload_date': '20231228',
|
||||
'timestamp': 1703804085,
|
||||
'duration': 394.347,
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/story.php?story_fbid=pfbid0Fnzhm8UuzjBYpPMNFzaSpFE9UmLdU4fJN8qTANi1Dmtj5q7DNrL5NERXfsAzDEV7l&id=100073071055552',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/video.php?v=10204634152394104',
|
||||
'only_matching': True,
|
||||
|
@ -362,6 +400,18 @@ class FacebookIE(InfoExtractor):
|
|||
},
|
||||
'playlist_count': 1,
|
||||
'skip': 'Requires logging in',
|
||||
}, {
|
||||
# data.event.cover_media_renderer.cover_video
|
||||
'url': 'https://m.facebook.com/events/1509582499515440',
|
||||
'info_dict': {
|
||||
'id': '637246984455045',
|
||||
'ext': 'mp4',
|
||||
'title': 'ANALISI IN CAMPO OSCURO " Coaguli nel sangue dei vaccinati"',
|
||||
'description': 'Other event by Comitato Liberi Pensatori on Tuesday, October 18 2022',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'uploader': 'Comitato Liberi Pensatori',
|
||||
'uploader_id': '100065709540881',
|
||||
},
|
||||
}]
|
||||
_SUPPORTED_PAGLETS_REGEX = r'(?:pagelet_group_mall|permalink_video_pagelet|hyperfeed_story_id_[0-9a-f]+)'
|
||||
_api_config = {
|
||||
|
@ -467,39 +517,10 @@ def extract_metadata(webpage):
|
|||
r'data-sjs>({.*?ScheduledServerJS.*?})</script>', webpage)]
|
||||
post = traverse_obj(post_data, (
|
||||
..., 'require', ..., ..., ..., '__bbox', 'require', ..., ..., ..., '__bbox', 'result', 'data'), expected_type=dict) or []
|
||||
|
||||
automatic_captions, subtitles = {}, {}
|
||||
subs_data = traverse_obj(post, (..., 'video', ..., 'attachments', ..., lambda k, v: (
|
||||
k == 'media' and str(v['id']) == video_id and v['__typename'] == 'Video')))
|
||||
is_video_broadcast = get_first(subs_data, 'is_video_broadcast', expected_type=bool)
|
||||
captions = get_first(subs_data, 'video_available_captions_locales', 'captions_url')
|
||||
if url_or_none(captions): # if subs_data only had a 'captions_url'
|
||||
locale = self._html_search_meta(['og:locale', 'twitter:locale'], webpage, 'locale', default='en_US')
|
||||
subtitles[locale] = [{'url': captions}]
|
||||
# or else subs_data had 'video_available_captions_locales', a list of dicts
|
||||
for caption in traverse_obj(captions, (
|
||||
{lambda x: sorted(x, key=lambda c: c['locale'])}, lambda _, v: v['captions_url'])
|
||||
):
|
||||
lang = caption.get('localized_language') or ''
|
||||
subs = {
|
||||
'url': caption['captions_url'],
|
||||
'name': format_field(caption, 'localized_country', f'{lang} (%s)', default=lang),
|
||||
}
|
||||
if caption.get('localized_creation_method') or is_video_broadcast:
|
||||
automatic_captions.setdefault(caption['locale'], []).append(subs)
|
||||
else:
|
||||
subtitles.setdefault(caption['locale'], []).append(subs)
|
||||
|
||||
media = traverse_obj(post, (..., 'attachments', ..., lambda k, v: (
|
||||
k == 'media' and str(v['id']) == video_id and v['__typename'] == 'Video')), expected_type=dict)
|
||||
title = get_first(media, ('title', 'text'))
|
||||
description = get_first(media, ('creation_story', 'comet_sections', 'message', 'story', 'message', 'text'))
|
||||
uploader_data = (
|
||||
get_first(media, ('owner', {dict}))
|
||||
or get_first(post, ('video', 'creation_story', 'attachments', ..., 'media', lambda k, v: k == 'owner' and v['name']))
|
||||
or get_first(post, (..., 'video', lambda k, v: k == 'owner' and v['name']))
|
||||
or get_first(post, ('node', 'actors', ..., {dict})) or {})
|
||||
|
||||
page_title = title or self._html_search_regex((
|
||||
r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>(?P<content>[^<]*)</h2>',
|
||||
r'(?s)<span class="fbPhotosPhotoCaption".*?id="fbPhotoPageCaption"><span class="hasCaption">(?P<content>.*?)</span>',
|
||||
|
@ -508,11 +529,16 @@ def extract_metadata(webpage):
|
|||
description = description or self._html_search_meta(
|
||||
['description', 'og:description', 'twitter:description'],
|
||||
webpage, 'description', default=None)
|
||||
uploader_data = (
|
||||
get_first(media, ('owner', {dict}))
|
||||
or get_first(post, ('video', 'creation_story', 'attachments', ..., 'media', lambda k, v: k == 'owner' and v['name']))
|
||||
or get_first(post, (..., 'video', lambda k, v: k == 'owner' and v['name']))
|
||||
or get_first(post, ('node', 'actors', ..., {dict}))
|
||||
or get_first(post, ('event', 'event_creator', {dict})) or {})
|
||||
uploader = uploader_data.get('name') or (
|
||||
clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage))
|
||||
or self._search_regex(
|
||||
(r'ownerName\s*:\s*"([^"]+)"', *self._og_regexes('title')), webpage, 'uploader', fatal=False))
|
||||
|
||||
timestamp = int_or_none(self._search_regex(
|
||||
r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
|
||||
'timestamp', default=None))
|
||||
|
@ -534,8 +560,6 @@ def extract_metadata(webpage):
|
|||
webpage, 'view count', default=None)),
|
||||
'concurrent_view_count': get_first(post, (
|
||||
('video', (..., ..., 'attachments', ..., 'media')), 'liveViewerCount', {int_or_none})),
|
||||
'automatic_captions': automatic_captions,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
info_json_ld = self._search_json_ld(webpage, video_id, default={})
|
||||
|
@ -577,7 +601,11 @@ def process_formats(info):
|
|||
# Downloads with browser's User-Agent are rate limited. Working around
|
||||
# with non-browser User-Agent.
|
||||
for f in info['formats']:
|
||||
# Downloads with browser's User-Agent are rate limited. Working around
|
||||
# with non-browser User-Agent.
|
||||
f.setdefault('http_headers', {})['User-Agent'] = 'facebookexternalhit/1.1'
|
||||
# Formats larger than ~500MB will return error 403 unless chunk size is regulated
|
||||
f.setdefault('downloader_options', {})['http_chunk_size'] = 250 << 20
|
||||
|
||||
def extract_relay_data(_filter):
|
||||
return self._parse_json(self._search_regex(
|
||||
|
@ -629,6 +657,29 @@ def parse_graphql_video(video):
|
|||
'url': playable_url,
|
||||
})
|
||||
extract_dash_manifest(video, formats)
|
||||
|
||||
automatic_captions, subtitles = {}, {}
|
||||
is_broadcast = traverse_obj(video, ('is_video_broadcast', {bool}))
|
||||
for caption in traverse_obj(video, (
|
||||
'video_available_captions_locales',
|
||||
{lambda x: sorted(x, key=lambda c: c['locale'])},
|
||||
lambda _, v: url_or_none(v['captions_url'])
|
||||
)):
|
||||
lang = caption.get('localized_language') or 'und'
|
||||
subs = {
|
||||
'url': caption['captions_url'],
|
||||
'name': format_field(caption, 'localized_country', f'{lang} (%s)', default=lang),
|
||||
}
|
||||
if caption.get('localized_creation_method') or is_broadcast:
|
||||
automatic_captions.setdefault(caption['locale'], []).append(subs)
|
||||
else:
|
||||
subtitles.setdefault(caption['locale'], []).append(subs)
|
||||
captions_url = traverse_obj(video, ('captions_url', {url_or_none}))
|
||||
if captions_url and not automatic_captions and not subtitles:
|
||||
locale = self._html_search_meta(
|
||||
['og:locale', 'twitter:locale'], webpage, 'locale', default='en_US')
|
||||
(automatic_captions if is_broadcast else subtitles)[locale] = [{'url': captions_url}]
|
||||
|
||||
info = {
|
||||
'id': v_id,
|
||||
'formats': formats,
|
||||
|
@ -638,6 +689,8 @@ def parse_graphql_video(video):
|
|||
'timestamp': traverse_obj(video, 'publish_time', 'creation_time', expected_type=int_or_none),
|
||||
'duration': (float_or_none(video.get('playable_duration_in_ms'), 1000)
|
||||
or float_or_none(video.get('length_in_second'))),
|
||||
'automatic_captions': automatic_captions,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
process_formats(info)
|
||||
description = try_get(video, lambda x: x['savable_description']['text'])
|
||||
|
@ -672,7 +725,8 @@ def parse_attachment(attachment, key='media'):
|
|||
for edge in edges:
|
||||
parse_attachment(edge, key='node')
|
||||
|
||||
video = data.get('video') or {}
|
||||
video = traverse_obj(data, (
|
||||
'event', 'cover_media_renderer', 'cover_video'), 'video', expected_type=dict) or {}
|
||||
if video:
|
||||
attachments = try_get(video, [
|
||||
lambda x: x['story']['attachments'],
|
||||
|
@ -691,6 +745,9 @@ def parse_attachment(attachment, key='media'):
|
|||
# honor precise duration in video info
|
||||
if video_info.get('duration'):
|
||||
webpage_info['duration'] = video_info['duration']
|
||||
# preserve preferred_thumbnail in video info
|
||||
if video_info.get('thumbnail'):
|
||||
webpage_info['thumbnail'] = video_info['thumbnail']
|
||||
return merge_dicts(webpage_info, video_info)
|
||||
|
||||
if not video_data:
|
||||
|
@ -921,3 +978,114 @@ def _real_extract(self, url):
|
|||
video_id = self._match_id(url)
|
||||
return self.url_result(
|
||||
f'https://m.facebook.com/watch/?v={video_id}&_rdr', FacebookIE, video_id)
|
||||
|
||||
|
||||
class FacebookAdsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[\w-]+\.)?facebook\.com/ads/library/?\?(?:[^#]+&)?id=(?P<id>\d+)'
|
||||
IE_NAME = 'facebook:ads'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.facebook.com/ads/library/?id=899206155126718',
|
||||
'info_dict': {
|
||||
'id': '899206155126718',
|
||||
'ext': 'mp4',
|
||||
'title': 'video by Kandao',
|
||||
'uploader': 'Kandao',
|
||||
'uploader_id': '774114102743284',
|
||||
'uploader_url': r're:^https?://.*',
|
||||
'timestamp': 1702548330,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'upload_date': '20231214',
|
||||
'like_count': int,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/ads/library/?id=893637265423481',
|
||||
'info_dict': {
|
||||
'id': '893637265423481',
|
||||
'title': 'Jusqu\u2019\u00e0 -25% sur une s\u00e9lection de vins p\u00e9tillants italiens ',
|
||||
'uploader': 'Eataly Paris Marais',
|
||||
'uploader_id': '2086668958314152',
|
||||
'uploader_url': r're:^https?://.*',
|
||||
'timestamp': 1703571529,
|
||||
'upload_date': '20231226',
|
||||
'like_count': int,
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}, {
|
||||
'url': 'https://es-la.facebook.com/ads/library/?id=901230958115569',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://m.facebook.com/ads/library/?id=901230958115569',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_FORMATS_MAP = {
|
||||
'watermarked_video_sd_url': ('sd-wmk', 'SD, watermarked'),
|
||||
'video_sd_url': ('sd', None),
|
||||
'watermarked_video_hd_url': ('hd-wmk', 'HD, watermarked'),
|
||||
'video_hd_url': ('hd', None),
|
||||
}
|
||||
|
||||
def _extract_formats(self, video_dict):
|
||||
formats = []
|
||||
for format_key, format_url in traverse_obj(video_dict, (
|
||||
{dict.items}, lambda _, v: v[0] in self._FORMATS_MAP and url_or_none(v[1])
|
||||
)):
|
||||
formats.append({
|
||||
'format_id': self._FORMATS_MAP[format_key][0],
|
||||
'format_note': self._FORMATS_MAP[format_key][1],
|
||||
'url': format_url,
|
||||
'ext': 'mp4',
|
||||
'quality': qualities(tuple(self._FORMATS_MAP))(format_key),
|
||||
})
|
||||
return formats
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
post_data = [self._parse_json(j, video_id, fatal=False)
|
||||
for j in re.findall(r's\.handle\(({.*})\);requireLazy\(', webpage)]
|
||||
data = traverse_obj(post_data, (
|
||||
..., 'require', ..., ..., ..., 'props', 'deeplinkAdCard', 'snapshot', {dict}), get_all=False)
|
||||
if not data:
|
||||
raise ExtractorError('Unable to extract ad data')
|
||||
|
||||
title = data.get('title')
|
||||
if not title or title == '{{product.name}}':
|
||||
title = join_nonempty('display_format', 'page_name', delim=' by ', from_dict=data)
|
||||
|
||||
info_dict = traverse_obj(data, {
|
||||
'description': ('link_description', {str}, {lambda x: x if x != '{{product.description}}' else None}),
|
||||
'uploader': ('page_name', {str}),
|
||||
'uploader_id': ('page_id', {str_or_none}),
|
||||
'uploader_url': ('page_profile_uri', {url_or_none}),
|
||||
'timestamp': ('creation_time', {int_or_none}),
|
||||
'like_count': ('page_like_count', {int_or_none}),
|
||||
})
|
||||
|
||||
entries = []
|
||||
for idx, entry in enumerate(traverse_obj(
|
||||
data, (('videos', 'cards'), lambda _, v: any([url_or_none(v[f]) for f in self._FORMATS_MAP]))), 1
|
||||
):
|
||||
entries.append({
|
||||
'id': f'{video_id}_{idx}',
|
||||
'title': entry.get('title') or title,
|
||||
'description': entry.get('link_description') or info_dict.get('description'),
|
||||
'thumbnail': url_or_none(entry.get('video_preview_image_url')),
|
||||
'formats': self._extract_formats(entry),
|
||||
})
|
||||
|
||||
if len(entries) == 1:
|
||||
info_dict.update(entries[0])
|
||||
|
||||
elif len(entries) > 1:
|
||||
info_dict.update({
|
||||
'title': entries[0]['title'],
|
||||
'entries': entries,
|
||||
'_type': 'playlist',
|
||||
})
|
||||
|
||||
info_dict['id'] = video_id
|
||||
|
||||
return info_dict
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
join_nonempty,
|
||||
parse_codecs,
|
||||
parse_iso8601,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
@ -108,6 +109,64 @@ class FloatplaneIE(InfoExtractor):
|
|||
'availability': 'subscriber_only',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.floatplane.com/post/65B5PNoBtf',
|
||||
'info_dict': {
|
||||
'id': '65B5PNoBtf',
|
||||
'description': 'I recorded the inbuilt demo mode for your 90\'s enjoyment, thanks for being Floaties!',
|
||||
'display_id': '65B5PNoBtf',
|
||||
'like_count': int,
|
||||
'release_timestamp': 1701249480,
|
||||
'uploader': 'The Trash Network',
|
||||
'availability': 'subscriber_only',
|
||||
'uploader_id': '61bc20c9a131fb692bf2a513',
|
||||
'uploader_url': 'https://www.floatplane.com/channel/TheTrashNetwork/home',
|
||||
'channel_url': 'https://www.floatplane.com/channel/TheTrashNetwork/home/thedrumthing',
|
||||
'comment_count': int,
|
||||
'title': 'The $50 electronic drum kit.',
|
||||
'channel_id': '64424fe73cd58cbcf8d8e131',
|
||||
'thumbnail': 'https://pbs.floatplane.com/blogPost_thumbnails/65B5PNoBtf/725555379422705_1701247052743.jpeg',
|
||||
'dislike_count': int,
|
||||
'channel': 'The Drum Thing',
|
||||
'release_date': '20231129',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': 'ISPJjexylS',
|
||||
'ext': 'mp4',
|
||||
'release_date': '20231129',
|
||||
'release_timestamp': 1701249480,
|
||||
'title': 'The $50 electronic drum kit. .mov',
|
||||
'channel_id': '64424fe73cd58cbcf8d8e131',
|
||||
'thumbnail': 'https://pbs.floatplane.com/video_thumbnails/ISPJjexylS/335202812134041_1701249383392.jpeg',
|
||||
'availability': 'subscriber_only',
|
||||
'uploader': 'The Trash Network',
|
||||
'duration': 622,
|
||||
'channel': 'The Drum Thing',
|
||||
'uploader_id': '61bc20c9a131fb692bf2a513',
|
||||
'channel_url': 'https://www.floatplane.com/channel/TheTrashNetwork/home/thedrumthing',
|
||||
'uploader_url': 'https://www.floatplane.com/channel/TheTrashNetwork/home',
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': 'qKfxu6fEpu',
|
||||
'ext': 'aac',
|
||||
'release_date': '20231129',
|
||||
'release_timestamp': 1701249480,
|
||||
'title': 'Roland TD-7 Demo.m4a',
|
||||
'channel_id': '64424fe73cd58cbcf8d8e131',
|
||||
'availability': 'subscriber_only',
|
||||
'uploader': 'The Trash Network',
|
||||
'duration': 114,
|
||||
'channel': 'The Drum Thing',
|
||||
'uploader_id': '61bc20c9a131fb692bf2a513',
|
||||
'channel_url': 'https://www.floatplane.com/channel/TheTrashNetwork/home/thedrumthing',
|
||||
'uploader_url': 'https://www.floatplane.com/channel/TheTrashNetwork/home',
|
||||
},
|
||||
}],
|
||||
'skip': 'requires subscription: "The Trash Network"',
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
|
@ -124,6 +183,22 @@ def _real_extract(self, url):
|
|||
if not any(traverse_obj(post_data, ('metadata', ('hasVideo', 'hasAudio')))):
|
||||
raise ExtractorError('Post does not contain a video or audio track', expected=True)
|
||||
|
||||
uploader_url = format_field(
|
||||
post_data, [('creator', 'urlname')], 'https://www.floatplane.com/channel/%s/home') or None
|
||||
|
||||
common_info = {
|
||||
'uploader_url': uploader_url,
|
||||
'channel_url': urljoin(f'{uploader_url}/', traverse_obj(post_data, ('channel', 'urlname'))),
|
||||
'availability': self._availability(needs_subscription=True),
|
||||
**traverse_obj(post_data, {
|
||||
'uploader': ('creator', 'title', {str}),
|
||||
'uploader_id': ('creator', 'id', {str}),
|
||||
'channel': ('channel', 'title', {str}),
|
||||
'channel_id': ('channel', 'id', {str}),
|
||||
'release_timestamp': ('releaseDate', {parse_iso8601}),
|
||||
}),
|
||||
}
|
||||
|
||||
items = []
|
||||
for media in traverse_obj(post_data, (('videoAttachments', 'audioAttachments'), ...)):
|
||||
media_id = media['id']
|
||||
|
@ -150,11 +225,11 @@ def format_path(params):
|
|||
formats = []
|
||||
for quality in traverse_obj(stream, ('resource', 'data', 'qualityLevels', ...)):
|
||||
url = urljoin(stream['cdn'], format_path(traverse_obj(
|
||||
stream, ('resource', 'data', 'qualityLevelParams', quality['name']))))
|
||||
stream, ('resource', 'data', 'qualityLevelParams', quality['name'], {dict}))))
|
||||
formats.append({
|
||||
**traverse_obj(quality, {
|
||||
'format_id': 'name',
|
||||
'format_note': 'label',
|
||||
'format_id': ('name', {str}),
|
||||
'format_note': ('label', {str}),
|
||||
'width': ('width', {int}),
|
||||
'height': ('height', {int}),
|
||||
}),
|
||||
|
@ -164,38 +239,28 @@ def format_path(params):
|
|||
})
|
||||
|
||||
items.append({
|
||||
**common_info,
|
||||
'id': media_id,
|
||||
**traverse_obj(metadata, {
|
||||
'title': 'title',
|
||||
'title': ('title', {str}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'thumbnail': ('thumbnail', 'path'),
|
||||
'thumbnail': ('thumbnail', 'path', {url_or_none}),
|
||||
}),
|
||||
'formats': formats,
|
||||
})
|
||||
|
||||
uploader_url = format_field(
|
||||
post_data, [('creator', 'urlname')], 'https://www.floatplane.com/channel/%s/home') or None
|
||||
channel_url = urljoin(f'{uploader_url}/', traverse_obj(post_data, ('channel', 'urlname')))
|
||||
|
||||
post_info = {
|
||||
**common_info,
|
||||
'id': post_id,
|
||||
'display_id': post_id,
|
||||
**traverse_obj(post_data, {
|
||||
'title': 'title',
|
||||
'title': ('title', {str}),
|
||||
'description': ('text', {clean_html}),
|
||||
'uploader': ('creator', 'title'),
|
||||
'uploader_id': ('creator', 'id'),
|
||||
'channel': ('channel', 'title'),
|
||||
'channel_id': ('channel', 'id'),
|
||||
'like_count': ('likes', {int_or_none}),
|
||||
'dislike_count': ('dislikes', {int_or_none}),
|
||||
'comment_count': ('comments', {int_or_none}),
|
||||
'release_timestamp': ('releaseDate', {parse_iso8601}),
|
||||
'thumbnail': ('thumbnail', 'path'),
|
||||
'thumbnail': ('thumbnail', 'path', {url_or_none}),
|
||||
}),
|
||||
'uploader_url': uploader_url,
|
||||
'channel_url': channel_url,
|
||||
'availability': self._availability(needs_subscription=True),
|
||||
}
|
||||
|
||||
if len(items) > 1:
|
||||
|
|
179
yt_dlp/extractor/getcourseru.py
Normal file
179
yt_dlp/extractor/getcourseru.py
Normal file
|
@ -0,0 +1,179 @@
|
|||
import re
|
||||
import time
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError, int_or_none, url_or_none, urlencode_postdata
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class GetCourseRuPlayerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://player02\.getcourse\.ru/sign-player/?\?(?:[^#]+&)?json=[^#&]+'
|
||||
_EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL}[^\'"]*)']
|
||||
_TESTS = [{
|
||||
'url': 'http://player02.getcourse.ru/sign-player/?json=eyJ2aWRlb19oYXNoIjoiMTkwYmRmOTNmMWIyOTczNTMwOTg1M2E3YTE5ZTI0YjMiLCJ1c2VyX2lkIjozNTk1MjUxODMsInN1Yl9sb2dpbl91c2VyX2lkIjpudWxsLCJsZXNzb25faWQiOm51bGwsImlwIjoiNDYuMTQyLjE4Mi4yNDciLCJnY19ob3N0IjoiYWNhZGVteW1lbC5vbmxpbmUiLCJ0aW1lIjoxNzA1NDQ5NjQyLCJwYXlsb2FkIjoidV8zNTk1MjUxODMiLCJ1aV9sYW5ndWFnZSI6InJ1IiwiaXNfaGF2ZV9jdXN0b21fc3R5bGUiOnRydWV9&s=354ad2c993d95d5ac629e3133d6cefea&vh-static-feature=zigzag',
|
||||
'info_dict': {
|
||||
'id': '513573381',
|
||||
'title': '190bdf93f1b29735309853a7a19e24b3',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': 'https://preview-htz.kinescopecdn.net/preview/190bdf93f1b29735309853a7a19e24b3/preview.jpg?version=1702370546&host=vh-80',
|
||||
'duration': 1693
|
||||
},
|
||||
'skip': 'JWT expired',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
webpage = self._download_webpage(url, None, 'Downloading player page')
|
||||
window_configs = self._search_json(
|
||||
r'window\.configs\s*=', webpage, 'config', None)
|
||||
video_id = str(window_configs['gcFileId'])
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
window_configs['masterPlaylistUrl'], video_id)
|
||||
|
||||
return {
|
||||
**traverse_obj(window_configs, {
|
||||
'title': ('videoHash', {str}),
|
||||
'thumbnail': ('previewUrl', {url_or_none}),
|
||||
'duration': ('videoDuration', {int_or_none}),
|
||||
}),
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles
|
||||
}
|
||||
|
||||
|
||||
class GetCourseRuIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'getcourseru'
|
||||
_DOMAINS = [
|
||||
'academymel.online',
|
||||
'marafon.mani-beauty.com',
|
||||
'on.psbook.ru'
|
||||
]
|
||||
_BASE_URL_RE = rf'https?://(?:(?!player02\.)[^.]+\.getcourse\.(?:ru|io)|{"|".join(map(re.escape, _DOMAINS))})'
|
||||
_VALID_URL = [
|
||||
rf'{_BASE_URL_RE}/(?!pl/|teach/)(?P<id>[^?#]+)',
|
||||
rf'{_BASE_URL_RE}/(:?pl/)?teach/control/lesson/view\?(?:[^#]+&)?id=(?P<id>\d+)',
|
||||
]
|
||||
_TESTS = [{
|
||||
'url': 'http://academymel.online/3video_1',
|
||||
'info_dict': {
|
||||
'id': '3059742',
|
||||
'display_id': '3video_1',
|
||||
'title': 'Промоуроки Академии МЕЛ',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '513573381',
|
||||
'ext': 'mp4',
|
||||
'title': 'Промоуроки Академии МЕЛ',
|
||||
'thumbnail': 'https://preview-htz.kinescopecdn.net/preview/190bdf93f1b29735309853a7a19e24b3/preview.jpg?version=1702370546&host=vh-80',
|
||||
'duration': 1693
|
||||
},
|
||||
}]
|
||||
}, {
|
||||
'url': 'https://academymel.getcourse.ru/3video_1',
|
||||
'info_dict': {
|
||||
'id': '3059742',
|
||||
'display_id': '3video_1',
|
||||
'title': 'Промоуроки Академии МЕЛ',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '513573381',
|
||||
'ext': 'mp4',
|
||||
'title': 'Промоуроки Академии МЕЛ',
|
||||
'thumbnail': 'https://preview-htz.kinescopecdn.net/preview/190bdf93f1b29735309853a7a19e24b3/preview.jpg?version=1702370546&host=vh-80',
|
||||
'duration': 1693
|
||||
},
|
||||
}]
|
||||
}, {
|
||||
'url': 'https://academymel.getcourse.ru/pl/teach/control/lesson/view?id=319141781&editMode=0',
|
||||
'info_dict': {
|
||||
'id': '319141781',
|
||||
'title': '1. Разминка у стены',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '4919601',
|
||||
'ext': 'mp4',
|
||||
'title': '1. Разминка у стены',
|
||||
'thumbnail': 'https://preview-htz.vhcdn.com/preview/5a521788e7dc25b4f70c3dff6512d90e/preview.jpg?version=1703223532&host=vh-81',
|
||||
'duration': 704
|
||||
},
|
||||
}],
|
||||
'skip': 'paid lesson'
|
||||
}, {
|
||||
'url': 'https://manibeauty.getcourse.ru/pl/teach/control/lesson/view?id=272499894',
|
||||
'info_dict': {
|
||||
'id': '272499894',
|
||||
'title': 'Мотивация к тренировкам',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '447479687',
|
||||
'ext': 'mp4',
|
||||
'title': 'Мотивация к тренировкам',
|
||||
'thumbnail': 'https://preview-htz.vhcdn.com/preview/70ed5b9f489dd03b4aff55bfdff71a26/preview.jpg?version=1685115787&host=vh-71',
|
||||
'duration': 30
|
||||
},
|
||||
}],
|
||||
'skip': 'paid lesson'
|
||||
}, {
|
||||
'url': 'https://gaismasmandalas.getcourse.io/ATLAUTSEVBUT',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_LOGIN_URL_PATH = '/cms/system/login'
|
||||
|
||||
def _login(self, hostname, username, password):
|
||||
if self._get_cookies(f'https://{hostname}').get('PHPSESSID5'):
|
||||
return
|
||||
login_url = f'https://{hostname}{self._LOGIN_URL_PATH}'
|
||||
webpage = self._download_webpage(login_url, None)
|
||||
|
||||
self._request_webpage(
|
||||
login_url, None, 'Logging in', 'Failed to log in',
|
||||
data=urlencode_postdata({
|
||||
'action': 'processXdget',
|
||||
'xdgetId': self._html_search_regex(
|
||||
r'<form[^>]+\bclass="[^"]*\bstate-login[^"]*"[^>]+\bdata-xdget-id="([^"]+)"',
|
||||
webpage, 'xdgetId'),
|
||||
'params[action]': 'login',
|
||||
'params[url]': login_url,
|
||||
'params[object_type]': 'cms_page',
|
||||
'params[object_id]': -1,
|
||||
'params[email]': username,
|
||||
'params[password]': password,
|
||||
'requestTime': int(time.time()),
|
||||
'requestSimpleSign': self._html_search_regex(
|
||||
r'window.requestSimpleSign\s*=\s*"([\da-f]+)"', webpage, 'simple sign'),
|
||||
}))
|
||||
|
||||
def _real_extract(self, url):
|
||||
hostname = urllib.parse.urlparse(url).hostname
|
||||
username, password = self._get_login_info(netrc_machine=hostname)
|
||||
if username:
|
||||
self._login(hostname, username, password)
|
||||
|
||||
display_id = self._match_id(url)
|
||||
# NB: 404 is returned due to yt-dlp not properly following redirects #9020
|
||||
webpage, urlh = self._download_webpage_handle(url, display_id, expected_status=404)
|
||||
if self._LOGIN_URL_PATH in urlh.url or urlh.status == 404:
|
||||
raise ExtractorError(
|
||||
f'This video is only available for registered users. {self._login_hint("any", netrc=hostname)}',
|
||||
expected=True)
|
||||
|
||||
playlist_id = self._search_regex(
|
||||
r'window\.(?:lessonId|gcsObjectId)\s*=\s*(\d+)', webpage, 'playlist id', default=display_id)
|
||||
title = self._og_search_title(webpage) or self._html_extract_title(webpage)
|
||||
|
||||
return self.playlist_from_matches(
|
||||
re.findall(GetCourseRuPlayerIE._EMBED_REGEX[0], webpage),
|
||||
playlist_id, title, display_id=display_id, ie=GetCourseRuPlayerIE, video_kwargs={
|
||||
'url_transparent': True,
|
||||
'title': title,
|
||||
})
|
|
@ -66,7 +66,7 @@ def _entries(self, file_id):
|
|||
query_params = {
|
||||
'contentId': file_id,
|
||||
'token': self._TOKEN,
|
||||
'websiteToken': '7fd94ds12fds4', # From https://gofile.io/dist/js/alljs.js
|
||||
'wt': '4fd6sg89d7s6', # From https://gofile.io/dist/js/alljs.js
|
||||
}
|
||||
password = self.get_param('videopassword')
|
||||
if password:
|
||||
|
|
|
@ -19,9 +19,9 @@ class GoogleDriveIE(InfoExtractor):
|
|||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:docs|drive)\.google\.com/
|
||||
(?:docs|drive|drive\.usercontent)\.google\.com/
|
||||
(?:
|
||||
(?:uc|open)\?.*?id=|
|
||||
(?:uc|open|download)\?.*?id=|
|
||||
file/d/
|
||||
)|
|
||||
video\.google\.com/get_player\?.*?docid=
|
||||
|
@ -53,6 +53,9 @@ class GoogleDriveIE(InfoExtractor):
|
|||
}, {
|
||||
'url': 'https://drive.google.com/uc?id=0B2fjwgkl1A_CX083Tkowdmt6d28',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://drive.usercontent.google.com/download?id=0ByeS4oOUV-49Zzh4R1J6R09zazQ',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_FORMATS_EXT = {
|
||||
'5': 'flv',
|
||||
|
@ -205,9 +208,10 @@ def get_value(key):
|
|||
formats.append(f)
|
||||
|
||||
source_url = update_url_query(
|
||||
'https://drive.google.com/uc', {
|
||||
'https://drive.usercontent.google.com/download', {
|
||||
'id': video_id,
|
||||
'export': 'download',
|
||||
'confirm': 't',
|
||||
})
|
||||
|
||||
def request_source_file(source_url, kind, data=None):
|
||||
|
|
|
@ -57,8 +57,8 @@ def _real_extract(self, url):
|
|||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
metadata = self._parse_json(
|
||||
self._html_search_regex(r'window\.__reflectData\s*=\s*([^;]+)', webpage, 'metadata'), video_id)
|
||||
metadata = self._search_json(
|
||||
r'window\.__reflectData\s*=', webpage, 'metadata', video_id)
|
||||
|
||||
video_info = metadata['collectionMedia'][0]
|
||||
media_data = self._download_json(
|
||||
|
@ -99,7 +99,7 @@ def _real_extract(self, url):
|
|||
'duration': int_or_none(
|
||||
video_info.get('source_duration')),
|
||||
'artist': str_or_none(
|
||||
video_info.get('music_track_artist')),
|
||||
video_info.get('music_track_artist')) or None,
|
||||
'track': str_or_none(
|
||||
video_info.get('music_track_name')),
|
||||
video_info.get('music_track_name')) or None,
|
||||
}
|
||||
|
|
69
yt_dlp/extractor/ilpost.py
Normal file
69
yt_dlp/extractor/ilpost.py
Normal file
|
@ -0,0 +1,69 @@
|
|||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class IlPostIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ilpost\.it/episodes/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ilpost.it/episodes/1-avis-akvasas-ka/',
|
||||
'md5': '43649f002d85e1c2f319bb478d479c40',
|
||||
'info_dict': {
|
||||
'id': '2972047',
|
||||
'ext': 'mp3',
|
||||
'display_id': '1-avis-akvasas-ka',
|
||||
'title': '1. Avis akvasas ka',
|
||||
'url': 'https://www.ilpost.it/wp-content/uploads/2023/12/28/1703781217-l-invasione-pt1-v6.mp3',
|
||||
'timestamp': 1703835014,
|
||||
'upload_date': '20231229',
|
||||
'duration': 2495.0,
|
||||
'availability': 'public',
|
||||
'series_id': '235598',
|
||||
'description': '',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
endpoint_metadata = self._search_json(
|
||||
r'var\s+ilpostpodcast\s*=', webpage, 'metadata', display_id)
|
||||
episode_id = endpoint_metadata['post_id']
|
||||
podcast_id = endpoint_metadata['podcast_id']
|
||||
podcast_metadata = self._download_json(
|
||||
endpoint_metadata['ajax_url'], display_id, data=urlencode_postdata({
|
||||
'action': 'checkpodcast',
|
||||
'cookie': endpoint_metadata['cookie'],
|
||||
'post_id': episode_id,
|
||||
'podcast_id': podcast_id,
|
||||
}))
|
||||
|
||||
episode = traverse_obj(podcast_metadata, (
|
||||
'data', 'postcastList', lambda _, v: str(v['id']) == episode_id, {dict}), get_all=False)
|
||||
if not episode:
|
||||
raise ExtractorError('Episode could not be extracted')
|
||||
|
||||
return {
|
||||
'id': episode_id,
|
||||
'display_id': display_id,
|
||||
'series_id': podcast_id,
|
||||
'vcodec': 'none',
|
||||
**traverse_obj(episode, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'url': ('podcast_raw_url', {url_or_none}),
|
||||
'thumbnail': ('image', {url_or_none}),
|
||||
'timestamp': ('timestamp', {int_or_none}),
|
||||
'duration': ('milliseconds', {functools.partial(float_or_none, scale=1000)}),
|
||||
'availability': ('free', {lambda v: 'public' if v else 'subscriber_only'}),
|
||||
}),
|
||||
}
|
|
@ -1,5 +1,6 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
|
@ -20,39 +21,64 @@ class JioSaavnSongIE(JioSaavnBaseIE):
|
|||
_VALID_URL = r'https?://(?:www\.)?(?:jiosaavn\.com/song/[^/?#]+/|saavn\.com/s/song/(?:[^/?#]+/){3})(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.jiosaavn.com/song/leja-re/OQsEfQFVUXk',
|
||||
'md5': '7b1f70de088ede3a152ea34aece4df42',
|
||||
'md5': '3b84396d15ed9e083c3106f1fa589c04',
|
||||
'info_dict': {
|
||||
'id': 'OQsEfQFVUXk',
|
||||
'ext': 'mp3',
|
||||
'ext': 'mp4',
|
||||
'title': 'Leja Re',
|
||||
'album': 'Leja Re',
|
||||
'thumbnail': 'https://c.saavncdn.com/258/Leja-Re-Hindi-2018-20181124024539-500x500.jpg',
|
||||
'duration': 205,
|
||||
'view_count': int,
|
||||
'release_year': 2018,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.saavn.com/s/song/hindi/Saathiya/O-Humdum-Suniyo-Re/KAMiazoCblU',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_VALID_BITRATES = ('16', '32', '64', '128', '320')
|
||||
|
||||
def _real_extract(self, url):
|
||||
audio_id = self._match_id(url)
|
||||
extract_bitrates = self._configuration_arg('bitrate', ['128', '320'], ie_key='JioSaavn')
|
||||
if invalid_bitrates := [br for br in extract_bitrates if br not in self._VALID_BITRATES]:
|
||||
raise ValueError(
|
||||
f'Invalid bitrate(s): {", ".join(invalid_bitrates)}. '
|
||||
+ f'Valid bitrates are: {", ".join(self._VALID_BITRATES)}')
|
||||
|
||||
song_data = self._extract_initial_data(url, audio_id)['song']['song']
|
||||
media_data = self._download_json(
|
||||
'https://www.jiosaavn.com/api.php', audio_id, data=urlencode_postdata({
|
||||
'__call': 'song.generateAuthToken',
|
||||
'_format': 'json',
|
||||
'bitrate': '128',
|
||||
'url': song_data['encrypted_media_url'],
|
||||
}))
|
||||
formats = []
|
||||
for bitrate in extract_bitrates:
|
||||
media_data = self._download_json(
|
||||
'https://www.jiosaavn.com/api.php', audio_id, f'Downloading format info for {bitrate}',
|
||||
fatal=False, data=urlencode_postdata({
|
||||
'__call': 'song.generateAuthToken',
|
||||
'_format': 'json',
|
||||
'bitrate': bitrate,
|
||||
'url': song_data['encrypted_media_url'],
|
||||
}))
|
||||
if not media_data.get('auth_url'):
|
||||
self.report_warning(f'Unable to extract format info for {bitrate}')
|
||||
continue
|
||||
formats.append({
|
||||
'url': media_data['auth_url'],
|
||||
'ext': media_data.get('type'),
|
||||
'format_id': bitrate,
|
||||
'abr': int(bitrate),
|
||||
'vcodec': 'none',
|
||||
})
|
||||
|
||||
return {
|
||||
'id': audio_id,
|
||||
'url': media_data['auth_url'],
|
||||
'ext': media_data.get('type'),
|
||||
'vcodec': 'none',
|
||||
'formats': formats,
|
||||
**traverse_obj(song_data, {
|
||||
'title': ('title', 'text'),
|
||||
'album': ('album', 'text'),
|
||||
'thumbnail': ('image', 0, {url_or_none}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'view_count': ('play_count', {int_or_none}),
|
||||
'release_year': ('year', {int_or_none}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
|
140
yt_dlp/extractor/kukululive.py
Normal file
140
yt_dlp/extractor/kukululive.py
Normal file
|
@ -0,0 +1,140 @@
|
|||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
filter_dict,
|
||||
get_element_by_id,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
qualities,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class KukuluLiveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://live\.erinn\.biz/live\.php\?h(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://live.erinn.biz/live.php?h675134569',
|
||||
'md5': 'e380fa6a47fc703d91cea913ab44ec2e',
|
||||
'info_dict': {
|
||||
'id': '675134569',
|
||||
'ext': 'mp4',
|
||||
'title': 'プロセカ',
|
||||
'description': 'テストも兼ねたプロセカ配信。',
|
||||
'timestamp': 1702689148,
|
||||
'upload_date': '20231216',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://live.erinn.biz/live.php?h102338092',
|
||||
'md5': 'dcf5167a934b1c60333461e13a81a6e2',
|
||||
'info_dict': {
|
||||
'id': '102338092',
|
||||
'ext': 'mp4',
|
||||
'title': 'Among Usで遊びます!!',
|
||||
'description': 'VTuberになりましたねんねこ㌨ですよろしくお願いします',
|
||||
'timestamp': 1704603118,
|
||||
'upload_date': '20240107',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://live.erinn.biz/live.php?h878049531',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _get_quality_meta(self, video_id, desc, code, force_h264=None):
|
||||
desc += ' (force_h264)' if force_h264 else ''
|
||||
qs = self._download_webpage(
|
||||
'https://live.erinn.biz/live.player.fplayer.php', video_id,
|
||||
f'Downloading {desc} quality metadata', f'Unable to download {desc} quality metadata',
|
||||
query=filter_dict({
|
||||
'hash': video_id,
|
||||
'action': f'get{code}liveByAjax',
|
||||
'force_h264': force_h264,
|
||||
}))
|
||||
return urllib.parse.parse_qs(qs)
|
||||
|
||||
def _add_quality_formats(self, formats, quality_meta):
|
||||
vcodec = traverse_obj(quality_meta, ('vcodec', 0, {str}))
|
||||
quality = traverse_obj(quality_meta, ('now_quality', 0, {str}))
|
||||
quality_priority = qualities(('low', 'h264', 'high'))(quality)
|
||||
if traverse_obj(quality_meta, ('hlsaddr', 0, {url_or_none})):
|
||||
formats.append({
|
||||
'format_id': quality,
|
||||
'url': quality_meta['hlsaddr'][0],
|
||||
'ext': 'mp4',
|
||||
'vcodec': vcodec,
|
||||
'quality': quality_priority,
|
||||
})
|
||||
if traverse_obj(quality_meta, ('hlsaddr_audioonly', 0, {url_or_none})):
|
||||
formats.append({
|
||||
'format_id': join_nonempty(quality, 'audioonly'),
|
||||
'url': quality_meta['hlsaddr_audioonly'][0],
|
||||
'ext': 'm4a',
|
||||
'vcodec': 'none',
|
||||
'quality': quality_priority,
|
||||
})
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
html = self._download_webpage(url, video_id)
|
||||
|
||||
if '>タイムシフトが見つかりませんでした。<' in html:
|
||||
raise ExtractorError('This stream has expired', expected=True)
|
||||
|
||||
title = clean_html(
|
||||
get_element_by_id('livetitle', html.replace('<SPAN', '<span').replace('SPAN>', 'span>')))
|
||||
description = self._html_search_meta('Description', html)
|
||||
thumbnail = self._html_search_meta(['og:image', 'twitter:image'], html)
|
||||
|
||||
if self._search_regex(r'(var\s+timeshift\s*=\s*false)', html, 'is livestream', default=False):
|
||||
formats = []
|
||||
for (desc, code) in [('high', 'Z'), ('low', 'ForceLow')]:
|
||||
quality_meta = self._get_quality_meta(video_id, desc, code)
|
||||
self._add_quality_formats(formats, quality_meta)
|
||||
if desc == 'high' and traverse_obj(quality_meta, ('vcodec', 0)) == 'HEVC':
|
||||
self._add_quality_formats(
|
||||
formats, self._get_quality_meta(video_id, desc, code, force_h264='1'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'is_live': True,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
# VOD extraction
|
||||
player_html = self._download_webpage(
|
||||
'https://live.erinn.biz/live.timeshift.fplayer.php', video_id,
|
||||
'Downloading player html', 'Unable to download player html', query={'hash': video_id})
|
||||
|
||||
sources = traverse_obj(self._search_json(
|
||||
r'var\s+fplayer_source\s*=', player_html, 'stream data', video_id,
|
||||
contains_pattern=r'\[(?s:.+)\]', transform_source=js_to_json), lambda _, v: v['file'])
|
||||
|
||||
def entries(segments, playlist=True):
|
||||
for i, segment in enumerate(segments, 1):
|
||||
yield {
|
||||
'id': f'{video_id}_{i}' if playlist else video_id,
|
||||
'title': f'{title} (Part {i})' if playlist else title,
|
||||
'description': description,
|
||||
'timestamp': traverse_obj(segment, ('time_start', {int_or_none})),
|
||||
'thumbnail': thumbnail,
|
||||
'formats': [{
|
||||
'url': urljoin('https://live.erinn.biz', segment['file']),
|
||||
'ext': 'mp4',
|
||||
'protocol': 'm3u8_native',
|
||||
}],
|
||||
}
|
||||
|
||||
if len(sources) == 1:
|
||||
return next(entries(sources, playlist=False))
|
||||
|
||||
return self.playlist_result(entries(sources), video_id, title, description, multi_video=True)
|
282
yt_dlp/extractor/lsm.py
Normal file
282
yt_dlp/extractor/lsm.py
Normal file
|
@ -0,0 +1,282 @@
|
|||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
parse_iso8601,
|
||||
parse_qs,
|
||||
str_or_none,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class LSMLREmbedIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:
|
||||
(?:latvijasradio|lr1|lr2|klasika|lr4|naba|radioteatris)\.lsm|
|
||||
pieci
|
||||
)\.lv/[^/?#]+/(?:
|
||||
pleijeris|embed
|
||||
)/?\?(?:[^#]+&)?(?:show|id)=(?P<id>\d+)'''
|
||||
_TESTS = [{
|
||||
'url': 'https://latvijasradio.lsm.lv/lv/embed/?theme=black&size=16x9&showCaptions=0&id=183522',
|
||||
'md5': '719b33875cd1429846eeeaeec6df2830',
|
||||
'info_dict': {
|
||||
'id': 'a342781',
|
||||
'ext': 'mp3',
|
||||
'duration': 1823,
|
||||
'title': '#138 Nepilnīgā kompensējamo zāļu sistēma pat mēnešiem dzenā pacientus pa aptiekām',
|
||||
'thumbnail': 'https://pic.latvijasradio.lv/public/assets/media/9/d/gallery_fd4675ac.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://radioteatris.lsm.lv/lv/embed/?id=&show=1270&theme=white&size=16x9',
|
||||
'info_dict': {
|
||||
'id': '1270',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
'playlist': [{
|
||||
'md5': '2e61b6eceff00d14d57fdbbe6ab24cac',
|
||||
'info_dict': {
|
||||
'id': 'a297397',
|
||||
'ext': 'mp3',
|
||||
'title': 'Eriks Emanuels Šmits "Pilāta evaņģēlijs". 1. daļa',
|
||||
'thumbnail': 'https://radioteatris.lsm.lv/public/assets/shows/62f131ae81e3c.jpg',
|
||||
'duration': 3300,
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
'url': 'https://radioteatris.lsm.lv/lv/embed/?id=&show=1269&theme=white&size=16x9',
|
||||
'md5': '24810d4a961da2295d9860afdcaf4f5a',
|
||||
'info_dict': {
|
||||
'id': 'a230690',
|
||||
'ext': 'mp3',
|
||||
'title': 'Jens Ahlboms "Spārni". Radioizrāde ar Mārtiņa Freimaņa mūziku',
|
||||
'thumbnail': 'https://radioteatris.lsm.lv/public/assets/shows/62f13023a457c.jpg',
|
||||
'duration': 1788,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://lr1.lsm.lv/lv/embed/?id=166557&show=0&theme=white&size=16x9',
|
||||
'info_dict': {
|
||||
'id': '166557',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
'playlist': [{
|
||||
'md5': '6a8b0927572f443f09c6e50a3ad65f2d',
|
||||
'info_dict': {
|
||||
'id': 'a303104',
|
||||
'ext': 'mp3',
|
||||
'thumbnail': 'https://pic.latvijasradio.lv/public/assets/media/c/5/gallery_a83ad2c2.jpg',
|
||||
'title': 'Krustpunktā Lielā intervija: Valsts prezidents Egils Levits',
|
||||
'duration': 3222,
|
||||
},
|
||||
}, {
|
||||
'md5': '5d5e191e718b7644e5118b7b4e093a6d',
|
||||
'info_dict': {
|
||||
'id': 'v303104',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': 'https://pic.latvijasradio.lv/public/assets/media/c/5/gallery_a83ad2c2.jpg',
|
||||
'title': 'Krustpunktā Lielā intervija: Valsts prezidents Egils Levits - Video Version',
|
||||
'duration': 3222,
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
'url': 'https://lr1.lsm.lv/lv/embed/?id=183522&show=0&theme=white&size=16x9',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://lr2.lsm.lv/lv/embed/?id=182126&show=0&theme=white&size=16x9',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://klasika.lsm.lv/lv/embed/?id=110806&show=0&theme=white&size=16x9',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://lr4.lsm.lv/lv/embed/?id=184282&show=0&theme=white&size=16x9',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://pieci.lv/lv/embed/?id=168896&show=0&theme=white&size=16x9',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://naba.lsm.lv/lv/embed/?id=182901&show=0&theme=white&size=16x9',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://radioteatris.lsm.lv/lv/embed/?id=176439&show=0&theme=white&size=16x9',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://lr1.lsm.lv/lv/pleijeris/?embed=0&id=48205&time=00%3A00&idx=0',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
query = parse_qs(url)
|
||||
video_id = traverse_obj(query, (
|
||||
('show', 'id'), 0, {int_or_none}, {lambda x: x or None}, {str_or_none}), get_all=False)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
player_data, media_data = self._search_regex(
|
||||
r'LR\.audio\.Player\s*\([^{]*(?P<player>\{.*?\}),(?P<media>\{.*\})\);',
|
||||
webpage, 'player json', group=('player', 'media'))
|
||||
|
||||
player_json = self._parse_json(
|
||||
player_data, video_id, transform_source=js_to_json, fatal=False) or {}
|
||||
media_json = self._parse_json(media_data, video_id, transform_source=js_to_json)
|
||||
|
||||
entries = []
|
||||
for item in traverse_obj(media_json, (('audio', 'video'), lambda _, v: v['id'])):
|
||||
formats = []
|
||||
for source_url in traverse_obj(item, ('sources', ..., 'file', {url_or_none})):
|
||||
if determine_ext(source_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(source_url, video_id, fatal=False))
|
||||
else:
|
||||
formats.append({'url': source_url})
|
||||
|
||||
id_ = item['id']
|
||||
title = item.get('title')
|
||||
if id_.startswith('v') and not title:
|
||||
title = traverse_obj(
|
||||
media_json, ('audio', lambda _, v: v['id'][1:] == id_[1:], 'title',
|
||||
{lambda x: x and f'{x} - Video Version'}), get_all=False)
|
||||
|
||||
entries.append({
|
||||
'formats': formats,
|
||||
'thumbnail': urljoin(url, player_json.get('poster')),
|
||||
'id': id_,
|
||||
'title': title,
|
||||
'duration': traverse_obj(item, ('duration', {int_or_none})),
|
||||
})
|
||||
|
||||
if len(entries) == 1:
|
||||
return entries[0]
|
||||
|
||||
return self.playlist_result(entries, video_id)
|
||||
|
||||
|
||||
class LSMLTVEmbedIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://ltv\.lsm\.lv/embed\?(?:[^#]+&)?c=(?P<id>[^#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://ltv.lsm.lv/embed?c=eyJpdiI6IjQzbHVUeHAyaDJiamFjcjdSUUFKdnc9PSIsInZhbHVlIjoiMHl3SnJNRmd2TmFIdnZwOGtGUUpzODFzUEZ4SVVsN2xoRjliSW9vckUyMWZIWG8vbWVzaFFkY0lhNmRjbjRpaCIsIm1hYyI6ImMzNjdhMzFhNTFhZmY1ZmE0NWI5YmFjZGI1YmJiNGEyNjgzNDM4MjUzMWEwM2FmMDMyZDMwYWM1MDFjZmM5MGIiLCJ0YWciOiIifQ==',
|
||||
'md5': '64f72a360ca530d5ed89c77646c9eee5',
|
||||
'info_dict': {
|
||||
'id': '46k_d23-6000-105',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1700589151,
|
||||
'duration': 1442,
|
||||
'upload_date': '20231121',
|
||||
'title': 'D23-6000-105_cetstud',
|
||||
'thumbnail': 'https://store.cloudycdn.services/tmsp00060/assets/media/660858/placeholder1700589200.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://ltv.lsm.lv/embed?enablesdkjs=1&c=eyJpdiI6IncwVzZmUFk2MU12enVWK1I3SUcwQ1E9PSIsInZhbHVlIjoid3FhV29vamc3T2sxL1RaRmJ5Rm1GTXozU0o2dVczdUtLK0cwZEZJMDQ2a3ZIRG5DK2pneGlnbktBQy9uazVleHN6VXhxdWIweWNvcHRDSnlISlNYOHlVZ1lpcTUrcWZSTUZPQW14TVdkMW9aOUtRWVNDcFF4eWpHNGcrT0VZbUNFQStKQk91cGpndW9FVjJIa0lpbkh3PT0iLCJtYWMiOiIyZGI1NDJlMWRlM2QyMGNhOGEwYTM2MmNlN2JlOGRhY2QyYjdkMmEzN2RlOTEzYTVkNzI1ODlhZDlhZjU4MjQ2IiwidGFnIjoiIn0=',
|
||||
'md5': 'a1711e190fe680fdb68fd8413b378e87',
|
||||
'info_dict': {
|
||||
'id': 'wUnFArIPDSY',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'LTV_16plus',
|
||||
'release_date': '20220514',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCNMrnafwXD2XKeeQOyfkFCw',
|
||||
'view_count': int,
|
||||
'availability': 'public',
|
||||
'thumbnail': 'https://i.ytimg.com/vi/wUnFArIPDSY/maxresdefault.jpg',
|
||||
'release_timestamp': 1652544074,
|
||||
'title': 'EIROVĪZIJA SALĀTOS',
|
||||
'live_status': 'was_live',
|
||||
'uploader_id': '@LTV16plus',
|
||||
'comment_count': int,
|
||||
'channel_id': 'UCNMrnafwXD2XKeeQOyfkFCw',
|
||||
'channel_follower_count': int,
|
||||
'categories': ['Entertainment'],
|
||||
'duration': 5269,
|
||||
'upload_date': '20220514',
|
||||
'age_limit': 0,
|
||||
'channel': 'LTV_16plus',
|
||||
'playable_in_embed': True,
|
||||
'tags': [],
|
||||
'uploader_url': 'https://www.youtube.com/@LTV16plus',
|
||||
'like_count': int,
|
||||
'description': 'md5:7ff0c42ba971e3c13e4b8a2ff03b70b5',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = urllib.parse.unquote(self._match_id(url))
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
data = self._search_json(
|
||||
r'window\.ltvEmbedPayload\s*=', webpage, 'embed json', video_id)
|
||||
embed_type = traverse_obj(data, ('source', 'name', {str}))
|
||||
|
||||
if embed_type == 'telia':
|
||||
ie_key = 'CloudyCDN'
|
||||
embed_url = traverse_obj(data, ('source', 'embed_url', {url_or_none}))
|
||||
elif embed_type == 'youtube':
|
||||
ie_key = 'Youtube'
|
||||
embed_url = traverse_obj(data, ('source', 'id', {str}))
|
||||
else:
|
||||
raise ExtractorError(f'Unsupported embed type {embed_type!r}')
|
||||
|
||||
return self.url_result(
|
||||
embed_url, ie_key, video_id, **traverse_obj(data, {
|
||||
'title': ('parentInfo', 'title'),
|
||||
'duration': ('parentInfo', 'duration', {int_or_none}),
|
||||
'thumbnail': ('source', 'poster', {url_or_none}),
|
||||
}))
|
||||
|
||||
|
||||
class LSMReplayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://replay\.lsm\.lv/[^/?#]+/(?:ieraksts|statja)/[^/?#]+/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://replay.lsm.lv/lv/ieraksts/ltv/311130/4-studija-zolitudes-tragedija-un-incupes-stacija',
|
||||
'md5': '64f72a360ca530d5ed89c77646c9eee5',
|
||||
'info_dict': {
|
||||
'id': '46k_d23-6000-105',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1700586300,
|
||||
'description': 'md5:0f1b14798cc39e1ae578bd0eb268f759',
|
||||
'duration': 1442,
|
||||
'upload_date': '20231121',
|
||||
'title': '4. studija. Zolitūdes traģēdija un Inčupes stacija',
|
||||
'thumbnail': 'https://ltv.lsm.lv/storage/media/8/7/large/5/1f9604e1.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://replay.lsm.lv/lv/ieraksts/lr/183522/138-nepilniga-kompensejamo-zalu-sistema-pat-menesiem-dzena-pacientus-pa-aptiekam',
|
||||
'md5': '719b33875cd1429846eeeaeec6df2830',
|
||||
'info_dict': {
|
||||
'id': 'a342781',
|
||||
'ext': 'mp3',
|
||||
'duration': 1823,
|
||||
'title': '#138 Nepilnīgā kompensējamo zāļu sistēma pat mēnešiem dzenā pacientus pa aptiekām',
|
||||
'thumbnail': 'https://pic.latvijasradio.lv/public/assets/media/9/d/large_fd4675ac.jpg',
|
||||
'upload_date': '20231102',
|
||||
'timestamp': 1698921060,
|
||||
'description': 'md5:7bac3b2dd41e44325032943251c357b1',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://replay.lsm.lv/ru/statja/ltv/311130/4-studija-zolitudes-tragedija-un-incupes-stacija',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _fix_nuxt_data(self, webpage):
|
||||
return re.sub(r'Object\.create\(null(?:,(\{.+\}))?\)', lambda m: m.group(1) or 'null', webpage)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
data = self._search_nuxt_data(
|
||||
self._fix_nuxt_data(webpage), video_id, context_name='__REPLAY__')
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
**traverse_obj(data, {
|
||||
'url': ('playback', 'service', 'url', {url_or_none}),
|
||||
'title': ('mediaItem', 'title'),
|
||||
'description': ('mediaItem', ('lead', 'body')),
|
||||
'duration': ('mediaItem', 'duration', {int_or_none}),
|
||||
'timestamp': ('mediaItem', 'aired_at', {parse_iso8601}),
|
||||
'thumbnail': ('mediaItem', 'largeThumbnail', {url_or_none}),
|
||||
}, get_all=False),
|
||||
}
|
62
yt_dlp/extractor/magentamusik.py
Normal file
62
yt_dlp/extractor/magentamusik.py
Normal file
|
@ -0,0 +1,62 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError, int_or_none, join_nonempty, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class MagentaMusikIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?magentamusik\.de/(?P<id>[^/?#]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.magentamusik.de/marty-friedman-woa-2023-9208205928595409235',
|
||||
'md5': 'd82dd4748f55fc91957094546aaf8584',
|
||||
'info_dict': {
|
||||
'id': '9208205928595409235',
|
||||
'display_id': 'marty-friedman-woa-2023-9208205928595409235',
|
||||
'ext': 'mp4',
|
||||
'title': 'Marty Friedman: W:O:A 2023',
|
||||
'alt_title': 'Konzert vom: 05.08.2023 13:00',
|
||||
'duration': 2760,
|
||||
'categories': ['Musikkonzert'],
|
||||
'release_year': 2023,
|
||||
'location': 'Deutschland',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
player_config = self._search_json(
|
||||
r'data-js-element="o-video-player__config">', webpage, 'player config', display_id, fatal=False)
|
||||
if not player_config:
|
||||
raise ExtractorError('No video found', expected=True)
|
||||
|
||||
asset_id = player_config['assetId']
|
||||
asset_details = self._download_json(
|
||||
f'https://wcps.t-online.de/cvss/magentamusic/vodclient/v2/assetdetails/58938/{asset_id}',
|
||||
display_id, note='Downloading asset details')
|
||||
|
||||
video_id = traverse_obj(
|
||||
asset_details, ('content', 'partnerInformation', ..., 'reference', {str}), get_all=False)
|
||||
if not video_id:
|
||||
raise ExtractorError('Unable to extract video id')
|
||||
|
||||
vod_data = self._download_json(
|
||||
f'https://wcps.t-online.de/cvss/magentamusic/vodclient/v2/player/58935/{video_id}/Main%20Movie', video_id)
|
||||
smil_url = traverse_obj(
|
||||
vod_data, ('content', 'feature', 'representations', ...,
|
||||
'contentPackages', ..., 'media', 'href', {url_or_none}), get_all=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'formats': self._extract_smil_formats(smil_url, video_id),
|
||||
**traverse_obj(vod_data, ('content', 'feature', 'metadata', {
|
||||
'title': 'title',
|
||||
'alt_title': 'originalTitle',
|
||||
'description': 'longDescription',
|
||||
'duration': ('runtimeInSeconds', {int_or_none}),
|
||||
'location': ('countriesOfProduction', {list}, {lambda x: join_nonempty(*x, delim=', ')}),
|
||||
'release_year': ('yearOfProduction', {int_or_none}),
|
||||
'categories': ('mainGenre', {str}, {lambda x: x and [x]}),
|
||||
})),
|
||||
}
|
|
@ -1,58 +0,0 @@
|
|||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class MagentaMusik360IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?magenta-musik-360\.de/([a-z0-9-]+-(?P<id>[0-9]+)|festivals/.+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.magenta-musik-360.de/within-temptation-wacken-2019-1-9208205928595185932',
|
||||
'md5': '65b6f060b40d90276ec6fb9b992c1216',
|
||||
'info_dict': {
|
||||
'id': '9208205928595185932',
|
||||
'ext': 'm3u8',
|
||||
'title': 'WITHIN TEMPTATION',
|
||||
'description': 'Robert Westerholt und Sharon Janny den Adel gründeten die Symphonic Metal-Band. Privat sind die Niederländer ein Paar und haben zwei Kinder. Die Single Ice Queen brachte ihnen Platin und Gold und verhalf 2002 zum internationalen Durchbruch. Charakteristisch für die Band war Anfangs der hohe Gesang von Frontfrau Sharon. Stilistisch fing die Band im Gothic Metal an. Mit neuem Sound, schnellen Gitarrenriffs und Gitarrensoli, avancierte Within Temptation zur erfolgreichen Rockband. Auch dieses Jahr wird die Band ihre Fangemeinde wieder mitreißen.',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.magenta-musik-360.de/festivals/wacken-world-wide-2020-body-count-feat-ice-t',
|
||||
'md5': '81010d27d7cab3f7da0b0f681b983b7e',
|
||||
'info_dict': {
|
||||
'id': '9208205928595231363',
|
||||
'ext': 'm3u8',
|
||||
'title': 'Body Count feat. Ice-T',
|
||||
'description': 'Body Count feat. Ice-T konnten bereits im vergangenen Jahr auf dem „Holy Ground“ in Wacken überzeugen. 2020 gehen die Crossover-Metaller aus einem Club in Los Angeles auf Sendung und bringen mit ihrer Mischung aus Metal und Hip-Hop Abwechslung und ordentlich Alarm zum WWW. Bereits seit 1990 stehen die beiden Gründer Ice-T (Gesang) und Ernie C (Gitarre) auf der Bühne. Sieben Studioalben hat die Gruppe bis jetzt veröffentlicht, darunter das Debüt „Body Count“ (1992) mit dem kontroversen Track „Cop Killer“.',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
# _match_id casts to string, but since "None" is not a valid video_id for magenta
|
||||
# there is no risk for confusion
|
||||
if video_id == "None":
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_id = self._html_search_regex(r'data-asset-id="([^"]+)"', webpage, 'video_id')
|
||||
json = self._download_json("https://wcps.t-online.de/cvss/magentamusic/vodplayer/v3/player/58935/%s/Main%%20Movie" % video_id, video_id)
|
||||
xml_url = json['content']['feature']['representations'][0]['contentPackages'][0]['media']['href']
|
||||
metadata = json['content']['feature'].get('metadata')
|
||||
title = None
|
||||
description = None
|
||||
duration = None
|
||||
thumbnails = []
|
||||
if metadata:
|
||||
title = metadata.get('title')
|
||||
description = metadata.get('fullDescription')
|
||||
duration = metadata.get('runtimeInSeconds')
|
||||
for img_key in ('teaserImageWide', 'smallCoverImage'):
|
||||
if img_key in metadata:
|
||||
thumbnails.append({'url': metadata[img_key].get('href')})
|
||||
|
||||
xml = self._download_xml(xml_url, video_id)
|
||||
final_url = xml[0][0][0].attrib['src']
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'url': final_url,
|
||||
'duration': duration,
|
||||
'thumbnails': thumbnails
|
||||
}
|
|
@ -8,7 +8,8 @@
|
|||
float_or_none,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
traverse_obj
|
||||
traverse_obj,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
|
@ -16,7 +17,7 @@ class MedalTVIE(InfoExtractor):
|
|||
_VALID_URL = r'https?://(?:www\.)?medal\.tv/games/[^/?#&]+/clips/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://medal.tv/games/valorant/clips/jTBFnLKdLy15K',
|
||||
'md5': '6930f8972914b6b9fdc2bb3918098ba0',
|
||||
'md5': '03e4911fdcf7fce563090705c2e79267',
|
||||
'info_dict': {
|
||||
'id': 'jTBFnLKdLy15K',
|
||||
'ext': 'mp4',
|
||||
|
@ -33,8 +34,8 @@ class MedalTVIE(InfoExtractor):
|
|||
'duration': 13,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://medal.tv/games/cod%20cold%20war/clips/2mA60jWAGQCBH',
|
||||
'md5': '3d19d426fe0b2d91c26e412684e66a06',
|
||||
'url': 'https://medal.tv/games/cod-cold-war/clips/2mA60jWAGQCBH',
|
||||
'md5': 'fc7a3e4552ae8993c1c4006db46be447',
|
||||
'info_dict': {
|
||||
'id': '2mA60jWAGQCBH',
|
||||
'ext': 'mp4',
|
||||
|
@ -52,7 +53,7 @@ class MedalTVIE(InfoExtractor):
|
|||
'duration': 23,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://medal.tv/games/cod%20cold%20war/clips/2um24TWdty0NA',
|
||||
'url': 'https://medal.tv/games/cod-cold-war/clips/2um24TWdty0NA',
|
||||
'md5': 'b6dc76b78195fff0b4f8bf4a33ec2148',
|
||||
'info_dict': {
|
||||
'id': '2um24TWdty0NA',
|
||||
|
@ -81,7 +82,7 @@ class MedalTVIE(InfoExtractor):
|
|||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
webpage = self._download_webpage(update_url_query(url, {'mobilebypass': 'true'}), video_id)
|
||||
|
||||
hydration_data = self._search_json(
|
||||
r'<script[^>]*>[^<]*\bhydrationData\s*=', webpage,
|
||||
|
|
|
@ -355,11 +355,11 @@ class MLBArticleIE(InfoExtractor):
|
|||
'info_dict': {
|
||||
'id': '36db7394-343c-4ea3-b8ca-ead2e61bca9a',
|
||||
'title': 'Machado\'s grab draws hilarious irate reaction',
|
||||
'modified_timestamp': 1650130737,
|
||||
'modified_timestamp': 1675888370,
|
||||
'description': 'md5:a19d4eb0487b2cb304e9a176f6b67676',
|
||||
'modified_date': '20220416',
|
||||
'modified_date': '20230208',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
'playlist_mincount': 2,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -367,15 +367,13 @@ def _real_extract(self, url):
|
|||
webpage = self._download_webpage(url, display_id)
|
||||
apollo_cache_json = self._search_json(r'window\.initState\s*=', webpage, 'window.initState', display_id)['apolloCache']
|
||||
|
||||
content_data_id = traverse_obj(
|
||||
apollo_cache_json, ('ROOT_QUERY', lambda k, _: k.startswith('getForgeContent'), 'id'), get_all=False)
|
||||
|
||||
content_real_info = apollo_cache_json[content_data_id]
|
||||
content_real_info = traverse_obj(
|
||||
apollo_cache_json, ('ROOT_QUERY', lambda k, _: k.startswith('getArticle')), get_all=False)
|
||||
|
||||
return self.playlist_from_matches(
|
||||
traverse_obj(content_real_info, ('parts', lambda _, v: v['typename'] == 'Video', 'id')),
|
||||
getter=lambda x: f'https://www.mlb.com/video/{apollo_cache_json[x]["slug"]}',
|
||||
ie=MLBVideoIE, playlist_id=content_real_info.get('_translationId'),
|
||||
traverse_obj(content_real_info, ('parts', lambda _, v: v['__typename'] == 'Video' or v['type'] == 'video')),
|
||||
getter=lambda x: f'https://www.mlb.com/video/{x["slug"]}',
|
||||
ie=MLBVideoIE, playlist_id=content_real_info.get('translationId'),
|
||||
title=self._html_search_meta('og:title', webpage),
|
||||
description=content_real_info.get('summary'),
|
||||
modified_timestamp=parse_iso8601(content_real_info.get('lastUpdatedDate')))
|
||||
|
|
|
@ -177,6 +177,7 @@ def _real_extract(self, url):
|
|||
|
||||
|
||||
class MotherlessPaginatedIE(InfoExtractor):
|
||||
_EXTRA_QUERY = {}
|
||||
_PAGE_SIZE = 60
|
||||
|
||||
def _correct_path(self, url, item_id):
|
||||
|
@ -199,7 +200,7 @@ def _real_extract(self, url):
|
|||
def get_page(idx):
|
||||
page = idx + 1
|
||||
current_page = webpage if not idx else self._download_webpage(
|
||||
real_url, item_id, note=f'Downloading page {page}', query={'page': page})
|
||||
real_url, item_id, note=f'Downloading page {page}', query={'page': page, **self._EXTRA_QUERY})
|
||||
yield from self._extract_entries(current_page, real_url)
|
||||
|
||||
return self.playlist_result(
|
||||
|
@ -213,7 +214,7 @@ class MotherlessGroupIE(MotherlessPaginatedIE):
|
|||
'url': 'http://motherless.com/gv/movie_scenes',
|
||||
'info_dict': {
|
||||
'id': 'movie_scenes',
|
||||
'title': 'Movie Scenes',
|
||||
'title': 'Movie Scenes - Videos - Hot and sexy scenes from "regular" movies... Beautiful actresses fully',
|
||||
},
|
||||
'playlist_mincount': 540,
|
||||
}, {
|
||||
|
@ -244,7 +245,7 @@ class MotherlessGalleryIE(MotherlessPaginatedIE):
|
|||
'id': '338999F',
|
||||
'title': 'Random',
|
||||
},
|
||||
'playlist_mincount': 190,
|
||||
'playlist_mincount': 171,
|
||||
}, {
|
||||
'url': 'https://motherless.com/GVABD6213',
|
||||
'info_dict': {
|
||||
|
@ -270,3 +271,27 @@ class MotherlessGalleryIE(MotherlessPaginatedIE):
|
|||
|
||||
def _correct_path(self, url, item_id):
|
||||
return urllib.parse.urljoin(url, f'/GV{item_id}')
|
||||
|
||||
|
||||
class MotherlessUploaderIE(MotherlessPaginatedIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?motherless\.com/u/(?P<id>\w+)/?(?:$|[?#])'
|
||||
_TESTS = [{
|
||||
'url': 'https://motherless.com/u/Mrgo4hrs2023',
|
||||
'info_dict': {
|
||||
'id': 'Mrgo4hrs2023',
|
||||
'title': "Mrgo4hrs2023's Uploads - Videos",
|
||||
},
|
||||
'playlist_mincount': 32,
|
||||
}, {
|
||||
'url': 'https://motherless.com/u/Happy_couple?t=v',
|
||||
'info_dict': {
|
||||
'id': 'Happy_couple',
|
||||
'title': "Happy_couple's Uploads - Videos",
|
||||
},
|
||||
'playlist_mincount': 8,
|
||||
}]
|
||||
|
||||
_EXTRA_QUERY = {'t': 'v'}
|
||||
|
||||
def _correct_path(self, url, item_id):
|
||||
return urllib.parse.urljoin(url, f'/u/{item_id}?t=v')
|
||||
|
|
171
yt_dlp/extractor/mx3.py
Normal file
171
yt_dlp/extractor/mx3.py
Normal file
|
@ -0,0 +1,171 @@
|
|||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
try_call,
|
||||
url_or_none,
|
||||
urlhandle_detect_ext,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class Mx3BaseIE(InfoExtractor):
|
||||
_VALID_URL_TMPL = r'https?://(?:www\.)?%s/t/(?P<id>\w+)'
|
||||
_FORMATS = [{
|
||||
'url': 'player_asset',
|
||||
'format_id': 'default',
|
||||
'quality': 0,
|
||||
}, {
|
||||
'url': 'player_asset?quality=hd',
|
||||
'format_id': 'hd',
|
||||
'quality': 1,
|
||||
}, {
|
||||
'url': 'download',
|
||||
'format_id': 'download',
|
||||
'quality': 2,
|
||||
}, {
|
||||
'url': 'player_asset?quality=source',
|
||||
'format_id': 'source',
|
||||
'quality': 2,
|
||||
}]
|
||||
|
||||
def _extract_formats(self, track_id):
|
||||
formats = []
|
||||
for fmt in self._FORMATS:
|
||||
format_url = f'https://{self._DOMAIN}/tracks/{track_id}/{fmt["url"]}'
|
||||
urlh = self._request_webpage(
|
||||
HEADRequest(format_url), track_id, fatal=False, expected_status=404,
|
||||
note=f'Checking for format {fmt["format_id"]}')
|
||||
if urlh and urlh.status == 200:
|
||||
formats.append({
|
||||
**fmt,
|
||||
'url': format_url,
|
||||
'ext': urlhandle_detect_ext(urlh),
|
||||
'filesize': int_or_none(urlh.headers.get('Content-Length')),
|
||||
})
|
||||
return formats
|
||||
|
||||
def _real_extract(self, url):
|
||||
track_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, track_id)
|
||||
more_info = get_element_by_class('single-more-info', webpage)
|
||||
data = self._download_json(f'https://{self._DOMAIN}/t/{track_id}.json', track_id, fatal=False)
|
||||
|
||||
def get_info_field(name):
|
||||
return self._html_search_regex(
|
||||
rf'<dt[^>]*>\s*{name}\s*</dt>\s*<dd[^>]*>(.*?)</dd>',
|
||||
more_info, name, default=None, flags=re.DOTALL)
|
||||
|
||||
return {
|
||||
'id': track_id,
|
||||
'formats': self._extract_formats(track_id),
|
||||
'genre': self._html_search_regex(
|
||||
r'<div\b[^>]+class="single-band-genre"[^>]*>([^<]+)</div>', webpage, 'genre', default=None),
|
||||
'release_year': int_or_none(get_info_field('Year of creation')),
|
||||
'description': get_info_field('Description'),
|
||||
'tags': try_call(lambda: get_info_field('Tag').split(', '), list),
|
||||
**traverse_obj(data, {
|
||||
'title': ('title', {str}),
|
||||
'artist': (('performer_name', 'artist'), {str}),
|
||||
'album_artist': ('artist', {str}),
|
||||
'composer': ('composer_name', {str}),
|
||||
'thumbnail': (('picture_url_xlarge', 'picture_url'), {url_or_none}),
|
||||
}, get_all=False),
|
||||
}
|
||||
|
||||
|
||||
class Mx3IE(Mx3BaseIE):
|
||||
_DOMAIN = 'mx3.ch'
|
||||
_VALID_URL = Mx3BaseIE._VALID_URL_TMPL % re.escape(_DOMAIN)
|
||||
_TESTS = [{
|
||||
'url': 'https://mx3.ch/t/1Cru',
|
||||
'md5': '7ba09e9826b4447d4e1ce9d69e0e295f',
|
||||
'info_dict': {
|
||||
'id': '1Cru',
|
||||
'ext': 'wav',
|
||||
'artist': 'Godina',
|
||||
'album_artist': 'Tortue Tortue',
|
||||
'composer': 'Olivier Godinat',
|
||||
'genre': 'Rock',
|
||||
'thumbnail': 'https://mx3.ch/pictures/mx3/file/0101/4643/square_xlarge/1-s-envoler-1.jpg?1630272813',
|
||||
'title': "S'envoler",
|
||||
'release_year': 2021,
|
||||
'tags': [],
|
||||
}
|
||||
}, {
|
||||
'url': 'https://mx3.ch/t/1LIY',
|
||||
'md5': '48293cb908342547827f963a5a2e9118',
|
||||
'info_dict': {
|
||||
'id': '1LIY',
|
||||
'ext': 'mov',
|
||||
'artist': 'Tania Kimfumu',
|
||||
'album_artist': 'The Broots',
|
||||
'composer': 'Emmanuel Diserens',
|
||||
'genre': 'Electro',
|
||||
'thumbnail': 'https://mx3.ch/pictures/mx3/file/0110/0003/video_xlarge/frame_0000.png?1686963670',
|
||||
'title': 'The Broots-Larytta remix "Begging For Help"',
|
||||
'release_year': 2023,
|
||||
'tags': ['the broots', 'cassata records', 'larytta'],
|
||||
'description': '"Begging for Help" Larytta Remix Official Video\nRealized By Kali Donkilie in 2023',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://mx3.ch/t/1C6E',
|
||||
'md5': '1afcd578493ddb8e5008e94bb6d97e25',
|
||||
'info_dict': {
|
||||
'id': '1C6E',
|
||||
'ext': 'wav',
|
||||
'artist': 'Alien Bubblegum',
|
||||
'album_artist': 'Alien Bubblegum',
|
||||
'composer': 'Alien Bubblegum',
|
||||
'genre': 'Punk',
|
||||
'thumbnail': 'https://mx3.ch/pictures/mx3/file/0101/1551/square_xlarge/pandora-s-box-cover-with-title.png?1627054733',
|
||||
'title': 'Wide Awake',
|
||||
'release_year': 2021,
|
||||
'tags': ['alien bubblegum', 'bubblegum', 'alien', 'pop punk', 'poppunk'],
|
||||
}
|
||||
}]
|
||||
|
||||
|
||||
class Mx3NeoIE(Mx3BaseIE):
|
||||
_DOMAIN = 'neo.mx3.ch'
|
||||
_VALID_URL = Mx3BaseIE._VALID_URL_TMPL % re.escape(_DOMAIN)
|
||||
_TESTS = [{
|
||||
'url': 'https://neo.mx3.ch/t/1hpd',
|
||||
'md5': '6d9986bbae5cac3296ec8813bf965eb2',
|
||||
'info_dict': {
|
||||
'id': '1hpd',
|
||||
'ext': 'wav',
|
||||
'artist': 'Baptiste Lopez',
|
||||
'album_artist': 'Kammerorchester Basel',
|
||||
'composer': 'Jannik Giger',
|
||||
'genre': 'Composition, Orchestra',
|
||||
'title': 'Troisième œil. Für Kammerorchester (2023)',
|
||||
'thumbnail': 'https://neo.mx3.ch/pictures/neo/file/0000/0241/square_xlarge/kammerorchester-basel-group-photo-2_c_-lukasz-rajchert.jpg?1560341252',
|
||||
'release_year': 2023,
|
||||
'tags': [],
|
||||
}
|
||||
}]
|
||||
|
||||
|
||||
class Mx3VolksmusikIE(Mx3BaseIE):
|
||||
_DOMAIN = 'volksmusik.mx3.ch'
|
||||
_VALID_URL = Mx3BaseIE._VALID_URL_TMPL % re.escape(_DOMAIN)
|
||||
_TESTS = [{
|
||||
'url': 'https://volksmusik.mx3.ch/t/Zx',
|
||||
'md5': 'dd967a7b0c1ef898f3e072cf9c2eae3c',
|
||||
'info_dict': {
|
||||
'id': 'Zx',
|
||||
'ext': 'mp3',
|
||||
'artist': 'Ländlerkapelle GrischArt',
|
||||
'album_artist': 'Ländlerkapelle GrischArt',
|
||||
'composer': 'Urs Glauser',
|
||||
'genre': 'Instrumental, Graubünden',
|
||||
'title': 'Chämilouf',
|
||||
'thumbnail': 'https://volksmusik.mx3.ch/pictures/vxm/file/0000/3815/square_xlarge/grischart1.jpg?1450530120',
|
||||
'release_year': 2012,
|
||||
'tags': [],
|
||||
}
|
||||
}]
|
|
@ -1,20 +1,25 @@
|
|||
import base64
|
||||
import hashlib
|
||||
import hmac
|
||||
import itertools
|
||||
import json
|
||||
import re
|
||||
from urllib.parse import urlparse, parse_qs
|
||||
import time
|
||||
from urllib.parse import parse_qs, urlparse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
dict_get,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
merge_dicts,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
|
@ -110,6 +115,18 @@ def get_subs(caption_url):
|
|||
**self.process_subtitles(video_data, get_subs),
|
||||
}
|
||||
|
||||
def _call_api(self, path, video_id):
|
||||
api_endpoint = f'https://apis.naver.com/now_web2/now_web_api/v1{path}'
|
||||
key = b'nbxvs5nwNG9QKEWK0ADjYA4JZoujF4gHcIwvoCxFTPAeamq5eemvt5IWAYXxrbYM'
|
||||
msgpad = int(time.time() * 1000)
|
||||
md = base64.b64encode(hmac.HMAC(
|
||||
key, f'{api_endpoint[:255]}{msgpad}'.encode(), digestmod=hashlib.sha1).digest()).decode()
|
||||
|
||||
return self._download_json(api_endpoint, video_id=video_id, headers=self.geo_verification_headers(), query={
|
||||
'msgpad': msgpad,
|
||||
'md': md,
|
||||
})['result']
|
||||
|
||||
|
||||
class NaverIE(NaverBaseIE):
|
||||
_VALID_URL = r'https?://(?:m\.)?tv(?:cast)?\.naver\.com/(?:v|embed)/(?P<id>\d+)'
|
||||
|
@ -125,21 +142,32 @@ class NaverIE(NaverBaseIE):
|
|||
'upload_date': '20130903',
|
||||
'uploader': '메가스터디, 합격불변의 법칙',
|
||||
'uploader_id': 'megastudy',
|
||||
'uploader_url': 'https://tv.naver.com/megastudy',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'duration': 2118,
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://tv.naver.com/v/395837',
|
||||
'md5': '8a38e35354d26a17f73f4e90094febd3',
|
||||
'md5': '7791205fa89dbed2f5e3eb16d287ff05',
|
||||
'info_dict': {
|
||||
'id': '395837',
|
||||
'ext': 'mp4',
|
||||
'title': '9년이 지나도 아픈 기억, 전효성의 아버지',
|
||||
'description': 'md5:eb6aca9d457b922e43860a2a2b1984d3',
|
||||
'description': 'md5:c76be23e21403a6473d8119678cdb5cb',
|
||||
'timestamp': 1432030253,
|
||||
'upload_date': '20150519',
|
||||
'uploader': '4가지쇼 시즌2',
|
||||
'uploader_id': 'wrappinguser29',
|
||||
'uploader': '4가지쇼',
|
||||
'uploader_id': '4show',
|
||||
'uploader_url': 'https://tv.naver.com/4show',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'duration': 277,
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
},
|
||||
'skip': 'Georestricted',
|
||||
}, {
|
||||
'url': 'http://tvcast.naver.com/v/81652',
|
||||
'only_matching': True,
|
||||
|
@ -147,56 +175,63 @@ class NaverIE(NaverBaseIE):
|
|||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
content = self._download_json(
|
||||
'https://tv.naver.com/api/json/v/' + video_id,
|
||||
video_id, headers=self.geo_verification_headers())
|
||||
player_info_json = content.get('playerInfoJson') or {}
|
||||
current_clip = player_info_json.get('currentClip') or {}
|
||||
data = self._call_api(f'/clips/{video_id}/play-info', video_id)
|
||||
|
||||
vid = current_clip.get('videoId')
|
||||
in_key = current_clip.get('inKey')
|
||||
vid = traverse_obj(data, ('clip', 'videoId', {str}))
|
||||
in_key = traverse_obj(data, ('play', 'inKey', {str}))
|
||||
|
||||
if not vid or not in_key:
|
||||
player_auth = try_get(player_info_json, lambda x: x['playerOption']['auth'])
|
||||
if player_auth == 'notCountry':
|
||||
self.raise_geo_restricted(countries=['KR'])
|
||||
elif player_auth == 'notLogin':
|
||||
self.raise_login_required()
|
||||
raise ExtractorError('couldn\'t extract vid and key')
|
||||
raise ExtractorError('Unable to extract video info')
|
||||
|
||||
info = self._extract_video_info(video_id, vid, in_key)
|
||||
info.update({
|
||||
'description': clean_html(current_clip.get('description')),
|
||||
'timestamp': int_or_none(current_clip.get('firstExposureTime'), 1000),
|
||||
'duration': parse_duration(current_clip.get('displayPlayTime')),
|
||||
'like_count': int_or_none(current_clip.get('recommendPoint')),
|
||||
'age_limit': 19 if current_clip.get('adult') else None,
|
||||
})
|
||||
info.update(traverse_obj(data, ('clip', {
|
||||
'title': 'title',
|
||||
'description': 'description',
|
||||
'timestamp': ('firstExposureDatetime', {parse_iso8601}),
|
||||
'duration': ('playTime', {int_or_none}),
|
||||
'like_count': ('likeItCount', {int_or_none}),
|
||||
'view_count': ('playCount', {int_or_none}),
|
||||
'comment_count': ('commentCount', {int_or_none}),
|
||||
'thumbnail': ('thumbnailImageUrl', {url_or_none}),
|
||||
'uploader': 'channelName',
|
||||
'uploader_id': 'channelId',
|
||||
'uploader_url': ('channelUrl', {url_or_none}),
|
||||
'age_limit': ('adultVideo', {lambda x: 19 if x else None}),
|
||||
})))
|
||||
return info
|
||||
|
||||
|
||||
class NaverLiveIE(InfoExtractor):
|
||||
class NaverLiveIE(NaverBaseIE):
|
||||
IE_NAME = 'Naver:live'
|
||||
_VALID_URL = r'https?://(?:m\.)?tv(?:cast)?\.naver\.com/l/(?P<id>\d+)'
|
||||
_GEO_BYPASS = False
|
||||
_TESTS = [{
|
||||
'url': 'https://tv.naver.com/l/52010',
|
||||
'url': 'https://tv.naver.com/l/127062',
|
||||
'info_dict': {
|
||||
'id': '52010',
|
||||
'id': '127062',
|
||||
'ext': 'mp4',
|
||||
'title': '[LIVE] 뉴스특보 : "수도권 거리두기, 2주간 2단계로 조정"',
|
||||
'description': 'md5:df7f0c237a5ed5e786ce5c91efbeaab3',
|
||||
'channel_id': 'NTV-ytnnews24-0',
|
||||
'start_time': 1597026780000,
|
||||
'live_status': 'is_live',
|
||||
'channel': '뉴스는 YTN',
|
||||
'channel_id': 'ytnnews24',
|
||||
'title': 're:^대한민국 24시간 뉴스 채널 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'description': 'md5:f938b5956711beab6f882314ffadf4d5',
|
||||
'start_time': 1677752280,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)',
|
||||
'like_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://tv.naver.com/l/51549',
|
||||
'url': 'https://tv.naver.com/l/140535',
|
||||
'info_dict': {
|
||||
'id': '51549',
|
||||
'id': '140535',
|
||||
'ext': 'mp4',
|
||||
'title': '연합뉴스TV - 코로나19 뉴스특보',
|
||||
'description': 'md5:c655e82091bc21e413f549c0eaccc481',
|
||||
'channel_id': 'NTV-yonhapnewstv-0',
|
||||
'start_time': 1596406380000,
|
||||
'live_status': 'is_live',
|
||||
'channel': 'KBS뉴스',
|
||||
'channel_id': 'kbsnews',
|
||||
'start_time': 1696867320,
|
||||
'title': 're:^언제 어디서나! KBS 뉴스 24 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'description': 'md5:6ad419c0bf2f332829bda3f79c295284',
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)',
|
||||
'like_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://tv.naver.com/l/54887',
|
||||
|
@ -205,55 +240,27 @@ class NaverLiveIE(InfoExtractor):
|
|||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
page = self._download_webpage(url, video_id, 'Downloading Page', 'Unable to download Page')
|
||||
secure_url = self._search_regex(r'sApiF:\s+(?:"|\')([^"\']+)', page, 'secureurl')
|
||||
|
||||
info = self._extract_video_info(video_id, secure_url)
|
||||
info.update({
|
||||
'description': self._og_search_description(page)
|
||||
})
|
||||
|
||||
return info
|
||||
|
||||
def _extract_video_info(self, video_id, url):
|
||||
video_data = self._download_json(url, video_id, headers=self.geo_verification_headers())
|
||||
meta = video_data.get('meta')
|
||||
status = meta.get('status')
|
||||
data = self._call_api(f'/live-end/normal/{video_id}/play-info?renewLastPlayDate=true', video_id)
|
||||
|
||||
status = traverse_obj(data, ('live', 'liveStatus'))
|
||||
if status == 'CLOSED':
|
||||
raise ExtractorError('Stream is offline.', expected=True)
|
||||
elif status != 'OPENED':
|
||||
raise ExtractorError('Unknown status %s' % status)
|
||||
|
||||
title = meta.get('title')
|
||||
stream_list = video_data.get('streams')
|
||||
|
||||
if stream_list is None:
|
||||
raise ExtractorError('Could not get stream data.', expected=True)
|
||||
|
||||
formats = []
|
||||
for quality in stream_list:
|
||||
if not quality.get('url'):
|
||||
continue
|
||||
|
||||
prop = quality.get('property')
|
||||
if prop.get('abr'): # This abr doesn't mean Average audio bitrate.
|
||||
continue
|
||||
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
quality.get('url'), video_id, 'mp4',
|
||||
m3u8_id=quality.get('qualityId'), live=True
|
||||
))
|
||||
raise ExtractorError(f'Unknown status {status!r}')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'channel_id': meta.get('channelId'),
|
||||
'channel_url': meta.get('channelUrl'),
|
||||
'thumbnail': meta.get('imgUrl'),
|
||||
'start_time': meta.get('startTime'),
|
||||
'categories': [meta.get('categoryId')],
|
||||
'formats': self._extract_m3u8_formats(
|
||||
traverse_obj(data, ('playbackBody', {json.loads}, 'media', 0, 'path')), video_id, live=True),
|
||||
**traverse_obj(data, ('live', {
|
||||
'title': 'title',
|
||||
'channel': 'channelName',
|
||||
'channel_id': 'channelId',
|
||||
'description': 'description',
|
||||
'like_count': (('likeCount', 'likeItCount'), {int_or_none}),
|
||||
'thumbnail': ('thumbnailImageUrl', {url_or_none}),
|
||||
'start_time': (('startTime', 'startDateTime', 'startYmdt'), {parse_iso8601}),
|
||||
}), get_all=False),
|
||||
'is_live': True
|
||||
}
|
||||
|
||||
|
|
|
@ -3,15 +3,15 @@
|
|||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
OnDemandPagedList,
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
get_element_by_id,
|
||||
int_or_none,
|
||||
parse_count,
|
||||
parse_duration,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
OnDemandPagedList,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
|
@ -263,19 +263,16 @@ class NewgroundsUserIE(InfoExtractor):
|
|||
def _fetch_page(self, channel_id, url, page):
|
||||
page += 1
|
||||
posts_info = self._download_json(
|
||||
f'{url}/page/{page}', channel_id,
|
||||
f'{url}?page={page}', channel_id,
|
||||
note=f'Downloading page {page}', headers={
|
||||
'Accept': 'application/json, text/javascript, */*; q = 0.01',
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
})
|
||||
sequence = posts_info.get('sequence', [])
|
||||
for year in sequence:
|
||||
posts = try_get(posts_info, lambda x: x['years'][str(year)]['items'])
|
||||
for post in posts:
|
||||
path, media_id = self._search_regex(
|
||||
r'<a[^>]+\bhref=["\'][^"\']+((?:portal/view|audio/listen)/(\d+))[^>]+>',
|
||||
post, 'url', group=(1, 2))
|
||||
yield self.url_result(f'https://www.newgrounds.com/{path}', NewgroundsIE.ie_key(), media_id)
|
||||
for post in traverse_obj(posts_info, ('items', ..., ..., {str})):
|
||||
path, media_id = self._search_regex(
|
||||
r'<a[^>]+\bhref=["\'][^"\']+((?:portal/view|audio/listen)/(\d+))[^>]+>',
|
||||
post, 'url', group=(1, 2))
|
||||
yield self.url_result(f'https://www.newgrounds.com/{path}', NewgroundsIE.ie_key(), media_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
|
|
72
yt_dlp/extractor/ninenews.py
Normal file
72
yt_dlp/extractor/ninenews.py
Normal file
|
@ -0,0 +1,72 @@
|
|||
from .common import InfoExtractor
|
||||
from .brightcove import BrightcoveNewIE
|
||||
from ..utils import ExtractorError
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class NineNewsIE(InfoExtractor):
|
||||
IE_NAME = '9News'
|
||||
_VALID_URL = r'https?://(?:www\.)?9news\.com\.au/(?:[\w-]+/){2,3}(?P<id>[\w-]+)/?(?:$|[?#])'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.9news.com.au/videos/national/fair-trading-pulls-dozens-of-toys-from-shelves/clqgc7dvj000y0jnvfism0w5m',
|
||||
'md5': 'd1a65b2e9d126e5feb9bc5cb96e62c80',
|
||||
'info_dict': {
|
||||
'id': '6343717246112',
|
||||
'ext': 'mp4',
|
||||
'title': 'Fair Trading pulls dozens of toys from shelves',
|
||||
'description': 'Fair Trading Australia have been forced to pull dozens of toys from shelves over hazard fears.',
|
||||
'thumbnail': 'md5:bdbe44294e2323b762d97acf8843f66c',
|
||||
'duration': 93.44,
|
||||
'timestamp': 1703231748,
|
||||
'upload_date': '20231222',
|
||||
'uploader_id': '664969388001',
|
||||
'tags': ['networkclip', 'aunews_aunationalninenews', 'christmas presents', 'toys', 'fair trading', 'au_news'],
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.9news.com.au/world/tape-reveals-donald-trump-pressured-michigan-officials-not-to-certify-2020-vote-a-new-report-says/0b8b880e-7d3c-41b9-b2bd-55bc7e492259',
|
||||
'md5': 'a885c44d20898c3e70e9a53e8188cea1',
|
||||
'info_dict': {
|
||||
'id': '6343587450112',
|
||||
'ext': 'mp4',
|
||||
'title': 'Trump found ineligible to run for president by state court',
|
||||
'description': 'md5:40e6e7db7a4ac6be0e960569a5af6066',
|
||||
'thumbnail': 'md5:3e132c48c186039fd06c10787de9bff2',
|
||||
'duration': 104.64,
|
||||
'timestamp': 1703058034,
|
||||
'upload_date': '20231220',
|
||||
'uploader_id': '664969388001',
|
||||
'tags': ['networkclip', 'aunews_aunationalninenews', 'ineligible', 'presidential candidate', 'donald trump', 'au_news'],
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.9news.com.au/national/outrage-as-parents-banned-from-giving-gifts-to-kindergarten-teachers/e19b49d4-a1a4-4533-9089-6e10e2d9386a',
|
||||
'info_dict': {
|
||||
'id': '6343716797112',
|
||||
'ext': 'mp4',
|
||||
'title': 'Outrage as parents banned from giving gifts to kindergarten teachers',
|
||||
'description': 'md5:7a8b0ed2f9e08875fd9a3e86e462bc46',
|
||||
'thumbnail': 'md5:5ee4d66717bdd0dee9fc9a705ef041b8',
|
||||
'duration': 91.307,
|
||||
'timestamp': 1703229584,
|
||||
'upload_date': '20231222',
|
||||
'uploader_id': '664969388001',
|
||||
'tags': ['networkclip', 'aunews_aunationalninenews', 'presents', 'teachers', 'kindergarten', 'au_news'],
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
article_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, article_id)
|
||||
initial_state = self._search_json(
|
||||
r'var\s+__INITIAL_STATE__\s*=', webpage, 'initial state', article_id)
|
||||
video_id = traverse_obj(
|
||||
initial_state, ('videoIndex', 'currentVideo', 'brightcoveId', {str}),
|
||||
('article', ..., 'media', lambda _, v: v['type'] == 'video', 'urn', {str}), get_all=False)
|
||||
account = traverse_obj(initial_state, (
|
||||
'videoIndex', 'config', (None, 'video'), 'account', {str}), get_all=False)
|
||||
|
||||
if not video_id or not account:
|
||||
raise ExtractorError('Unable to get the required video data')
|
||||
|
||||
return self.url_result(
|
||||
f'https://players.brightcove.net/{account}/default_default/index.html?videoId={video_id}',
|
||||
BrightcoveNewIE, video_id)
|
|
@ -275,7 +275,7 @@ def _real_extract(self, url):
|
|||
'ext': ext,
|
||||
'url': post_file['url'],
|
||||
}
|
||||
elif name == 'video':
|
||||
elif name == 'video' or determine_ext(post_file.get('url')) == 'm3u8':
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(post_file['url'], video_id)
|
||||
return {
|
||||
**info,
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
|
||||
class PiaproIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'piapro'
|
||||
_VALID_URL = r'https?://piapro\.jp/(?:t|content)/(?P<id>\w+)/?'
|
||||
_VALID_URL = r'https?://piapro\.jp/(?:t|content)/(?P<id>[\w-]+)/?'
|
||||
_TESTS = [{
|
||||
'url': 'https://piapro.jp/t/NXYR',
|
||||
'md5': 'f7c0f760913fb1d44a1c45a4af793909',
|
||||
|
@ -49,6 +49,9 @@ class PiaproIE(InfoExtractor):
|
|||
}, {
|
||||
'url': 'https://piapro.jp/content/hcw0z3a169wtemz6',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://piapro.jp/t/-SO-',
|
||||
'only_matching': True
|
||||
}]
|
||||
|
||||
_login_status = False
|
||||
|
|
|
@ -1,10 +1,18 @@
|
|||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none, traverse_obj
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_qs,
|
||||
traverse_obj,
|
||||
update_url_query,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class PlaySuisseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'playsuisse'
|
||||
_VALID_URL = r'https?://(?:www\.)?playsuisse\.ch/(?:watch|detail)/(?:[^#]*[?&]episodeId=)?(?P<id>[0-9]+)'
|
||||
_TESTS = [
|
||||
{
|
||||
|
@ -134,12 +142,47 @@ class PlaySuisseIE(InfoExtractor):
|
|||
id
|
||||
url
|
||||
}'''
|
||||
_LOGIN_BASE_URL = 'https://login.srgssr.ch/srgssrlogin.onmicrosoft.com'
|
||||
_LOGIN_PATH = 'B2C_1A__SignInV2'
|
||||
_ID_TOKEN = None
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
login_page = self._download_webpage(
|
||||
'https://www.playsuisse.ch/api/sso/login', None, note='Downloading login page',
|
||||
query={'x': 'x', 'locale': 'de', 'redirectUrl': 'https://www.playsuisse.ch/'})
|
||||
settings = self._search_json(r'var\s+SETTINGS\s*=', login_page, 'settings', None)
|
||||
|
||||
csrf_token = settings['csrf']
|
||||
query = {'tx': settings['transId'], 'p': self._LOGIN_PATH}
|
||||
|
||||
status = traverse_obj(self._download_json(
|
||||
f'{self._LOGIN_BASE_URL}/{self._LOGIN_PATH}/SelfAsserted', None, 'Logging in',
|
||||
query=query, headers={'X-CSRF-TOKEN': csrf_token}, data=urlencode_postdata({
|
||||
'request_type': 'RESPONSE',
|
||||
'signInName': username,
|
||||
'password': password
|
||||
}), expected_status=400), ('status', {int_or_none}))
|
||||
if status == 400:
|
||||
raise ExtractorError('Invalid username or password', expected=True)
|
||||
|
||||
urlh = self._request_webpage(
|
||||
f'{self._LOGIN_BASE_URL}/{self._LOGIN_PATH}/api/CombinedSigninAndSignup/confirmed',
|
||||
None, 'Downloading ID token', query={
|
||||
'rememberMe': 'false',
|
||||
'csrf_token': csrf_token,
|
||||
**query,
|
||||
'diags': '',
|
||||
})
|
||||
|
||||
self._ID_TOKEN = traverse_obj(parse_qs(urlh.url), ('id_token', 0))
|
||||
if not self._ID_TOKEN:
|
||||
raise ExtractorError('Login failed')
|
||||
|
||||
def _get_media_data(self, media_id):
|
||||
# NOTE In the web app, the "locale" header is used to switch between languages,
|
||||
# However this doesn't seem to take effect when passing the header here.
|
||||
response = self._download_json(
|
||||
'https://4bbepzm4ef.execute-api.eu-central-1.amazonaws.com/prod/graphql',
|
||||
'https://www.playsuisse.ch/api/graphql',
|
||||
media_id, data=json.dumps({
|
||||
'operationName': 'AssetWatch',
|
||||
'query': self._GRAPHQL_QUERY,
|
||||
|
@ -150,6 +193,9 @@ def _get_media_data(self, media_id):
|
|||
return response['data']['assetV2']
|
||||
|
||||
def _real_extract(self, url):
|
||||
if not self._ID_TOKEN:
|
||||
self.raise_login_required(method='password')
|
||||
|
||||
media_id = self._match_id(url)
|
||||
media_data = self._get_media_data(media_id)
|
||||
info = self._extract_single(media_data)
|
||||
|
@ -168,7 +214,8 @@ def _extract_single(self, media_data):
|
|||
if not media.get('url') or media.get('type') != 'HLS':
|
||||
continue
|
||||
f, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
media['url'], media_data['id'], 'mp4', m3u8_id='HLS', fatal=False)
|
||||
update_url_query(media['url'], {'id_token': self._ID_TOKEN}),
|
||||
media_data['id'], 'mp4', m3u8_id='HLS', fatal=False)
|
||||
formats.extend(f)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
|
|
|
@ -18,7 +18,6 @@
|
|||
class Pr0grammIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://pr0gramm\.com\/(?:[^/?#]+/)+(?P<id>[\d]+)(?:[/?#:]|$)'
|
||||
_TESTS = [{
|
||||
# Tags require account
|
||||
'url': 'https://pr0gramm.com/new/video/5466437',
|
||||
'info_dict': {
|
||||
'id': '5466437',
|
||||
|
@ -36,7 +35,6 @@ class Pr0grammIE(InfoExtractor):
|
|||
'_old_archive_ids': ['pr0grammstatic 5466437'],
|
||||
},
|
||||
}, {
|
||||
# Tags require account
|
||||
'url': 'https://pr0gramm.com/new/3052805:comment28391322',
|
||||
'info_dict': {
|
||||
'id': '3052805',
|
||||
|
@ -71,6 +69,23 @@ class Pr0grammIE(InfoExtractor):
|
|||
'thumbnail': r're:^https://thumb\.pr0gramm\.com/.*\.jpg',
|
||||
'_old_archive_ids': ['pr0grammstatic 5848332'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://pr0gramm.com/top/5895149',
|
||||
'info_dict': {
|
||||
'id': '5895149',
|
||||
'ext': 'mp4',
|
||||
'title': 'pr0gramm-5895149 by algoholigSeeManThrower',
|
||||
'tags': 'count:19',
|
||||
'uploader': 'algoholigSeeManThrower',
|
||||
'uploader_id': 457556,
|
||||
'upload_timestamp': 1697580902,
|
||||
'upload_date': '20231018',
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'age_limit': 0,
|
||||
'thumbnail': 'https://thumb.pr0gramm.com/2023/10/18/db47bb3db5e1a1b3.jpg',
|
||||
'_old_archive_ids': ['pr0grammstatic 5895149'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://pr0gramm.com/static/5466437',
|
||||
'only_matching': True,
|
||||
|
@ -92,15 +107,15 @@ def _is_logged_in(self):
|
|||
def _maximum_flags(self):
|
||||
# We need to guess the flags for the content otherwise the api will raise an error
|
||||
# We can guess the maximum allowed flags for the account from the cookies
|
||||
# Bitflags are (msbf): nsfp, nsfl, nsfw, sfw
|
||||
flags = 0b0001
|
||||
# Bitflags are (msbf): pol, nsfp, nsfl, nsfw, sfw
|
||||
flags = 0b10001
|
||||
if self._is_logged_in:
|
||||
flags |= 0b1000
|
||||
flags |= 0b01000
|
||||
cookies = self._get_cookies(self.BASE_URL)
|
||||
if 'me' not in cookies:
|
||||
self._download_webpage(self.BASE_URL, None, 'Refreshing verification information')
|
||||
if traverse_obj(cookies, ('me', {lambda x: x.value}, {unquote}, {json.loads}, 'verified')):
|
||||
flags |= 0b0110
|
||||
flags |= 0b00110
|
||||
|
||||
return flags
|
||||
|
||||
|
@ -134,14 +149,12 @@ def _real_extract(self, url):
|
|||
if not source or not source.endswith('mp4'):
|
||||
self.raise_no_formats('Could not extract a video', expected=bool(source), video_id=video_id)
|
||||
|
||||
tags = None
|
||||
if self._is_logged_in:
|
||||
metadata = self._call_api('info', video_id, {'itemId': video_id}, note='Downloading tags')
|
||||
tags = traverse_obj(metadata, ('tags', ..., 'tag', {str}))
|
||||
# Sorted by "confidence", higher confidence = earlier in list
|
||||
confidences = traverse_obj(metadata, ('tags', ..., 'confidence', ({int}, {float})))
|
||||
if confidences:
|
||||
tags = [tag for _, tag in sorted(zip(confidences, tags), reverse=True)]
|
||||
metadata = self._call_api('info', video_id, {'itemId': video_id}, note='Downloading tags')
|
||||
tags = traverse_obj(metadata, ('tags', ..., 'tag', {str}))
|
||||
# Sorted by "confidence", higher confidence = earlier in list
|
||||
confidences = traverse_obj(metadata, ('tags', ..., 'confidence', ({int}, {float})))
|
||||
if confidences:
|
||||
tags = [tag for _, tag in sorted(zip(confidences, tags), reverse=True)]
|
||||
|
||||
formats = traverse_obj(video_info, ('variants', ..., {
|
||||
'format_id': ('name', {str}),
|
||||
|
|
135
yt_dlp/extractor/redge.py
Normal file
135
yt_dlp/extractor/redge.py
Normal file
|
@ -0,0 +1,135 @@
|
|||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_qs,
|
||||
update_url_query,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class RedCDNLivxIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://[^.]+\.(?:dcs\.redcdn|atmcdn)\.pl/(?:live(?:dash|hls|ss)|nvr)/o2/(?P<tenant>[^/?#]+)/(?P<id>[^?#]+)\.livx'
|
||||
IE_NAME = 'redcdnlivx'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://r.dcs.redcdn.pl/livedash/o2/senat/ENC02/channel.livx?indexMode=true&startTime=638272860000&stopTime=638292544000',
|
||||
'info_dict': {
|
||||
'id': 'ENC02-638272860000-638292544000',
|
||||
'ext': 'mp4',
|
||||
'title': 'ENC02',
|
||||
'duration': 19683.982,
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://r.dcs.redcdn.pl/livedash/o2/sejm/ENC18/live.livx?indexMode=true&startTime=722333096000&stopTime=722335562000',
|
||||
'info_dict': {
|
||||
'id': 'ENC18-722333096000-722335562000',
|
||||
'ext': 'mp4',
|
||||
'title': 'ENC18',
|
||||
'duration': 2463.995,
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://r.dcs.redcdn.pl/livehls/o2/sportevolution/live/triathlon2018/warsaw.livx/playlist.m3u8?startTime=550305000000&stopTime=550327620000',
|
||||
'info_dict': {
|
||||
'id': 'triathlon2018-warsaw-550305000000-550327620000',
|
||||
'ext': 'mp4',
|
||||
'title': 'triathlon2018/warsaw',
|
||||
'duration': 22619.98,
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://n-25-12.dcs.redcdn.pl/nvr/o2/sejm/Migacz-ENC01/1.livx?startTime=722347200000&stopTime=722367345000',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://redir.atmcdn.pl/nvr/o2/sejm/ENC08/1.livx?startTime=503831270000&stopTime=503840040000',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
"""
|
||||
Known methods (first in url path):
|
||||
- `livedash` - DASH MPD
|
||||
- `livehls` - HTTP Live Streaming
|
||||
- `livess` - IIS Smooth Streaming
|
||||
- `nvr` - CCTV mode, directly returns a file, typically flv, avc1, aac
|
||||
- `sc` - shoutcast/icecast (audio streams, like radio)
|
||||
"""
|
||||
|
||||
def _real_extract(self, url):
|
||||
tenant, path = self._match_valid_url(url).group('tenant', 'id')
|
||||
qs = parse_qs(url)
|
||||
start_time = traverse_obj(qs, ('startTime', 0, {int_or_none}))
|
||||
stop_time = traverse_obj(qs, ('stopTime', 0, {int_or_none}))
|
||||
|
||||
def livx_mode(mode):
|
||||
suffix = ''
|
||||
if mode == 'livess':
|
||||
suffix = '/manifest'
|
||||
elif mode == 'livehls':
|
||||
suffix = '/playlist.m3u8'
|
||||
file_qs = {}
|
||||
if start_time:
|
||||
file_qs['startTime'] = start_time
|
||||
if stop_time:
|
||||
file_qs['stopTime'] = stop_time
|
||||
if mode == 'nvr':
|
||||
file_qs['nolimit'] = 1
|
||||
elif mode != 'sc':
|
||||
file_qs['indexMode'] = 'true'
|
||||
return update_url_query(f'https://r.dcs.redcdn.pl/{mode}/o2/{tenant}/{path}.livx{suffix}', file_qs)
|
||||
|
||||
# no id or title for a transmission. making ones up.
|
||||
title = path \
|
||||
.replace('/live', '').replace('live/', '') \
|
||||
.replace('/channel', '').replace('channel/', '') \
|
||||
.strip('/')
|
||||
video_id = join_nonempty(title.replace('/', '-'), start_time, stop_time)
|
||||
|
||||
formats = []
|
||||
# downloading the manifest separately here instead of _extract_ism_formats to also get some stream metadata
|
||||
ism_res = self._download_xml_handle(
|
||||
livx_mode('livess'), video_id,
|
||||
note='Downloading ISM manifest',
|
||||
errnote='Failed to download ISM manifest',
|
||||
fatal=False)
|
||||
ism_doc = None
|
||||
if ism_res is not False:
|
||||
ism_doc, ism_urlh = ism_res
|
||||
formats, _ = self._parse_ism_formats_and_subtitles(ism_doc, ism_urlh.url, 'ss')
|
||||
|
||||
nvr_urlh = self._request_webpage(
|
||||
HEADRequest(livx_mode('nvr')), video_id, 'Follow flv file redirect', fatal=False,
|
||||
expected_status=lambda _: True)
|
||||
if nvr_urlh and nvr_urlh.status == 200:
|
||||
formats.append({
|
||||
'url': nvr_urlh.url,
|
||||
'ext': 'flv',
|
||||
'format_id': 'direct-0',
|
||||
'preference': -1, # might be slow
|
||||
})
|
||||
formats.extend(self._extract_mpd_formats(livx_mode('livedash'), video_id, mpd_id='dash', fatal=False))
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
livx_mode('livehls'), video_id, m3u8_id='hls', ext='mp4', fatal=False))
|
||||
|
||||
time_scale = traverse_obj(ism_doc, ('@TimeScale', {int_or_none})) or 10000000
|
||||
duration = traverse_obj(
|
||||
ism_doc, ('@Duration', {functools.partial(float_or_none, scale=time_scale)})) or None
|
||||
|
||||
live_status = None
|
||||
if traverse_obj(ism_doc, '@IsLive') == 'TRUE':
|
||||
live_status = 'is_live'
|
||||
elif duration:
|
||||
live_status = 'was_live'
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'duration': duration,
|
||||
'live_status': live_status,
|
||||
}
|
|
@ -7,6 +7,7 @@
|
|||
str_to_int,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
|
@ -79,7 +80,7 @@ def _real_extract(self, url):
|
|||
'media definitions', default='{}'),
|
||||
video_id, fatal=False)
|
||||
for media in medias if isinstance(medias, list) else []:
|
||||
format_url = url_or_none(media.get('videoUrl'))
|
||||
format_url = urljoin('https://www.redtube.com', media.get('videoUrl'))
|
||||
if not format_url:
|
||||
continue
|
||||
format_id = media.get('format')
|
||||
|
|
|
@ -1,8 +1,34 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import format_field, parse_iso8601
|
||||
from ..utils import (
|
||||
MEDIA_EXTENSIONS,
|
||||
determine_ext,
|
||||
parse_iso8601,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class RinseFMIE(InfoExtractor):
|
||||
class RinseFMBaseIE(InfoExtractor):
|
||||
@staticmethod
|
||||
def _parse_entry(entry):
|
||||
return {
|
||||
**traverse_obj(entry, {
|
||||
'id': ('id', {str}),
|
||||
'title': ('title', {str}),
|
||||
'url': ('fileUrl', {url_or_none}),
|
||||
'release_timestamp': ('episodeDate', {parse_iso8601}),
|
||||
'thumbnail': ('featuredImage', 0, 'filename', {str},
|
||||
{lambda x: x and f'https://rinse.imgix.net/media/{x}'}),
|
||||
'webpage_url': ('slug', {str},
|
||||
{lambda x: x and f'https://rinse.fm/episodes/{x}'}),
|
||||
}),
|
||||
'vcodec': 'none',
|
||||
'extractor_key': RinseFMIE.ie_key(),
|
||||
'extractor': RinseFMIE.IE_NAME,
|
||||
}
|
||||
|
||||
|
||||
class RinseFMIE(RinseFMBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?rinse\.fm/episodes/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://rinse.fm/episodes/club-glow-15-12-2023-2000/',
|
||||
|
@ -22,12 +48,42 @@ def _real_extract(self, url):
|
|||
webpage = self._download_webpage(url, display_id)
|
||||
entry = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['entry']
|
||||
|
||||
return {
|
||||
'id': entry['id'],
|
||||
'title': entry.get('title'),
|
||||
'url': entry['fileUrl'],
|
||||
'vcodec': 'none',
|
||||
'release_timestamp': parse_iso8601(entry.get('episodeDate')),
|
||||
'thumbnail': format_field(
|
||||
entry, [('featuredImage', 0, 'filename')], 'https://rinse.imgix.net/media/%s', default=None),
|
||||
}
|
||||
return self._parse_entry(entry)
|
||||
|
||||
|
||||
class RinseFMArtistPlaylistIE(RinseFMBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?rinse\.fm/shows/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://rinse.fm/shows/resources/',
|
||||
'info_dict': {
|
||||
'id': 'resources',
|
||||
'title': '[re]sources',
|
||||
'description': '[re]sources est un label parisien piloté par le DJ et producteur Tommy Kid.'
|
||||
},
|
||||
'playlist_mincount': 40
|
||||
}, {
|
||||
'url': 'https://rinse.fm/shows/ivy/',
|
||||
'info_dict': {
|
||||
'id': 'ivy',
|
||||
'title': '[IVY]',
|
||||
'description': 'A dedicated space for DNB/Turbo House and 4x4.'
|
||||
},
|
||||
'playlist_mincount': 7
|
||||
}]
|
||||
|
||||
def _entries(self, data):
|
||||
for episode in traverse_obj(data, (
|
||||
'props', 'pageProps', 'episodes', lambda _, v: determine_ext(v['fileUrl']) in MEDIA_EXTENSIONS.audio)
|
||||
):
|
||||
yield self._parse_entry(episode)
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
title = self._og_search_title(webpage) or self._html_search_meta('title', webpage)
|
||||
description = self._og_search_description(webpage) or self._html_search_meta(
|
||||
'description', webpage)
|
||||
data = self._search_nextjs_data(webpage, playlist_id)
|
||||
|
||||
return self.playlist_result(
|
||||
self._entries(data), playlist_id, title, description=description)
|
||||
|
|
|
@ -1,14 +1,27 @@
|
|||
import re
|
||||
|
||||
from ..utils import parse_duration, unescapeHTML
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
get_element_by_attribute,
|
||||
get_element_by_class,
|
||||
get_element_html_by_class,
|
||||
get_elements_by_class,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_count,
|
||||
parse_duration,
|
||||
unescapeHTML,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class Rule34VideoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?rule34video\.com/videos/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?rule34video\.com/videos?/(?P<id>\d+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://rule34video.com/videos/3065157/shot-it-mmd-hmv/',
|
||||
'url': 'https://rule34video.com/video/3065157/shot-it-mmd-hmv/',
|
||||
'md5': 'ffccac2c23799dabbd192621ae4d04f3',
|
||||
'info_dict': {
|
||||
'id': '3065157',
|
||||
|
@ -17,7 +30,16 @@ class Rule34VideoIE(InfoExtractor):
|
|||
'thumbnail': 'https://rule34video.com/contents/videos_screenshots/3065000/3065157/preview.jpg',
|
||||
'duration': 347.0,
|
||||
'age_limit': 18,
|
||||
'tags': 'count:14'
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'timestamp': 1639872000,
|
||||
'description': 'https://discord.gg/aBqPrHSHvv',
|
||||
'upload_date': '20211219',
|
||||
'uploader': 'Sweet HMV',
|
||||
'uploader_url': 'https://rule34video.com/members/22119/',
|
||||
'categories': ['3D', 'MMD', 'iwara'],
|
||||
'tags': 'mincount:10'
|
||||
}
|
||||
},
|
||||
{
|
||||
|
@ -30,7 +52,17 @@ class Rule34VideoIE(InfoExtractor):
|
|||
'thumbnail': 'https://rule34video.com/contents/videos_screenshots/3065000/3065296/preview.jpg',
|
||||
'duration': 938.0,
|
||||
'age_limit': 18,
|
||||
'tags': 'count:50'
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'timestamp': 1640131200,
|
||||
'description': '',
|
||||
'creator': 'WildeerStudio',
|
||||
'upload_date': '20211222',
|
||||
'uploader': 'CerZule',
|
||||
'uploader_url': 'https://rule34video.com/members/36281/',
|
||||
'categories': ['3D', 'Tomb Raider'],
|
||||
'tags': 'mincount:40'
|
||||
}
|
||||
},
|
||||
]
|
||||
|
@ -49,17 +81,44 @@ def _real_extract(self, url):
|
|||
'quality': quality,
|
||||
})
|
||||
|
||||
title = self._html_extract_title(webpage)
|
||||
thumbnail = self._html_search_regex(r'preview_url:\s+\'([^\']+)\'', webpage, 'thumbnail', default=None)
|
||||
duration = self._html_search_regex(r'"icon-clock"></i>\s+<span>((?:\d+:?)+)', webpage, 'duration', default=None)
|
||||
categories, creator, uploader, uploader_url = [None] * 4
|
||||
for col in get_elements_by_class('col', webpage):
|
||||
label = clean_html(get_element_by_class('label', col))
|
||||
if label == 'Categories:':
|
||||
categories = list(map(clean_html, get_elements_by_class('item', col)))
|
||||
elif label == 'Artist:':
|
||||
creator = join_nonempty(*map(clean_html, get_elements_by_class('item', col)), delim=', ')
|
||||
elif label == 'Uploaded By:':
|
||||
uploader = clean_html(get_element_by_class('name', col))
|
||||
uploader_url = extract_attributes(get_element_html_by_class('name', col) or '').get('href')
|
||||
|
||||
return {
|
||||
**traverse_obj(self._search_json_ld(webpage, video_id, default={}), ({
|
||||
'title': 'title',
|
||||
'view_count': 'view_count',
|
||||
'like_count': 'like_count',
|
||||
'duration': 'duration',
|
||||
'timestamp': 'timestamp',
|
||||
'description': 'description',
|
||||
'thumbnail': ('thumbnails', 0, 'url'),
|
||||
})),
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': parse_duration(duration),
|
||||
'title': self._html_extract_title(webpage),
|
||||
'thumbnail': self._html_search_regex(
|
||||
r'preview_url:\s+\'([^\']+)\'', webpage, 'thumbnail', default=None),
|
||||
'duration': parse_duration(self._html_search_regex(
|
||||
r'"icon-clock"></i>\s+<span>((?:\d+:?)+)', webpage, 'duration', default=None)),
|
||||
'view_count': int_or_none(self._html_search_regex(
|
||||
r'"icon-eye"></i>\s+<span>([ \d]+)', webpage, 'views', default='').replace(' ', '')),
|
||||
'like_count': parse_count(get_element_by_class('voters count', webpage)),
|
||||
'comment_count': int_or_none(self._search_regex(
|
||||
r'[^(]+\((\d+)\)', get_element_by_attribute('href', '#tab_comments', webpage), 'comment count', fatal=False)),
|
||||
'age_limit': 18,
|
||||
'creator': creator,
|
||||
'uploader': uploader,
|
||||
'uploader_url': uploader_url,
|
||||
'categories': categories,
|
||||
'tags': list(map(unescapeHTML, re.findall(
|
||||
r'<a class="tag_item"[^>]+\bhref="https://rule34video\.com/tags/\d+/"[^>]*>(?P<tag>[^>]*)</a>', webpage))),
|
||||
}
|
||||
|
|
|
@ -383,7 +383,7 @@ def entries(self, url, playlist_id):
|
|||
if isinstance(e.cause, HTTPError) and e.cause.status == 404:
|
||||
break
|
||||
raise
|
||||
for video_url in re.findall(r'class=video-item--a\s?href=([^>]+\.html)', webpage):
|
||||
for video_url in re.findall(r'class="[^>"]*videostream__link[^>]+href="([^"]+\.html)"', webpage):
|
||||
yield self.url_result('https://rumble.com' + video_url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
|
218
yt_dlp/extractor/sejmpl.py
Normal file
218
yt_dlp/extractor/sejmpl.py
Normal file
|
@ -0,0 +1,218 @@
|
|||
import datetime
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .redge import RedCDNLivxIE
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
strip_or_none,
|
||||
update_url_query,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
def is_dst(date):
|
||||
last_march = datetime.datetime(date.year, 3, 31)
|
||||
last_october = datetime.datetime(date.year, 10, 31)
|
||||
last_sunday_march = last_march - datetime.timedelta(days=last_march.isoweekday() % 7)
|
||||
last_sunday_october = last_october - datetime.timedelta(days=last_october.isoweekday() % 7)
|
||||
return last_sunday_march.replace(hour=2) <= date <= last_sunday_october.replace(hour=3)
|
||||
|
||||
|
||||
def rfc3339_to_atende(date):
|
||||
date = datetime.datetime.fromisoformat(date)
|
||||
date = date + datetime.timedelta(hours=1 if is_dst(date) else 0)
|
||||
return int((date.timestamp() - 978307200) * 1000)
|
||||
|
||||
|
||||
class SejmIE(InfoExtractor):
|
||||
_VALID_URL = (
|
||||
r'https?://(?:www\.)?sejm\.gov\.pl/[Ss]ejm(?P<term>\d+)\.nsf/transmisje(?:_arch)?\.xsp(?:\?[^#]*)?#(?P<id>[\dA-F]+)',
|
||||
r'https?://(?:www\.)?sejm\.gov\.pl/[Ss]ejm(?P<term>\d+)\.nsf/transmisje(?:_arch)?\.xsp\?(?:[^#]+&)?unid=(?P<id>[\dA-F]+)',
|
||||
r'https?://sejm-embed\.redcdn\.pl/[Ss]ejm(?P<term>\d+)\.nsf/VideoFrame\.xsp/(?P<id>[\dA-F]+)',
|
||||
)
|
||||
IE_NAME = 'sejm'
|
||||
|
||||
_TESTS = [{
|
||||
# multiple cameras, polish SL iterpreter
|
||||
'url': 'https://www.sejm.gov.pl/Sejm10.nsf/transmisje_arch.xsp#6181EF1AD9CEEBB5C1258A6D006452B5',
|
||||
'info_dict': {
|
||||
'id': '6181EF1AD9CEEBB5C1258A6D006452B5',
|
||||
'title': '1. posiedzenie Sejmu X kadencji',
|
||||
'duration': 20145,
|
||||
'live_status': 'was_live',
|
||||
'location': 'Sala Posiedzeń',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': 'ENC01-722340000000-722360145000',
|
||||
'ext': 'mp4',
|
||||
'duration': 20145,
|
||||
'title': '1. posiedzenie Sejmu X kadencji - ENC01',
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': 'ENC30-722340000000-722360145000',
|
||||
'ext': 'mp4',
|
||||
'duration': 20145,
|
||||
'title': '1. posiedzenie Sejmu X kadencji - ENC30',
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': 'ENC31-722340000000-722360145000',
|
||||
'ext': 'mp4',
|
||||
'duration': 20145,
|
||||
'title': '1. posiedzenie Sejmu X kadencji - ENC31',
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': 'ENC32-722340000000-722360145000',
|
||||
'ext': 'mp4',
|
||||
'duration': 20145,
|
||||
'title': '1. posiedzenie Sejmu X kadencji - ENC32',
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
}, {
|
||||
# sign lang interpreter
|
||||
'info_dict': {
|
||||
'id': 'Migacz-ENC01-1-722340000000-722360145000',
|
||||
'ext': 'mp4',
|
||||
'duration': 20145,
|
||||
'title': '1. posiedzenie Sejmu X kadencji - Migacz-ENC01',
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
'url': 'https://www.sejm.gov.pl/Sejm8.nsf/transmisje.xsp?unid=9377A9D65518E9A5C125808E002E9FF2',
|
||||
'info_dict': {
|
||||
'id': '9377A9D65518E9A5C125808E002E9FF2',
|
||||
'title': 'Debata "Lepsza Polska: obywatelska"',
|
||||
'description': 'KP .Nowoczesna',
|
||||
'duration': 8770,
|
||||
'live_status': 'was_live',
|
||||
'location': 'sala kolumnowa im. Kazimierza Pużaka (bud. C-D)',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': 'ENC08-1-503831270000-503840040000',
|
||||
'ext': 'mp4',
|
||||
'duration': 8770,
|
||||
'title': 'Debata "Lepsza Polska: obywatelska" - ENC08',
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
# 7th term is very special, since it does not use redcdn livx
|
||||
'url': 'https://www.sejm.gov.pl/sejm7.nsf/transmisje_arch.xsp?rok=2015&month=11#A6E6D475ECCC6FE5C1257EF90034817F',
|
||||
'info_dict': {
|
||||
'id': 'A6E6D475ECCC6FE5C1257EF90034817F',
|
||||
'title': 'Konferencja prasowa - Stanowisko SLD ws. składu nowego rządu',
|
||||
'description': 'SLD - Biuro Prasowe Klubu',
|
||||
'duration': 514,
|
||||
'location': 'sala 101/bud. C',
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': 'A6E6D475ECCC6FE5C1257EF90034817F',
|
||||
'ext': 'mp4',
|
||||
'title': 'Konferencja prasowa - Stanowisko SLD ws. składu nowego rządu',
|
||||
'duration': 514,
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
'url': 'https://sejm-embed.redcdn.pl/Sejm10.nsf/VideoFrame.xsp/FED58EABB97FBD53C1258A7400386492',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
term, video_id = self._match_valid_url(url).group('term', 'id')
|
||||
frame = self._download_webpage(
|
||||
f'https://sejm-embed.redcdn.pl/Sejm{term}.nsf/VideoFrame.xsp/{video_id}',
|
||||
video_id)
|
||||
# despite it says "transmisje_arch", it works for live streams too!
|
||||
data = self._download_json(
|
||||
f'https://www.sejm.gov.pl/Sejm{term}.nsf/transmisje_arch.xsp/json/{video_id}',
|
||||
video_id)
|
||||
params = data['params']
|
||||
|
||||
title = strip_or_none(data.get('title'))
|
||||
|
||||
if data.get('status') == 'VIDEO_ENDED':
|
||||
live_status = 'was_live'
|
||||
elif data.get('status') == 'VIDEO_PLAYING':
|
||||
live_status = 'is_live'
|
||||
else:
|
||||
live_status = None
|
||||
self.report_warning(f'unknown status: {data.get("status")}')
|
||||
|
||||
start_time = rfc3339_to_atende(params['start'])
|
||||
# current streams have a stop time of *expected* end of session, but actual times
|
||||
# can change during the transmission. setting a stop_time would artificially
|
||||
# end the stream at that time, while the session actually keeps going.
|
||||
if live_status == 'was_live':
|
||||
stop_time = rfc3339_to_atende(params['stop'])
|
||||
duration = (stop_time - start_time) // 1000
|
||||
else:
|
||||
stop_time, duration = None, None
|
||||
|
||||
entries = []
|
||||
|
||||
def add_entry(file, legacy_file=False):
|
||||
if not file:
|
||||
return
|
||||
file = self._proto_relative_url(file)
|
||||
if not legacy_file:
|
||||
file = update_url_query(file, {'startTime': start_time})
|
||||
if stop_time is not None:
|
||||
file = update_url_query(file, {'stopTime': stop_time})
|
||||
stream_id = self._search_regex(r'/o2/sejm/([^/]+)/[^./]+\.livx', file, 'stream id')
|
||||
common_info = {
|
||||
'url': file,
|
||||
'duration': duration,
|
||||
}
|
||||
if legacy_file:
|
||||
entries.append({
|
||||
**common_info,
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
})
|
||||
else:
|
||||
entries.append({
|
||||
**common_info,
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': RedCDNLivxIE.ie_key(),
|
||||
'id': stream_id,
|
||||
'title': join_nonempty(title, stream_id, delim=' - '),
|
||||
})
|
||||
|
||||
cameras = self._search_json(
|
||||
r'var\s+cameras\s*=', frame, 'camera list', video_id,
|
||||
contains_pattern=r'\[(?s:.+)\]', transform_source=js_to_json,
|
||||
fatal=False) or []
|
||||
for camera_file in traverse_obj(cameras, (..., 'file', {dict})):
|
||||
if camera_file.get('flv'):
|
||||
add_entry(camera_file['flv'])
|
||||
elif camera_file.get('mp4'):
|
||||
# this is only a thing in 7th term. no streams before, and starting 8th it's redcdn livx
|
||||
add_entry(camera_file['mp4'], legacy_file=True)
|
||||
else:
|
||||
self.report_warning('Unknown camera stream type found')
|
||||
|
||||
if params.get('mig'):
|
||||
add_entry(self._search_regex(r"var sliUrl\s*=\s*'([^']+)'", frame, 'sign language interpreter url', fatal=False))
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'entries': entries,
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': clean_html(data.get('desc')) or None,
|
||||
'duration': duration,
|
||||
'live_status': live_status,
|
||||
'location': strip_or_none(data.get('location')),
|
||||
}
|
101
yt_dlp/extractor/trtworld.py
Normal file
101
yt_dlp/extractor/trtworld.py
Normal file
|
@ -0,0 +1,101 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError, determine_ext, parse_iso8601, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class TrtWorldIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.trtworld\.com/video/[\w-]+/[\w-]+-(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.trtworld.com/video/news/turkiye-switches-to-sustainable-tourism-16067690',
|
||||
'info_dict': {
|
||||
'id': '16067690',
|
||||
'ext': 'mp4',
|
||||
'title': 'Türkiye switches to sustainable tourism',
|
||||
'release_timestamp': 1701529569,
|
||||
'release_date': '20231202',
|
||||
'thumbnail': 'https://cdn-i.pr.trt.com.tr/trtworld/17647563_0-0-1920-1080.jpeg',
|
||||
'description': 'md5:0a975c04257fb529c8f99c7b76a2cf12',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.trtworld.com/video/one-offs/frames-from-anatolia-recreating-a-james-bond-scene-in-istanbuls-grand-bazaar-14541780',
|
||||
'info_dict': {
|
||||
'id': '14541780',
|
||||
'ext': 'mp4',
|
||||
'title': 'Frames From Anatolia: Recreating a ‘James Bond’ Scene in Istanbul’s Grand Bazaar',
|
||||
'release_timestamp': 1692440844,
|
||||
'release_date': '20230819',
|
||||
'thumbnail': 'https://cdn-i.pr.trt.com.tr/trtworld/16939810_0-0-1920-1080.jpeg',
|
||||
'description': 'md5:4050e21570cc3c40b6c9badae800a94f',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.trtworld.com/video/the-newsmakers/can-sudan-find-peace-amidst-failed-transition-to-democracy-12904760',
|
||||
'info_dict': {
|
||||
'id': '12904760',
|
||||
'ext': 'mp4',
|
||||
'title': 'Can Sudan find peace amidst failed transition to democracy?',
|
||||
'release_timestamp': 1681972747,
|
||||
'release_date': '20230420',
|
||||
'thumbnail': 'http://cdni0.trtworld.com/w768/q70/154214_NMYOUTUBETEMPLATE1_1681833018736.jpg'
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.trtworld.com/video/africa-matters/locals-learning-to-cope-with-rising-tides-of-kenyas-great-lakes-16059545',
|
||||
'info_dict': {
|
||||
'id': 'zEns2dWl00w',
|
||||
'ext': 'mp4',
|
||||
'title': "Locals learning to cope with rising tides of Kenya's Great Lakes",
|
||||
'thumbnail': 'https://i.ytimg.com/vi/zEns2dWl00w/maxresdefault.jpg',
|
||||
'description': 'md5:3ad9d7c5234d752a4ead4340c79c6b8d',
|
||||
'channel_id': 'UC7fWeaHhqgM4Ry-RMpM2YYw',
|
||||
'channel_url': 'https://www.youtube.com/channel/UC7fWeaHhqgM4Ry-RMpM2YYw',
|
||||
'duration': 210,
|
||||
'view_count': int,
|
||||
'age_limit': 0,
|
||||
'webpage_url': 'https://www.youtube.com/watch?v=zEns2dWl00w',
|
||||
'categories': ['News & Politics'],
|
||||
'channel': 'TRT World',
|
||||
'channel_follower_count': int,
|
||||
'channel_is_verified': True,
|
||||
'uploader': 'TRT World',
|
||||
'uploader_id': '@trtworld',
|
||||
'uploader_url': 'https://www.youtube.com/@trtworld',
|
||||
'upload_date': '20231202',
|
||||
'availability': 'public',
|
||||
'comment_count': int,
|
||||
'playable_in_embed': True,
|
||||
'tags': [],
|
||||
'live_status': 'not_live',
|
||||
'like_count': int,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
nuxtjs_data = self._search_nuxt_data(webpage, display_id)['videoData']['content']['platforms']
|
||||
formats = []
|
||||
for media_url in traverse_obj(nuxtjs_data, (
|
||||
('website', 'ott'), 'metadata', ('hls_url', 'url'), {url_or_none})):
|
||||
# NB: Website sometimes serves mp4 files under `hls_url` key
|
||||
if determine_ext(media_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(media_url, display_id, fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': 'http',
|
||||
'url': media_url,
|
||||
})
|
||||
if not formats:
|
||||
if youtube_id := traverse_obj(nuxtjs_data, ('youtube', 'metadata', 'youtubeId')):
|
||||
return self.url_result(youtube_id, 'Youtube')
|
||||
raise ExtractorError('No video found', expected=True)
|
||||
|
||||
return {
|
||||
'id': display_id,
|
||||
'formats': formats,
|
||||
**traverse_obj(nuxtjs_data, (('website', 'ott'), {
|
||||
'title': ('fields', 'title', 'text', {str}),
|
||||
'description': ('fields', 'description', 'text', {str}),
|
||||
'thumbnail': ('fields', 'thumbnail', 'url', {url_or_none}),
|
||||
'release_timestamp': ('published', 'date', {parse_iso8601}),
|
||||
}), get_all=False),
|
||||
}
|
|
@ -10,6 +10,7 @@
|
|||
parse_duration,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
variadic,
|
||||
)
|
||||
|
@ -83,6 +84,7 @@ class TxxxIE(InfoExtractor):
|
|||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'age_limit': 18,
|
||||
'thumbnail': 'https://tn.txxx.tube/contents/videos_sources/16574000/16574965/screenshots/1.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://txxx.tube/videos/16574965/digital-desire-malena-morgan/',
|
||||
|
@ -98,6 +100,7 @@ class TxxxIE(InfoExtractor):
|
|||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'age_limit': 18,
|
||||
'thumbnail': 'https://tn.txxx.tube/contents/videos_sources/16574000/16574965/screenshots/1.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://vxxx.com/video-68925/',
|
||||
|
@ -113,6 +116,7 @@ class TxxxIE(InfoExtractor):
|
|||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'age_limit': 18,
|
||||
'thumbnail': 'https://tn.vxxx.com/contents/videos_sources/68000/68925/screenshots/1.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://hclips.com/videos/6291073/malena-morgan-masturbates-her-sweet/',
|
||||
|
@ -128,6 +132,7 @@ class TxxxIE(InfoExtractor):
|
|||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'age_limit': 18,
|
||||
'thumbnail': 'https://hctn.nv7s.com/contents/videos_sources/6291000/6291073/screenshots/1.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://hdzog.com/videos/67063/gorgeous-malena-morgan-will-seduce-you-at-the-first-glance/',
|
||||
|
@ -143,6 +148,7 @@ class TxxxIE(InfoExtractor):
|
|||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'age_limit': 18,
|
||||
'thumbnail': 'https://tn.hdzog.com/contents/videos_sources/67000/67063/screenshots/1.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://hdzog.tube/videos/67063/gorgeous-malena-morgan-will-seduce-you-at-the-first-glance/',
|
||||
|
@ -158,6 +164,7 @@ class TxxxIE(InfoExtractor):
|
|||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'age_limit': 18,
|
||||
'thumbnail': 'https://tn.hdzog.com/contents/videos_sources/67000/67063/screenshots/1.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://hotmovs.com/videos/8789287/unbelievable-malena-morgan-performing-in-incredible-masturantion/',
|
||||
|
@ -173,6 +180,7 @@ class TxxxIE(InfoExtractor):
|
|||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'age_limit': 18,
|
||||
'thumbnail': 'https://tn.hotmovs.com/contents/videos_sources/8789000/8789287/screenshots/10.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://hotmovs.tube/videos/8789287/unbelievable-malena-morgan-performing-in-incredible-masturantion/',
|
||||
|
@ -188,6 +196,7 @@ class TxxxIE(InfoExtractor):
|
|||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'age_limit': 18,
|
||||
'thumbnail': 'https://tn.hotmovs.com/contents/videos_sources/8789000/8789287/screenshots/10.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://inporn.com/video/517897/malena-morgan-solo/',
|
||||
|
@ -203,6 +212,7 @@ class TxxxIE(InfoExtractor):
|
|||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'age_limit': 18,
|
||||
'thumbnail': 'https://iptn.m3pd.com/media/tn/sources/517897_1.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://privatehomeclips.com/videos/3630599/malena-morgan-cam-show/',
|
||||
|
@ -218,6 +228,7 @@ class TxxxIE(InfoExtractor):
|
|||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'age_limit': 18,
|
||||
'thumbnail': 'https://hctn.nv7s.com/contents/videos_sources/3630000/3630599/screenshots/15.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://tubepornclassic.com/videos/1015455/mimi-rogers-full-body-massage-nude-compilation/',
|
||||
|
@ -233,6 +244,7 @@ class TxxxIE(InfoExtractor):
|
|||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'age_limit': 18,
|
||||
'thumbnail': 'https://tn.tubepornclassic.com/contents/videos_sources/1015000/1015455/screenshots/6.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://upornia.com/videos/1498858/twistys-malena-morgan-starring-at-dr-morgan-baller/',
|
||||
|
@ -248,6 +260,7 @@ class TxxxIE(InfoExtractor):
|
|||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'age_limit': 18,
|
||||
'thumbnail': 'https://tn.upornia.com/contents/videos_sources/1498000/1498858/screenshots/1.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://upornia.tube/videos/1498858/twistys-malena-morgan-starring-at-dr-morgan-baller/',
|
||||
|
@ -263,6 +276,7 @@ class TxxxIE(InfoExtractor):
|
|||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'age_limit': 18,
|
||||
'thumbnail': 'https://tn.upornia.com/contents/videos_sources/1498000/1498858/screenshots/1.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://vjav.com/videos/11761/yui-hatano-in-if-yui-was-my-girlfriend2/',
|
||||
|
@ -278,6 +292,7 @@ class TxxxIE(InfoExtractor):
|
|||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'age_limit': 18,
|
||||
'thumbnail': 'https://tn.vjav.com/contents/videos_sources/11000/11761/screenshots/23.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://vjav.tube/videos/11761/yui-hatano-in-if-yui-was-my-girlfriend2/',
|
||||
|
@ -293,6 +308,7 @@ class TxxxIE(InfoExtractor):
|
|||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'age_limit': 18,
|
||||
'thumbnail': 'https://tn.vjav.com/contents/videos_sources/11000/11761/screenshots/23.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://voyeurhit.com/videos/332875/charlotte-stokely-elle-alexandra-malena-morgan-lingerie/',
|
||||
|
@ -308,6 +324,7 @@ class TxxxIE(InfoExtractor):
|
|||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'age_limit': 18,
|
||||
'thumbnail': 'https://tn.voyeurhit.com/contents/videos_sources/332000/332875/screenshots/1.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://voyeurhit.tube/videos/332875/charlotte-stokely-elle-alexandra-malena-morgan-lingerie/',
|
||||
|
@ -323,6 +340,7 @@ class TxxxIE(InfoExtractor):
|
|||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'age_limit': 18,
|
||||
'thumbnail': 'https://tn.voyeurhit.com/contents/videos_sources/332000/332875/screenshots/1.jpg',
|
||||
}
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
|
@ -338,6 +356,7 @@ class TxxxIE(InfoExtractor):
|
|||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'age_limit': 18,
|
||||
'thumbnail': 'https://hctn.nv7s.com/contents/videos_sources/5119000/5119660/screenshots/1.jpg',
|
||||
}
|
||||
}]
|
||||
|
||||
|
@ -371,6 +390,7 @@ def _real_extract(self, url):
|
|||
'like_count': int_or_none(traverse_obj(video_info, ('video', 'statistics', 'likes'))),
|
||||
'dislike_count': int_or_none(traverse_obj(video_info, ('video', 'statistics', 'dislikes'))),
|
||||
'age_limit': 18,
|
||||
'thumbnail': traverse_obj(video_info, ('video', 'thumbsrc', {url_or_none})),
|
||||
'formats': get_formats(host, video_file),
|
||||
}
|
||||
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
from ..utils import ExtractorError, base_url, int_or_none, url_basename
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class Vbox7IE(InfoExtractor):
|
||||
|
@ -19,7 +20,7 @@ class Vbox7IE(InfoExtractor):
|
|||
_GEO_COUNTRIES = ['BG']
|
||||
_TESTS = [{
|
||||
'url': 'http://vbox7.com/play:0946fff23c',
|
||||
'md5': 'a60f9ab3a3a2f013ef9a967d5f7be5bf',
|
||||
'md5': '50ca1f78345a9c15391af47d8062d074',
|
||||
'info_dict': {
|
||||
'id': '0946fff23c',
|
||||
'ext': 'mp4',
|
||||
|
@ -29,19 +30,25 @@ class Vbox7IE(InfoExtractor):
|
|||
'timestamp': 1470982814,
|
||||
'upload_date': '20160812',
|
||||
'uploader': 'zdraveibulgaria',
|
||||
},
|
||||
'params': {
|
||||
'proxy': '127.0.0.1:8118',
|
||||
'view_count': int,
|
||||
'duration': 2640,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://vbox7.com/play:249bb972c2',
|
||||
'md5': '99f65c0c9ef9b682b97313e052734c3f',
|
||||
'md5': 'da1dd2eb245200cb86e6d09d43232116',
|
||||
'info_dict': {
|
||||
'id': '249bb972c2',
|
||||
'ext': 'mp4',
|
||||
'title': 'Смях! Чудо - чист за секунди - Скрита камера',
|
||||
'uploader': 'svideteliat_ot_varshava',
|
||||
'view_count': int,
|
||||
'timestamp': 1360215023,
|
||||
'thumbnail': 'https://i49.vbox7.com/design/iconci/png/noimg6.png',
|
||||
'description': 'Смях! Чудо - чист за секунди - Скрита камера',
|
||||
'upload_date': '20130207',
|
||||
'duration': 83,
|
||||
},
|
||||
'skip': 'georestricted',
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
}, {
|
||||
'url': 'http://vbox7.com/emb/external.php?vid=a240d20f9c&autoplay=1',
|
||||
'only_matching': True,
|
||||
|
@ -53,41 +60,38 @@ class Vbox7IE(InfoExtractor):
|
|||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
response = self._download_json(
|
||||
'https://www.vbox7.com/ajax/video/nextvideo.php?vid=%s' % video_id,
|
||||
video_id)
|
||||
data = self._download_json(
|
||||
'https://www.vbox7.com/aj/player/item/options', video_id,
|
||||
query={'vid': video_id})['options']
|
||||
|
||||
if 'error' in response:
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, response['error']), expected=True)
|
||||
src_url = data.get('src')
|
||||
if src_url in (None, '', 'blank'):
|
||||
raise ExtractorError('Video is unavailable', expected=True)
|
||||
|
||||
video = response['options']
|
||||
fmt_base = url_basename(src_url).rsplit('.', 1)[0].rsplit('_', 1)[0]
|
||||
if fmt_base == 'vn':
|
||||
self.raise_geo_restricted()
|
||||
|
||||
title = video['title']
|
||||
video_url = video['src']
|
||||
fmt_base = base_url(src_url) + fmt_base
|
||||
|
||||
if '/na.mp4' in video_url:
|
||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
||||
formats = self._extract_m3u8_formats(
|
||||
f'{fmt_base}.m3u8', video_id, m3u8_id='hls', fatal=False)
|
||||
# TODO: Add MPD formats, when dash range support is added
|
||||
for res in traverse_obj(data, ('resolutions', lambda _, v: v != 0, {int})):
|
||||
formats.append({
|
||||
'url': f'{fmt_base}_{res}.mp4',
|
||||
'format_id': f'http-{res}',
|
||||
'height': res,
|
||||
})
|
||||
|
||||
uploader = video.get('uploader')
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://vbox7.com/play:%s' % video_id, video_id, fatal=None)
|
||||
|
||||
info = {}
|
||||
|
||||
if webpage:
|
||||
info = self._search_json_ld(
|
||||
webpage.replace('"/*@context"', '"@context"'), video_id,
|
||||
fatal=False)
|
||||
|
||||
info.update({
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'uploader': uploader,
|
||||
'thumbnail': self._proto_relative_url(
|
||||
info.get('thumbnail') or self._og_search_thumbnail(webpage),
|
||||
'http:'),
|
||||
})
|
||||
return info
|
||||
'formats': formats,
|
||||
**self._search_json_ld(self._download_webpage(
|
||||
f'https://www.vbox7.com/play:{video_id}', video_id, fatal=False) or '', video_id, fatal=False),
|
||||
**traverse_obj(data, {
|
||||
'title': ('title', {str}),
|
||||
'uploader': ('uploader', {str}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
}),
|
||||
}
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
|
||||
class ViewLiftBaseIE(InfoExtractor):
|
||||
_API_BASE = 'https://prod-api.viewlift.com/'
|
||||
_DOMAINS_REGEX = r'(?:(?:main\.)?snagfilms|snagxtreme|funnyforfree|kiddovid|winnersview|(?:monumental|lax)sportsnetwork|vayafilm|failarmy|ftfnext|lnppass\.legapallacanestro|moviespree|app\.myoutdoortv|neoufitness|pflmma|theidentitytb)\.com|(?:hoichoi|app\.horseandcountry|kronon|marquee|supercrosslive)\.tv'
|
||||
_DOMAINS_REGEX = r'(?:(?:main\.)?snagfilms|snagxtreme|funnyforfree|kiddovid|winnersview|(?:monumental|lax)sportsnetwork|vayafilm|failarmy|ftfnext|lnppass\.legapallacanestro|moviespree|app\.myoutdoortv|neoufitness|pflmma|theidentitytb|chorki)\.com|(?:hoichoi|app\.horseandcountry|kronon|marquee|supercrosslive)\.tv'
|
||||
_SITE_MAP = {
|
||||
'ftfnext': 'lax',
|
||||
'funnyforfree': 'snagfilms',
|
||||
|
@ -27,6 +27,7 @@ class ViewLiftBaseIE(InfoExtractor):
|
|||
'snagxtreme': 'snagfilms',
|
||||
'theidentitytb': 'tampabay',
|
||||
'vayafilm': 'snagfilms',
|
||||
'chorki': 'prothomalo',
|
||||
}
|
||||
_TOKENS = {}
|
||||
|
||||
|
@ -296,6 +297,33 @@ class ViewLiftIE(ViewLiftBaseIE):
|
|||
}, { # Premium movie
|
||||
'url': 'https://www.hoichoi.tv/movies/detective-2020',
|
||||
'only_matching': True
|
||||
}, { # Chorki Premium series
|
||||
'url': 'https://www.chorki.com/bn/series/sinpaat',
|
||||
'playlist_mincount': 7,
|
||||
'info_dict': {
|
||||
'id': 'bn/series/sinpaat',
|
||||
},
|
||||
}, { # Chorki free movie
|
||||
'url': 'https://www.chorki.com/bn/videos/bangla-movie-bikkhov',
|
||||
'info_dict': {
|
||||
'id': '564e755b-f5c7-4515-aee6-8959bee18c93',
|
||||
'title': 'Bikkhov',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20230824',
|
||||
'timestamp': 1692860553,
|
||||
'categories': ['Action Movies', 'Salman Special'],
|
||||
'tags': 'count:14',
|
||||
'thumbnail': 'https://snagfilms-a.akamaihd.net/dd078ff5-b16e-45e4-9723-501b56b9df0a/images/2023/08/24/1692860450729_1920x1080_16x9Images.jpg',
|
||||
'display_id': 'bn/videos/bangla-movie-bikkhov',
|
||||
'description': 'md5:71492b086450625f4374a3eb824f27dc',
|
||||
'duration': 8002,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, { # Chorki Premium movie
|
||||
'url': 'https://www.chorki.com/bn/videos/something-like-an-autobiography',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
|
||||
from ..dependencies import brotli, requests, urllib3
|
||||
from ..utils import bug_reports_message, int_or_none, variadic
|
||||
from ..utils.networking import normalize_url
|
||||
|
||||
if requests is None:
|
||||
raise ImportError('requests module is not installed')
|
||||
|
@ -199,6 +200,10 @@ def rebuild_method(self, prepared_request, response):
|
|||
|
||||
prepared_request.method = new_method
|
||||
|
||||
# Requests fails to resolve dot segments on absolute redirect locations
|
||||
# See: https://github.com/yt-dlp/yt-dlp/issues/9020
|
||||
prepared_request.url = normalize_url(prepared_request.url)
|
||||
|
||||
def rebuild_auth(self, prepared_request, response):
|
||||
# HACK: undo status code change from rebuild_method, if applicable.
|
||||
# rebuild_auth runs after requests would remove headers/body based on status code
|
||||
|
|
|
@ -1,9 +1,8 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import typing
|
||||
import urllib.error
|
||||
|
||||
from ..utils import YoutubeDLError, deprecation_warning
|
||||
from ..utils import YoutubeDLError
|
||||
|
||||
if typing.TYPE_CHECKING:
|
||||
from .common import RequestHandler, Response
|
||||
|
@ -101,117 +100,4 @@ class ProxyError(TransportError):
|
|||
pass
|
||||
|
||||
|
||||
class _CompatHTTPError(urllib.error.HTTPError, HTTPError):
|
||||
"""
|
||||
Provides backwards compatibility with urllib.error.HTTPError.
|
||||
Do not use this class directly, use HTTPError instead.
|
||||
"""
|
||||
|
||||
def __init__(self, http_error: HTTPError):
|
||||
super().__init__(
|
||||
url=http_error.response.url,
|
||||
code=http_error.status,
|
||||
msg=http_error.msg,
|
||||
hdrs=http_error.response.headers,
|
||||
fp=http_error.response
|
||||
)
|
||||
self._closer.close_called = True # Disable auto close
|
||||
self._http_error = http_error
|
||||
HTTPError.__init__(self, http_error.response, redirect_loop=http_error.redirect_loop)
|
||||
|
||||
@property
|
||||
def status(self):
|
||||
return self._http_error.status
|
||||
|
||||
@status.setter
|
||||
def status(self, value):
|
||||
return
|
||||
|
||||
@property
|
||||
def reason(self):
|
||||
return self._http_error.reason
|
||||
|
||||
@reason.setter
|
||||
def reason(self, value):
|
||||
return
|
||||
|
||||
@property
|
||||
def headers(self):
|
||||
deprecation_warning('HTTPError.headers is deprecated, use HTTPError.response.headers instead')
|
||||
return self._http_error.response.headers
|
||||
|
||||
@headers.setter
|
||||
def headers(self, value):
|
||||
return
|
||||
|
||||
def info(self):
|
||||
deprecation_warning('HTTPError.info() is deprecated, use HTTPError.response.headers instead')
|
||||
return self.response.headers
|
||||
|
||||
def getcode(self):
|
||||
deprecation_warning('HTTPError.getcode is deprecated, use HTTPError.status instead')
|
||||
return self.status
|
||||
|
||||
def geturl(self):
|
||||
deprecation_warning('HTTPError.geturl is deprecated, use HTTPError.response.url instead')
|
||||
return self.response.url
|
||||
|
||||
@property
|
||||
def code(self):
|
||||
deprecation_warning('HTTPError.code is deprecated, use HTTPError.status instead')
|
||||
return self.status
|
||||
|
||||
@code.setter
|
||||
def code(self, value):
|
||||
return
|
||||
|
||||
@property
|
||||
def url(self):
|
||||
deprecation_warning('HTTPError.url is deprecated, use HTTPError.response.url instead')
|
||||
return self.response.url
|
||||
|
||||
@url.setter
|
||||
def url(self, value):
|
||||
return
|
||||
|
||||
@property
|
||||
def hdrs(self):
|
||||
deprecation_warning('HTTPError.hdrs is deprecated, use HTTPError.response.headers instead')
|
||||
return self.response.headers
|
||||
|
||||
@hdrs.setter
|
||||
def hdrs(self, value):
|
||||
return
|
||||
|
||||
@property
|
||||
def filename(self):
|
||||
deprecation_warning('HTTPError.filename is deprecated, use HTTPError.response.url instead')
|
||||
return self.response.url
|
||||
|
||||
@filename.setter
|
||||
def filename(self, value):
|
||||
return
|
||||
|
||||
def __getattr__(self, name):
|
||||
# File operations are passed through the response.
|
||||
# Warn for some commonly used ones
|
||||
passthrough_warnings = {
|
||||
'read': 'response.read()',
|
||||
# technically possibly due to passthrough, but we should discourage this
|
||||
'get_header': 'response.get_header()',
|
||||
'readable': 'response.readable()',
|
||||
'closed': 'response.closed',
|
||||
'tell': 'response.tell()',
|
||||
}
|
||||
if name in passthrough_warnings:
|
||||
deprecation_warning(f'HTTPError.{name} is deprecated, use HTTPError.{passthrough_warnings[name]} instead')
|
||||
return super().__getattr__(name)
|
||||
|
||||
def __str__(self):
|
||||
return str(self._http_error)
|
||||
|
||||
def __repr__(self):
|
||||
return repr(self._http_error)
|
||||
|
||||
|
||||
network_exceptions = (HTTPError, TransportError)
|
||||
|
|
|
@ -476,7 +476,8 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
|
|||
'youtube-dl': ['all', '-multistreams', '-playlist-match-filter', '-manifest-filesize-approx'],
|
||||
'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat', '-playlist-match-filter', '-manifest-filesize-approx'],
|
||||
'2021': ['2022', 'no-certifi', 'filename-sanitization', 'no-youtube-prefer-utc-upload-date'],
|
||||
'2022': ['no-external-downloader-progress', 'playlist-match-filter', 'prefer-legacy-http-handler', 'manifest-filesize-approx'],
|
||||
'2022': ['2023', 'no-external-downloader-progress', 'playlist-match-filter'],
|
||||
'2023': ['prefer-legacy-http-handler', 'manifest-filesize-approx'],
|
||||
}
|
||||
}, help=(
|
||||
'Options that can help keep compatibility with youtube-dl or youtube-dlc '
|
||||
|
|
Loading…
Reference in a new issue