mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-01-25 18:26:49 +00:00
Merge branch 'yt-dlp:master' into pr/live-sections
This commit is contained in:
commit
54ad67d785
14
.github/workflows/build.yml
vendored
14
.github/workflows/build.yml
vendored
|
@ -360,7 +360,7 @@ jobs:
|
||||||
- name: Install Requirements
|
- name: Install Requirements
|
||||||
run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds
|
run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds
|
||||||
python devscripts/install_deps.py -o --include build
|
python devscripts/install_deps.py -o --include build
|
||||||
python devscripts/install_deps.py --include py2exe --include curl-cffi
|
python devscripts/install_deps.py --include curl-cffi
|
||||||
python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-5.8.0-py3-none-any.whl"
|
python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-5.8.0-py3-none-any.whl"
|
||||||
|
|
||||||
- name: Prepare
|
- name: Prepare
|
||||||
|
@ -369,12 +369,20 @@ jobs:
|
||||||
python devscripts/make_lazy_extractors.py
|
python devscripts/make_lazy_extractors.py
|
||||||
- name: Build
|
- name: Build
|
||||||
run: |
|
run: |
|
||||||
python -m bundle.py2exe
|
|
||||||
Move-Item ./dist/yt-dlp.exe ./dist/yt-dlp_min.exe
|
|
||||||
python -m bundle.pyinstaller
|
python -m bundle.pyinstaller
|
||||||
python -m bundle.pyinstaller --onedir
|
python -m bundle.pyinstaller --onedir
|
||||||
|
Move-Item ./dist/yt-dlp.exe ./dist/yt-dlp_real.exe
|
||||||
Compress-Archive -Path ./dist/yt-dlp/* -DestinationPath ./dist/yt-dlp_win.zip
|
Compress-Archive -Path ./dist/yt-dlp/* -DestinationPath ./dist/yt-dlp_win.zip
|
||||||
|
|
||||||
|
- name: Install Requirements (py2exe)
|
||||||
|
run: |
|
||||||
|
python devscripts/install_deps.py --include py2exe
|
||||||
|
- name: Build (py2exe)
|
||||||
|
run: |
|
||||||
|
python -m bundle.py2exe
|
||||||
|
Move-Item ./dist/yt-dlp.exe ./dist/yt-dlp_min.exe
|
||||||
|
Move-Item ./dist/yt-dlp_real.exe ./dist/yt-dlp.exe
|
||||||
|
|
||||||
- name: Verify --update-to
|
- name: Verify --update-to
|
||||||
if: vars.UPDATE_TO_VERIFICATION
|
if: vars.UPDATE_TO_VERIFICATION
|
||||||
run: |
|
run: |
|
||||||
|
|
14
README.md
14
README.md
|
@ -263,7 +263,7 @@ ### Platform-independent Binary (UNIX)
|
||||||
|
|
||||||
### Standalone Py2Exe Builds (Windows)
|
### Standalone Py2Exe Builds (Windows)
|
||||||
|
|
||||||
While we provide the option to build with [py2exe](https://www.py2exe.org), it is recommended to build [using PyInstaller](#standalone-pyinstaller-builds) instead since the py2exe builds **cannot contain `pycryptodomex`/`certifi` and needs VC++14** on the target computer to run.
|
While we provide the option to build with [py2exe](https://www.py2exe.org), it is recommended to build [using PyInstaller](#standalone-pyinstaller-builds) instead since the py2exe builds **cannot contain `pycryptodomex`/`certifi` and need VC++14** on the target computer to run.
|
||||||
|
|
||||||
If you wish to build it anyway, install Python (if it is not already installed) and you can run the following commands:
|
If you wish to build it anyway, install Python (if it is not already installed) and you can run the following commands:
|
||||||
|
|
||||||
|
@ -666,7 +666,7 @@ ## Filesystem Options:
|
||||||
The name of the browser to load cookies
|
The name of the browser to load cookies
|
||||||
from. Currently supported browsers are:
|
from. Currently supported browsers are:
|
||||||
brave, chrome, chromium, edge, firefox,
|
brave, chrome, chromium, edge, firefox,
|
||||||
opera, safari, vivaldi. Optionally, the
|
opera, safari, vivaldi, whale. Optionally, the
|
||||||
KEYRING used for decrypting Chromium cookies
|
KEYRING used for decrypting Chromium cookies
|
||||||
on Linux, the name/path of the PROFILE to
|
on Linux, the name/path of the PROFILE to
|
||||||
load cookies from, and the CONTAINER name
|
load cookies from, and the CONTAINER name
|
||||||
|
@ -1760,7 +1760,7 @@ # EXTRACTOR ARGUMENTS
|
||||||
#### youtube
|
#### youtube
|
||||||
* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes
|
* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes
|
||||||
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
|
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
|
||||||
* `player_client`: Clients to extract video data from. The main clients are `web`, `android` and `ios` with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb`, `mweb_embedscreen` and `tv_embedded` (agegate bypass) with no variants. By default, `ios,android,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. You can use `all` to use all the clients, and `default` for the default clients.
|
* `player_client`: Clients to extract video data from. The main clients are `web`, `ios` and `android`, with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb`, `mweb_embedscreen` and `tv_embedded` (agegate bypass) with no variants. By default, `ios,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. The `android` clients will always be given lowest priority since their formats are broken. You can use `all` to use all the clients, and `default` for the default clients.
|
||||||
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details
|
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details
|
||||||
* `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
|
* `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
|
||||||
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
|
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
|
||||||
|
@ -1813,8 +1813,9 @@ #### tiktok
|
||||||
* `app_name`: Default app name to use with mobile API calls, e.g. `trill`
|
* `app_name`: Default app name to use with mobile API calls, e.g. `trill`
|
||||||
* `app_version`: Default app version to use with mobile API calls - should be set along with `manifest_app_version`, e.g. `34.1.2`
|
* `app_version`: Default app version to use with mobile API calls - should be set along with `manifest_app_version`, e.g. `34.1.2`
|
||||||
* `manifest_app_version`: Default numeric app version to use with mobile API calls, e.g. `2023401020`
|
* `manifest_app_version`: Default numeric app version to use with mobile API calls, e.g. `2023401020`
|
||||||
* `aid`: Default app ID to use with API calls, e.g. `1180`
|
* `aid`: Default app ID to use with mobile API calls, e.g. `1180`
|
||||||
* `app_info`: One or more app info strings in the format of `<iid>/[app_name]/[app_version]/[manifest_app_version]/[aid]`, where `iid` is the unique app install ID. `iid` is the only required value; all other values and their `/` separators can be omitted, e.g. `tiktok:app_info=1234567890123456789` or `tiktok:app_info=123,456/trill///1180,789//34.0.1/340001`
|
* `app_info`: Enable mobile API extraction with one or more app info strings in the format of `<iid>/[app_name]/[app_version]/[manifest_app_version]/[aid]`, where `iid` is the unique app install ID. `iid` is the only required value; all other values and their `/` separators can be omitted, e.g. `tiktok:app_info=1234567890123456789` or `tiktok:app_info=123,456/trill///1180,789//34.0.1/340001`
|
||||||
|
* `device_id`: Enable mobile API extraction with a genuine device ID to be used with mobile API calls. Default is a random 19-digit string
|
||||||
|
|
||||||
#### rokfinchannel
|
#### rokfinchannel
|
||||||
* `tab`: Which tab to download - one of `new`, `top`, `videos`, `podcasts`, `streams`, `stacks`
|
* `tab`: Which tab to download - one of `new`, `top`, `videos`, `podcasts`, `streams`, `stacks`
|
||||||
|
@ -1840,6 +1841,9 @@ #### jiosaavn
|
||||||
#### afreecatvlive
|
#### afreecatvlive
|
||||||
* `cdn`: One or more CDN IDs to use with the API call for stream URLs, e.g. `gcp_cdn`, `gs_cdn_pc_app`, `gs_cdn_mobile_web`, `gs_cdn_pc_web`
|
* `cdn`: One or more CDN IDs to use with the API call for stream URLs, e.g. `gcp_cdn`, `gs_cdn_pc_app`, `gs_cdn_mobile_web`, `gs_cdn_pc_web`
|
||||||
|
|
||||||
|
#### soundcloud
|
||||||
|
* `formats`: Formats to request from the API. Requested values should be in the format of `{protocol}_{extension}` (omitting the bitrate), e.g. `hls_opus,http_aac`. The `*` character functions as a wildcard, e.g. `*_mp3`, and can passed by itself to request all formats. Known protocols include `http`, `hls` and `hls-aes`; known extensions include `aac`, `opus` and `mp3`. Original `download` formats are always extracted. Default is `http_aac,hls_aac,http_opus,hls_opus,http_mp3,hls_mp3`
|
||||||
|
|
||||||
**Note**: These options may be changed/removed in the future without concern for backward compatibility
|
**Note**: These options may be changed/removed in the future without concern for backward compatibility
|
||||||
|
|
||||||
<!-- MANPAGE: MOVE "INSTALLATION" SECTION HERE -->
|
<!-- MANPAGE: MOVE "INSTALLATION" SECTION HERE -->
|
||||||
|
|
|
@ -62,6 +62,7 @@ build = [
|
||||||
"build",
|
"build",
|
||||||
"hatchling",
|
"hatchling",
|
||||||
"pip",
|
"pip",
|
||||||
|
"setuptools>=66.1.0,<70",
|
||||||
"wheel",
|
"wheel",
|
||||||
]
|
]
|
||||||
dev = [
|
dev = [
|
||||||
|
@ -73,7 +74,10 @@ pyinstaller = [
|
||||||
"pyinstaller>=6.3; sys_platform!='darwin'",
|
"pyinstaller>=6.3; sys_platform!='darwin'",
|
||||||
"pyinstaller==5.13.2; sys_platform=='darwin'", # needed for curl_cffi
|
"pyinstaller==5.13.2; sys_platform=='darwin'", # needed for curl_cffi
|
||||||
]
|
]
|
||||||
py2exe = ["py2exe>=0.12"]
|
py2exe = [
|
||||||
|
"py2exe>=0.12",
|
||||||
|
"requests==2.31.*",
|
||||||
|
]
|
||||||
|
|
||||||
[project.urls]
|
[project.urls]
|
||||||
Documentation = "https://github.com/yt-dlp/yt-dlp#readme"
|
Documentation = "https://github.com/yt-dlp/yt-dlp#readme"
|
||||||
|
|
|
@ -1,4 +1,3 @@
|
||||||
import functools
|
|
||||||
import inspect
|
import inspect
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
@ -10,7 +9,9 @@
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def handler(request):
|
def handler(request):
|
||||||
RH_KEY = request.param
|
RH_KEY = getattr(request, 'param', None)
|
||||||
|
if not RH_KEY:
|
||||||
|
return
|
||||||
if inspect.isclass(RH_KEY) and issubclass(RH_KEY, RequestHandler):
|
if inspect.isclass(RH_KEY) and issubclass(RH_KEY, RequestHandler):
|
||||||
handler = RH_KEY
|
handler = RH_KEY
|
||||||
elif RH_KEY in _REQUEST_HANDLERS:
|
elif RH_KEY in _REQUEST_HANDLERS:
|
||||||
|
@ -18,9 +19,46 @@ def handler(request):
|
||||||
else:
|
else:
|
||||||
pytest.skip(f'{RH_KEY} request handler is not available')
|
pytest.skip(f'{RH_KEY} request handler is not available')
|
||||||
|
|
||||||
return functools.partial(handler, logger=FakeLogger)
|
class HandlerWrapper(handler):
|
||||||
|
RH_KEY = handler.RH_KEY
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__(logger=FakeLogger, *args, **kwargs)
|
||||||
|
|
||||||
|
return HandlerWrapper
|
||||||
|
|
||||||
|
|
||||||
def validate_and_send(rh, req):
|
@pytest.fixture(autouse=True)
|
||||||
rh.validate(req)
|
def skip_handler(request, handler):
|
||||||
return rh.send(req)
|
"""usage: pytest.mark.skip_handler('my_handler', 'reason')"""
|
||||||
|
for marker in request.node.iter_markers('skip_handler'):
|
||||||
|
if marker.args[0] == handler.RH_KEY:
|
||||||
|
pytest.skip(marker.args[1] if len(marker.args) > 1 else '')
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def skip_handler_if(request, handler):
|
||||||
|
"""usage: pytest.mark.skip_handler_if('my_handler', lambda request: True, 'reason')"""
|
||||||
|
for marker in request.node.iter_markers('skip_handler_if'):
|
||||||
|
if marker.args[0] == handler.RH_KEY and marker.args[1](request):
|
||||||
|
pytest.skip(marker.args[2] if len(marker.args) > 2 else '')
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def skip_handlers_if(request, handler):
|
||||||
|
"""usage: pytest.mark.skip_handlers_if(lambda request, handler: True, 'reason')"""
|
||||||
|
for marker in request.node.iter_markers('skip_handlers_if'):
|
||||||
|
if handler and marker.args[0](request, handler):
|
||||||
|
pytest.skip(marker.args[1] if len(marker.args) > 1 else '')
|
||||||
|
|
||||||
|
|
||||||
|
def pytest_configure(config):
|
||||||
|
config.addinivalue_line(
|
||||||
|
"markers", "skip_handler(handler): skip test for the given handler",
|
||||||
|
)
|
||||||
|
config.addinivalue_line(
|
||||||
|
"markers", "skip_handler_if(handler): skip test for the given handler if condition is true"
|
||||||
|
)
|
||||||
|
config.addinivalue_line(
|
||||||
|
"markers", "skip_handlers_if(handler): skip test for handlers when the condition is true"
|
||||||
|
)
|
||||||
|
|
|
@ -338,3 +338,8 @@ def http_server_port(httpd):
|
||||||
def verify_address_availability(address):
|
def verify_address_availability(address):
|
||||||
if find_available_port(address) is None:
|
if find_available_port(address) is None:
|
||||||
pytest.skip(f'Unable to bind to source address {address} (address may not exist)')
|
pytest.skip(f'Unable to bind to source address {address} (address may not exist)')
|
||||||
|
|
||||||
|
|
||||||
|
def validate_and_send(rh, req):
|
||||||
|
rh.validate(req)
|
||||||
|
return rh.send(req)
|
||||||
|
|
379
test/test_http_proxy.py
Normal file
379
test/test_http_proxy.py
Normal file
|
@ -0,0 +1,379 @@
|
||||||
|
import abc
|
||||||
|
import base64
|
||||||
|
import contextlib
|
||||||
|
import functools
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import random
|
||||||
|
import ssl
|
||||||
|
import threading
|
||||||
|
from http.server import BaseHTTPRequestHandler
|
||||||
|
from socketserver import ThreadingTCPServer
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from test.helper import http_server_port, verify_address_availability
|
||||||
|
from test.test_networking import TEST_DIR
|
||||||
|
from test.test_socks import IPv6ThreadingTCPServer
|
||||||
|
from yt_dlp.dependencies import urllib3
|
||||||
|
from yt_dlp.networking import Request
|
||||||
|
from yt_dlp.networking.exceptions import HTTPError, ProxyError, SSLError
|
||||||
|
|
||||||
|
|
||||||
|
class HTTPProxyAuthMixin:
|
||||||
|
|
||||||
|
def proxy_auth_error(self):
|
||||||
|
self.send_response(407)
|
||||||
|
self.send_header('Proxy-Authenticate', 'Basic realm="test http proxy"')
|
||||||
|
self.end_headers()
|
||||||
|
return False
|
||||||
|
|
||||||
|
def do_proxy_auth(self, username, password):
|
||||||
|
if username is None and password is None:
|
||||||
|
return True
|
||||||
|
|
||||||
|
proxy_auth_header = self.headers.get('Proxy-Authorization', None)
|
||||||
|
if proxy_auth_header is None:
|
||||||
|
return self.proxy_auth_error()
|
||||||
|
|
||||||
|
if not proxy_auth_header.startswith('Basic '):
|
||||||
|
return self.proxy_auth_error()
|
||||||
|
|
||||||
|
auth = proxy_auth_header[6:]
|
||||||
|
|
||||||
|
try:
|
||||||
|
auth_username, auth_password = base64.b64decode(auth).decode().split(':', 1)
|
||||||
|
except Exception:
|
||||||
|
return self.proxy_auth_error()
|
||||||
|
|
||||||
|
if auth_username != (username or '') or auth_password != (password or ''):
|
||||||
|
return self.proxy_auth_error()
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
class HTTPProxyHandler(BaseHTTPRequestHandler, HTTPProxyAuthMixin):
|
||||||
|
def __init__(self, *args, proxy_info=None, username=None, password=None, request_handler=None, **kwargs):
|
||||||
|
self.username = username
|
||||||
|
self.password = password
|
||||||
|
self.proxy_info = proxy_info
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
|
||||||
|
def do_GET(self):
|
||||||
|
if not self.do_proxy_auth(self.username, self.password):
|
||||||
|
self.server.close_request(self.request)
|
||||||
|
return
|
||||||
|
if self.path.endswith('/proxy_info'):
|
||||||
|
payload = json.dumps(self.proxy_info or {
|
||||||
|
'client_address': self.client_address,
|
||||||
|
'connect': False,
|
||||||
|
'connect_host': None,
|
||||||
|
'connect_port': None,
|
||||||
|
'headers': dict(self.headers),
|
||||||
|
'path': self.path,
|
||||||
|
'proxy': ':'.join(str(y) for y in self.connection.getsockname()),
|
||||||
|
})
|
||||||
|
self.send_response(200)
|
||||||
|
self.send_header('Content-Type', 'application/json; charset=utf-8')
|
||||||
|
self.send_header('Content-Length', str(len(payload)))
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(payload.encode())
|
||||||
|
else:
|
||||||
|
self.send_response(404)
|
||||||
|
self.end_headers()
|
||||||
|
|
||||||
|
self.server.close_request(self.request)
|
||||||
|
|
||||||
|
|
||||||
|
if urllib3:
|
||||||
|
import urllib3.util.ssltransport
|
||||||
|
|
||||||
|
class SSLTransport(urllib3.util.ssltransport.SSLTransport):
|
||||||
|
"""
|
||||||
|
Modified version of urllib3 SSLTransport to support server side SSL
|
||||||
|
|
||||||
|
This allows us to chain multiple TLS connections.
|
||||||
|
"""
|
||||||
|
def __init__(self, socket, ssl_context, server_hostname=None, suppress_ragged_eofs=True, server_side=False):
|
||||||
|
self.incoming = ssl.MemoryBIO()
|
||||||
|
self.outgoing = ssl.MemoryBIO()
|
||||||
|
|
||||||
|
self.suppress_ragged_eofs = suppress_ragged_eofs
|
||||||
|
self.socket = socket
|
||||||
|
|
||||||
|
self.sslobj = ssl_context.wrap_bio(
|
||||||
|
self.incoming,
|
||||||
|
self.outgoing,
|
||||||
|
server_hostname=server_hostname,
|
||||||
|
server_side=server_side
|
||||||
|
)
|
||||||
|
self._ssl_io_loop(self.sslobj.do_handshake)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def _io_refs(self):
|
||||||
|
return self.socket._io_refs
|
||||||
|
|
||||||
|
@_io_refs.setter
|
||||||
|
def _io_refs(self, value):
|
||||||
|
self.socket._io_refs = value
|
||||||
|
|
||||||
|
def shutdown(self, *args, **kwargs):
|
||||||
|
self.socket.shutdown(*args, **kwargs)
|
||||||
|
else:
|
||||||
|
SSLTransport = None
|
||||||
|
|
||||||
|
|
||||||
|
class HTTPSProxyHandler(HTTPProxyHandler):
|
||||||
|
def __init__(self, request, *args, **kwargs):
|
||||||
|
certfn = os.path.join(TEST_DIR, 'testcert.pem')
|
||||||
|
sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
|
||||||
|
sslctx.load_cert_chain(certfn, None)
|
||||||
|
if isinstance(request, ssl.SSLSocket):
|
||||||
|
request = SSLTransport(request, ssl_context=sslctx, server_side=True)
|
||||||
|
else:
|
||||||
|
request = sslctx.wrap_socket(request, server_side=True)
|
||||||
|
super().__init__(request, *args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
class HTTPConnectProxyHandler(BaseHTTPRequestHandler, HTTPProxyAuthMixin):
|
||||||
|
protocol_version = 'HTTP/1.1'
|
||||||
|
default_request_version = 'HTTP/1.1'
|
||||||
|
|
||||||
|
def __init__(self, *args, username=None, password=None, request_handler=None, **kwargs):
|
||||||
|
self.username = username
|
||||||
|
self.password = password
|
||||||
|
self.request_handler = request_handler
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
|
||||||
|
def do_CONNECT(self):
|
||||||
|
if not self.do_proxy_auth(self.username, self.password):
|
||||||
|
self.server.close_request(self.request)
|
||||||
|
return
|
||||||
|
self.send_response(200)
|
||||||
|
self.end_headers()
|
||||||
|
proxy_info = {
|
||||||
|
'client_address': self.client_address,
|
||||||
|
'connect': True,
|
||||||
|
'connect_host': self.path.split(':')[0],
|
||||||
|
'connect_port': int(self.path.split(':')[1]),
|
||||||
|
'headers': dict(self.headers),
|
||||||
|
'path': self.path,
|
||||||
|
'proxy': ':'.join(str(y) for y in self.connection.getsockname()),
|
||||||
|
}
|
||||||
|
self.request_handler(self.request, self.client_address, self.server, proxy_info=proxy_info)
|
||||||
|
self.server.close_request(self.request)
|
||||||
|
|
||||||
|
|
||||||
|
class HTTPSConnectProxyHandler(HTTPConnectProxyHandler):
|
||||||
|
def __init__(self, request, *args, **kwargs):
|
||||||
|
certfn = os.path.join(TEST_DIR, 'testcert.pem')
|
||||||
|
sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
|
||||||
|
sslctx.load_cert_chain(certfn, None)
|
||||||
|
request = sslctx.wrap_socket(request, server_side=True)
|
||||||
|
self._original_request = request
|
||||||
|
super().__init__(request, *args, **kwargs)
|
||||||
|
|
||||||
|
def do_CONNECT(self):
|
||||||
|
super().do_CONNECT()
|
||||||
|
self.server.close_request(self._original_request)
|
||||||
|
|
||||||
|
|
||||||
|
@contextlib.contextmanager
|
||||||
|
def proxy_server(proxy_server_class, request_handler, bind_ip=None, **proxy_server_kwargs):
|
||||||
|
server = server_thread = None
|
||||||
|
try:
|
||||||
|
bind_address = bind_ip or '127.0.0.1'
|
||||||
|
server_type = ThreadingTCPServer if '.' in bind_address else IPv6ThreadingTCPServer
|
||||||
|
server = server_type(
|
||||||
|
(bind_address, 0), functools.partial(proxy_server_class, request_handler=request_handler, **proxy_server_kwargs))
|
||||||
|
server_port = http_server_port(server)
|
||||||
|
server_thread = threading.Thread(target=server.serve_forever)
|
||||||
|
server_thread.daemon = True
|
||||||
|
server_thread.start()
|
||||||
|
if '.' not in bind_address:
|
||||||
|
yield f'[{bind_address}]:{server_port}'
|
||||||
|
else:
|
||||||
|
yield f'{bind_address}:{server_port}'
|
||||||
|
finally:
|
||||||
|
server.shutdown()
|
||||||
|
server.server_close()
|
||||||
|
server_thread.join(2.0)
|
||||||
|
|
||||||
|
|
||||||
|
class HTTPProxyTestContext(abc.ABC):
|
||||||
|
REQUEST_HANDLER_CLASS = None
|
||||||
|
REQUEST_PROTO = None
|
||||||
|
|
||||||
|
def http_server(self, server_class, *args, **kwargs):
|
||||||
|
return proxy_server(server_class, self.REQUEST_HANDLER_CLASS, *args, **kwargs)
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
def proxy_info_request(self, handler, target_domain=None, target_port=None, **req_kwargs) -> dict:
|
||||||
|
"""return a dict of proxy_info"""
|
||||||
|
|
||||||
|
|
||||||
|
class HTTPProxyHTTPTestContext(HTTPProxyTestContext):
|
||||||
|
# Standard HTTP Proxy for http requests
|
||||||
|
REQUEST_HANDLER_CLASS = HTTPProxyHandler
|
||||||
|
REQUEST_PROTO = 'http'
|
||||||
|
|
||||||
|
def proxy_info_request(self, handler, target_domain=None, target_port=None, **req_kwargs):
|
||||||
|
request = Request(f'http://{target_domain or "127.0.0.1"}:{target_port or "40000"}/proxy_info', **req_kwargs)
|
||||||
|
handler.validate(request)
|
||||||
|
return json.loads(handler.send(request).read().decode())
|
||||||
|
|
||||||
|
|
||||||
|
class HTTPProxyHTTPSTestContext(HTTPProxyTestContext):
|
||||||
|
# HTTP Connect proxy, for https requests
|
||||||
|
REQUEST_HANDLER_CLASS = HTTPSProxyHandler
|
||||||
|
REQUEST_PROTO = 'https'
|
||||||
|
|
||||||
|
def proxy_info_request(self, handler, target_domain=None, target_port=None, **req_kwargs):
|
||||||
|
request = Request(f'https://{target_domain or "127.0.0.1"}:{target_port or "40000"}/proxy_info', **req_kwargs)
|
||||||
|
handler.validate(request)
|
||||||
|
return json.loads(handler.send(request).read().decode())
|
||||||
|
|
||||||
|
|
||||||
|
CTX_MAP = {
|
||||||
|
'http': HTTPProxyHTTPTestContext,
|
||||||
|
'https': HTTPProxyHTTPSTestContext,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope='module')
|
||||||
|
def ctx(request):
|
||||||
|
return CTX_MAP[request.param]()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
'handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||||
|
@pytest.mark.parametrize('ctx', ['http'], indirect=True) # pure http proxy can only support http
|
||||||
|
class TestHTTPProxy:
|
||||||
|
def test_http_no_auth(self, handler, ctx):
|
||||||
|
with ctx.http_server(HTTPProxyHandler) as server_address:
|
||||||
|
with handler(proxies={ctx.REQUEST_PROTO: f'http://{server_address}'}) as rh:
|
||||||
|
proxy_info = ctx.proxy_info_request(rh)
|
||||||
|
assert proxy_info['proxy'] == server_address
|
||||||
|
assert proxy_info['connect'] is False
|
||||||
|
assert 'Proxy-Authorization' not in proxy_info['headers']
|
||||||
|
|
||||||
|
def test_http_auth(self, handler, ctx):
|
||||||
|
with ctx.http_server(HTTPProxyHandler, username='test', password='test') as server_address:
|
||||||
|
with handler(proxies={ctx.REQUEST_PROTO: f'http://test:test@{server_address}'}) as rh:
|
||||||
|
proxy_info = ctx.proxy_info_request(rh)
|
||||||
|
assert proxy_info['proxy'] == server_address
|
||||||
|
assert 'Proxy-Authorization' in proxy_info['headers']
|
||||||
|
|
||||||
|
def test_http_bad_auth(self, handler, ctx):
|
||||||
|
with ctx.http_server(HTTPProxyHandler, username='test', password='test') as server_address:
|
||||||
|
with handler(proxies={ctx.REQUEST_PROTO: f'http://test:bad@{server_address}'}) as rh:
|
||||||
|
with pytest.raises(HTTPError) as exc_info:
|
||||||
|
ctx.proxy_info_request(rh)
|
||||||
|
assert exc_info.value.response.status == 407
|
||||||
|
exc_info.value.response.close()
|
||||||
|
|
||||||
|
def test_http_source_address(self, handler, ctx):
|
||||||
|
with ctx.http_server(HTTPProxyHandler) as server_address:
|
||||||
|
source_address = f'127.0.0.{random.randint(5, 255)}'
|
||||||
|
verify_address_availability(source_address)
|
||||||
|
with handler(proxies={ctx.REQUEST_PROTO: f'http://{server_address}'},
|
||||||
|
source_address=source_address) as rh:
|
||||||
|
proxy_info = ctx.proxy_info_request(rh)
|
||||||
|
assert proxy_info['proxy'] == server_address
|
||||||
|
assert proxy_info['client_address'][0] == source_address
|
||||||
|
|
||||||
|
@pytest.mark.skip_handler('Urllib', 'urllib does not support https proxies')
|
||||||
|
def test_https(self, handler, ctx):
|
||||||
|
with ctx.http_server(HTTPSProxyHandler) as server_address:
|
||||||
|
with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'https://{server_address}'}) as rh:
|
||||||
|
proxy_info = ctx.proxy_info_request(rh)
|
||||||
|
assert proxy_info['proxy'] == server_address
|
||||||
|
assert proxy_info['connect'] is False
|
||||||
|
assert 'Proxy-Authorization' not in proxy_info['headers']
|
||||||
|
|
||||||
|
@pytest.mark.skip_handler('Urllib', 'urllib does not support https proxies')
|
||||||
|
def test_https_verify_failed(self, handler, ctx):
|
||||||
|
with ctx.http_server(HTTPSProxyHandler) as server_address:
|
||||||
|
with handler(verify=True, proxies={ctx.REQUEST_PROTO: f'https://{server_address}'}) as rh:
|
||||||
|
# Accept SSLError as may not be feasible to tell if it is proxy or request error.
|
||||||
|
# note: if request proto also does ssl verification, this may also be the error of the request.
|
||||||
|
# Until we can support passing custom cacerts to handlers, we cannot properly test this for all cases.
|
||||||
|
with pytest.raises((ProxyError, SSLError)):
|
||||||
|
ctx.proxy_info_request(rh)
|
||||||
|
|
||||||
|
def test_http_with_idn(self, handler, ctx):
|
||||||
|
with ctx.http_server(HTTPProxyHandler) as server_address:
|
||||||
|
with handler(proxies={ctx.REQUEST_PROTO: f'http://{server_address}'}) as rh:
|
||||||
|
proxy_info = ctx.proxy_info_request(rh, target_domain='中文.tw')
|
||||||
|
assert proxy_info['proxy'] == server_address
|
||||||
|
assert proxy_info['path'].startswith('http://xn--fiq228c.tw')
|
||||||
|
assert proxy_info['headers']['Host'].split(':', 1)[0] == 'xn--fiq228c.tw'
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
'handler,ctx', [
|
||||||
|
('Requests', 'https'),
|
||||||
|
('CurlCFFI', 'https'),
|
||||||
|
], indirect=True)
|
||||||
|
class TestHTTPConnectProxy:
|
||||||
|
def test_http_connect_no_auth(self, handler, ctx):
|
||||||
|
with ctx.http_server(HTTPConnectProxyHandler) as server_address:
|
||||||
|
with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'http://{server_address}'}) as rh:
|
||||||
|
proxy_info = ctx.proxy_info_request(rh)
|
||||||
|
assert proxy_info['proxy'] == server_address
|
||||||
|
assert proxy_info['connect'] is True
|
||||||
|
assert 'Proxy-Authorization' not in proxy_info['headers']
|
||||||
|
|
||||||
|
def test_http_connect_auth(self, handler, ctx):
|
||||||
|
with ctx.http_server(HTTPConnectProxyHandler, username='test', password='test') as server_address:
|
||||||
|
with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'http://test:test@{server_address}'}) as rh:
|
||||||
|
proxy_info = ctx.proxy_info_request(rh)
|
||||||
|
assert proxy_info['proxy'] == server_address
|
||||||
|
assert 'Proxy-Authorization' in proxy_info['headers']
|
||||||
|
|
||||||
|
@pytest.mark.skip_handler(
|
||||||
|
'Requests',
|
||||||
|
'bug in urllib3 causes unclosed socket: https://github.com/urllib3/urllib3/issues/3374'
|
||||||
|
)
|
||||||
|
def test_http_connect_bad_auth(self, handler, ctx):
|
||||||
|
with ctx.http_server(HTTPConnectProxyHandler, username='test', password='test') as server_address:
|
||||||
|
with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'http://test:bad@{server_address}'}) as rh:
|
||||||
|
with pytest.raises(ProxyError):
|
||||||
|
ctx.proxy_info_request(rh)
|
||||||
|
|
||||||
|
def test_http_connect_source_address(self, handler, ctx):
|
||||||
|
with ctx.http_server(HTTPConnectProxyHandler) as server_address:
|
||||||
|
source_address = f'127.0.0.{random.randint(5, 255)}'
|
||||||
|
verify_address_availability(source_address)
|
||||||
|
with handler(proxies={ctx.REQUEST_PROTO: f'http://{server_address}'},
|
||||||
|
source_address=source_address,
|
||||||
|
verify=False) as rh:
|
||||||
|
proxy_info = ctx.proxy_info_request(rh)
|
||||||
|
assert proxy_info['proxy'] == server_address
|
||||||
|
assert proxy_info['client_address'][0] == source_address
|
||||||
|
|
||||||
|
@pytest.mark.skipif(urllib3 is None, reason='requires urllib3 to test')
|
||||||
|
def test_https_connect_proxy(self, handler, ctx):
|
||||||
|
with ctx.http_server(HTTPSConnectProxyHandler) as server_address:
|
||||||
|
with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'https://{server_address}'}) as rh:
|
||||||
|
proxy_info = ctx.proxy_info_request(rh)
|
||||||
|
assert proxy_info['proxy'] == server_address
|
||||||
|
assert proxy_info['connect'] is True
|
||||||
|
assert 'Proxy-Authorization' not in proxy_info['headers']
|
||||||
|
|
||||||
|
@pytest.mark.skipif(urllib3 is None, reason='requires urllib3 to test')
|
||||||
|
def test_https_connect_verify_failed(self, handler, ctx):
|
||||||
|
with ctx.http_server(HTTPSConnectProxyHandler) as server_address:
|
||||||
|
with handler(verify=True, proxies={ctx.REQUEST_PROTO: f'https://{server_address}'}) as rh:
|
||||||
|
# Accept SSLError as may not be feasible to tell if it is proxy or request error.
|
||||||
|
# note: if request proto also does ssl verification, this may also be the error of the request.
|
||||||
|
# Until we can support passing custom cacerts to handlers, we cannot properly test this for all cases.
|
||||||
|
with pytest.raises((ProxyError, SSLError)):
|
||||||
|
ctx.proxy_info_request(rh)
|
||||||
|
|
||||||
|
@pytest.mark.skipif(urllib3 is None, reason='requires urllib3 to test')
|
||||||
|
def test_https_connect_proxy_auth(self, handler, ctx):
|
||||||
|
with ctx.http_server(HTTPSConnectProxyHandler, username='test', password='test') as server_address:
|
||||||
|
with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'https://test:test@{server_address}'}) as rh:
|
||||||
|
proxy_info = ctx.proxy_info_request(rh)
|
||||||
|
assert proxy_info['proxy'] == server_address
|
||||||
|
assert 'Proxy-Authorization' in proxy_info['headers']
|
|
@ -6,6 +6,8 @@
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
from yt_dlp.networking.common import Features, DEFAULT_TIMEOUT
|
||||||
|
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
import gzip
|
import gzip
|
||||||
|
@ -27,8 +29,12 @@
|
||||||
from email.message import Message
|
from email.message import Message
|
||||||
from http.cookiejar import CookieJar
|
from http.cookiejar import CookieJar
|
||||||
|
|
||||||
from test.conftest import validate_and_send
|
from test.helper import (
|
||||||
from test.helper import FakeYDL, http_server_port, verify_address_availability
|
FakeYDL,
|
||||||
|
http_server_port,
|
||||||
|
validate_and_send,
|
||||||
|
verify_address_availability,
|
||||||
|
)
|
||||||
from yt_dlp.cookies import YoutubeDLCookieJar
|
from yt_dlp.cookies import YoutubeDLCookieJar
|
||||||
from yt_dlp.dependencies import brotli, curl_cffi, requests, urllib3
|
from yt_dlp.dependencies import brotli, curl_cffi, requests, urllib3
|
||||||
from yt_dlp.networking import (
|
from yt_dlp.networking import (
|
||||||
|
@ -62,21 +68,6 @@
|
||||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
|
||||||
|
|
||||||
def _build_proxy_handler(name):
|
|
||||||
class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
|
|
||||||
proxy_name = name
|
|
||||||
|
|
||||||
def log_message(self, format, *args):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def do_GET(self):
|
|
||||||
self.send_response(200)
|
|
||||||
self.send_header('Content-Type', 'text/plain; charset=utf-8')
|
|
||||||
self.end_headers()
|
|
||||||
self.wfile.write(f'{self.proxy_name}: {self.path}'.encode())
|
|
||||||
return HTTPTestRequestHandler
|
|
||||||
|
|
||||||
|
|
||||||
class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
|
class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
|
||||||
protocol_version = 'HTTP/1.1'
|
protocol_version = 'HTTP/1.1'
|
||||||
default_request_version = 'HTTP/1.1'
|
default_request_version = 'HTTP/1.1'
|
||||||
|
@ -317,8 +308,9 @@ def setup_class(cls):
|
||||||
cls.https_server_thread.start()
|
cls.https_server_thread.start()
|
||||||
|
|
||||||
|
|
||||||
class TestHTTPRequestHandler(TestRequestHandlerBase):
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||||
|
class TestHTTPRequestHandler(TestRequestHandlerBase):
|
||||||
|
|
||||||
def test_verify_cert(self, handler):
|
def test_verify_cert(self, handler):
|
||||||
with handler() as rh:
|
with handler() as rh:
|
||||||
with pytest.raises(CertificateVerifyError):
|
with pytest.raises(CertificateVerifyError):
|
||||||
|
@ -329,7 +321,6 @@ def test_verify_cert(self, handler):
|
||||||
assert r.status == 200
|
assert r.status == 200
|
||||||
r.close()
|
r.close()
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
|
||||||
def test_ssl_error(self, handler):
|
def test_ssl_error(self, handler):
|
||||||
# HTTPS server with too old TLS version
|
# HTTPS server with too old TLS version
|
||||||
# XXX: is there a better way to test this than to create a new server?
|
# XXX: is there a better way to test this than to create a new server?
|
||||||
|
@ -347,7 +338,6 @@ def test_ssl_error(self, handler):
|
||||||
validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers'))
|
validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers'))
|
||||||
assert not issubclass(exc_info.type, CertificateVerifyError)
|
assert not issubclass(exc_info.type, CertificateVerifyError)
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
|
||||||
def test_percent_encode(self, handler):
|
def test_percent_encode(self, handler):
|
||||||
with handler() as rh:
|
with handler() as rh:
|
||||||
# Unicode characters should be encoded with uppercase percent-encoding
|
# Unicode characters should be encoded with uppercase percent-encoding
|
||||||
|
@ -359,7 +349,6 @@ def test_percent_encode(self, handler):
|
||||||
assert res.status == 200
|
assert res.status == 200
|
||||||
res.close()
|
res.close()
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
|
||||||
@pytest.mark.parametrize('path', [
|
@pytest.mark.parametrize('path', [
|
||||||
'/a/b/./../../headers',
|
'/a/b/./../../headers',
|
||||||
'/redirect_dotsegments',
|
'/redirect_dotsegments',
|
||||||
|
@ -375,15 +364,13 @@ def test_remove_dot_segments(self, handler, path):
|
||||||
assert res.url == f'http://127.0.0.1:{self.http_port}/headers'
|
assert res.url == f'http://127.0.0.1:{self.http_port}/headers'
|
||||||
res.close()
|
res.close()
|
||||||
|
|
||||||
# Not supported by CurlCFFI (non-standard)
|
@pytest.mark.skip_handler('CurlCFFI', 'not supported by curl-cffi (non-standard)')
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
|
||||||
def test_unicode_path_redirection(self, handler):
|
def test_unicode_path_redirection(self, handler):
|
||||||
with handler() as rh:
|
with handler() as rh:
|
||||||
r = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/302-non-ascii-redirect'))
|
r = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/302-non-ascii-redirect'))
|
||||||
assert r.url == f'http://127.0.0.1:{self.http_port}/%E4%B8%AD%E6%96%87.html'
|
assert r.url == f'http://127.0.0.1:{self.http_port}/%E4%B8%AD%E6%96%87.html'
|
||||||
r.close()
|
r.close()
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
|
||||||
def test_raise_http_error(self, handler):
|
def test_raise_http_error(self, handler):
|
||||||
with handler() as rh:
|
with handler() as rh:
|
||||||
for bad_status in (400, 500, 599, 302):
|
for bad_status in (400, 500, 599, 302):
|
||||||
|
@ -393,7 +380,6 @@ def test_raise_http_error(self, handler):
|
||||||
# Should not raise an error
|
# Should not raise an error
|
||||||
validate_and_send(rh, Request('http://127.0.0.1:%d/gen_200' % self.http_port)).close()
|
validate_and_send(rh, Request('http://127.0.0.1:%d/gen_200' % self.http_port)).close()
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
|
||||||
def test_response_url(self, handler):
|
def test_response_url(self, handler):
|
||||||
with handler() as rh:
|
with handler() as rh:
|
||||||
# Response url should be that of the last url in redirect chain
|
# Response url should be that of the last url in redirect chain
|
||||||
|
@ -405,7 +391,6 @@ def test_response_url(self, handler):
|
||||||
res2.close()
|
res2.close()
|
||||||
|
|
||||||
# Covers some basic cases we expect some level of consistency between request handlers for
|
# Covers some basic cases we expect some level of consistency between request handlers for
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
|
||||||
@pytest.mark.parametrize('redirect_status,method,expected', [
|
@pytest.mark.parametrize('redirect_status,method,expected', [
|
||||||
# A 303 must either use GET or HEAD for subsequent request
|
# A 303 must either use GET or HEAD for subsequent request
|
||||||
(303, 'POST', ('', 'GET', False)),
|
(303, 'POST', ('', 'GET', False)),
|
||||||
|
@ -447,7 +432,6 @@ def test_redirect(self, handler, redirect_status, method, expected):
|
||||||
assert expected[1] == res.headers.get('method')
|
assert expected[1] == res.headers.get('method')
|
||||||
assert expected[2] == ('content-length' in headers.decode().lower())
|
assert expected[2] == ('content-length' in headers.decode().lower())
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
|
||||||
def test_request_cookie_header(self, handler):
|
def test_request_cookie_header(self, handler):
|
||||||
# We should accept a Cookie header being passed as in normal headers and handle it appropriately.
|
# We should accept a Cookie header being passed as in normal headers and handle it appropriately.
|
||||||
with handler() as rh:
|
with handler() as rh:
|
||||||
|
@ -480,19 +464,16 @@ def test_request_cookie_header(self, handler):
|
||||||
assert b'cookie: test=ytdlp' not in data.lower()
|
assert b'cookie: test=ytdlp' not in data.lower()
|
||||||
assert b'cookie: test=test3' in data.lower()
|
assert b'cookie: test=test3' in data.lower()
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
|
||||||
def test_redirect_loop(self, handler):
|
def test_redirect_loop(self, handler):
|
||||||
with handler() as rh:
|
with handler() as rh:
|
||||||
with pytest.raises(HTTPError, match='redirect loop'):
|
with pytest.raises(HTTPError, match='redirect loop'):
|
||||||
validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_loop'))
|
validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_loop'))
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
|
||||||
def test_incompleteread(self, handler):
|
def test_incompleteread(self, handler):
|
||||||
with handler(timeout=2) as rh:
|
with handler(timeout=2) as rh:
|
||||||
with pytest.raises(IncompleteRead, match='13 bytes read, 234221 more expected'):
|
with pytest.raises(IncompleteRead, match='13 bytes read, 234221 more expected'):
|
||||||
validate_and_send(rh, Request('http://127.0.0.1:%d/incompleteread' % self.http_port)).read()
|
validate_and_send(rh, Request('http://127.0.0.1:%d/incompleteread' % self.http_port)).read()
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
|
||||||
def test_cookies(self, handler):
|
def test_cookies(self, handler):
|
||||||
cookiejar = YoutubeDLCookieJar()
|
cookiejar = YoutubeDLCookieJar()
|
||||||
cookiejar.set_cookie(http.cookiejar.Cookie(
|
cookiejar.set_cookie(http.cookiejar.Cookie(
|
||||||
|
@ -509,7 +490,6 @@ def test_cookies(self, handler):
|
||||||
rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions={'cookiejar': cookiejar})).read()
|
rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions={'cookiejar': cookiejar})).read()
|
||||||
assert b'cookie: test=ytdlp' in data.lower()
|
assert b'cookie: test=ytdlp' in data.lower()
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
|
||||||
def test_headers(self, handler):
|
def test_headers(self, handler):
|
||||||
|
|
||||||
with handler(headers=HTTPHeaderDict({'test1': 'test', 'test2': 'test2'})) as rh:
|
with handler(headers=HTTPHeaderDict({'test1': 'test', 'test2': 'test2'})) as rh:
|
||||||
|
@ -525,7 +505,6 @@ def test_headers(self, handler):
|
||||||
assert b'test2: test2' not in data
|
assert b'test2: test2' not in data
|
||||||
assert b'test3: test3' in data
|
assert b'test3: test3' in data
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
|
||||||
def test_read_timeout(self, handler):
|
def test_read_timeout(self, handler):
|
||||||
with handler() as rh:
|
with handler() as rh:
|
||||||
# Default timeout is 20 seconds, so this should go through
|
# Default timeout is 20 seconds, so this should go through
|
||||||
|
@ -541,26 +520,21 @@ def test_read_timeout(self, handler):
|
||||||
validate_and_send(
|
validate_and_send(
|
||||||
rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1', extensions={'timeout': 4}))
|
rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1', extensions={'timeout': 4}))
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
|
||||||
def test_connect_timeout(self, handler):
|
def test_connect_timeout(self, handler):
|
||||||
# nothing should be listening on this port
|
# nothing should be listening on this port
|
||||||
connect_timeout_url = 'http://10.255.255.255'
|
connect_timeout_url = 'http://10.255.255.255'
|
||||||
with handler(timeout=0.01) as rh:
|
with handler(timeout=0.01) as rh, pytest.raises(TransportError):
|
||||||
now = time.time()
|
now = time.time()
|
||||||
with pytest.raises(TransportError):
|
validate_and_send(rh, Request(connect_timeout_url))
|
||||||
validate_and_send(
|
assert time.time() - now < DEFAULT_TIMEOUT
|
||||||
rh, Request(connect_timeout_url))
|
|
||||||
assert 0.01 <= time.time() - now < 20
|
|
||||||
|
|
||||||
with handler() as rh:
|
|
||||||
with pytest.raises(TransportError):
|
|
||||||
# Per request timeout, should override handler timeout
|
# Per request timeout, should override handler timeout
|
||||||
|
request = Request(connect_timeout_url, extensions={'timeout': 0.01})
|
||||||
|
with handler() as rh, pytest.raises(TransportError):
|
||||||
now = time.time()
|
now = time.time()
|
||||||
validate_and_send(
|
validate_and_send(rh, request)
|
||||||
rh, Request(connect_timeout_url, extensions={'timeout': 0.01}))
|
assert time.time() - now < DEFAULT_TIMEOUT
|
||||||
assert 0.01 <= time.time() - now < 20
|
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
|
||||||
def test_source_address(self, handler):
|
def test_source_address(self, handler):
|
||||||
source_address = f'127.0.0.{random.randint(5, 255)}'
|
source_address = f'127.0.0.{random.randint(5, 255)}'
|
||||||
# on some systems these loopback addresses we need for testing may not be available
|
# on some systems these loopback addresses we need for testing may not be available
|
||||||
|
@ -572,13 +546,13 @@ def test_source_address(self, handler):
|
||||||
assert source_address == data
|
assert source_address == data
|
||||||
|
|
||||||
# Not supported by CurlCFFI
|
# Not supported by CurlCFFI
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
@pytest.mark.skip_handler('CurlCFFI', 'not supported by curl-cffi')
|
||||||
def test_gzip_trailing_garbage(self, handler):
|
def test_gzip_trailing_garbage(self, handler):
|
||||||
with handler() as rh:
|
with handler() as rh:
|
||||||
data = validate_and_send(rh, Request(f'http://localhost:{self.http_port}/trailing_garbage')).read().decode()
|
data = validate_and_send(rh, Request(f'http://localhost:{self.http_port}/trailing_garbage')).read().decode()
|
||||||
assert data == '<html><video src="/vid.mp4" /></html>'
|
assert data == '<html><video src="/vid.mp4" /></html>'
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
@pytest.mark.skip_handler('CurlCFFI', 'not applicable to curl-cffi')
|
||||||
@pytest.mark.skipif(not brotli, reason='brotli support is not installed')
|
@pytest.mark.skipif(not brotli, reason='brotli support is not installed')
|
||||||
def test_brotli(self, handler):
|
def test_brotli(self, handler):
|
||||||
with handler() as rh:
|
with handler() as rh:
|
||||||
|
@ -589,7 +563,6 @@ def test_brotli(self, handler):
|
||||||
assert res.headers.get('Content-Encoding') == 'br'
|
assert res.headers.get('Content-Encoding') == 'br'
|
||||||
assert res.read() == b'<html><video src="/vid.mp4" /></html>'
|
assert res.read() == b'<html><video src="/vid.mp4" /></html>'
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
|
||||||
def test_deflate(self, handler):
|
def test_deflate(self, handler):
|
||||||
with handler() as rh:
|
with handler() as rh:
|
||||||
res = validate_and_send(
|
res = validate_and_send(
|
||||||
|
@ -599,7 +572,6 @@ def test_deflate(self, handler):
|
||||||
assert res.headers.get('Content-Encoding') == 'deflate'
|
assert res.headers.get('Content-Encoding') == 'deflate'
|
||||||
assert res.read() == b'<html><video src="/vid.mp4" /></html>'
|
assert res.read() == b'<html><video src="/vid.mp4" /></html>'
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
|
||||||
def test_gzip(self, handler):
|
def test_gzip(self, handler):
|
||||||
with handler() as rh:
|
with handler() as rh:
|
||||||
res = validate_and_send(
|
res = validate_and_send(
|
||||||
|
@ -609,7 +581,6 @@ def test_gzip(self, handler):
|
||||||
assert res.headers.get('Content-Encoding') == 'gzip'
|
assert res.headers.get('Content-Encoding') == 'gzip'
|
||||||
assert res.read() == b'<html><video src="/vid.mp4" /></html>'
|
assert res.read() == b'<html><video src="/vid.mp4" /></html>'
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
|
||||||
def test_multiple_encodings(self, handler):
|
def test_multiple_encodings(self, handler):
|
||||||
with handler() as rh:
|
with handler() as rh:
|
||||||
for pair in ('gzip,deflate', 'deflate, gzip', 'gzip, gzip', 'deflate, deflate'):
|
for pair in ('gzip,deflate', 'deflate, gzip', 'gzip, gzip', 'deflate, deflate'):
|
||||||
|
@ -620,8 +591,7 @@ def test_multiple_encodings(self, handler):
|
||||||
assert res.headers.get('Content-Encoding') == pair
|
assert res.headers.get('Content-Encoding') == pair
|
||||||
assert res.read() == b'<html><video src="/vid.mp4" /></html>'
|
assert res.read() == b'<html><video src="/vid.mp4" /></html>'
|
||||||
|
|
||||||
# Not supported by curl_cffi
|
@pytest.mark.skip_handler('CurlCFFI', 'not supported by curl-cffi')
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
|
||||||
def test_unsupported_encoding(self, handler):
|
def test_unsupported_encoding(self, handler):
|
||||||
with handler() as rh:
|
with handler() as rh:
|
||||||
res = validate_and_send(
|
res = validate_and_send(
|
||||||
|
@ -631,7 +601,6 @@ def test_unsupported_encoding(self, handler):
|
||||||
assert res.headers.get('Content-Encoding') == 'unsupported'
|
assert res.headers.get('Content-Encoding') == 'unsupported'
|
||||||
assert res.read() == b'raw'
|
assert res.read() == b'raw'
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
|
||||||
def test_read(self, handler):
|
def test_read(self, handler):
|
||||||
with handler() as rh:
|
with handler() as rh:
|
||||||
res = validate_and_send(
|
res = validate_and_send(
|
||||||
|
@ -642,83 +611,48 @@ def test_read(self, handler):
|
||||||
assert res.read().decode().endswith('\n\n')
|
assert res.read().decode().endswith('\n\n')
|
||||||
assert res.read() == b''
|
assert res.read() == b''
|
||||||
|
|
||||||
|
def test_request_disable_proxy(self, handler):
|
||||||
class TestHTTPProxy(TestRequestHandlerBase):
|
for proxy_proto in handler._SUPPORTED_PROXY_SCHEMES or ['http']:
|
||||||
# Note: this only tests http urls over non-CONNECT proxy
|
# Given the handler is configured with a proxy
|
||||||
@classmethod
|
with handler(proxies={'http': f'{proxy_proto}://10.255.255.255'}, timeout=5) as rh:
|
||||||
def setup_class(cls):
|
# When a proxy is explicitly set to None for the request
|
||||||
super().setup_class()
|
|
||||||
# HTTP Proxy server
|
|
||||||
cls.proxy = http.server.ThreadingHTTPServer(
|
|
||||||
('127.0.0.1', 0), _build_proxy_handler('normal'))
|
|
||||||
cls.proxy_port = http_server_port(cls.proxy)
|
|
||||||
cls.proxy_thread = threading.Thread(target=cls.proxy.serve_forever)
|
|
||||||
cls.proxy_thread.daemon = True
|
|
||||||
cls.proxy_thread.start()
|
|
||||||
|
|
||||||
# Geo proxy server
|
|
||||||
cls.geo_proxy = http.server.ThreadingHTTPServer(
|
|
||||||
('127.0.0.1', 0), _build_proxy_handler('geo'))
|
|
||||||
cls.geo_port = http_server_port(cls.geo_proxy)
|
|
||||||
cls.geo_proxy_thread = threading.Thread(target=cls.geo_proxy.serve_forever)
|
|
||||||
cls.geo_proxy_thread.daemon = True
|
|
||||||
cls.geo_proxy_thread.start()
|
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
|
||||||
def test_http_proxy(self, handler):
|
|
||||||
http_proxy = f'http://127.0.0.1:{self.proxy_port}'
|
|
||||||
geo_proxy = f'http://127.0.0.1:{self.geo_port}'
|
|
||||||
|
|
||||||
# Test global http proxy
|
|
||||||
# Test per request http proxy
|
|
||||||
# Test per request http proxy disables proxy
|
|
||||||
url = 'http://foo.com/bar'
|
|
||||||
|
|
||||||
# Global HTTP proxy
|
|
||||||
with handler(proxies={'http': http_proxy}) as rh:
|
|
||||||
res = validate_and_send(rh, Request(url)).read().decode()
|
|
||||||
assert res == f'normal: {url}'
|
|
||||||
|
|
||||||
# Per request proxy overrides global
|
|
||||||
res = validate_and_send(rh, Request(url, proxies={'http': geo_proxy})).read().decode()
|
|
||||||
assert res == f'geo: {url}'
|
|
||||||
|
|
||||||
# and setting to None disables all proxies for that request
|
|
||||||
real_url = f'http://127.0.0.1:{self.http_port}/headers'
|
|
||||||
res = validate_and_send(
|
res = validate_and_send(
|
||||||
rh, Request(real_url, proxies={'http': None})).read().decode()
|
rh, Request(f'http://127.0.0.1:{self.http_port}/headers', proxies={'http': None}))
|
||||||
assert res != f'normal: {real_url}'
|
# Then no proxy should be used
|
||||||
assert 'Accept' in res
|
res.close()
|
||||||
|
assert res.status == 200
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
@pytest.mark.skip_handlers_if(
|
||||||
|
lambda _, handler: Features.NO_PROXY not in handler._SUPPORTED_FEATURES, 'handler does not support NO_PROXY')
|
||||||
def test_noproxy(self, handler):
|
def test_noproxy(self, handler):
|
||||||
with handler(proxies={'proxy': f'http://127.0.0.1:{self.proxy_port}'}) as rh:
|
for proxy_proto in handler._SUPPORTED_PROXY_SCHEMES or ['http']:
|
||||||
# NO_PROXY
|
# Given the handler is configured with a proxy
|
||||||
|
with handler(proxies={'http': f'{proxy_proto}://10.255.255.255'}, timeout=5) as rh:
|
||||||
for no_proxy in (f'127.0.0.1:{self.http_port}', '127.0.0.1', 'localhost'):
|
for no_proxy in (f'127.0.0.1:{self.http_port}', '127.0.0.1', 'localhost'):
|
||||||
|
# When request no proxy includes the request url host
|
||||||
nop_response = validate_and_send(
|
nop_response = validate_and_send(
|
||||||
rh, Request(f'http://127.0.0.1:{self.http_port}/headers', proxies={'no': no_proxy})).read().decode(
|
rh, Request(f'http://127.0.0.1:{self.http_port}/headers', proxies={'no': no_proxy}))
|
||||||
'utf-8')
|
# Then the proxy should not be used
|
||||||
assert 'Accept' in nop_response
|
assert nop_response.status == 200
|
||||||
|
nop_response.close()
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
@pytest.mark.skip_handlers_if(
|
||||||
|
lambda _, handler: Features.ALL_PROXY not in handler._SUPPORTED_FEATURES, 'handler does not support ALL_PROXY')
|
||||||
def test_allproxy(self, handler):
|
def test_allproxy(self, handler):
|
||||||
url = 'http://foo.com/bar'
|
# This is a bit of a hacky test, but it should be enough to check whether the handler is using the proxy.
|
||||||
with handler() as rh:
|
# 0.1s might not be enough of a timeout if proxy is not used in all cases, but should still get failures.
|
||||||
response = validate_and_send(rh, Request(url, proxies={'all': f'http://127.0.0.1:{self.proxy_port}'})).read().decode(
|
with handler(proxies={'all': 'http://10.255.255.255'}, timeout=0.1) as rh:
|
||||||
'utf-8')
|
with pytest.raises(TransportError):
|
||||||
assert response == f'normal: {url}'
|
validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).close()
|
||||||
|
|
||||||
|
with handler(timeout=0.1) as rh:
|
||||||
|
with pytest.raises(TransportError):
|
||||||
|
validate_and_send(
|
||||||
|
rh, Request(
|
||||||
|
f'http://127.0.0.1:{self.http_port}/headers', proxies={'all': 'http://10.255.255.255'})).close()
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||||
def test_http_proxy_with_idn(self, handler):
|
|
||||||
with handler(proxies={
|
|
||||||
'http': f'http://127.0.0.1:{self.proxy_port}',
|
|
||||||
}) as rh:
|
|
||||||
url = 'http://中文.tw/'
|
|
||||||
response = rh.send(Request(url)).read().decode()
|
|
||||||
# b'xn--fiq228c' is '中文'.encode('idna')
|
|
||||||
assert response == 'normal: http://xn--fiq228c.tw/'
|
|
||||||
|
|
||||||
|
|
||||||
class TestClientCertificate:
|
class TestClientCertificate:
|
||||||
@classmethod
|
@classmethod
|
||||||
def setup_class(cls):
|
def setup_class(cls):
|
||||||
|
@ -745,27 +679,23 @@ def _run_test(self, handler, **handler_kwargs):
|
||||||
) as rh:
|
) as rh:
|
||||||
validate_and_send(rh, Request(f'https://127.0.0.1:{self.port}/video.html')).read().decode()
|
validate_and_send(rh, Request(f'https://127.0.0.1:{self.port}/video.html')).read().decode()
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
|
||||||
def test_certificate_combined_nopass(self, handler):
|
def test_certificate_combined_nopass(self, handler):
|
||||||
self._run_test(handler, client_cert={
|
self._run_test(handler, client_cert={
|
||||||
'client_certificate': os.path.join(self.certdir, 'clientwithkey.crt'),
|
'client_certificate': os.path.join(self.certdir, 'clientwithkey.crt'),
|
||||||
})
|
})
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
|
||||||
def test_certificate_nocombined_nopass(self, handler):
|
def test_certificate_nocombined_nopass(self, handler):
|
||||||
self._run_test(handler, client_cert={
|
self._run_test(handler, client_cert={
|
||||||
'client_certificate': os.path.join(self.certdir, 'client.crt'),
|
'client_certificate': os.path.join(self.certdir, 'client.crt'),
|
||||||
'client_certificate_key': os.path.join(self.certdir, 'client.key'),
|
'client_certificate_key': os.path.join(self.certdir, 'client.key'),
|
||||||
})
|
})
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
|
||||||
def test_certificate_combined_pass(self, handler):
|
def test_certificate_combined_pass(self, handler):
|
||||||
self._run_test(handler, client_cert={
|
self._run_test(handler, client_cert={
|
||||||
'client_certificate': os.path.join(self.certdir, 'clientwithencryptedkey.crt'),
|
'client_certificate': os.path.join(self.certdir, 'clientwithencryptedkey.crt'),
|
||||||
'client_certificate_password': 'foobar',
|
'client_certificate_password': 'foobar',
|
||||||
})
|
})
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
|
||||||
def test_certificate_nocombined_pass(self, handler):
|
def test_certificate_nocombined_pass(self, handler):
|
||||||
self._run_test(handler, client_cert={
|
self._run_test(handler, client_cert={
|
||||||
'client_certificate': os.path.join(self.certdir, 'client.crt'),
|
'client_certificate': os.path.join(self.certdir, 'client.crt'),
|
||||||
|
@ -824,8 +754,8 @@ def test_remove_logging_handler(self, handler, logger_name):
|
||||||
assert len(logging_handlers) == before_count
|
assert len(logging_handlers) == before_count
|
||||||
|
|
||||||
|
|
||||||
class TestUrllibRequestHandler(TestRequestHandlerBase):
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||||
|
class TestUrllibRequestHandler(TestRequestHandlerBase):
|
||||||
def test_file_urls(self, handler):
|
def test_file_urls(self, handler):
|
||||||
# See https://github.com/ytdl-org/youtube-dl/issues/8227
|
# See https://github.com/ytdl-org/youtube-dl/issues/8227
|
||||||
tf = tempfile.NamedTemporaryFile(delete=False)
|
tf = tempfile.NamedTemporaryFile(delete=False)
|
||||||
|
@ -847,7 +777,6 @@ def test_file_urls(self, handler):
|
||||||
|
|
||||||
os.unlink(tf.name)
|
os.unlink(tf.name)
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
|
||||||
def test_http_error_returns_content(self, handler):
|
def test_http_error_returns_content(self, handler):
|
||||||
# urllib HTTPError will try close the underlying response if reference to the HTTPError object is lost
|
# urllib HTTPError will try close the underlying response if reference to the HTTPError object is lost
|
||||||
def get_response():
|
def get_response():
|
||||||
|
@ -860,7 +789,6 @@ def get_response():
|
||||||
|
|
||||||
assert get_response().read() == b'<html></html>'
|
assert get_response().read() == b'<html></html>'
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
|
||||||
def test_verify_cert_error_text(self, handler):
|
def test_verify_cert_error_text(self, handler):
|
||||||
# Check the output of the error message
|
# Check the output of the error message
|
||||||
with handler() as rh:
|
with handler() as rh:
|
||||||
|
@ -870,7 +798,6 @@ def test_verify_cert_error_text(self, handler):
|
||||||
):
|
):
|
||||||
validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
|
validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
|
||||||
@pytest.mark.parametrize('req,match,version_check', [
|
@pytest.mark.parametrize('req,match,version_check', [
|
||||||
# https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1256
|
# https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1256
|
||||||
# bpo-39603: Check implemented in 3.7.9+, 3.8.5+
|
# bpo-39603: Check implemented in 3.7.9+, 3.8.5+
|
||||||
|
@ -1202,7 +1129,7 @@ class HTTPSupportedRH(ValidationRH):
|
||||||
]
|
]
|
||||||
|
|
||||||
PROXY_SCHEME_TESTS = [
|
PROXY_SCHEME_TESTS = [
|
||||||
# scheme, expected to fail
|
# proxy scheme, expected to fail
|
||||||
('Urllib', 'http', [
|
('Urllib', 'http', [
|
||||||
('http', False),
|
('http', False),
|
||||||
('https', UnsupportedRequest),
|
('https', UnsupportedRequest),
|
||||||
|
@ -1228,30 +1155,41 @@ class HTTPSupportedRH(ValidationRH):
|
||||||
('socks5', False),
|
('socks5', False),
|
||||||
('socks5h', False),
|
('socks5h', False),
|
||||||
]),
|
]),
|
||||||
|
('Websockets', 'ws', [
|
||||||
|
('http', UnsupportedRequest),
|
||||||
|
('https', UnsupportedRequest),
|
||||||
|
('socks4', False),
|
||||||
|
('socks4a', False),
|
||||||
|
('socks5', False),
|
||||||
|
('socks5h', False),
|
||||||
|
]),
|
||||||
(NoCheckRH, 'http', [('http', False)]),
|
(NoCheckRH, 'http', [('http', False)]),
|
||||||
(HTTPSupportedRH, 'http', [('http', UnsupportedRequest)]),
|
(HTTPSupportedRH, 'http', [('http', UnsupportedRequest)]),
|
||||||
('Websockets', 'ws', [('http', UnsupportedRequest)]),
|
|
||||||
(NoCheckRH, 'http', [('http', False)]),
|
(NoCheckRH, 'http', [('http', False)]),
|
||||||
(HTTPSupportedRH, 'http', [('http', UnsupportedRequest)]),
|
(HTTPSupportedRH, 'http', [('http', UnsupportedRequest)]),
|
||||||
]
|
]
|
||||||
|
|
||||||
PROXY_KEY_TESTS = [
|
PROXY_KEY_TESTS = [
|
||||||
# key, expected to fail
|
# proxy key, proxy scheme, expected to fail
|
||||||
('Urllib', [
|
('Urllib', 'http', [
|
||||||
('all', False),
|
('all', 'http', False),
|
||||||
('unrelated', False),
|
('unrelated', 'http', False),
|
||||||
]),
|
]),
|
||||||
('Requests', [
|
('Requests', 'http', [
|
||||||
('all', False),
|
('all', 'http', False),
|
||||||
('unrelated', False),
|
('unrelated', 'http', False),
|
||||||
]),
|
]),
|
||||||
('CurlCFFI', [
|
('CurlCFFI', 'http', [
|
||||||
('all', False),
|
('all', 'http', False),
|
||||||
('unrelated', False),
|
('unrelated', 'http', False),
|
||||||
]),
|
]),
|
||||||
(NoCheckRH, [('all', False)]),
|
('Websockets', 'ws', [
|
||||||
(HTTPSupportedRH, [('all', UnsupportedRequest)]),
|
('all', 'socks5', False),
|
||||||
(HTTPSupportedRH, [('no', UnsupportedRequest)]),
|
('unrelated', 'socks5', False),
|
||||||
|
]),
|
||||||
|
(NoCheckRH, 'http', [('all', 'http', False)]),
|
||||||
|
(HTTPSupportedRH, 'http', [('all', 'http', UnsupportedRequest)]),
|
||||||
|
(HTTPSupportedRH, 'http', [('no', 'http', UnsupportedRequest)]),
|
||||||
]
|
]
|
||||||
|
|
||||||
EXTENSION_TESTS = [
|
EXTENSION_TESTS = [
|
||||||
|
@ -1293,28 +1231,54 @@ class HTTPSupportedRH(ValidationRH):
|
||||||
]),
|
]),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('handler,fail,scheme', [
|
||||||
|
('Urllib', False, 'http'),
|
||||||
|
('Requests', False, 'http'),
|
||||||
|
('CurlCFFI', False, 'http'),
|
||||||
|
('Websockets', False, 'ws')
|
||||||
|
], indirect=['handler'])
|
||||||
|
def test_no_proxy(self, handler, fail, scheme):
|
||||||
|
run_validation(handler, fail, Request(f'{scheme}://', proxies={'no': '127.0.0.1,github.com'}))
|
||||||
|
run_validation(handler, fail, Request(f'{scheme}://'), proxies={'no': '127.0.0.1,github.com'})
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('handler,scheme', [
|
||||||
|
('Urllib', 'http'),
|
||||||
|
(HTTPSupportedRH, 'http'),
|
||||||
|
('Requests', 'http'),
|
||||||
|
('CurlCFFI', 'http'),
|
||||||
|
('Websockets', 'ws')
|
||||||
|
], indirect=['handler'])
|
||||||
|
def test_empty_proxy(self, handler, scheme):
|
||||||
|
run_validation(handler, False, Request(f'{scheme}://', proxies={scheme: None}))
|
||||||
|
run_validation(handler, False, Request(f'{scheme}://'), proxies={scheme: None})
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('proxy_url', ['//example.com', 'example.com', '127.0.0.1', '/a/b/c'])
|
||||||
|
@pytest.mark.parametrize('handler,scheme', [
|
||||||
|
('Urllib', 'http'),
|
||||||
|
(HTTPSupportedRH, 'http'),
|
||||||
|
('Requests', 'http'),
|
||||||
|
('CurlCFFI', 'http'),
|
||||||
|
('Websockets', 'ws')
|
||||||
|
], indirect=['handler'])
|
||||||
|
def test_invalid_proxy_url(self, handler, scheme, proxy_url):
|
||||||
|
run_validation(handler, UnsupportedRequest, Request(f'{scheme}://', proxies={scheme: proxy_url}))
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler,scheme,fail,handler_kwargs', [
|
@pytest.mark.parametrize('handler,scheme,fail,handler_kwargs', [
|
||||||
(handler_tests[0], scheme, fail, handler_kwargs)
|
(handler_tests[0], scheme, fail, handler_kwargs)
|
||||||
for handler_tests in URL_SCHEME_TESTS
|
for handler_tests in URL_SCHEME_TESTS
|
||||||
for scheme, fail, handler_kwargs in handler_tests[1]
|
for scheme, fail, handler_kwargs in handler_tests[1]
|
||||||
|
|
||||||
], indirect=['handler'])
|
], indirect=['handler'])
|
||||||
def test_url_scheme(self, handler, scheme, fail, handler_kwargs):
|
def test_url_scheme(self, handler, scheme, fail, handler_kwargs):
|
||||||
run_validation(handler, fail, Request(f'{scheme}://'), **(handler_kwargs or {}))
|
run_validation(handler, fail, Request(f'{scheme}://'), **(handler_kwargs or {}))
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler,fail', [('Urllib', False), ('Requests', False), ('CurlCFFI', False)], indirect=['handler'])
|
@pytest.mark.parametrize('handler,scheme,proxy_key,proxy_scheme,fail', [
|
||||||
def test_no_proxy(self, handler, fail):
|
(handler_tests[0], handler_tests[1], proxy_key, proxy_scheme, fail)
|
||||||
run_validation(handler, fail, Request('http://', proxies={'no': '127.0.0.1,github.com'}))
|
|
||||||
run_validation(handler, fail, Request('http://'), proxies={'no': '127.0.0.1,github.com'})
|
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler,proxy_key,fail', [
|
|
||||||
(handler_tests[0], proxy_key, fail)
|
|
||||||
for handler_tests in PROXY_KEY_TESTS
|
for handler_tests in PROXY_KEY_TESTS
|
||||||
for proxy_key, fail in handler_tests[1]
|
for proxy_key, proxy_scheme, fail in handler_tests[2]
|
||||||
], indirect=['handler'])
|
], indirect=['handler'])
|
||||||
def test_proxy_key(self, handler, proxy_key, fail):
|
def test_proxy_key(self, handler, scheme, proxy_key, proxy_scheme, fail):
|
||||||
run_validation(handler, fail, Request('http://', proxies={proxy_key: 'http://example.com'}))
|
run_validation(handler, fail, Request(f'{scheme}://', proxies={proxy_key: f'{proxy_scheme}://example.com'}))
|
||||||
run_validation(handler, fail, Request('http://'), proxies={proxy_key: 'http://example.com'})
|
run_validation(handler, fail, Request(f'{scheme}://'), proxies={proxy_key: f'{proxy_scheme}://example.com'})
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler,req_scheme,scheme,fail', [
|
@pytest.mark.parametrize('handler,req_scheme,scheme,fail', [
|
||||||
(handler_tests[0], handler_tests[1], scheme, fail)
|
(handler_tests[0], handler_tests[1], scheme, fail)
|
||||||
|
@ -1325,16 +1289,6 @@ def test_proxy_scheme(self, handler, req_scheme, scheme, fail):
|
||||||
run_validation(handler, fail, Request(f'{req_scheme}://', proxies={req_scheme: f'{scheme}://example.com'}))
|
run_validation(handler, fail, Request(f'{req_scheme}://', proxies={req_scheme: f'{scheme}://example.com'}))
|
||||||
run_validation(handler, fail, Request(f'{req_scheme}://'), proxies={req_scheme: f'{scheme}://example.com'})
|
run_validation(handler, fail, Request(f'{req_scheme}://'), proxies={req_scheme: f'{scheme}://example.com'})
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', HTTPSupportedRH, 'Requests', 'CurlCFFI'], indirect=True)
|
|
||||||
def test_empty_proxy(self, handler):
|
|
||||||
run_validation(handler, False, Request('http://', proxies={'http': None}))
|
|
||||||
run_validation(handler, False, Request('http://'), proxies={'http': None})
|
|
||||||
|
|
||||||
@pytest.mark.parametrize('proxy_url', ['//example.com', 'example.com', '127.0.0.1', '/a/b/c'])
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
|
||||||
def test_invalid_proxy_url(self, handler, proxy_url):
|
|
||||||
run_validation(handler, UnsupportedRequest, Request('http://', proxies={'http': proxy_url}))
|
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler,scheme,extensions,fail', [
|
@pytest.mark.parametrize('handler,scheme,extensions,fail', [
|
||||||
(handler_tests[0], handler_tests[1], extensions, fail)
|
(handler_tests[0], handler_tests[1], extensions, fail)
|
||||||
for handler_tests in EXTENSION_TESTS
|
for handler_tests in EXTENSION_TESTS
|
||||||
|
|
|
@ -3,10 +3,12 @@
|
||||||
# Allow direct execution
|
# Allow direct execution
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
import time
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from test.helper import verify_address_availability
|
from test.helper import verify_address_availability
|
||||||
|
from yt_dlp.networking.common import Features, DEFAULT_TIMEOUT
|
||||||
|
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
@ -18,7 +20,7 @@
|
||||||
import ssl
|
import ssl
|
||||||
import threading
|
import threading
|
||||||
|
|
||||||
from yt_dlp import socks
|
from yt_dlp import socks, traverse_obj
|
||||||
from yt_dlp.cookies import YoutubeDLCookieJar
|
from yt_dlp.cookies import YoutubeDLCookieJar
|
||||||
from yt_dlp.dependencies import websockets
|
from yt_dlp.dependencies import websockets
|
||||||
from yt_dlp.networking import Request
|
from yt_dlp.networking import Request
|
||||||
|
@ -114,6 +116,7 @@ def ws_validate_and_send(rh, req):
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skipif(not websockets, reason='websockets must be installed to test websocket request handlers')
|
@pytest.mark.skipif(not websockets, reason='websockets must be installed to test websocket request handlers')
|
||||||
|
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
|
||||||
class TestWebsSocketRequestHandlerConformance:
|
class TestWebsSocketRequestHandlerConformance:
|
||||||
@classmethod
|
@classmethod
|
||||||
def setup_class(cls):
|
def setup_class(cls):
|
||||||
|
@ -129,7 +132,6 @@ def setup_class(cls):
|
||||||
cls.mtls_wss_thread, cls.mtls_wss_port = create_mtls_wss_websocket_server()
|
cls.mtls_wss_thread, cls.mtls_wss_port = create_mtls_wss_websocket_server()
|
||||||
cls.mtls_wss_base_url = f'wss://127.0.0.1:{cls.mtls_wss_port}'
|
cls.mtls_wss_base_url = f'wss://127.0.0.1:{cls.mtls_wss_port}'
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
|
|
||||||
def test_basic_websockets(self, handler):
|
def test_basic_websockets(self, handler):
|
||||||
with handler() as rh:
|
with handler() as rh:
|
||||||
ws = ws_validate_and_send(rh, Request(self.ws_base_url))
|
ws = ws_validate_and_send(rh, Request(self.ws_base_url))
|
||||||
|
@ -141,7 +143,6 @@ def test_basic_websockets(self, handler):
|
||||||
|
|
||||||
# https://www.rfc-editor.org/rfc/rfc6455.html#section-5.6
|
# https://www.rfc-editor.org/rfc/rfc6455.html#section-5.6
|
||||||
@pytest.mark.parametrize('msg,opcode', [('str', 1), (b'bytes', 2)])
|
@pytest.mark.parametrize('msg,opcode', [('str', 1), (b'bytes', 2)])
|
||||||
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
|
|
||||||
def test_send_types(self, handler, msg, opcode):
|
def test_send_types(self, handler, msg, opcode):
|
||||||
with handler() as rh:
|
with handler() as rh:
|
||||||
ws = ws_validate_and_send(rh, Request(self.ws_base_url))
|
ws = ws_validate_and_send(rh, Request(self.ws_base_url))
|
||||||
|
@ -149,7 +150,6 @@ def test_send_types(self, handler, msg, opcode):
|
||||||
assert int(ws.recv()) == opcode
|
assert int(ws.recv()) == opcode
|
||||||
ws.close()
|
ws.close()
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
|
|
||||||
def test_verify_cert(self, handler):
|
def test_verify_cert(self, handler):
|
||||||
with handler() as rh:
|
with handler() as rh:
|
||||||
with pytest.raises(CertificateVerifyError):
|
with pytest.raises(CertificateVerifyError):
|
||||||
|
@ -160,14 +160,12 @@ def test_verify_cert(self, handler):
|
||||||
assert ws.status == 101
|
assert ws.status == 101
|
||||||
ws.close()
|
ws.close()
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
|
|
||||||
def test_ssl_error(self, handler):
|
def test_ssl_error(self, handler):
|
||||||
with handler(verify=False) as rh:
|
with handler(verify=False) as rh:
|
||||||
with pytest.raises(SSLError, match=r'ssl(?:v3|/tls) alert handshake failure') as exc_info:
|
with pytest.raises(SSLError, match=r'ssl(?:v3|/tls) alert handshake failure') as exc_info:
|
||||||
ws_validate_and_send(rh, Request(self.bad_wss_host))
|
ws_validate_and_send(rh, Request(self.bad_wss_host))
|
||||||
assert not issubclass(exc_info.type, CertificateVerifyError)
|
assert not issubclass(exc_info.type, CertificateVerifyError)
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
|
|
||||||
@pytest.mark.parametrize('path,expected', [
|
@pytest.mark.parametrize('path,expected', [
|
||||||
# Unicode characters should be encoded with uppercase percent-encoding
|
# Unicode characters should be encoded with uppercase percent-encoding
|
||||||
('/中文', '/%E4%B8%AD%E6%96%87'),
|
('/中文', '/%E4%B8%AD%E6%96%87'),
|
||||||
|
@ -182,7 +180,6 @@ def test_percent_encode(self, handler, path, expected):
|
||||||
assert ws.status == 101
|
assert ws.status == 101
|
||||||
ws.close()
|
ws.close()
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
|
|
||||||
def test_remove_dot_segments(self, handler):
|
def test_remove_dot_segments(self, handler):
|
||||||
with handler() as rh:
|
with handler() as rh:
|
||||||
# This isn't a comprehensive test,
|
# This isn't a comprehensive test,
|
||||||
|
@ -195,7 +192,6 @@ def test_remove_dot_segments(self, handler):
|
||||||
|
|
||||||
# We are restricted to known HTTP status codes in http.HTTPStatus
|
# We are restricted to known HTTP status codes in http.HTTPStatus
|
||||||
# Redirects are not supported for websockets
|
# Redirects are not supported for websockets
|
||||||
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
|
|
||||||
@pytest.mark.parametrize('status', (200, 204, 301, 302, 303, 400, 500, 511))
|
@pytest.mark.parametrize('status', (200, 204, 301, 302, 303, 400, 500, 511))
|
||||||
def test_raise_http_error(self, handler, status):
|
def test_raise_http_error(self, handler, status):
|
||||||
with handler() as rh:
|
with handler() as rh:
|
||||||
|
@ -203,17 +199,30 @@ def test_raise_http_error(self, handler, status):
|
||||||
ws_validate_and_send(rh, Request(f'{self.ws_base_url}/gen_{status}'))
|
ws_validate_and_send(rh, Request(f'{self.ws_base_url}/gen_{status}'))
|
||||||
assert exc_info.value.status == status
|
assert exc_info.value.status == status
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
|
|
||||||
@pytest.mark.parametrize('params,extensions', [
|
@pytest.mark.parametrize('params,extensions', [
|
||||||
({'timeout': sys.float_info.min}, {}),
|
({'timeout': sys.float_info.min}, {}),
|
||||||
({}, {'timeout': sys.float_info.min}),
|
({}, {'timeout': sys.float_info.min}),
|
||||||
])
|
])
|
||||||
def test_timeout(self, handler, params, extensions):
|
def test_read_timeout(self, handler, params, extensions):
|
||||||
with handler(**params) as rh:
|
with handler(**params) as rh:
|
||||||
with pytest.raises(TransportError):
|
with pytest.raises(TransportError):
|
||||||
ws_validate_and_send(rh, Request(self.ws_base_url, extensions=extensions))
|
ws_validate_and_send(rh, Request(self.ws_base_url, extensions=extensions))
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
|
def test_connect_timeout(self, handler):
|
||||||
|
# nothing should be listening on this port
|
||||||
|
connect_timeout_url = 'ws://10.255.255.255'
|
||||||
|
with handler(timeout=0.01) as rh, pytest.raises(TransportError):
|
||||||
|
now = time.time()
|
||||||
|
ws_validate_and_send(rh, Request(connect_timeout_url))
|
||||||
|
assert time.time() - now < DEFAULT_TIMEOUT
|
||||||
|
|
||||||
|
# Per request timeout, should override handler timeout
|
||||||
|
request = Request(connect_timeout_url, extensions={'timeout': 0.01})
|
||||||
|
with handler() as rh, pytest.raises(TransportError):
|
||||||
|
now = time.time()
|
||||||
|
ws_validate_and_send(rh, request)
|
||||||
|
assert time.time() - now < DEFAULT_TIMEOUT
|
||||||
|
|
||||||
def test_cookies(self, handler):
|
def test_cookies(self, handler):
|
||||||
cookiejar = YoutubeDLCookieJar()
|
cookiejar = YoutubeDLCookieJar()
|
||||||
cookiejar.set_cookie(http.cookiejar.Cookie(
|
cookiejar.set_cookie(http.cookiejar.Cookie(
|
||||||
|
@ -239,7 +248,6 @@ def test_cookies(self, handler):
|
||||||
assert json.loads(ws.recv())['cookie'] == 'test=ytdlp'
|
assert json.loads(ws.recv())['cookie'] == 'test=ytdlp'
|
||||||
ws.close()
|
ws.close()
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
|
|
||||||
def test_source_address(self, handler):
|
def test_source_address(self, handler):
|
||||||
source_address = f'127.0.0.{random.randint(5, 255)}'
|
source_address = f'127.0.0.{random.randint(5, 255)}'
|
||||||
verify_address_availability(source_address)
|
verify_address_availability(source_address)
|
||||||
|
@ -249,7 +257,6 @@ def test_source_address(self, handler):
|
||||||
assert source_address == ws.recv()
|
assert source_address == ws.recv()
|
||||||
ws.close()
|
ws.close()
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
|
|
||||||
def test_response_url(self, handler):
|
def test_response_url(self, handler):
|
||||||
with handler() as rh:
|
with handler() as rh:
|
||||||
url = f'{self.ws_base_url}/something'
|
url = f'{self.ws_base_url}/something'
|
||||||
|
@ -257,7 +264,6 @@ def test_response_url(self, handler):
|
||||||
assert ws.url == url
|
assert ws.url == url
|
||||||
ws.close()
|
ws.close()
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
|
|
||||||
def test_request_headers(self, handler):
|
def test_request_headers(self, handler):
|
||||||
with handler(headers=HTTPHeaderDict({'test1': 'test', 'test2': 'test2'})) as rh:
|
with handler(headers=HTTPHeaderDict({'test1': 'test', 'test2': 'test2'})) as rh:
|
||||||
# Global Headers
|
# Global Headers
|
||||||
|
@ -293,7 +299,6 @@ def test_request_headers(self, handler):
|
||||||
'client_certificate_password': 'foobar',
|
'client_certificate_password': 'foobar',
|
||||||
}
|
}
|
||||||
))
|
))
|
||||||
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
|
|
||||||
def test_mtls(self, handler, client_cert):
|
def test_mtls(self, handler, client_cert):
|
||||||
with handler(
|
with handler(
|
||||||
# Disable client-side validation of unacceptable self-signed testcert.pem
|
# Disable client-side validation of unacceptable self-signed testcert.pem
|
||||||
|
@ -303,6 +308,44 @@ def test_mtls(self, handler, client_cert):
|
||||||
) as rh:
|
) as rh:
|
||||||
ws_validate_and_send(rh, Request(self.mtls_wss_base_url)).close()
|
ws_validate_and_send(rh, Request(self.mtls_wss_base_url)).close()
|
||||||
|
|
||||||
|
def test_request_disable_proxy(self, handler):
|
||||||
|
for proxy_proto in handler._SUPPORTED_PROXY_SCHEMES or ['ws']:
|
||||||
|
# Given handler is configured with a proxy
|
||||||
|
with handler(proxies={'ws': f'{proxy_proto}://10.255.255.255'}, timeout=5) as rh:
|
||||||
|
# When a proxy is explicitly set to None for the request
|
||||||
|
ws = ws_validate_and_send(rh, Request(self.ws_base_url, proxies={'http': None}))
|
||||||
|
# Then no proxy should be used
|
||||||
|
assert ws.status == 101
|
||||||
|
ws.close()
|
||||||
|
|
||||||
|
@pytest.mark.skip_handlers_if(
|
||||||
|
lambda _, handler: Features.NO_PROXY not in handler._SUPPORTED_FEATURES, 'handler does not support NO_PROXY')
|
||||||
|
def test_noproxy(self, handler):
|
||||||
|
for proxy_proto in handler._SUPPORTED_PROXY_SCHEMES or ['ws']:
|
||||||
|
# Given the handler is configured with a proxy
|
||||||
|
with handler(proxies={'ws': f'{proxy_proto}://10.255.255.255'}, timeout=5) as rh:
|
||||||
|
for no_proxy in (f'127.0.0.1:{self.ws_port}', '127.0.0.1', 'localhost'):
|
||||||
|
# When request no proxy includes the request url host
|
||||||
|
ws = ws_validate_and_send(rh, Request(self.ws_base_url, proxies={'no': no_proxy}))
|
||||||
|
# Then the proxy should not be used
|
||||||
|
assert ws.status == 101
|
||||||
|
ws.close()
|
||||||
|
|
||||||
|
@pytest.mark.skip_handlers_if(
|
||||||
|
lambda _, handler: Features.ALL_PROXY not in handler._SUPPORTED_FEATURES, 'handler does not support ALL_PROXY')
|
||||||
|
def test_allproxy(self, handler):
|
||||||
|
supported_proto = traverse_obj(handler._SUPPORTED_PROXY_SCHEMES, 0, default='ws')
|
||||||
|
# This is a bit of a hacky test, but it should be enough to check whether the handler is using the proxy.
|
||||||
|
# 0.1s might not be enough of a timeout if proxy is not used in all cases, but should still get failures.
|
||||||
|
with handler(proxies={'all': f'{supported_proto}://10.255.255.255'}, timeout=0.1) as rh:
|
||||||
|
with pytest.raises(TransportError):
|
||||||
|
ws_validate_and_send(rh, Request(self.ws_base_url)).close()
|
||||||
|
|
||||||
|
with handler(timeout=0.1) as rh:
|
||||||
|
with pytest.raises(TransportError):
|
||||||
|
ws_validate_and_send(
|
||||||
|
rh, Request(self.ws_base_url, proxies={'all': f'{supported_proto}://10.255.255.255'})).close()
|
||||||
|
|
||||||
|
|
||||||
def create_fake_ws_connection(raised):
|
def create_fake_ws_connection(raised):
|
||||||
import websockets.sync.client
|
import websockets.sync.client
|
||||||
|
|
|
@ -3076,7 +3076,7 @@ def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
|
||||||
f = formats[-1]
|
f = formats[-1]
|
||||||
self.report_warning(
|
self.report_warning(
|
||||||
'No subtitle format found matching "%s" for language %s, '
|
'No subtitle format found matching "%s" for language %s, '
|
||||||
'using %s' % (formats_query, lang, f['ext']))
|
'using %s. Use --list-subs for a list of available subtitles' % (formats_query, lang, f['ext']))
|
||||||
subs[lang] = f
|
subs[lang] = f
|
||||||
return subs
|
return subs
|
||||||
|
|
||||||
|
|
|
@ -46,7 +46,7 @@
|
||||||
from .utils._utils import _YDLLogger
|
from .utils._utils import _YDLLogger
|
||||||
from .utils.networking import normalize_url
|
from .utils.networking import normalize_url
|
||||||
|
|
||||||
CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
|
CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi', 'whale'}
|
||||||
SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
|
SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
|
||||||
|
|
||||||
|
|
||||||
|
@ -219,6 +219,7 @@ def _get_chromium_based_browser_settings(browser_name):
|
||||||
'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'),
|
'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'),
|
||||||
'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'),
|
'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'),
|
||||||
'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'),
|
'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'),
|
||||||
|
'whale': os.path.join(appdata_local, R'Naver\Naver Whale\User Data'),
|
||||||
}[browser_name]
|
}[browser_name]
|
||||||
|
|
||||||
elif sys.platform == 'darwin':
|
elif sys.platform == 'darwin':
|
||||||
|
@ -230,6 +231,7 @@ def _get_chromium_based_browser_settings(browser_name):
|
||||||
'edge': os.path.join(appdata, 'Microsoft Edge'),
|
'edge': os.path.join(appdata, 'Microsoft Edge'),
|
||||||
'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
|
'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
|
||||||
'vivaldi': os.path.join(appdata, 'Vivaldi'),
|
'vivaldi': os.path.join(appdata, 'Vivaldi'),
|
||||||
|
'whale': os.path.join(appdata, 'Naver/Whale'),
|
||||||
}[browser_name]
|
}[browser_name]
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
@ -241,6 +243,7 @@ def _get_chromium_based_browser_settings(browser_name):
|
||||||
'edge': os.path.join(config, 'microsoft-edge'),
|
'edge': os.path.join(config, 'microsoft-edge'),
|
||||||
'opera': os.path.join(config, 'opera'),
|
'opera': os.path.join(config, 'opera'),
|
||||||
'vivaldi': os.path.join(config, 'vivaldi'),
|
'vivaldi': os.path.join(config, 'vivaldi'),
|
||||||
|
'whale': os.path.join(config, 'naver-whale'),
|
||||||
}[browser_name]
|
}[browser_name]
|
||||||
|
|
||||||
# Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
|
# Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
|
||||||
|
@ -252,6 +255,7 @@ def _get_chromium_based_browser_settings(browser_name):
|
||||||
'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
|
'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
|
||||||
'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
|
'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
|
||||||
'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
|
'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
|
||||||
|
'whale': 'Whale',
|
||||||
}[browser_name]
|
}[browser_name]
|
||||||
|
|
||||||
browsers_without_profiles = {'opera'}
|
browsers_without_profiles = {'opera'}
|
||||||
|
@ -347,6 +351,11 @@ def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, pa
|
||||||
if value is None:
|
if value is None:
|
||||||
return is_encrypted, None
|
return is_encrypted, None
|
||||||
|
|
||||||
|
# In chrome, session cookies have expires_utc set to 0
|
||||||
|
# In our cookie-store, cookies that do not expire should have expires set to None
|
||||||
|
if not expires_utc:
|
||||||
|
expires_utc = None
|
||||||
|
|
||||||
return is_encrypted, http.cookiejar.Cookie(
|
return is_encrypted, http.cookiejar.Cookie(
|
||||||
version=0, name=name, value=value, port=None, port_specified=False,
|
version=0, name=name, value=value, port=None, port_specified=False,
|
||||||
domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
|
domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
|
||||||
|
|
|
@ -288,7 +288,6 @@
|
||||||
from .buzzfeed import BuzzFeedIE
|
from .buzzfeed import BuzzFeedIE
|
||||||
from .byutv import BYUtvIE
|
from .byutv import BYUtvIE
|
||||||
from .c56 import C56IE
|
from .c56 import C56IE
|
||||||
from .cableav import CableAVIE
|
|
||||||
from .callin import CallinIE
|
from .callin import CallinIE
|
||||||
from .caltrans import CaltransIE
|
from .caltrans import CaltransIE
|
||||||
from .cam4 import CAM4IE
|
from .cam4 import CAM4IE
|
||||||
|
@ -548,7 +547,6 @@
|
||||||
EggheadLessonIE,
|
EggheadLessonIE,
|
||||||
)
|
)
|
||||||
from .eighttracks import EightTracksIE
|
from .eighttracks import EightTracksIE
|
||||||
from .einthusan import EinthusanIE
|
|
||||||
from .eitb import EitbIE
|
from .eitb import EitbIE
|
||||||
from .elementorembed import ElementorEmbedIE
|
from .elementorembed import ElementorEmbedIE
|
||||||
from .elonet import ElonetIE
|
from .elonet import ElonetIE
|
||||||
|
@ -717,6 +715,7 @@
|
||||||
from .gmanetwork import GMANetworkVideoIE
|
from .gmanetwork import GMANetworkVideoIE
|
||||||
from .go import GoIE
|
from .go import GoIE
|
||||||
from .godtube import GodTubeIE
|
from .godtube import GodTubeIE
|
||||||
|
from .godresource import GodResourceIE
|
||||||
from .gofile import GofileIE
|
from .gofile import GofileIE
|
||||||
from .golem import GolemIE
|
from .golem import GolemIE
|
||||||
from .goodgame import GoodGameIE
|
from .goodgame import GoodGameIE
|
||||||
|
@ -861,10 +860,6 @@
|
||||||
)
|
)
|
||||||
from .ixigua import IxiguaIE
|
from .ixigua import IxiguaIE
|
||||||
from .izlesene import IzleseneIE
|
from .izlesene import IzleseneIE
|
||||||
from .jable import (
|
|
||||||
JableIE,
|
|
||||||
JablePlaylistIE,
|
|
||||||
)
|
|
||||||
from .jamendo import (
|
from .jamendo import (
|
||||||
JamendoIE,
|
JamendoIE,
|
||||||
JamendoAlbumIE,
|
JamendoAlbumIE,
|
||||||
|
@ -1338,6 +1333,7 @@
|
||||||
NRKTVSeriesIE,
|
NRKTVSeriesIE,
|
||||||
)
|
)
|
||||||
from .nrl import NRLTVIE
|
from .nrl import NRLTVIE
|
||||||
|
from .nts import NTSLiveIE
|
||||||
from .ntvcojp import NTVCoJpCUIE
|
from .ntvcojp import NTVCoJpCUIE
|
||||||
from .ntvde import NTVDeIE
|
from .ntvde import NTVDeIE
|
||||||
from .ntvru import NTVRuIE
|
from .ntvru import NTVRuIE
|
||||||
|
@ -1499,7 +1495,6 @@
|
||||||
)
|
)
|
||||||
from .popcorntimes import PopcorntimesIE
|
from .popcorntimes import PopcorntimesIE
|
||||||
from .popcorntv import PopcornTVIE
|
from .popcorntv import PopcornTVIE
|
||||||
from .porn91 import Porn91IE
|
|
||||||
from .pornbox import PornboxIE
|
from .pornbox import PornboxIE
|
||||||
from .pornflip import PornFlipIE
|
from .pornflip import PornFlipIE
|
||||||
from .pornhub import (
|
from .pornhub import (
|
||||||
|
@ -2377,7 +2372,6 @@
|
||||||
)
|
)
|
||||||
from .xanimu import XanimuIE
|
from .xanimu import XanimuIE
|
||||||
from .xboxclips import XboxClipsIE
|
from .xboxclips import XboxClipsIE
|
||||||
from .xfileshare import XFileShareIE
|
|
||||||
from .xhamster import (
|
from .xhamster import (
|
||||||
XHamsterIE,
|
XHamsterIE,
|
||||||
XHamsterEmbedIE,
|
XHamsterEmbedIE,
|
||||||
|
@ -2432,8 +2426,6 @@
|
||||||
YouNowMomentIE,
|
YouNowMomentIE,
|
||||||
)
|
)
|
||||||
from .youporn import YouPornIE
|
from .youporn import YouPornIE
|
||||||
from .yourporn import YourPornIE
|
|
||||||
from .yourupload import YourUploadIE
|
|
||||||
from .zaiko import (
|
from .zaiko import (
|
||||||
ZaikoIE,
|
ZaikoIE,
|
||||||
ZaikoETicketIE,
|
ZaikoETicketIE,
|
||||||
|
|
|
@ -39,7 +39,7 @@ class AluraIE(InfoExtractor):
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|
||||||
course, video_id = self._match_valid_url(url)
|
course, video_id = self._match_valid_url(url).group('course_name', 'id')
|
||||||
video_url = self._VIDEO_URL % (course, video_id)
|
video_url = self._VIDEO_URL % (course, video_id)
|
||||||
|
|
||||||
video_dict = self._download_json(video_url, video_id, 'Searching for videos')
|
video_dict = self._download_json(video_url, video_id, 'Searching for videos')
|
||||||
|
@ -52,7 +52,7 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for video_obj in video_dict:
|
for video_obj in video_dict:
|
||||||
video_url_m3u8 = video_obj.get('link')
|
video_url_m3u8 = video_obj.get('mp4')
|
||||||
video_format = self._extract_m3u8_formats(
|
video_format = self._extract_m3u8_formats(
|
||||||
video_url_m3u8, None, 'mp4', entry_protocol='m3u8_native',
|
video_url_m3u8, None, 'mp4', entry_protocol='m3u8_native',
|
||||||
m3u8_id='hls', fatal=False)
|
m3u8_id='hls', fatal=False)
|
||||||
|
|
|
@ -5,6 +5,7 @@
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
GeoRestrictedError,
|
GeoRestrictedError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
join_nonempty,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
parse_qs,
|
parse_qs,
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
|
@ -31,20 +32,6 @@ class ArteTVIE(ArteTVBaseIE):
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/',
|
'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
|
||||||
'url': 'https://www.arte.tv/pl/videos/100103-000-A/usa-dyskryminacja-na-porodowce/',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '100103-000-A',
|
|
||||||
'title': 'USA: Dyskryminacja na porodówce',
|
|
||||||
'description': 'md5:242017b7cce59ffae340a54baefcafb1',
|
|
||||||
'alt_title': 'ARTE Reportage',
|
|
||||||
'upload_date': '20201103',
|
|
||||||
'duration': 554,
|
|
||||||
'thumbnail': r're:https://api-cdn\.arte\.tv/.+940x530',
|
|
||||||
'timestamp': 1604417980,
|
|
||||||
'ext': 'mp4',
|
|
||||||
},
|
|
||||||
'params': {'skip_download': 'm3u8'}
|
|
||||||
}, {
|
}, {
|
||||||
'note': 'No alt_title',
|
'note': 'No alt_title',
|
||||||
'url': 'https://www.arte.tv/fr/videos/110371-000-A/la-chaleur-supplice-des-arbres-de-rue/',
|
'url': 'https://www.arte.tv/fr/videos/110371-000-A/la-chaleur-supplice-des-arbres-de-rue/',
|
||||||
|
@ -58,6 +45,23 @@ class ArteTVIE(ArteTVBaseIE):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.arte.tv/de/videos/110203-006-A/zaz/',
|
'url': 'https://www.arte.tv/de/videos/110203-006-A/zaz/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.arte.tv/fr/videos/109067-000-A/la-loi-de-teheran/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '109067-000-A',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'description': 'md5:d2ca367b8ecee028dddaa8bd1aebc739',
|
||||||
|
'timestamp': 1713927600,
|
||||||
|
'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/3rR6PLzfbigSkkeHtkCZNF/940x530',
|
||||||
|
'duration': 7599,
|
||||||
|
'title': 'La loi de Téhéran',
|
||||||
|
'upload_date': '20240424',
|
||||||
|
'subtitles': {
|
||||||
|
'fr': 'mincount:1',
|
||||||
|
'fr-acc': 'mincount:1',
|
||||||
|
'fr-forced': 'mincount:1',
|
||||||
|
},
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'note': 'age-restricted',
|
'note': 'age-restricted',
|
||||||
'url': 'https://www.arte.tv/de/videos/006785-000-A/the-element-of-crime/',
|
'url': 'https://www.arte.tv/de/videos/006785-000-A/the-element-of-crime/',
|
||||||
|
@ -71,23 +75,7 @@ class ArteTVIE(ArteTVBaseIE):
|
||||||
'upload_date': '20230930',
|
'upload_date': '20230930',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
},
|
},
|
||||||
}, {
|
'skip': '404 Not Found',
|
||||||
'url': 'https://www.arte.tv/de/videos/085374-003-A/im-hohen-norden-geboren/',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '085374-003-A',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'description': 'md5:ab79ec7cc472a93164415b4e4916abf9',
|
|
||||||
'timestamp': 1702872000,
|
|
||||||
'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/TnyHBfPxv3v2GEY3suXGZP/940x530',
|
|
||||||
'duration': 2594,
|
|
||||||
'title': 'Die kurze Zeit der Jugend',
|
|
||||||
'alt_title': 'Im hohen Norden geboren',
|
|
||||||
'upload_date': '20231218',
|
|
||||||
'subtitles': {
|
|
||||||
'fr': 'mincount:1',
|
|
||||||
'fr-acc': 'mincount:1',
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_GEO_BYPASS = True
|
_GEO_BYPASS = True
|
||||||
|
@ -143,16 +131,18 @@ def _fix_accessible_subs_locale(subs):
|
||||||
updated_subs = {}
|
updated_subs = {}
|
||||||
for lang, sub_formats in subs.items():
|
for lang, sub_formats in subs.items():
|
||||||
for fmt in sub_formats:
|
for fmt in sub_formats:
|
||||||
if fmt.get('url', '').endswith('-MAL.m3u8'):
|
url = fmt.get('url') or ''
|
||||||
lang += '-acc'
|
suffix = ('acc' if url.endswith('-MAL.m3u8')
|
||||||
updated_subs.setdefault(lang, []).append(fmt)
|
else 'forced' if '_VO' not in url
|
||||||
|
else None)
|
||||||
|
updated_subs.setdefault(join_nonempty(lang, suffix), []).append(fmt)
|
||||||
return updated_subs
|
return updated_subs
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = self._match_valid_url(url)
|
mobj = self._match_valid_url(url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
lang = mobj.group('lang') or mobj.group('lang_2')
|
lang = mobj.group('lang') or mobj.group('lang_2')
|
||||||
langauge_code = self._LANG_MAP.get(lang)
|
language_code = self._LANG_MAP.get(lang)
|
||||||
|
|
||||||
config = self._download_json(f'{self._API_BASE}/config/{lang}/{video_id}', video_id, headers={
|
config = self._download_json(f'{self._API_BASE}/config/{lang}/{video_id}', video_id, headers={
|
||||||
'x-validated-age': '18'
|
'x-validated-age': '18'
|
||||||
|
@ -180,10 +170,10 @@ def _real_extract(self, url):
|
||||||
m = self._VERSION_CODE_RE.match(stream_version_code)
|
m = self._VERSION_CODE_RE.match(stream_version_code)
|
||||||
if m:
|
if m:
|
||||||
lang_pref = int(''.join('01'[x] for x in (
|
lang_pref = int(''.join('01'[x] for x in (
|
||||||
m.group('vlang') == langauge_code, # we prefer voice in the requested language
|
m.group('vlang') == language_code, # we prefer voice in the requested language
|
||||||
not m.group('audio_desc'), # and not the audio description version
|
not m.group('audio_desc'), # and not the audio description version
|
||||||
bool(m.group('original_voice')), # but if voice is not in the requested language, at least choose the original voice
|
bool(m.group('original_voice')), # but if voice is not in the requested language, at least choose the original voice
|
||||||
m.group('sub_lang') == langauge_code, # if subtitles are present, we prefer them in the requested language
|
m.group('sub_lang') == language_code, # if subtitles are present, we prefer them in the requested language
|
||||||
not m.group('has_sub'), # but we prefer no subtitles otherwise
|
not m.group('has_sub'), # but we prefer no subtitles otherwise
|
||||||
not m.group('sdh_sub'), # and we prefer not the hard-of-hearing subtitles if there are subtitles
|
not m.group('sdh_sub'), # and we prefer not the hard-of-hearing subtitles if there are subtitles
|
||||||
)))
|
)))
|
||||||
|
|
|
@ -602,7 +602,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||||
'url': 'http://www.bbc.com/news/world-europe-32668511',
|
'url': 'http://www.bbc.com/news/world-europe-32668511',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'world-europe-32668511',
|
'id': 'world-europe-32668511',
|
||||||
'title': 'Russia stages massive WW2 parade',
|
'title': 'Russia stages massive WW2 parade despite Western boycott',
|
||||||
'description': 'md5:00ff61976f6081841f759a08bf78cc9c',
|
'description': 'md5:00ff61976f6081841f759a08bf78cc9c',
|
||||||
},
|
},
|
||||||
'playlist_count': 2,
|
'playlist_count': 2,
|
||||||
|
@ -623,6 +623,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '3662a707-0af9-3149-963f-47bea720b460',
|
'id': '3662a707-0af9-3149-963f-47bea720b460',
|
||||||
'title': 'BUGGER',
|
'title': 'BUGGER',
|
||||||
|
'description': r're:BUGGER The recent revelations by the whistleblower Edward Snowden were fascinating. .{211}\.{3}$',
|
||||||
},
|
},
|
||||||
'playlist_count': 18,
|
'playlist_count': 18,
|
||||||
}, {
|
}, {
|
||||||
|
@ -631,14 +632,14 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'p02mprgb',
|
'id': 'p02mprgb',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Aerial footage showed the site of the crash in the Alps - courtesy BFM TV',
|
'title': 'Germanwings crash site aerial video',
|
||||||
'description': 'md5:2868290467291b37feda7863f7a83f54',
|
'description': r're:(?s)Aerial video showed the site where the Germanwings flight 4U 9525, .{156} BFM TV\.$',
|
||||||
'duration': 47,
|
'duration': 47,
|
||||||
'timestamp': 1427219242,
|
'timestamp': 1427219242,
|
||||||
'upload_date': '20150324',
|
'upload_date': '20150324',
|
||||||
|
'thumbnail': 'https://ichef.bbci.co.uk/news/1024/media/images/81879000/jpg/_81879090_81879089.jpg',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# rtmp download
|
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
|
@ -656,21 +657,24 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
}
|
},
|
||||||
|
'skip': 'now SIMORGH_DATA with no video',
|
||||||
}, {
|
}, {
|
||||||
# single video embedded with data-playable containing XML playlists (regional section)
|
# single video embedded with data-playable containing XML playlists (regional section)
|
||||||
'url': 'http://www.bbc.com/mundo/video_fotos/2015/06/150619_video_honduras_militares_hospitales_corrupcion_aw',
|
'url': 'http://www.bbc.com/mundo/video_fotos/2015/06/150619_video_honduras_militares_hospitales_corrupcion_aw',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
|
'id': '39275083',
|
||||||
|
'display_id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
|
'title': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
|
||||||
'description': 'md5:1525f17448c4ee262b64b8f0c9ce66c8',
|
'description': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
|
||||||
'timestamp': 1434713142,
|
'timestamp': 1434713142,
|
||||||
'upload_date': '20150619',
|
'upload_date': '20150619',
|
||||||
|
'thumbnail': 'https://a.files.bbci.co.uk/worldservice/live/assets/images/2015/06/19/150619132146_honduras_hsopitales_militares_640x360_aptn_nocredit.jpg',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
}
|
},
|
||||||
}, {
|
}, {
|
||||||
# single video from video playlist embedded with vxp-playlist-data JSON
|
# single video from video playlist embedded with vxp-playlist-data JSON
|
||||||
'url': 'http://www.bbc.com/news/video_and_audio/must_see/33376376',
|
'url': 'http://www.bbc.com/news/video_and_audio/must_see/33376376',
|
||||||
|
@ -683,22 +687,21 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
}
|
},
|
||||||
|
'skip': '404 Not Found',
|
||||||
}, {
|
}, {
|
||||||
# single video story with digitalData
|
# single video story with __PWA_PRELOADED_STATE__
|
||||||
'url': 'http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret',
|
'url': 'http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'p02q6gc4',
|
'id': 'p02q6gc4',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'Sri Lanka’s spicy secret',
|
'title': 'Tasting the spice of life in Jaffna',
|
||||||
'description': 'As a new train line to Jaffna opens up the country’s north, travellers can experience a truly distinct slice of Tamil culture.',
|
'description': r're:(?s)BBC Travel Show’s Henry Golding explores the city of Jaffna .{151} aftertaste\.$',
|
||||||
'timestamp': 1437674293,
|
'timestamp': 1646058397,
|
||||||
'upload_date': '20150723',
|
'upload_date': '20220228',
|
||||||
|
'duration': 255,
|
||||||
|
'thumbnail': 'https://ichef.bbci.co.uk/images/ic/1920xn/p02vxvkn.jpg',
|
||||||
},
|
},
|
||||||
'params': {
|
|
||||||
# rtmp download
|
|
||||||
'skip_download': True,
|
|
||||||
}
|
|
||||||
}, {
|
}, {
|
||||||
# single video story without digitalData
|
# single video story without digitalData
|
||||||
'url': 'http://www.bbc.com/autos/story/20130513-hyundais-rock-star',
|
'url': 'http://www.bbc.com/autos/story/20130513-hyundais-rock-star',
|
||||||
|
@ -710,12 +713,10 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||||
'timestamp': 1415867444,
|
'timestamp': 1415867444,
|
||||||
'upload_date': '20141113',
|
'upload_date': '20141113',
|
||||||
},
|
},
|
||||||
'params': {
|
'skip': 'redirects to TopGear home page',
|
||||||
# rtmp download
|
|
||||||
'skip_download': True,
|
|
||||||
}
|
|
||||||
}, {
|
}, {
|
||||||
# single video embedded with Morph
|
# single video embedded with Morph
|
||||||
|
# TODO: replacement test page
|
||||||
'url': 'http://www.bbc.co.uk/sport/live/olympics/36895975',
|
'url': 'http://www.bbc.co.uk/sport/live/olympics/36895975',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'p041vhd0',
|
'id': 'p041vhd0',
|
||||||
|
@ -726,27 +727,22 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||||
'uploader': 'BBC Sport',
|
'uploader': 'BBC Sport',
|
||||||
'uploader_id': 'bbc_sport',
|
'uploader_id': 'bbc_sport',
|
||||||
},
|
},
|
||||||
'params': {
|
'skip': 'Video no longer in page',
|
||||||
# m3u8 download
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
'skip': 'Georestricted to UK',
|
|
||||||
}, {
|
}, {
|
||||||
# single video with playlist.sxml URL in playlist param
|
# single video in __INITIAL_DATA__
|
||||||
'url': 'http://www.bbc.com/sport/0/football/33653409',
|
'url': 'http://www.bbc.com/sport/0/football/33653409',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'p02xycnp',
|
'id': 'p02xycnp',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
|
'title': 'Ronaldo to Man Utd, Arsenal to spend?',
|
||||||
'description': 'BBC Sport\'s David Ornstein has the latest transfer gossip, including rumours of a Manchester United return for Cristiano Ronaldo.',
|
'description': r're:(?s)BBC Sport\'s David Ornstein rounds up the latest transfer reports, .{359} here\.$',
|
||||||
|
'timestamp': 1437750175,
|
||||||
|
'upload_date': '20150724',
|
||||||
|
'thumbnail': r're:https?://.+/.+media/images/69320000/png/_69320754_mmgossipcolumnextraaugust18.png',
|
||||||
'duration': 140,
|
'duration': 140,
|
||||||
},
|
},
|
||||||
'params': {
|
|
||||||
# rtmp download
|
|
||||||
'skip_download': True,
|
|
||||||
}
|
|
||||||
}, {
|
}, {
|
||||||
# article with multiple videos embedded with playlist.sxml in playlist param
|
# article with multiple videos embedded with Morph.setPayload
|
||||||
'url': 'http://www.bbc.com/sport/0/football/34475836',
|
'url': 'http://www.bbc.com/sport/0/football/34475836',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '34475836',
|
'id': '34475836',
|
||||||
|
@ -754,6 +750,21 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||||
'description': 'Fast-paced football, wit, wisdom and a ready smile - why Liverpool fans should come to love new boss Jurgen Klopp.',
|
'description': 'Fast-paced football, wit, wisdom and a ready smile - why Liverpool fans should come to love new boss Jurgen Klopp.',
|
||||||
},
|
},
|
||||||
'playlist_count': 3,
|
'playlist_count': 3,
|
||||||
|
}, {
|
||||||
|
# Testing noplaylist
|
||||||
|
'url': 'http://www.bbc.com/sport/0/football/34475836',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'p034ppnv',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'All you need to know about Jurgen Klopp',
|
||||||
|
'timestamp': 1444335081,
|
||||||
|
'upload_date': '20151008',
|
||||||
|
'duration': 122.0,
|
||||||
|
'thumbnail': 'https://ichef.bbci.co.uk/onesport/cps/976/cpsprodpb/7542/production/_85981003_klopp.jpg',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'noplaylist': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
# school report article with single video
|
# school report article with single video
|
||||||
'url': 'http://www.bbc.co.uk/schoolreport/35744779',
|
'url': 'http://www.bbc.co.uk/schoolreport/35744779',
|
||||||
|
@ -762,6 +773,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||||
'title': 'School which breaks down barriers in Jerusalem',
|
'title': 'School which breaks down barriers in Jerusalem',
|
||||||
},
|
},
|
||||||
'playlist_count': 1,
|
'playlist_count': 1,
|
||||||
|
'skip': 'redirects to Young Reporter home page https://www.bbc.co.uk/news/topics/cg41ylwv43pt',
|
||||||
}, {
|
}, {
|
||||||
# single video with playlist URL from weather section
|
# single video with playlist URL from weather section
|
||||||
'url': 'http://www.bbc.com/weather/features/33601775',
|
'url': 'http://www.bbc.com/weather/features/33601775',
|
||||||
|
@ -778,18 +790,33 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||||
'timestamp': 1437785037,
|
'timestamp': 1437785037,
|
||||||
'upload_date': '20150725',
|
'upload_date': '20150725',
|
||||||
|
'duration': 105,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# video with window.__INITIAL_DATA__ and value as JSON string
|
# video with window.__INITIAL_DATA__ and value as JSON string
|
||||||
'url': 'https://www.bbc.com/news/av/world-europe-59468682',
|
'url': 'https://www.bbc.com/news/av/world-europe-59468682',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'p0b71qth',
|
'id': 'p0b779gc',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Why France is making this woman a national hero',
|
'title': 'Why France is making this woman a national hero',
|
||||||
'description': 'md5:7affdfab80e9c3a1f976230a1ff4d5e4',
|
'description': r're:(?s)France is honouring the US-born 20th Century singer and activist Josephine .{208} Second World War.',
|
||||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||||
'timestamp': 1638230731,
|
'timestamp': 1638215626,
|
||||||
'upload_date': '20211130',
|
'upload_date': '20211129',
|
||||||
|
'duration': 125,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# video with script id __NEXT_DATA__ and value as JSON string
|
||||||
|
'url': 'https://www.bbc.com/news/uk-68546268',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'p0hj0lq7',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Nasser Hospital doctor describes his treatment by IDF',
|
||||||
|
'description': r're:(?s)Doctor Abu Sabha said he was detained by Israeli forces after .{276} hostages\."$',
|
||||||
|
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||||
|
'timestamp': 1710188248,
|
||||||
|
'upload_date': '20240311',
|
||||||
|
'duration': 104,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# single video article embedded with data-media-vpid
|
# single video article embedded with data-media-vpid
|
||||||
|
@ -817,6 +844,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||||
'uploader': 'Radio 3',
|
'uploader': 'Radio 3',
|
||||||
'uploader_id': 'bbc_radio_three',
|
'uploader_id': 'bbc_radio_three',
|
||||||
},
|
},
|
||||||
|
'skip': '404 Not Found',
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227',
|
'url': 'http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -824,6 +852,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'md5:2fabf12a726603193a2879a055f72514',
|
'title': 'md5:2fabf12a726603193a2879a055f72514',
|
||||||
'description': 'Learn English words and phrases from this story',
|
'description': 'Learn English words and phrases from this story',
|
||||||
|
'thumbnail': 'https://ichef.bbci.co.uk/images/ic/1200x675/p06pq9gk.jpg',
|
||||||
},
|
},
|
||||||
'add_ie': [BBCCoUkIE.ie_key()],
|
'add_ie': [BBCCoUkIE.ie_key()],
|
||||||
}, {
|
}, {
|
||||||
|
@ -832,28 +861,30 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'p07c6sb9',
|
'id': 'p07c6sb9',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'How positive thinking is harming your happiness',
|
'title': 'The downsides of positive thinking',
|
||||||
'alt_title': 'The downsides of positive thinking',
|
'description': 'The downsides of positive thinking',
|
||||||
'description': 'md5:fad74b31da60d83b8265954ee42d85b4',
|
|
||||||
'duration': 235,
|
'duration': 235,
|
||||||
'thumbnail': r're:https?://.+/p07c9dsr.jpg',
|
'thumbnail': r're:https?://.+/p07c9dsr\.(?:jpg|webp|png)',
|
||||||
'upload_date': '20190604',
|
'upload_date': '20220223',
|
||||||
'categories': ['Psychology'],
|
'timestamp': 1645632746,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# BBC Sounds
|
# BBC Sounds
|
||||||
'url': 'https://www.bbc.co.uk/sounds/play/m001q78b',
|
'url': 'https://www.bbc.co.uk/sounds/play/w3ct5rgx',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'm001q789',
|
'id': 'p0hrw4nr',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'The Night Tracks Mix - Music for the darkling hour',
|
'title': 'Are our coastlines being washed away?',
|
||||||
'thumbnail': 'https://ichef.bbci.co.uk/images/ic/raw/p0c00hym.jpg',
|
'description': r're:(?s)Around the world, coastlines are constantly changing .{2000,} Images\)$',
|
||||||
'chapters': 'count:8',
|
'timestamp': 1713556800,
|
||||||
'description': 'md5:815fb51cbdaa270040aab8145b3f1d67',
|
'upload_date': '20240419',
|
||||||
'uploader': 'Radio 3',
|
'duration': 1588,
|
||||||
'duration': 1800,
|
'thumbnail': 'https://ichef.bbci.co.uk/images/ic/raw/p0hrnxbl.jpg',
|
||||||
'uploader_id': 'bbc_radio_three',
|
'uploader': 'World Service',
|
||||||
},
|
'uploader_id': 'bbc_world_service',
|
||||||
|
'series': 'CrowdScience',
|
||||||
|
'chapters': [],
|
||||||
|
}
|
||||||
}, { # onion routes
|
}, { # onion routes
|
||||||
'url': 'https://www.bbcnewsd73hkzno2ini43t4gblxvycyac5aw4gnv7t2rccijh7745uqd.onion/news/av/world-europe-63208576',
|
'url': 'https://www.bbcnewsd73hkzno2ini43t4gblxvycyac5aw4gnv7t2rccijh7745uqd.onion/news/av/world-europe-63208576',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -1008,8 +1039,7 @@ def _real_extract(self, url):
|
||||||
webpage, 'group id', default=None)
|
webpage, 'group id', default=None)
|
||||||
if group_id:
|
if group_id:
|
||||||
return self.url_result(
|
return self.url_result(
|
||||||
'https://www.bbc.co.uk/programmes/%s' % group_id,
|
f'https://www.bbc.co.uk/programmes/{group_id}', BBCCoUkIE)
|
||||||
ie=BBCCoUkIE.ie_key())
|
|
||||||
|
|
||||||
# single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
|
# single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
|
||||||
programme_id = self._search_regex(
|
programme_id = self._search_regex(
|
||||||
|
@ -1069,84 +1099,134 @@ def _real_extract(self, url):
|
||||||
}
|
}
|
||||||
|
|
||||||
# Morph based embed (e.g. http://www.bbc.co.uk/sport/live/olympics/36895975)
|
# Morph based embed (e.g. http://www.bbc.co.uk/sport/live/olympics/36895975)
|
||||||
# There are several setPayload calls may be present but the video
|
# Several setPayload calls may be present but the video(s)
|
||||||
# seems to be always related to the first one
|
# should be in one that mentions leadMedia or videoData
|
||||||
morph_payload = self._parse_json(
|
morph_payload = self._search_json(
|
||||||
self._search_regex(
|
r'\bMorph\s*\.\s*setPayload\s*\([^,]+,', webpage, 'morph payload', playlist_id,
|
||||||
r'Morph\.setPayload\([^,]+,\s*({.+?})\);',
|
contains_pattern=r'{(?s:(?:(?!</script>).)+(?:"leadMedia"|\\"videoData\\")\s*:.+)}',
|
||||||
webpage, 'morph payload', default='{}'),
|
default={})
|
||||||
playlist_id, fatal=False)
|
|
||||||
if morph_payload:
|
if morph_payload:
|
||||||
components = try_get(morph_payload, lambda x: x['body']['components'], list) or []
|
for lead_media in traverse_obj(morph_payload, (
|
||||||
for component in components:
|
'body', 'components', ..., 'props', 'leadMedia', {dict})):
|
||||||
if not isinstance(component, dict):
|
programme_id = traverse_obj(lead_media, ('identifiers', ('vpid', 'playablePid'), {str}, any))
|
||||||
continue
|
|
||||||
lead_media = try_get(component, lambda x: x['props']['leadMedia'], dict)
|
|
||||||
if not lead_media:
|
|
||||||
continue
|
|
||||||
identifiers = lead_media.get('identifiers')
|
|
||||||
if not identifiers or not isinstance(identifiers, dict):
|
|
||||||
continue
|
|
||||||
programme_id = identifiers.get('vpid') or identifiers.get('playablePid')
|
|
||||||
if not programme_id:
|
if not programme_id:
|
||||||
continue
|
continue
|
||||||
title = lead_media.get('title') or self._og_search_title(webpage)
|
|
||||||
formats, subtitles = self._download_media_selector(programme_id)
|
formats, subtitles = self._download_media_selector(programme_id)
|
||||||
description = lead_media.get('summary')
|
|
||||||
uploader = lead_media.get('masterBrand')
|
|
||||||
uploader_id = lead_media.get('mid')
|
|
||||||
duration = None
|
|
||||||
duration_d = lead_media.get('duration')
|
|
||||||
if isinstance(duration_d, dict):
|
|
||||||
duration = parse_duration(dict_get(
|
|
||||||
duration_d, ('rawDuration', 'formattedDuration', 'spokenDuration')))
|
|
||||||
return {
|
return {
|
||||||
'id': programme_id,
|
'id': programme_id,
|
||||||
'title': title,
|
'title': lead_media.get('title') or self._og_search_title(webpage),
|
||||||
'description': description,
|
**traverse_obj(lead_media, {
|
||||||
'duration': duration,
|
'description': ('summary', {str}),
|
||||||
'uploader': uploader,
|
'duration': ('duration', ('rawDuration', 'formattedDuration', 'spokenDuration'), {parse_duration}),
|
||||||
'uploader_id': uploader_id,
|
'uploader': ('masterBrand', {str}),
|
||||||
|
'uploader_id': ('mid', {str}),
|
||||||
|
}),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
body = self._parse_json(traverse_obj(morph_payload, (
|
||||||
|
'body', 'content', 'article', 'body')), playlist_id, fatal=False)
|
||||||
|
for video_data in traverse_obj(body, (lambda _, v: v['videoData']['pid'], 'videoData')):
|
||||||
|
if video_data.get('vpid'):
|
||||||
|
video_id = video_data['vpid']
|
||||||
|
formats, subtitles = self._download_media_selector(video_id)
|
||||||
|
entry = {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
video_id = video_data['pid']
|
||||||
|
entry = self.url_result(
|
||||||
|
f'https://www.bbc.co.uk/programmes/{video_id}', BBCCoUkIE,
|
||||||
|
video_id, url_transparent=True)
|
||||||
|
entry.update({
|
||||||
|
'timestamp': traverse_obj(morph_payload, (
|
||||||
|
'body', 'content', 'article', 'dateTimeInfo', 'dateTime', {parse_iso8601})
|
||||||
|
),
|
||||||
|
**traverse_obj(video_data, {
|
||||||
|
'thumbnail': (('iChefImage', 'image'), {url_or_none}, any),
|
||||||
|
'title': (('title', 'caption'), {str}, any),
|
||||||
|
'duration': ('duration', {parse_duration}),
|
||||||
|
}),
|
||||||
|
})
|
||||||
|
if video_data.get('isLead') and not self._yes_playlist(playlist_id, video_id):
|
||||||
|
return entry
|
||||||
|
entries.append(entry)
|
||||||
|
if entries:
|
||||||
|
playlist_title = traverse_obj(morph_payload, (
|
||||||
|
'body', 'content', 'article', 'headline', {str})) or playlist_title
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, playlist_id, playlist_title, playlist_description)
|
||||||
|
|
||||||
preload_state = self._parse_json(self._search_regex(
|
# various PRELOADED_STATE JSON
|
||||||
r'window\.__PRELOADED_STATE__\s*=\s*({.+?});', webpage,
|
preload_state = self._search_json(
|
||||||
'preload state', default='{}'), playlist_id, fatal=False)
|
r'window\.__(?:PWA_)?PRELOADED_STATE__\s*=', webpage,
|
||||||
if preload_state:
|
'preload state', playlist_id, transform_source=js_to_json, default={})
|
||||||
current_programme = preload_state.get('programmes', {}).get('current') or {}
|
# PRELOADED_STATE with current programmme
|
||||||
programme_id = current_programme.get('id')
|
current_programme = traverse_obj(preload_state, ('programmes', 'current', {dict}))
|
||||||
if current_programme and programme_id and current_programme.get('type') == 'playable_item':
|
programme_id = traverse_obj(current_programme, ('id', {str}))
|
||||||
title = current_programme.get('titles', {}).get('tertiary') or playlist_title
|
if programme_id and current_programme.get('type') == 'playable_item':
|
||||||
|
title = traverse_obj(current_programme, ('titles', ('tertiary', 'secondary'), {str}, any)) or playlist_title
|
||||||
formats, subtitles = self._download_media_selector(programme_id)
|
formats, subtitles = self._download_media_selector(programme_id)
|
||||||
synopses = current_programme.get('synopses') or {}
|
|
||||||
network = current_programme.get('network') or {}
|
|
||||||
duration = int_or_none(
|
|
||||||
current_programme.get('duration', {}).get('value'))
|
|
||||||
thumbnail = None
|
|
||||||
image_url = current_programme.get('image_url')
|
|
||||||
if image_url:
|
|
||||||
thumbnail = image_url.replace('{recipe}', 'raw')
|
|
||||||
return {
|
return {
|
||||||
'id': programme_id,
|
'id': programme_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': dict_get(synopses, ('long', 'medium', 'short')),
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'duration': duration,
|
|
||||||
'uploader': network.get('short_title'),
|
|
||||||
'uploader_id': network.get('id'),
|
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
**traverse_obj(current_programme, {
|
||||||
|
'description': ('synopses', ('long', 'medium', 'short'), {str}, any),
|
||||||
|
'thumbnail': ('image_url', {lambda u: url_or_none(u.replace('{recipe}', 'raw'))}),
|
||||||
|
'duration': ('duration', 'value', {int_or_none}),
|
||||||
|
'uploader': ('network', 'short_title', {str}),
|
||||||
|
'uploader_id': ('network', 'id', {str}),
|
||||||
|
'timestamp': ((('availability', 'from'), ('release', 'date')), {parse_iso8601}, any),
|
||||||
|
'series': ('titles', 'primary', {str}),
|
||||||
|
}),
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
'chapters': traverse_obj(preload_state, (
|
'chapters': traverse_obj(preload_state, (
|
||||||
'tracklist', 'tracks', lambda _, v: float_or_none(v['offset']['start']), {
|
'tracklist', 'tracks', lambda _, v: float(v['offset']['start']), {
|
||||||
'title': ('titles', {lambda x: join_nonempty(
|
'title': ('titles', {lambda x: join_nonempty(
|
||||||
'primary', 'secondary', 'tertiary', delim=' - ', from_dict=x)}),
|
'primary', 'secondary', 'tertiary', delim=' - ', from_dict=x)}),
|
||||||
'start_time': ('offset', 'start', {float_or_none}),
|
'start_time': ('offset', 'start', {float_or_none}),
|
||||||
'end_time': ('offset', 'end', {float_or_none}),
|
'end_time': ('offset', 'end', {float_or_none}),
|
||||||
})) or None,
|
})
|
||||||
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# PWA_PRELOADED_STATE with article video asset
|
||||||
|
asset_id = traverse_obj(preload_state, (
|
||||||
|
'entities', 'articles', lambda k, _: k.rsplit('/', 1)[-1] == playlist_id,
|
||||||
|
'assetVideo', 0, {str}, any))
|
||||||
|
if asset_id:
|
||||||
|
video_id = traverse_obj(preload_state, ('entities', 'videos', asset_id, 'vpid', {str}))
|
||||||
|
if video_id:
|
||||||
|
article = traverse_obj(preload_state, (
|
||||||
|
'entities', 'articles', lambda _, v: v['assetVideo'][0] == asset_id, any))
|
||||||
|
|
||||||
|
def image_url(image_id):
|
||||||
|
return traverse_obj(preload_state, (
|
||||||
|
'entities', 'images', image_id, 'url',
|
||||||
|
{lambda u: url_or_none(u.replace('$recipe', 'raw'))}))
|
||||||
|
|
||||||
|
formats, subtitles = self._download_media_selector(video_id)
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
**traverse_obj(preload_state, ('entities', 'videos', asset_id, {
|
||||||
|
'title': ('title', {str}),
|
||||||
|
'description': (('synopsisLong', 'synopsisMedium', 'synopsisShort'), {str}, any),
|
||||||
|
'thumbnail': (0, {image_url}),
|
||||||
|
'duration': ('duration', {int_or_none}),
|
||||||
|
})),
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'timestamp': traverse_obj(article, ('displayDate', {parse_iso8601})),
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
return self.url_result(
|
||||||
|
f'https://www.bbc.co.uk/programmes/{asset_id}', BBCCoUkIE,
|
||||||
|
asset_id, playlist_title, display_id=playlist_id,
|
||||||
|
description=playlist_description)
|
||||||
|
|
||||||
bbc3_config = self._parse_json(
|
bbc3_config = self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'(?s)bbcthreeConfig\s*=\s*({.+?})\s*;\s*<', webpage,
|
r'(?s)bbcthreeConfig\s*=\s*({.+?})\s*;\s*<', webpage,
|
||||||
|
@ -1191,6 +1271,28 @@ def _real_extract(self, url):
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
entries, playlist_id, playlist_title, playlist_description)
|
entries, playlist_id, playlist_title, playlist_description)
|
||||||
|
|
||||||
|
def parse_model(model):
|
||||||
|
"""Extract single video from model structure"""
|
||||||
|
item_id = traverse_obj(model, ('versions', 0, 'versionId', {str}))
|
||||||
|
if not item_id:
|
||||||
|
return
|
||||||
|
formats, subtitles = self._download_media_selector(item_id)
|
||||||
|
return {
|
||||||
|
'id': item_id,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
**traverse_obj(model, {
|
||||||
|
'title': ('title', {str}),
|
||||||
|
'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}),
|
||||||
|
'description': ('synopses', ('long', 'medium', 'short'), {str}, {lambda x: x or None}, any),
|
||||||
|
'duration': ('versions', 0, 'duration', {int}),
|
||||||
|
'timestamp': ('versions', 0, 'availableFrom', {functools.partial(int_or_none, scale=1000)}),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
def is_type(*types):
|
||||||
|
return lambda _, v: v['type'] in types
|
||||||
|
|
||||||
initial_data = self._search_regex(
|
initial_data = self._search_regex(
|
||||||
r'window\.__INITIAL_DATA__\s*=\s*("{.+?}")\s*;', webpage,
|
r'window\.__INITIAL_DATA__\s*=\s*("{.+?}")\s*;', webpage,
|
||||||
'quoted preload state', default=None)
|
'quoted preload state', default=None)
|
||||||
|
@ -1202,6 +1304,19 @@ def _real_extract(self, url):
|
||||||
initial_data = self._parse_json(initial_data or '"{}"', playlist_id, fatal=False)
|
initial_data = self._parse_json(initial_data or '"{}"', playlist_id, fatal=False)
|
||||||
initial_data = self._parse_json(initial_data, playlist_id, fatal=False)
|
initial_data = self._parse_json(initial_data, playlist_id, fatal=False)
|
||||||
if initial_data:
|
if initial_data:
|
||||||
|
for video_data in traverse_obj(initial_data, (
|
||||||
|
'stores', 'article', 'articleBodyContent', is_type('video'))):
|
||||||
|
model = traverse_obj(video_data, (
|
||||||
|
'model', 'blocks', is_type('aresMedia'),
|
||||||
|
'model', 'blocks', is_type('aresMediaMetadata'),
|
||||||
|
'model', {dict}, any))
|
||||||
|
entry = parse_model(model)
|
||||||
|
if entry:
|
||||||
|
entries.append(entry)
|
||||||
|
if entries:
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, playlist_id, playlist_title, playlist_description)
|
||||||
|
|
||||||
def parse_media(media):
|
def parse_media(media):
|
||||||
if not media:
|
if not media:
|
||||||
return
|
return
|
||||||
|
@ -1234,19 +1349,64 @@ def parse_media(media):
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
'timestamp': item_time,
|
'timestamp': item_time,
|
||||||
'description': strip_or_none(item_desc),
|
'description': strip_or_none(item_desc),
|
||||||
|
'duration': int_or_none(item.get('duration')),
|
||||||
})
|
})
|
||||||
for resp in (initial_data.get('data') or {}).values():
|
|
||||||
name = resp.get('name')
|
for resp in traverse_obj(initial_data, ('data', lambda _, v: v['name'])):
|
||||||
|
name = resp['name']
|
||||||
if name == 'media-experience':
|
if name == 'media-experience':
|
||||||
parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict))
|
parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict))
|
||||||
elif name == 'article':
|
elif name == 'article':
|
||||||
for block in (try_get(resp,
|
for block in traverse_obj(resp, (
|
||||||
(lambda x: x['data']['blocks'],
|
'data', (None, ('content', 'model')), 'blocks',
|
||||||
lambda x: x['data']['content']['model']['blocks'],),
|
is_type('media', 'video'), 'model', {dict})):
|
||||||
list) or []):
|
parse_media(block)
|
||||||
if block.get('type') not in ['media', 'video']:
|
return self.playlist_result(
|
||||||
continue
|
entries, playlist_id, playlist_title, playlist_description)
|
||||||
parse_media(block.get('model'))
|
|
||||||
|
# extract from SIMORGH_DATA hydration JSON
|
||||||
|
simorgh_data = self._search_json(
|
||||||
|
r'window\s*\.\s*SIMORGH_DATA\s*=', webpage,
|
||||||
|
'simorgh data', playlist_id, default={})
|
||||||
|
if simorgh_data:
|
||||||
|
done = False
|
||||||
|
for video_data in traverse_obj(simorgh_data, (
|
||||||
|
'pageData', 'content', 'model', 'blocks', is_type('video', 'legacyMedia'))):
|
||||||
|
model = traverse_obj(video_data, (
|
||||||
|
'model', 'blocks', is_type('aresMedia'),
|
||||||
|
'model', 'blocks', is_type('aresMediaMetadata'),
|
||||||
|
'model', {dict}, any))
|
||||||
|
if video_data['type'] == 'video':
|
||||||
|
entry = parse_model(model)
|
||||||
|
else: # legacyMedia: no duration, subtitles
|
||||||
|
block_id, entry = traverse_obj(model, ('blockId', {str})), None
|
||||||
|
media_data = traverse_obj(simorgh_data, (
|
||||||
|
'pageData', 'promo', 'media',
|
||||||
|
{lambda x: x if x['id'] == block_id else None}))
|
||||||
|
formats = traverse_obj(media_data, ('playlist', lambda _, v: url_or_none(v['url']), {
|
||||||
|
'url': ('url', {url_or_none}),
|
||||||
|
'ext': ('format', {str}),
|
||||||
|
'tbr': ('bitrate', {functools.partial(int_or_none, scale=1000)}),
|
||||||
|
}))
|
||||||
|
if formats:
|
||||||
|
entry = {
|
||||||
|
'id': block_id,
|
||||||
|
'display_id': playlist_id,
|
||||||
|
'formats': formats,
|
||||||
|
'description': traverse_obj(simorgh_data, ('pageData', 'promo', 'summary', {str})),
|
||||||
|
**traverse_obj(model, {
|
||||||
|
'title': ('title', {str}),
|
||||||
|
'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}),
|
||||||
|
'description': ('synopses', ('long', 'medium', 'short'), {str}, any),
|
||||||
|
'timestamp': ('firstPublished', {functools.partial(int_or_none, scale=1000)}),
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
done = True
|
||||||
|
if entry:
|
||||||
|
entries.append(entry)
|
||||||
|
if done:
|
||||||
|
break
|
||||||
|
if entries:
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
entries, playlist_id, playlist_title, playlist_description)
|
entries, playlist_id, playlist_title, playlist_description)
|
||||||
|
|
||||||
|
@ -1255,6 +1415,24 @@ def extract_all(pattern):
|
||||||
lambda s: self._parse_json(s, playlist_id, fatal=False),
|
lambda s: self._parse_json(s, playlist_id, fatal=False),
|
||||||
re.findall(pattern, webpage))))
|
re.findall(pattern, webpage))))
|
||||||
|
|
||||||
|
# US accessed article with single embedded video (e.g.
|
||||||
|
# https://www.bbc.com/news/uk-68546268)
|
||||||
|
next_data = traverse_obj(self._search_nextjs_data(webpage, playlist_id, default={}),
|
||||||
|
('props', 'pageProps', 'page'))
|
||||||
|
model = traverse_obj(next_data, (
|
||||||
|
..., 'contents', is_type('video'),
|
||||||
|
'model', 'blocks', is_type('media'),
|
||||||
|
'model', 'blocks', is_type('mediaMetadata'),
|
||||||
|
'model', {dict}, any))
|
||||||
|
if model and (entry := parse_model(model)):
|
||||||
|
if not entry.get('timestamp'):
|
||||||
|
entry['timestamp'] = traverse_obj(next_data, (
|
||||||
|
..., 'contents', is_type('timestamp'), 'model',
|
||||||
|
'timestamp', {functools.partial(int_or_none, scale=1000)}, any))
|
||||||
|
entries.append(entry)
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, playlist_id, playlist_title, playlist_description)
|
||||||
|
|
||||||
# Multiple video article (e.g.
|
# Multiple video article (e.g.
|
||||||
# http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460)
|
# http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460)
|
||||||
EMBED_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+%s(?:\b[^"]+)?' % self._ID_REGEX
|
EMBED_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+%s(?:\b[^"]+)?' % self._ID_REGEX
|
||||||
|
|
|
@ -1045,7 +1045,8 @@ def fetch_page(page_idx):
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search',
|
response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search',
|
||||||
playlist_id, note=f'Downloading page {page_idx}', query=query)
|
playlist_id, note=f'Downloading page {page_idx}', query=query,
|
||||||
|
headers={'referer': url})
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
if isinstance(e.cause, HTTPError) and e.cause.status == 412:
|
if isinstance(e.cause, HTTPError) and e.cause.status == 412:
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
|
|
|
@ -1,32 +0,0 @@
|
||||||
from .common import InfoExtractor
|
|
||||||
|
|
||||||
|
|
||||||
class CableAVIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://cableav\.tv/(?P<id>[a-zA-Z0-9]+)'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://cableav.tv/lS4iR9lWjN8/',
|
|
||||||
'md5': '7e3fe5e49d61c4233b7f5b0f69b15e18',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'lS4iR9lWjN8',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': '國產麻豆AV 叮叮映畫 DDF001 情欲小說家 - CableAV',
|
|
||||||
'description': '國產AV 480p, 720p 国产麻豆AV 叮叮映画 DDF001 情欲小说家',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
|
||||||
}
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
|
|
||||||
video_url = self._og_search_video_url(webpage, secure=False)
|
|
||||||
|
|
||||||
formats = self._extract_m3u8_formats(video_url, video_id, 'mp4')
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': self._og_search_title(webpage),
|
|
||||||
'description': self._og_search_description(webpage),
|
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
|
||||||
'formats': formats,
|
|
||||||
}
|
|
|
@ -16,7 +16,6 @@
|
||||||
merge_dicts,
|
merge_dicts,
|
||||||
multipart_encode,
|
multipart_encode,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
random_birthday,
|
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
try_call,
|
try_call,
|
||||||
try_get,
|
try_get,
|
||||||
|
@ -63,38 +62,57 @@ class CDAIE(InfoExtractor):
|
||||||
'description': 'md5:60d76b71186dcce4e0ba6d4bbdb13e1a',
|
'description': 'md5:60d76b71186dcce4e0ba6d4bbdb13e1a',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'uploader': 'crash404',
|
'uploader': 'crash404',
|
||||||
'view_count': int,
|
|
||||||
'average_rating': float,
|
'average_rating': float,
|
||||||
'duration': 137,
|
'duration': 137,
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
|
'upload_date': '20160220',
|
||||||
|
'timestamp': 1455968218,
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
# Age-restricted
|
# Age-restricted with vfilm redirection
|
||||||
'url': 'http://www.cda.pl/video/1273454c4',
|
'url': 'https://www.cda.pl/video/8753244c4',
|
||||||
|
'md5': 'd8eeb83d63611289507010d3df3bb8b3',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1273454c4',
|
'id': '8753244c4',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Bronson (2008) napisy HD 1080p',
|
'title': '[18+] Bez Filtra: Rezerwowe Psy czyli... najwulgarniejsza polska gra?',
|
||||||
'description': 'md5:1b6cb18508daf2dc4e0fa4db77fec24c',
|
'description': 'md5:ae80bac31bd6a9f077a6cce03c7c077e',
|
||||||
'height': 1080,
|
'height': 1080,
|
||||||
'uploader': 'boniek61',
|
'uploader': 'arhn eu',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'duration': 5554,
|
'duration': 991,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
'view_count': int,
|
|
||||||
'average_rating': float,
|
'average_rating': float,
|
||||||
},
|
'timestamp': 1633888264,
|
||||||
|
'upload_date': '20211010',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
# Age-restricted without vfilm redirection
|
||||||
|
'url': 'https://www.cda.pl/video/17028157b8',
|
||||||
|
'md5': 'c1fe5ff4582bace95d4f0ce0fbd0f992',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '17028157b8',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'STENDUPY MICHAŁ OGIŃSKI',
|
||||||
|
'description': 'md5:5851f3272bfc31f762d616040a1d609a',
|
||||||
|
'height': 480,
|
||||||
|
'uploader': 'oginski',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'duration': 18855,
|
||||||
|
'age_limit': 18,
|
||||||
|
'average_rating': float,
|
||||||
|
'timestamp': 1699705901,
|
||||||
|
'upload_date': '20231111',
|
||||||
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://ebd.cda.pl/0x0/5749950c',
|
'url': 'http://ebd.cda.pl/0x0/5749950c',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _download_age_confirm_page(self, url, video_id, *args, **kwargs):
|
def _download_age_confirm_page(self, url, video_id, *args, **kwargs):
|
||||||
form_data = random_birthday('rok', 'miesiac', 'dzien')
|
data, content_type = multipart_encode({'age_confirm': ''})
|
||||||
form_data.update({'return': url, 'module': 'video', 'module_id': video_id})
|
|
||||||
data, content_type = multipart_encode(form_data)
|
|
||||||
return self._download_webpage(
|
return self._download_webpage(
|
||||||
urljoin(url, '/a/validatebirth'), video_id, *args,
|
url, video_id, *args,
|
||||||
data=data, headers={
|
data=data, headers={
|
||||||
'Referer': url,
|
'Referer': url,
|
||||||
'Content-Type': content_type,
|
'Content-Type': content_type,
|
||||||
|
@ -164,7 +182,7 @@ def _real_extract(self, url):
|
||||||
if 'Authorization' in self._API_HEADERS:
|
if 'Authorization' in self._API_HEADERS:
|
||||||
return self._api_extract(video_id)
|
return self._api_extract(video_id)
|
||||||
else:
|
else:
|
||||||
return self._web_extract(video_id, url)
|
return self._web_extract(video_id)
|
||||||
|
|
||||||
def _api_extract(self, video_id):
|
def _api_extract(self, video_id):
|
||||||
meta = self._download_json(
|
meta = self._download_json(
|
||||||
|
@ -197,9 +215,9 @@ def _api_extract(self, video_id):
|
||||||
'view_count': meta.get('views'),
|
'view_count': meta.get('views'),
|
||||||
}
|
}
|
||||||
|
|
||||||
def _web_extract(self, video_id, url):
|
def _web_extract(self, video_id):
|
||||||
self._set_cookie('cda.pl', 'cda.player', 'html5')
|
self._set_cookie('cda.pl', 'cda.player', 'html5')
|
||||||
webpage = self._download_webpage(
|
webpage, urlh = self._download_webpage_handle(
|
||||||
f'{self._BASE_URL}/video/{video_id}/vfilm', video_id)
|
f'{self._BASE_URL}/video/{video_id}/vfilm', video_id)
|
||||||
|
|
||||||
if 'Ten film jest dostępny dla użytkowników premium' in webpage:
|
if 'Ten film jest dostępny dla użytkowników premium' in webpage:
|
||||||
|
@ -209,10 +227,10 @@ def _web_extract(self, video_id, url):
|
||||||
self.raise_geo_restricted()
|
self.raise_geo_restricted()
|
||||||
|
|
||||||
need_confirm_age = False
|
need_confirm_age = False
|
||||||
if self._html_search_regex(r'(<form[^>]+action="[^"]*/a/validatebirth[^"]*")',
|
if self._html_search_regex(r'(<button[^>]+name="[^"]*age_confirm[^"]*")',
|
||||||
webpage, 'birthday validate form', default=None):
|
webpage, 'birthday validate form', default=None):
|
||||||
webpage = self._download_age_confirm_page(
|
webpage = self._download_age_confirm_page(
|
||||||
url, video_id, note='Confirming age')
|
urlh.url, video_id, note='Confirming age')
|
||||||
need_confirm_age = True
|
need_confirm_age = True
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
|
@ -222,9 +240,6 @@ def _web_extract(self, video_id, url):
|
||||||
(?:<\1[^>]*>[^<]*</\1>|(?!</\1>)(?:.|\n))*?
|
(?:<\1[^>]*>[^<]*</\1>|(?!</\1>)(?:.|\n))*?
|
||||||
<(span|meta)[^>]+itemprop=(["\'])name\4[^>]*>(?P<uploader>[^<]+)</\3>
|
<(span|meta)[^>]+itemprop=(["\'])name\4[^>]*>(?P<uploader>[^<]+)</\3>
|
||||||
''', webpage, 'uploader', default=None, group='uploader')
|
''', webpage, 'uploader', default=None, group='uploader')
|
||||||
view_count = self._search_regex(
|
|
||||||
r'Odsłony:(?:\s| )*([0-9]+)', webpage,
|
|
||||||
'view_count', default=None)
|
|
||||||
average_rating = self._search_regex(
|
average_rating = self._search_regex(
|
||||||
(r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P<rating_value>[0-9.]+)',
|
(r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P<rating_value>[0-9.]+)',
|
||||||
r'<span[^>]+\bclass=["\']rating["\'][^>]*>(?P<rating_value>[0-9.]+)'), webpage, 'rating', fatal=False,
|
r'<span[^>]+\bclass=["\']rating["\'][^>]*>(?P<rating_value>[0-9.]+)'), webpage, 'rating', fatal=False,
|
||||||
|
@ -235,7 +250,6 @@ def _web_extract(self, video_id, url):
|
||||||
'title': self._og_search_title(webpage),
|
'title': self._og_search_title(webpage),
|
||||||
'description': self._og_search_description(webpage),
|
'description': self._og_search_description(webpage),
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
'view_count': int_or_none(view_count),
|
|
||||||
'average_rating': float_or_none(average_rating),
|
'average_rating': float_or_none(average_rating),
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
|
|
@ -957,7 +957,8 @@ def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=
|
||||||
if urlh is False:
|
if urlh is False:
|
||||||
assert not fatal
|
assert not fatal
|
||||||
return False
|
return False
|
||||||
content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal, encoding=encoding)
|
content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal,
|
||||||
|
encoding=encoding, data=data)
|
||||||
return (content, urlh)
|
return (content, urlh)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -1005,8 +1006,10 @@ def __check_blocked(self, content):
|
||||||
'Visit http://blocklist.rkn.gov.ru/ for a block reason.',
|
'Visit http://blocklist.rkn.gov.ru/ for a block reason.',
|
||||||
expected=True)
|
expected=True)
|
||||||
|
|
||||||
def _request_dump_filename(self, url, video_id):
|
def _request_dump_filename(self, url, video_id, data=None):
|
||||||
basen = f'{video_id}_{url}'
|
if data is not None:
|
||||||
|
data = hashlib.md5(data).hexdigest()
|
||||||
|
basen = join_nonempty(video_id, data, url, delim='_')
|
||||||
trim_length = self.get_param('trim_file_name') or 240
|
trim_length = self.get_param('trim_file_name') or 240
|
||||||
if len(basen) > trim_length:
|
if len(basen) > trim_length:
|
||||||
h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
|
h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
|
||||||
|
@ -1028,7 +1031,8 @@ def __decode_webpage(self, webpage_bytes, encoding, headers):
|
||||||
except LookupError:
|
except LookupError:
|
||||||
return webpage_bytes.decode('utf-8', 'replace')
|
return webpage_bytes.decode('utf-8', 'replace')
|
||||||
|
|
||||||
def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None, encoding=None):
|
def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True,
|
||||||
|
prefix=None, encoding=None, data=None):
|
||||||
webpage_bytes = urlh.read()
|
webpage_bytes = urlh.read()
|
||||||
if prefix is not None:
|
if prefix is not None:
|
||||||
webpage_bytes = prefix + webpage_bytes
|
webpage_bytes = prefix + webpage_bytes
|
||||||
|
@ -1037,7 +1041,9 @@ def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errno
|
||||||
dump = base64.b64encode(webpage_bytes).decode('ascii')
|
dump = base64.b64encode(webpage_bytes).decode('ascii')
|
||||||
self._downloader.to_screen(dump)
|
self._downloader.to_screen(dump)
|
||||||
if self.get_param('write_pages'):
|
if self.get_param('write_pages'):
|
||||||
filename = self._request_dump_filename(urlh.url, video_id)
|
if isinstance(url_or_request, Request):
|
||||||
|
data = self._create_request(url_or_request, data).data
|
||||||
|
filename = self._request_dump_filename(urlh.url, video_id, data)
|
||||||
self.to_screen(f'Saving request to {filename}')
|
self.to_screen(f'Saving request to {filename}')
|
||||||
with open(filename, 'wb') as outf:
|
with open(filename, 'wb') as outf:
|
||||||
outf.write(webpage_bytes)
|
outf.write(webpage_bytes)
|
||||||
|
@ -1098,7 +1104,7 @@ def download_content(self, url_or_request, video_id, note=note, errnote=errnote,
|
||||||
impersonate=None, require_impersonation=False):
|
impersonate=None, require_impersonation=False):
|
||||||
if self.get_param('load_pages'):
|
if self.get_param('load_pages'):
|
||||||
url_or_request = self._create_request(url_or_request, data, headers, query)
|
url_or_request = self._create_request(url_or_request, data, headers, query)
|
||||||
filename = self._request_dump_filename(url_or_request.url, video_id)
|
filename = self._request_dump_filename(url_or_request.url, video_id, url_or_request.data)
|
||||||
self.to_screen(f'Loading request from {filename}')
|
self.to_screen(f'Loading request from {filename}')
|
||||||
try:
|
try:
|
||||||
with open(filename, 'rb') as dumpf:
|
with open(filename, 'rb') as dumpf:
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..networking import Request
|
||||||
from ..networking.exceptions import HTTPError
|
from ..networking.exceptions import HTTPError
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
@ -24,6 +25,7 @@ class CrunchyrollBaseIE(InfoExtractor):
|
||||||
_BASE_URL = 'https://www.crunchyroll.com'
|
_BASE_URL = 'https://www.crunchyroll.com'
|
||||||
_API_BASE = 'https://api.crunchyroll.com'
|
_API_BASE = 'https://api.crunchyroll.com'
|
||||||
_NETRC_MACHINE = 'crunchyroll'
|
_NETRC_MACHINE = 'crunchyroll'
|
||||||
|
_SWITCH_USER_AGENT = 'Crunchyroll/1.8.0 Nintendo Switch/12.3.12.0 UE4/4.27'
|
||||||
_REFRESH_TOKEN = None
|
_REFRESH_TOKEN = None
|
||||||
_AUTH_HEADERS = None
|
_AUTH_HEADERS = None
|
||||||
_AUTH_EXPIRY = None
|
_AUTH_EXPIRY = None
|
||||||
|
@ -179,10 +181,19 @@ def _extract_stream(self, identifier, display_id=None):
|
||||||
display_id = identifier
|
display_id = identifier
|
||||||
|
|
||||||
self._update_auth()
|
self._update_auth()
|
||||||
|
headers = {**CrunchyrollBaseIE._AUTH_HEADERS, 'User-Agent': self._SWITCH_USER_AGENT}
|
||||||
|
try:
|
||||||
stream_response = self._download_json(
|
stream_response = self._download_json(
|
||||||
f'https://cr-play-service.prd.crunchyrollsvc.com/v1/{identifier}/console/switch/play',
|
f'https://cr-play-service.prd.crunchyrollsvc.com/v1/{identifier}/console/switch/play',
|
||||||
display_id, note='Downloading stream info', errnote='Failed to download stream info',
|
display_id, note='Downloading stream info', errnote='Failed to download stream info', headers=headers)
|
||||||
headers=CrunchyrollBaseIE._AUTH_HEADERS)
|
except ExtractorError as error:
|
||||||
|
if self.get_param('ignore_no_formats_error'):
|
||||||
|
self.report_warning(error.orig_msg)
|
||||||
|
return [], {}
|
||||||
|
elif isinstance(error.cause, HTTPError) and error.cause.status == 420:
|
||||||
|
raise ExtractorError(
|
||||||
|
'You have reached the rate-limit for active streams; try again later', expected=True)
|
||||||
|
raise
|
||||||
|
|
||||||
available_formats = {'': ('', '', stream_response['url'])}
|
available_formats = {'': ('', '', stream_response['url'])}
|
||||||
for hardsub_lang, stream in traverse_obj(stream_response, ('hardSubs', {dict.items}, lambda _, v: v[1]['url'])):
|
for hardsub_lang, stream in traverse_obj(stream_response, ('hardSubs', {dict.items}, lambda _, v: v[1]['url'])):
|
||||||
|
@ -211,7 +222,7 @@ def _extract_stream(self, identifier, display_id=None):
|
||||||
fatal=False, note=f'Downloading {f"{format_id} " if hardsub_lang else ""}MPD manifest')
|
fatal=False, note=f'Downloading {f"{format_id} " if hardsub_lang else ""}MPD manifest')
|
||||||
self._merge_subtitles(dash_subs, target=subtitles)
|
self._merge_subtitles(dash_subs, target=subtitles)
|
||||||
else:
|
else:
|
||||||
continue # XXX: Update this if/when meta mpd formats are working
|
continue # XXX: Update this if meta mpd formats work; will be tricky with token invalidation
|
||||||
for f in adaptive_formats:
|
for f in adaptive_formats:
|
||||||
if f.get('acodec') != 'none':
|
if f.get('acodec') != 'none':
|
||||||
f['language'] = audio_locale
|
f['language'] = audio_locale
|
||||||
|
@ -221,6 +232,15 @@ def _extract_stream(self, identifier, display_id=None):
|
||||||
for locale, subtitle in traverse_obj(stream_response, (('subtitles', 'captions'), {dict.items}, ...)):
|
for locale, subtitle in traverse_obj(stream_response, (('subtitles', 'captions'), {dict.items}, ...)):
|
||||||
subtitles.setdefault(locale, []).append(traverse_obj(subtitle, {'url': 'url', 'ext': 'format'}))
|
subtitles.setdefault(locale, []).append(traverse_obj(subtitle, {'url': 'url', 'ext': 'format'}))
|
||||||
|
|
||||||
|
# Invalidate stream token to avoid rate-limit
|
||||||
|
error_msg = 'Unable to invalidate stream token; you may experience rate-limiting'
|
||||||
|
if stream_token := stream_response.get('token'):
|
||||||
|
self._request_webpage(Request(
|
||||||
|
f'https://cr-play-service.prd.crunchyrollsvc.com/v1/token/{identifier}/{stream_token}/inactive',
|
||||||
|
headers=headers, method='PATCH'), display_id, 'Invalidating stream token', error_msg, fatal=False)
|
||||||
|
else:
|
||||||
|
self.report_warning(error_msg)
|
||||||
|
|
||||||
return formats, subtitles
|
return formats, subtitles
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,105 +0,0 @@
|
||||||
import json
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..compat import (
|
|
||||||
compat_b64decode,
|
|
||||||
compat_str,
|
|
||||||
compat_urlparse,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
|
||||||
extract_attributes,
|
|
||||||
ExtractorError,
|
|
||||||
get_elements_by_class,
|
|
||||||
urlencode_postdata,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class EinthusanIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?P<host>einthusan\.(?:tv|com|ca))/movie/watch/(?P<id>[^/?#&]+)'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://einthusan.tv/movie/watch/9097/',
|
|
||||||
'md5': 'ff0f7f2065031b8a2cf13a933731c035',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '9097',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Ae Dil Hai Mushkil',
|
|
||||||
'description': 'md5:33ef934c82a671a94652a9b4e54d931b',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
'url': 'https://einthusan.tv/movie/watch/51MZ/?lang=hindi',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'https://einthusan.com/movie/watch/9097/',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'https://einthusan.ca/movie/watch/4E9n/?lang=hindi',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
# reversed from jsoncrypto.prototype.decrypt() in einthusan-PGMovieWatcher.js
|
|
||||||
def _decrypt(self, encrypted_data, video_id):
|
|
||||||
return self._parse_json(compat_b64decode((
|
|
||||||
encrypted_data[:10] + encrypted_data[-1] + encrypted_data[12:-1]
|
|
||||||
)).decode('utf-8'), video_id)
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
mobj = self._match_valid_url(url)
|
|
||||||
host = mobj.group('host')
|
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
|
|
||||||
title = self._html_search_regex(r'<h3>([^<]+)</h3>', webpage, 'title')
|
|
||||||
|
|
||||||
player_params = extract_attributes(self._search_regex(
|
|
||||||
r'(<section[^>]+id="UIVideoPlayer"[^>]+>)', webpage, 'player parameters'))
|
|
||||||
|
|
||||||
page_id = self._html_search_regex(
|
|
||||||
'<html[^>]+data-pageid="([^"]+)"', webpage, 'page ID')
|
|
||||||
video_data = self._download_json(
|
|
||||||
'https://%s/ajax/movie/watch/%s/' % (host, video_id), video_id,
|
|
||||||
data=urlencode_postdata({
|
|
||||||
'xEvent': 'UIVideoPlayer.PingOutcome',
|
|
||||||
'xJson': json.dumps({
|
|
||||||
'EJOutcomes': player_params['data-ejpingables'],
|
|
||||||
'NativeHLS': False
|
|
||||||
}),
|
|
||||||
'arcVersion': 3,
|
|
||||||
'appVersion': 59,
|
|
||||||
'gorilla.csrf.Token': page_id,
|
|
||||||
}))['Data']
|
|
||||||
|
|
||||||
if isinstance(video_data, compat_str) and video_data.startswith('/ratelimited/'):
|
|
||||||
raise ExtractorError(
|
|
||||||
'Download rate reached. Please try again later.', expected=True)
|
|
||||||
|
|
||||||
ej_links = self._decrypt(video_data['EJLinks'], video_id)
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
|
|
||||||
m3u8_url = ej_links.get('HLSLink')
|
|
||||||
if m3u8_url:
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native'))
|
|
||||||
|
|
||||||
mp4_url = ej_links.get('MP4Link')
|
|
||||||
if mp4_url:
|
|
||||||
formats.append({
|
|
||||||
'url': mp4_url,
|
|
||||||
})
|
|
||||||
|
|
||||||
description = get_elements_by_class('synopsis', webpage)[0]
|
|
||||||
thumbnail = self._html_search_regex(
|
|
||||||
r'''<img[^>]+src=(["'])(?P<url>(?!\1).+?/moviecovers/(?!\1).+?)\1''',
|
|
||||||
webpage, 'thumbnail url', fatal=False, group='url')
|
|
||||||
if thumbnail is not None:
|
|
||||||
thumbnail = compat_urlparse.urljoin(url, thumbnail)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'formats': formats,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'description': description,
|
|
||||||
}
|
|
|
@ -16,13 +16,31 @@ class EplusIbIE(InfoExtractor):
|
||||||
_VALID_URL = [r'https?://live\.eplus\.jp/ex/player\?ib=(?P<id>(?:\w|%2B|%2F){86}%3D%3D)',
|
_VALID_URL = [r'https?://live\.eplus\.jp/ex/player\?ib=(?P<id>(?:\w|%2B|%2F){86}%3D%3D)',
|
||||||
r'https?://live\.eplus\.jp/(?P<id>sample|\d+)']
|
r'https?://live\.eplus\.jp/(?P<id>sample|\d+)']
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://live.eplus.jp/ex/player?ib=YEFxb3Vyc2Dombnjg7blkrLlrablnJLjgrnjgq%2Fjg7zjg6vjgqLjgqTjg4njg6vlkIzlpb3kvJpgTGllbGxhIQ%3D%3D',
|
'url': 'https://live.eplus.jp/ex/player?ib=41K6Wzbr3PlcMD%2FOKHFlC%2FcZCe2Eaw7FK%2BpJS1ooUHki8d0vGSy2mYqxillQBe1dSnOxU%2B8%2FzXKls4XPBSb3vw%3D%3D',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '354502-0001-002',
|
'id': '335699-0001-006',
|
||||||
'title': 'LoveLive!Series Presents COUNTDOWN LoveLive! 2021→2022~LIVE with a smile!~【Streaming+(配信)】',
|
'title': '少女☆歌劇 レヴュースタァライト -The LIVE 青嵐- BLUE GLITTER <定点映像配信>【Streaming+(配信)】',
|
||||||
'live_status': 'was_live',
|
'live_status': 'was_live',
|
||||||
'release_date': '20211231',
|
'release_date': '20201221',
|
||||||
'release_timestamp': 1640952000,
|
'release_timestamp': 1608544800,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
'ignore_no_formats_error': True,
|
||||||
|
},
|
||||||
|
'expected_warnings': [
|
||||||
|
'This event may not be accessible',
|
||||||
|
'No video formats found',
|
||||||
|
'Requested format is not available',
|
||||||
|
],
|
||||||
|
}, {
|
||||||
|
'url': 'https://live.eplus.jp/ex/player?ib=6QSsQdyRAwOFZrEHWlhRm7vocgV%2FO0YzBZ%2BaBEBg1XR%2FmbLn0R%2F048dUoAY038%2F%2F92MJ73BsoAtvUpbV6RLtDQ%3D%3D&show_id=2371511',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '348021-0054-001',
|
||||||
|
'title': 'ラブライブ!スーパースター!! Liella! First LoveLive! Tour ~Starlines~【東京/DAY.1】',
|
||||||
|
'live_status': 'was_live',
|
||||||
|
'release_date': '20220115',
|
||||||
|
'release_timestamp': 1642233600,
|
||||||
'description': str,
|
'description': str,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
|
@ -124,6 +142,10 @@ def _real_extract(self, url):
|
||||||
if data_json.get('drm_mode') == 'ON':
|
if data_json.get('drm_mode') == 'ON':
|
||||||
self.report_drm(video_id)
|
self.report_drm(video_id)
|
||||||
|
|
||||||
|
if data_json.get('is_pass_ticket') == 'YES':
|
||||||
|
raise ExtractorError(
|
||||||
|
'This URL is for a pass ticket instead of a player page', expected=True)
|
||||||
|
|
||||||
delivery_status = data_json.get('delivery_status')
|
delivery_status = data_json.get('delivery_status')
|
||||||
archive_mode = data_json.get('archive_mode')
|
archive_mode = data_json.get('archive_mode')
|
||||||
release_timestamp = try_call(lambda: unified_timestamp(data_json['event_datetime']) - 32400)
|
release_timestamp = try_call(lambda: unified_timestamp(data_json['event_datetime']) - 32400)
|
||||||
|
|
|
@ -94,13 +94,14 @@ def get_item(type_, preference):
|
||||||
|
|
||||||
class EuroParlWebstreamIE(InfoExtractor):
|
class EuroParlWebstreamIE(InfoExtractor):
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://multimedia\.europarl\.europa\.eu/[^/#?]+/
|
https?://multimedia\.europarl\.europa\.eu/
|
||||||
(?:(?!video)[^/#?]+/[\w-]+_)(?P<id>[\w-]+)
|
(?:\w+/)?webstreaming/(?:[\w-]+_)?(?P<id>[\w-]+)
|
||||||
'''
|
'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://multimedia.europarl.europa.eu/pl/webstreaming/plenary-session_20220914-0900-PLENARY',
|
'url': 'https://multimedia.europarl.europa.eu/pl/webstreaming/plenary-session_20220914-0900-PLENARY',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '62388b15-d85b-4add-99aa-ba12ccf64f0d',
|
'id': '62388b15-d85b-4add-99aa-ba12ccf64f0d',
|
||||||
|
'display_id': '20220914-0900-PLENARY',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Plenary session',
|
'title': 'Plenary session',
|
||||||
'release_timestamp': 1663139069,
|
'release_timestamp': 1663139069,
|
||||||
|
@ -125,6 +126,7 @@ class EuroParlWebstreamIE(InfoExtractor):
|
||||||
'url': 'https://multimedia.europarl.europa.eu/en/webstreaming/committee-on-culture-and-education_20230301-1130-COMMITTEE-CULT',
|
'url': 'https://multimedia.europarl.europa.eu/en/webstreaming/committee-on-culture-and-education_20230301-1130-COMMITTEE-CULT',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '7355662c-8eac-445e-4bb9-08db14b0ddd7',
|
'id': '7355662c-8eac-445e-4bb9-08db14b0ddd7',
|
||||||
|
'display_id': '20230301-1130-COMMITTEE-CULT',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'release_date': '20230301',
|
'release_date': '20230301',
|
||||||
'title': 'Committee on Culture and Education',
|
'title': 'Committee on Culture and Education',
|
||||||
|
@ -142,6 +144,19 @@ class EuroParlWebstreamIE(InfoExtractor):
|
||||||
'live_status': 'is_live',
|
'live_status': 'is_live',
|
||||||
},
|
},
|
||||||
'skip': 'Not live anymore'
|
'skip': 'Not live anymore'
|
||||||
|
}, {
|
||||||
|
'url': 'https://multimedia.europarl.europa.eu/en/webstreaming/20240320-1345-SPECIAL-PRESSER',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'c1f11567-5b52-470a-f3e1-08dc3c216ace',
|
||||||
|
'display_id': '20240320-1345-SPECIAL-PRESSER',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'release_date': '20240320',
|
||||||
|
'title': 'md5:7c6c814cac55dea5e2d87bf8d3db2234',
|
||||||
|
'release_timestamp': 1710939767,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://multimedia.europarl.europa.eu/webstreaming/briefing-for-media-on-2024-european-elections_20240429-1000-SPECIAL-OTHER',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -166,6 +181,7 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': json_info['id'],
|
'id': json_info['id'],
|
||||||
|
'display_id': display_id,
|
||||||
'title': traverse_obj(webpage_nextjs, (('mediaItem', 'title'), ('title', )), get_all=False),
|
'title': traverse_obj(webpage_nextjs, (('mediaItem', 'title'), ('title', )), get_all=False),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
|
|
79
yt_dlp/extractor/godresource.py
Normal file
79
yt_dlp/extractor/godresource.py
Normal file
|
@ -0,0 +1,79 @@
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
determine_ext,
|
||||||
|
str_or_none,
|
||||||
|
unified_timestamp,
|
||||||
|
url_or_none
|
||||||
|
)
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
|
class GodResourceIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://new\.godresource\.com/video/(?P<id>\w+)'
|
||||||
|
_TESTS = [{
|
||||||
|
# hls stream
|
||||||
|
'url': 'https://new.godresource.com/video/A01mTKjyf6w',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'A01mTKjyf6w',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'view_count': int,
|
||||||
|
'timestamp': 1710978666,
|
||||||
|
'channel_id': '5',
|
||||||
|
'thumbnail': 'https://cdn-02.godresource.com/e42968ac-9e8b-4231-ab86-f4f9d775841f/thumbnail.jpg',
|
||||||
|
'channel': 'Stedfast Baptist Church',
|
||||||
|
'upload_date': '20240320',
|
||||||
|
'title': 'GodResource video #A01mTKjyf6w',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
# mp4 link
|
||||||
|
'url': 'https://new.godresource.com/video/01DXmBbQv_X',
|
||||||
|
'md5': '0e8f72aa89a106b9d5c011ba6f8717b7',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '01DXmBbQv_X',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'channel_id': '12',
|
||||||
|
'view_count': int,
|
||||||
|
'timestamp': 1687996800,
|
||||||
|
'thumbnail': 'https://cdn-02.godresource.com/sodomitedeception/thumbnail.jpg',
|
||||||
|
'channel': 'Documentaries',
|
||||||
|
'title': 'The Sodomite Deception',
|
||||||
|
'upload_date': '20230629',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
|
api_data = self._download_json(
|
||||||
|
f'https://api.godresource.com/api/Streams/{display_id}', display_id)
|
||||||
|
|
||||||
|
video_url = api_data['streamUrl']
|
||||||
|
is_live = api_data.get('isLive') or False
|
||||||
|
if (ext := determine_ext(video_url)) == 'm3u8':
|
||||||
|
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||||
|
video_url, display_id, live=is_live)
|
||||||
|
elif ext == 'mp4':
|
||||||
|
formats, subtitles = [{
|
||||||
|
'url': video_url,
|
||||||
|
'ext': ext
|
||||||
|
}], {}
|
||||||
|
else:
|
||||||
|
raise ExtractorError(f'Unexpected video format {ext}')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': display_id,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'title': '',
|
||||||
|
'is_live': is_live,
|
||||||
|
**traverse_obj(api_data, {
|
||||||
|
'title': ('title', {str}),
|
||||||
|
'thumbnail': ('thumbnail', {url_or_none}),
|
||||||
|
'view_count': ('views', {int}),
|
||||||
|
'channel': ('channelName', {str}),
|
||||||
|
'channel_id': ('channelId', {str_or_none}),
|
||||||
|
'timestamp': ('streamDateCreated', {unified_timestamp}),
|
||||||
|
'modified_timestamp': ('streamDataModified', {unified_timestamp})
|
||||||
|
})
|
||||||
|
}
|
|
@ -1,9 +1,11 @@
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from .youtube import YoutubeIE
|
||||||
from ..compat import compat_parse_qs
|
from ..compat import compat_parse_qs
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
bug_reports_message,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
extract_attributes,
|
extract_attributes,
|
||||||
get_element_by_class,
|
get_element_by_class,
|
||||||
|
@ -38,6 +40,17 @@ class GoogleDriveIE(InfoExtractor):
|
||||||
'duration': 45,
|
'duration': 45,
|
||||||
'thumbnail': 'https://drive.google.com/thumbnail?id=0ByeS4oOUV-49Zzh4R1J6R09zazQ',
|
'thumbnail': 'https://drive.google.com/thumbnail?id=0ByeS4oOUV-49Zzh4R1J6R09zazQ',
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
# has itag 50 which is not in YoutubeIE._formats (royalty Free music from 1922)
|
||||||
|
'url': 'https://drive.google.com/uc?id=1IP0o8dHcQrIHGgVyp0Ofvx2cGfLzyO1x',
|
||||||
|
'md5': '322db8d63dd19788c04050a4bba67073',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1IP0o8dHcQrIHGgVyp0Ofvx2cGfLzyO1x',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'My Buddy - Henry Burr - Gus Kahn - Walter Donaldson.mp3',
|
||||||
|
'duration': 184,
|
||||||
|
'thumbnail': 'https://drive.google.com/thumbnail?id=1IP0o8dHcQrIHGgVyp0Ofvx2cGfLzyO1x',
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
# video can't be watched anonymously due to view count limit reached,
|
# video can't be watched anonymously due to view count limit reached,
|
||||||
# but can be downloaded (see https://github.com/ytdl-org/youtube-dl/issues/14046)
|
# but can be downloaded (see https://github.com/ytdl-org/youtube-dl/issues/14046)
|
||||||
|
@ -58,22 +71,8 @@ class GoogleDriveIE(InfoExtractor):
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
_FORMATS_EXT = {
|
_FORMATS_EXT = {
|
||||||
'5': 'flv',
|
**{k: v['ext'] for k, v in YoutubeIE._formats.items() if v.get('ext')},
|
||||||
'6': 'flv',
|
'50': 'm4a',
|
||||||
'13': '3gp',
|
|
||||||
'17': '3gp',
|
|
||||||
'18': 'mp4',
|
|
||||||
'22': 'mp4',
|
|
||||||
'34': 'flv',
|
|
||||||
'35': 'flv',
|
|
||||||
'36': '3gp',
|
|
||||||
'37': 'mp4',
|
|
||||||
'38': 'mp4',
|
|
||||||
'43': 'webm',
|
|
||||||
'44': 'webm',
|
|
||||||
'45': 'webm',
|
|
||||||
'46': 'webm',
|
|
||||||
'59': 'mp4',
|
|
||||||
}
|
}
|
||||||
_BASE_URL_CAPTIONS = 'https://drive.google.com/timedtext'
|
_BASE_URL_CAPTIONS = 'https://drive.google.com/timedtext'
|
||||||
_CAPTIONS_ENTRY_TAG = {
|
_CAPTIONS_ENTRY_TAG = {
|
||||||
|
@ -194,10 +193,13 @@ def get_value(key):
|
||||||
if len(fmt_stream_split) < 2:
|
if len(fmt_stream_split) < 2:
|
||||||
continue
|
continue
|
||||||
format_id, format_url = fmt_stream_split[:2]
|
format_id, format_url = fmt_stream_split[:2]
|
||||||
|
ext = self._FORMATS_EXT.get(format_id)
|
||||||
|
if not ext:
|
||||||
|
self.report_warning(f'Unknown format {format_id}{bug_reports_message()}')
|
||||||
f = {
|
f = {
|
||||||
'url': lowercase_escape(format_url),
|
'url': lowercase_escape(format_url),
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
'ext': self._FORMATS_EXT[format_id],
|
'ext': ext,
|
||||||
}
|
}
|
||||||
resolution = resolutions.get(format_id)
|
resolution = resolutions.get(format_id)
|
||||||
if resolution:
|
if resolution:
|
||||||
|
|
|
@ -7,13 +7,14 @@
|
||||||
|
|
||||||
|
|
||||||
class HearThisAtIE(InfoExtractor):
|
class HearThisAtIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?hearthis\.at/(?P<artist>[^/]+)/(?P<title>[A-Za-z0-9\-]+)/?$'
|
_VALID_URL = r'https?://(?:www\.)?hearthis\.at/(?P<artist>[^/?#]+)/(?P<title>[\w.-]+)'
|
||||||
_PLAYLIST_URL = 'https://hearthis.at/playlist.php'
|
_PLAYLIST_URL = 'https://hearthis.at/playlist.php'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://hearthis.at/moofi/dr-kreep',
|
'url': 'https://hearthis.at/moofi/dr-kreep',
|
||||||
'md5': 'ab6ec33c8fed6556029337c7885eb4e0',
|
'md5': 'ab6ec33c8fed6556029337c7885eb4e0',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '150939',
|
'id': '150939',
|
||||||
|
'display_id': 'moofi - dr-kreep',
|
||||||
'ext': 'wav',
|
'ext': 'wav',
|
||||||
'title': 'Moofi - Dr. Kreep',
|
'title': 'Moofi - Dr. Kreep',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
@ -21,15 +22,16 @@ class HearThisAtIE(InfoExtractor):
|
||||||
'description': 'md5:1adb0667b01499f9d27e97ddfd53852a',
|
'description': 'md5:1adb0667b01499f9d27e97ddfd53852a',
|
||||||
'upload_date': '20150118',
|
'upload_date': '20150118',
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'duration': 71,
|
'duration': 70,
|
||||||
'genre': 'Experimental',
|
'genres': ['Experimental'],
|
||||||
}
|
},
|
||||||
}, {
|
}, {
|
||||||
# 'download' link redirects to the original webpage
|
# 'download' link redirects to the original webpage
|
||||||
'url': 'https://hearthis.at/twitchsf/dj-jim-hopkins-totally-bitchin-80s-dance-mix/',
|
'url': 'https://hearthis.at/twitchsf/dj-jim-hopkins-totally-bitchin-80s-dance-mix/',
|
||||||
'md5': '5980ceb7c461605d30f1f039df160c6e',
|
'md5': '5980ceb7c461605d30f1f039df160c6e',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '811296',
|
'id': '811296',
|
||||||
|
'display_id': 'twitchsf - dj-jim-hopkins-totally-bitchin-80s-dance-mix',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': 'TwitchSF - DJ Jim Hopkins - Totally Bitchin\' 80\'s Dance Mix!',
|
'title': 'TwitchSF - DJ Jim Hopkins - Totally Bitchin\' 80\'s Dance Mix!',
|
||||||
'description': 'md5:ef26815ca8f483272a87b137ff175be2',
|
'description': 'md5:ef26815ca8f483272a87b137ff175be2',
|
||||||
|
@ -38,7 +40,39 @@ class HearThisAtIE(InfoExtractor):
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'duration': 4360,
|
'duration': 4360,
|
||||||
'genre': 'Dance',
|
'genres': ['Dance'],
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://hearthis.at/tindalos/0001-tindalos-gnrique/eQd/',
|
||||||
|
'md5': 'cd08e51911f147f6da2d9678905b0bd9',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2685222',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'duration': 86,
|
||||||
|
'view_count': int,
|
||||||
|
'timestamp': 1545471670,
|
||||||
|
'display_id': 'tindalos - 0001-tindalos-gnrique',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'genres': ['Other'],
|
||||||
|
'title': 'Tindalos - Tindalos - générique n°1',
|
||||||
|
'description': '',
|
||||||
|
'upload_date': '20181222',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://hearthis.at/sithi2/biochip-c-classics-set-wolle-xdp-tresor.core-special-tresor-globus-berlin-13.07.20011/',
|
||||||
|
'md5': 'b45ac60f0c8111eef6ddc10ec232e312',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '7145959',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'description': 'md5:d7ae36a453d78903f6b7ed6eb2fce1f2',
|
||||||
|
'duration': 8986,
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'title': 'md5:62669ce5b1b67f45c6f846033f37d3b9',
|
||||||
|
'timestamp': 1588699409,
|
||||||
|
'display_id': 'sithi2 - biochip-c-classics-set-wolle-xdp-tresor.core-special-tresor-globus-berlin-13.07.20011',
|
||||||
|
'view_count': int,
|
||||||
|
'upload_date': '20200505',
|
||||||
|
'genres': ['Other'],
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,8 @@
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
from .cloudflarestream import CloudflareStreamIE
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import traverse_obj
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
class HytaleIE(InfoExtractor):
|
class HytaleIE(InfoExtractor):
|
||||||
|
@ -49,7 +50,7 @@ def _real_extract(self, url):
|
||||||
entries = [
|
entries = [
|
||||||
self.url_result(
|
self.url_result(
|
||||||
f'https://cloudflarestream.com/{video_hash}/manifest/video.mpd?parentOrigin=https%3A%2F%2Fhytale.com',
|
f'https://cloudflarestream.com/{video_hash}/manifest/video.mpd?parentOrigin=https%3A%2F%2Fhytale.com',
|
||||||
title=self._titles.get(video_hash), url_transparent=True)
|
CloudflareStreamIE, title=self._titles.get(video_hash), url_transparent=True)
|
||||||
for video_hash in re.findall(
|
for video_hash in re.findall(
|
||||||
r'<stream\s+class\s*=\s*"ql-video\s+cf-stream"\s+src\s*=\s*"([a-f0-9]{32})"',
|
r'<stream\s+class\s*=\s*"ql-video\s+cf-stream"\s+src\s*=\s*"([a-f0-9]{32})"',
|
||||||
webpage)
|
webpage)
|
||||||
|
|
|
@ -255,7 +255,7 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
|
|
||||||
class InstagramIE(InstagramBaseIE):
|
class InstagramIE(InstagramBaseIE):
|
||||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com(?:/[^/]+)?/(?:p|tv|reel)/(?P<id>[^/?#&]+))'
|
_VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com(?:/[^/]+)?/(?:p|tv|reels?(?!/audio/))/(?P<id>[^/?#&]+))'
|
||||||
_EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?instagram\.com/p/[^/]+/embed.*?)\1']
|
_EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?instagram\.com/p/[^/]+/embed.*?)\1']
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc',
|
'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc',
|
||||||
|
@ -379,6 +379,9 @@ class InstagramIE(InstagramBaseIE):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.instagram.com/marvelskies.fc/reel/CWqAgUZgCku/',
|
'url': 'https://www.instagram.com/marvelskies.fc/reel/CWqAgUZgCku/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.instagram.com/reels/Cop84x6u7CP/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|
|
@ -1,103 +0,0 @@
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import (
|
|
||||||
InAdvancePagedList,
|
|
||||||
int_or_none,
|
|
||||||
orderedSet,
|
|
||||||
unified_strdate,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class JableIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?jable\.tv/videos/(?P<id>[\w-]+)'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://jable.tv/videos/pppd-812/',
|
|
||||||
'md5': 'f1537283a9bc073c31ff86ca35d9b2a6',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'pppd-812',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'PPPD-812 只要表現好巨乳女教師吉根柚莉愛就獎勵學生們在白虎穴內射出精液',
|
|
||||||
'description': 'md5:5b6d4199a854f62c5e56e26ccad19967',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
|
||||||
'age_limit': 18,
|
|
||||||
'like_count': int,
|
|
||||||
'view_count': int,
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'https://jable.tv/videos/apak-220/',
|
|
||||||
'md5': '71f9239d69ced58ab74a816908847cc1',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'apak-220',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'md5:5c3861b7cf80112a6e2b70bccf170824',
|
|
||||||
'description': '',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
|
||||||
'age_limit': 18,
|
|
||||||
'like_count': int,
|
|
||||||
'view_count': int,
|
|
||||||
'upload_date': '20220319',
|
|
||||||
},
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
formats = self._extract_m3u8_formats(
|
|
||||||
self._search_regex(r'var\s+hlsUrl\s*=\s*\'([^\']+)', webpage, 'hls_url'), video_id, 'mp4', m3u8_id='hls')
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': self._og_search_title(webpage),
|
|
||||||
'description': self._og_search_description(webpage, default=''),
|
|
||||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
|
||||||
'formats': formats,
|
|
||||||
'age_limit': 18,
|
|
||||||
'upload_date': unified_strdate(self._search_regex(
|
|
||||||
r'class="inactive-color">\D+\s+(\d{4}-\d+-\d+)', webpage, 'upload_date', default=None)),
|
|
||||||
'view_count': int_or_none(self._search_regex(
|
|
||||||
r'#icon-eye"></use></svg>\n*<span class="mr-3">([\d ]+)',
|
|
||||||
webpage, 'view_count', default='').replace(' ', '')),
|
|
||||||
'like_count': int_or_none(self._search_regex(
|
|
||||||
r'#icon-heart"></use></svg><span class="count">(\d+)', webpage, 'link_count', default=None)),
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class JablePlaylistIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?jable\.tv/(?:categories|models|tags)/(?P<id>[\w-]+)'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://jable.tv/models/kaede-karen/',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'kaede-karen',
|
|
||||||
'title': '楓カレン',
|
|
||||||
},
|
|
||||||
'playlist_count': 34,
|
|
||||||
}, {
|
|
||||||
'url': 'https://jable.tv/categories/roleplay/',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'https://jable.tv/tags/girl/',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
playlist_id = self._match_id(url)
|
|
||||||
webpage = self._download_webpage(url, playlist_id)
|
|
||||||
|
|
||||||
def page_func(page_num):
|
|
||||||
return [
|
|
||||||
self.url_result(player_url, JableIE)
|
|
||||||
for player_url in orderedSet(re.findall(
|
|
||||||
r'href="(https://jable.tv/videos/[\w-]+/?)"',
|
|
||||||
self._download_webpage(url, playlist_id, query={
|
|
||||||
'mode': 'async',
|
|
||||||
'from': page_num + 1,
|
|
||||||
'function': 'get_block',
|
|
||||||
'block_id': 'list_videos_common_videos_list',
|
|
||||||
}, note=f'Downloading page {page_num + 1}')))]
|
|
||||||
|
|
||||||
return self.playlist_result(
|
|
||||||
InAdvancePagedList(page_func, int_or_none(self._search_regex(
|
|
||||||
r'from:(\d+)">[^<]+\s*»', webpage, 'last page number', default=1)), 24),
|
|
||||||
playlist_id, self._search_regex(
|
|
||||||
r'<h2 class="h3-md mb-1">([^<]+)', webpage, 'playlist title', default=None))
|
|
|
@ -14,7 +14,7 @@ class MoviepilotIE(InfoExtractor):
|
||||||
'display_id': 'interstellar-2',
|
'display_id': 'interstellar-2',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Interstellar',
|
'title': 'Interstellar',
|
||||||
'thumbnail': r're:https://\w+\.dmcdn\.net/v/SaV-q1ZganMw4HVXg/x1080',
|
'thumbnail': r're:https://\w+\.dmcdn\.net/v/SaV-q1.*/x1080',
|
||||||
'timestamp': 1605010596,
|
'timestamp': 1605010596,
|
||||||
'description': 'md5:0ae9cb452af52610c9ffc60f2fd0474c',
|
'description': 'md5:0ae9cb452af52610c9ffc60f2fd0474c',
|
||||||
'uploader': 'Moviepilot',
|
'uploader': 'Moviepilot',
|
||||||
|
@ -71,7 +71,7 @@ class MoviepilotIE(InfoExtractor):
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
'duration': 82,
|
'duration': 82,
|
||||||
'upload_date': '20201109',
|
'upload_date': '20201109',
|
||||||
'thumbnail': r're:https://\w+\.dmcdn\.net/v/SaMes1Zg3lxLv9j5u/x1080',
|
'thumbnail': r're:https://\w+\.dmcdn\.net/v/SaMes1Z.*/x1080',
|
||||||
'uploader': 'Moviepilot',
|
'uploader': 'Moviepilot',
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
|
@ -92,6 +92,6 @@ def _real_extract(self, url):
|
||||||
'ie_key': DailymotionIE.ie_key(),
|
'ie_key': DailymotionIE.ie_key(),
|
||||||
'display_id': video_id,
|
'display_id': video_id,
|
||||||
'title': clip.get('title'),
|
'title': clip.get('title'),
|
||||||
'url': f'https://www.dailymotion.com/video/{clip["videoRemoteId"]}',
|
'url': f'https://www.dailymotion.com/video/{clip["video"]["remoteId"]}',
|
||||||
'description': clip.get('summary'),
|
'description': clip.get('summary'),
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,7 +5,6 @@
|
||||||
merge_dicts,
|
merge_dicts,
|
||||||
parse_count,
|
parse_count,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
urljoin,
|
|
||||||
)
|
)
|
||||||
from ..utils.traversal import traverse_obj
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
@ -16,8 +15,7 @@ class NFBBaseIE(InfoExtractor):
|
||||||
|
|
||||||
def _extract_ep_data(self, webpage, video_id, fatal=False):
|
def _extract_ep_data(self, webpage, video_id, fatal=False):
|
||||||
return self._search_json(
|
return self._search_json(
|
||||||
r'const\s+episodesData\s*=', webpage, 'episode data', video_id,
|
r'episodesData\s*:', webpage, 'episode data', video_id, fatal=fatal) or {}
|
||||||
contains_pattern=r'\[\s*{(?s:.+)}\s*\]', fatal=fatal) or []
|
|
||||||
|
|
||||||
def _extract_ep_info(self, data, video_id, slug=None):
|
def _extract_ep_info(self, data, video_id, slug=None):
|
||||||
info = traverse_obj(data, (lambda _, v: video_id in v['embed_url'], {
|
info = traverse_obj(data, (lambda _, v: video_id in v['embed_url'], {
|
||||||
|
@ -224,18 +222,14 @@ def _real_extract(self, url):
|
||||||
# type_ can change from film to serie(s) after redirect; new slug may have episode number
|
# type_ can change from film to serie(s) after redirect; new slug may have episode number
|
||||||
type_, slug = self._match_valid_url(urlh.url).group('type', 'id')
|
type_, slug = self._match_valid_url(urlh.url).group('type', 'id')
|
||||||
|
|
||||||
embed_url = urljoin(f'https://www.{site}.ca', self._html_search_regex(
|
player_data = self._search_json(
|
||||||
r'<[^>]+\bid=["\']player-iframe["\'][^>]*\bsrc=["\']([^"\']+)', webpage, 'embed url'))
|
r'window\.PLAYER_OPTIONS\[[^\]]+\]\s*=', webpage, 'player data', slug)
|
||||||
video_id = self._match_id(embed_url) # embed url has unique slug
|
video_id = self._match_id(player_data['overlay']['url']) # overlay url always has unique slug
|
||||||
player = self._download_webpage(embed_url, video_id, 'Downloading player page')
|
|
||||||
if 'MESSAGE_GEOBLOCKED' in player:
|
|
||||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
|
||||||
|
|
||||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||||
self._html_search_regex(r'source:\s*\'([^\']+)', player, 'm3u8 url'),
|
player_data['source'], video_id, 'mp4', m3u8_id='hls')
|
||||||
video_id, 'mp4', m3u8_id='hls')
|
|
||||||
|
|
||||||
if dv_source := self._html_search_regex(r'dvSource:\s*\'([^\']+)', player, 'dv', default=None):
|
if dv_source := url_or_none(player_data.get('dvSource')):
|
||||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||||
dv_source, video_id, 'mp4', m3u8_id='dv', preference=-2, fatal=False)
|
dv_source, video_id, 'mp4', m3u8_id='dv', preference=-2, fatal=False)
|
||||||
for fmt in fmts:
|
for fmt in fmts:
|
||||||
|
@ -246,17 +240,16 @@ def _real_extract(self, url):
|
||||||
info = {
|
info = {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': self._html_search_regex(
|
'title': self._html_search_regex(
|
||||||
r'<[^>]+\bid=["\']titleHeader["\'][^>]*>\s*<h1[^>]*>\s*([^<]+?)\s*</h1>',
|
r'["\']nfb_version_title["\']\s*:\s*["\']([^"\']+)',
|
||||||
webpage, 'title', default=None),
|
webpage, 'title', default=None),
|
||||||
'description': self._html_search_regex(
|
'description': self._html_search_regex(
|
||||||
r'<[^>]+\bid=["\']tabSynopsis["\'][^>]*>\s*<p[^>]*>\s*([^<]+)',
|
r'<[^>]+\bid=["\']tabSynopsis["\'][^>]*>\s*<p[^>]*>\s*([^<]+)',
|
||||||
webpage, 'description', default=None),
|
webpage, 'description', default=None),
|
||||||
'thumbnail': self._html_search_regex(
|
'thumbnail': url_or_none(player_data.get('poster')),
|
||||||
r'poster:\s*\'([^\']+)', player, 'thumbnail', default=None),
|
|
||||||
'uploader': self._html_search_regex(
|
'uploader': self._html_search_regex(
|
||||||
r'<[^>]+\bitemprop=["\']name["\'][^>]*>([^<]+)', webpage, 'uploader', default=None),
|
r'<[^>]+\bitemprop=["\']director["\'][^>]*>([^<]+)', webpage, 'uploader', default=None),
|
||||||
'release_year': int_or_none(self._html_search_regex(
|
'release_year': int_or_none(self._html_search_regex(
|
||||||
r'<[^>]+\bitemprop=["\']datePublished["\'][^>]*>([^<]+)',
|
r'["\']nfb_version_year["\']\s*:\s*["\']([^"\']+)',
|
||||||
webpage, 'release_year', default=None)),
|
webpage, 'release_year', default=None)),
|
||||||
} if type_ == 'film' else self._extract_ep_info(self._extract_ep_data(webpage, video_id, slug), video_id)
|
} if type_ == 'film' else self._extract_ep_info(self._extract_ep_data(webpage, video_id, slug), video_id)
|
||||||
|
|
||||||
|
|
76
yt_dlp/extractor/nts.py
Normal file
76
yt_dlp/extractor/nts.py
Normal file
|
@ -0,0 +1,76 @@
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import parse_iso8601, url_or_none
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
|
class NTSLiveIE(InfoExtractor):
|
||||||
|
IE_NAME = 'nts.live'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?nts\.live/shows/[^/?#]+/episodes/(?P<id>[^/?#]+)'
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
# embedded soundcloud
|
||||||
|
'url': 'https://www.nts.live/shows/yu-su/episodes/yu-su-2nd-april-2024',
|
||||||
|
'md5': 'b5444c04888c869d68758982de1a27d8',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1791563518',
|
||||||
|
'ext': 'opus',
|
||||||
|
'uploader_id': '995579326',
|
||||||
|
'title': 'Pender Street Steppers & YU SU',
|
||||||
|
'timestamp': 1712073600,
|
||||||
|
'upload_date': '20240402',
|
||||||
|
'thumbnail': 'https://i1.sndcdn.com/artworks-qKcNO0z0AQGGbv9s-GljJCw-original.jpg',
|
||||||
|
'license': 'all-rights-reserved',
|
||||||
|
'repost_count': int,
|
||||||
|
'uploader_url': 'https://soundcloud.com/user-643553014',
|
||||||
|
'uploader': 'NTS Latest',
|
||||||
|
'description': 'md5:cd00ac535a63caaad722483ae3ff802a',
|
||||||
|
'duration': 10784.157,
|
||||||
|
'genres': ['Deep House', 'House', 'Leftfield Disco', 'Jazz Fusion', 'Dream Pop'],
|
||||||
|
'modified_timestamp': 1712564687,
|
||||||
|
'modified_date': '20240408',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
# embedded mixcloud
|
||||||
|
'url': 'https://www.nts.live/shows/absolute-fiction/episodes/absolute-fiction-23rd-july-2022',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'NTSRadio_absolute-fiction-23rd-july-2022',
|
||||||
|
'ext': 'webm',
|
||||||
|
'like_count': int,
|
||||||
|
'title': 'Absolute Fiction',
|
||||||
|
'comment_count': int,
|
||||||
|
'uploader_url': 'https://www.mixcloud.com/NTSRadio/',
|
||||||
|
'description': 'md5:ba49da971ae8d71ee45813c52c5e2a04',
|
||||||
|
'tags': [],
|
||||||
|
'duration': 3529,
|
||||||
|
'timestamp': 1658588400,
|
||||||
|
'repost_count': int,
|
||||||
|
'upload_date': '20220723',
|
||||||
|
'uploader_id': 'NTSRadio',
|
||||||
|
'thumbnail': 'https://thumbnailer.mixcloud.com/unsafe/1024x1024/extaudio/5/1/a/d/ae3e-1be9-4fd4-983e-9c3294226eac',
|
||||||
|
'uploader': 'Mixcloud NTS Radio',
|
||||||
|
'genres': ['Minimal Synth', 'Post Punk', 'Industrial '],
|
||||||
|
'modified_timestamp': 1658842165,
|
||||||
|
'modified_date': '20220726',
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
data = self._search_json(r'window\._REACT_STATE_\s*=', webpage, 'react state', video_id)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
**traverse_obj(data, ('episode', {
|
||||||
|
'url': ('audio_sources', ..., 'url', {url_or_none}, any),
|
||||||
|
'title': ('name', {str}),
|
||||||
|
'description': ('description', {str}),
|
||||||
|
'genres': ('genres', ..., 'value', {str}),
|
||||||
|
'timestamp': ('broadcast', {parse_iso8601}),
|
||||||
|
'modified_timestamp': ('updated', {parse_iso8601}),
|
||||||
|
})),
|
||||||
|
}
|
|
@ -14,6 +14,7 @@
|
||||||
make_archive_id,
|
make_archive_id,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
|
parse_age_limit,
|
||||||
remove_end,
|
remove_end,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
strip_jsonp,
|
strip_jsonp,
|
||||||
|
@ -569,7 +570,7 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
class ORFONIE(InfoExtractor):
|
class ORFONIE(InfoExtractor):
|
||||||
IE_NAME = 'orf:on'
|
IE_NAME = 'orf:on'
|
||||||
_VALID_URL = r'https?://on\.orf\.at/video/(?P<id>\d{8})/(?P<slug>[\w-]+)'
|
_VALID_URL = r'https?://on\.orf\.at/video/(?P<id>\d+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://on.orf.at/video/14210000/school-of-champions-48',
|
'url': 'https://on.orf.at/video/14210000/school-of-champions-48',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -583,32 +584,55 @@ class ORFONIE(InfoExtractor):
|
||||||
'timestamp': 1706472362,
|
'timestamp': 1706472362,
|
||||||
'upload_date': '20240128',
|
'upload_date': '20240128',
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://on.orf.at/video/3220355',
|
||||||
|
'md5': 'f94d98e667cf9a3851317efb4e136662',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3220355',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'duration': 445.04,
|
||||||
|
'thumbnail': 'https://api-tvthek.orf.at/assets/segments/0002/60/thumb_159573_segments_highlight_teaser.png',
|
||||||
|
'title': '50 Jahre Burgenland: Der Festumzug',
|
||||||
|
'description': 'md5:1560bf855119544ee8c4fa5376a2a6b0',
|
||||||
|
'media_type': 'episode',
|
||||||
|
'timestamp': 52916400,
|
||||||
|
'upload_date': '19710905',
|
||||||
|
}
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _extract_video(self, video_id, display_id):
|
def _extract_video(self, video_id):
|
||||||
encrypted_id = base64.b64encode(f'3dSlfek03nsLKdj4Jsd{video_id}'.encode()).decode()
|
encrypted_id = base64.b64encode(f'3dSlfek03nsLKdj4Jsd{video_id}'.encode()).decode()
|
||||||
api_json = self._download_json(
|
api_json = self._download_json(
|
||||||
f'https://api-tvthek.orf.at/api/v4.3/public/episode/encrypted/{encrypted_id}', display_id)
|
f'https://api-tvthek.orf.at/api/v4.3/public/episode/encrypted/{encrypted_id}', video_id)
|
||||||
|
|
||||||
|
if traverse_obj(api_json, 'is_drm_protected'):
|
||||||
|
self.report_drm(video_id)
|
||||||
|
|
||||||
formats, subtitles = [], {}
|
formats, subtitles = [], {}
|
||||||
for manifest_type in traverse_obj(api_json, ('sources', {dict.keys}, ...)):
|
for manifest_type in traverse_obj(api_json, ('sources', {dict.keys}, ...)):
|
||||||
for manifest_url in traverse_obj(api_json, ('sources', manifest_type, ..., 'src', {url_or_none})):
|
for manifest_url in traverse_obj(api_json, ('sources', manifest_type, ..., 'src', {url_or_none})):
|
||||||
if manifest_type == 'hls':
|
if manifest_type == 'hls':
|
||||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||||
manifest_url, display_id, fatal=False, m3u8_id='hls')
|
manifest_url, video_id, fatal=False, m3u8_id='hls')
|
||||||
elif manifest_type == 'dash':
|
elif manifest_type == 'dash':
|
||||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||||
manifest_url, display_id, fatal=False, mpd_id='dash')
|
manifest_url, video_id, fatal=False, mpd_id='dash')
|
||||||
else:
|
else:
|
||||||
continue
|
continue
|
||||||
formats.extend(fmts)
|
formats.extend(fmts)
|
||||||
self._merge_subtitles(subs, target=subtitles)
|
self._merge_subtitles(subs, target=subtitles)
|
||||||
|
|
||||||
|
for sub_url in traverse_obj(api_json, (
|
||||||
|
'_embedded', 'subtitle',
|
||||||
|
('xml_url', 'sami_url', 'stl_url', 'ttml_url', 'srt_url', 'vtt_url'), {url_or_none})):
|
||||||
|
self._merge_subtitles({'de': [{'url': sub_url}]}, target=subtitles)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
**traverse_obj(api_json, {
|
**traverse_obj(api_json, {
|
||||||
|
'age_limit': ('age_classification', {parse_age_limit}),
|
||||||
'duration': ('duration_second', {float_or_none}),
|
'duration': ('duration_second', {float_or_none}),
|
||||||
'title': (('title', 'headline'), {str}),
|
'title': (('title', 'headline'), {str}),
|
||||||
'description': (('description', 'teaser_text'), {str}),
|
'description': (('description', 'teaser_text'), {str}),
|
||||||
|
@ -617,14 +641,14 @@ def _extract_video(self, video_id, display_id):
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id, display_id = self._match_valid_url(url).group('id', 'slug')
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': self._html_search_meta(['og:title', 'twitter:title'], webpage, default=None),
|
'title': self._html_search_meta(['og:title', 'twitter:title'], webpage, default=None),
|
||||||
'description': self._html_search_meta(
|
'description': self._html_search_meta(
|
||||||
['description', 'og:description', 'twitter:description'], webpage, default=None),
|
['description', 'og:description', 'twitter:description'], webpage, default=None),
|
||||||
**self._search_json_ld(webpage, display_id, fatal=False),
|
**self._search_json_ld(webpage, video_id, fatal=False),
|
||||||
**self._extract_video(video_id, display_id),
|
**self._extract_video(video_id),
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,95 +0,0 @@
|
||||||
import urllib.parse
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import (
|
|
||||||
determine_ext,
|
|
||||||
int_or_none,
|
|
||||||
parse_duration,
|
|
||||||
remove_end,
|
|
||||||
unified_strdate,
|
|
||||||
ExtractorError,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class Porn91IE(InfoExtractor):
|
|
||||||
IE_NAME = '91porn'
|
|
||||||
_VALID_URL = r'(?:https?://)(?:www\.|)91porn\.com/view_video.php\?([^#]+&)?viewkey=(?P<id>\w+)'
|
|
||||||
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'http://91porn.com/view_video.php?viewkey=7e42283b4f5ab36da134',
|
|
||||||
'md5': 'd869db281402e0ef4ddef3c38b866f86',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '7e42283b4f5ab36da134',
|
|
||||||
'title': '18岁大一漂亮学妹,水嫩性感,再爽一次!',
|
|
||||||
'description': 'md5:1ff241f579b07ae936a54e810ad2e891',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'duration': 431,
|
|
||||||
'upload_date': '20150520',
|
|
||||||
'comment_count': int,
|
|
||||||
'view_count': int,
|
|
||||||
'age_limit': 18,
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
'url': 'https://91porn.com/view_video.php?viewkey=7ef0cf3d362c699ab91c',
|
|
||||||
'md5': 'f8fd50540468a6d795378cd778b40226',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '7ef0cf3d362c699ab91c',
|
|
||||||
'title': '真实空乘,冲上云霄第二部',
|
|
||||||
'description': 'md5:618bf9652cafcc66cd277bd96789baea',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'duration': 248,
|
|
||||||
'upload_date': '20221119',
|
|
||||||
'comment_count': int,
|
|
||||||
'view_count': int,
|
|
||||||
'age_limit': 18,
|
|
||||||
}
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
self._set_cookie('91porn.com', 'language', 'cn_CN')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(
|
|
||||||
'http://91porn.com/view_video.php?viewkey=%s' % video_id, video_id)
|
|
||||||
|
|
||||||
if '视频不存在,可能已经被删除或者被举报为不良内容!' in webpage:
|
|
||||||
raise ExtractorError('91 Porn says: Video does not exist', expected=True)
|
|
||||||
|
|
||||||
daily_limit = self._search_regex(
|
|
||||||
r'作为游客,你每天只可观看([\d]+)个视频', webpage, 'exceeded daily limit', default=None, fatal=False)
|
|
||||||
if daily_limit:
|
|
||||||
raise ExtractorError(f'91 Porn says: Daily limit {daily_limit} videos exceeded', expected=True)
|
|
||||||
|
|
||||||
video_link_url = self._search_regex(
|
|
||||||
r'document\.write\(\s*strencode2\s*\(\s*((?:"[^"]+")|(?:\'[^\']+\'))', webpage, 'video link')
|
|
||||||
video_link_url = self._search_regex(
|
|
||||||
r'src=["\']([^"\']+)["\']', urllib.parse.unquote(video_link_url), 'unquoted video link')
|
|
||||||
|
|
||||||
formats, subtitles = self._get_formats_and_subtitle(video_link_url, video_id)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': remove_end(self._html_extract_title(webpage).replace('\n', ''), 'Chinese homemade video').strip(),
|
|
||||||
'formats': formats,
|
|
||||||
'subtitles': subtitles,
|
|
||||||
'upload_date': unified_strdate(self._search_regex(
|
|
||||||
r'<span\s+class=["\']title-yakov["\']>(\d{4}-\d{2}-\d{2})</span>', webpage, 'upload_date', fatal=False)),
|
|
||||||
'description': self._html_search_regex(
|
|
||||||
r'<span\s+class=["\']more title["\']>\s*([^<]+)', webpage, 'description', fatal=False),
|
|
||||||
'duration': parse_duration(self._search_regex(
|
|
||||||
r'时长:\s*<span[^>]*>\s*(\d+(?::\d+){1,2})', webpage, 'duration', fatal=False)),
|
|
||||||
'comment_count': int_or_none(self._search_regex(
|
|
||||||
r'留言:\s*<span[^>]*>\s*(\d+)\s*</span>', webpage, 'comment count', fatal=False)),
|
|
||||||
'view_count': int_or_none(self._search_regex(
|
|
||||||
r'热度:\s*<span[^>]*>\s*(\d+)\s*</span>', webpage, 'view count', fatal=False)),
|
|
||||||
'age_limit': 18,
|
|
||||||
}
|
|
||||||
|
|
||||||
def _get_formats_and_subtitle(self, video_link_url, video_id):
|
|
||||||
ext = determine_ext(video_link_url)
|
|
||||||
if ext == 'm3u8':
|
|
||||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_link_url, video_id, ext='mp4')
|
|
||||||
else:
|
|
||||||
formats = [{'url': video_link_url, 'ext': ext}]
|
|
||||||
subtitles = {}
|
|
||||||
|
|
||||||
return formats, subtitles
|
|
|
@ -97,7 +97,7 @@ def is_logged(webpage):
|
||||||
login_form = self._hidden_inputs(login_page)
|
login_form = self._hidden_inputs(login_page)
|
||||||
|
|
||||||
login_form.update({
|
login_form.update({
|
||||||
'username': username,
|
'email': username,
|
||||||
'password': password,
|
'password': password,
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
import functools
|
||||||
import itertools
|
import itertools
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
|
@ -12,6 +13,7 @@
|
||||||
error_to_compat_str,
|
error_to_compat_str,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
join_nonempty,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
parse_qs,
|
parse_qs,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
|
@ -68,6 +70,16 @@ class SoundcloudBaseIE(InfoExtractor):
|
||||||
'original': 0,
|
'original': 0,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_DEFAULT_FORMATS = ['http_aac', 'hls_aac', 'http_opus', 'hls_opus', 'http_mp3', 'hls_mp3']
|
||||||
|
|
||||||
|
@functools.cached_property
|
||||||
|
def _is_requested(self):
|
||||||
|
return re.compile(r'|'.join(set(
|
||||||
|
re.escape(pattern).replace(r'\*', r'.*') if pattern != 'default'
|
||||||
|
else '|'.join(map(re.escape, self._DEFAULT_FORMATS))
|
||||||
|
for pattern in self._configuration_arg('formats', ['default'], ie_key=SoundcloudIE)
|
||||||
|
))).fullmatch
|
||||||
|
|
||||||
def _store_client_id(self, client_id):
|
def _store_client_id(self, client_id):
|
||||||
self.cache.store('soundcloud', 'client_id', client_id)
|
self.cache.store('soundcloud', 'client_id', client_id)
|
||||||
|
|
||||||
|
@ -216,7 +228,7 @@ def _extract_info_dict(self, info, full_title=None, secret_token=None, extract_f
|
||||||
redirect_url = (self._download_json(download_url, track_id, fatal=False) or {}).get('redirectUri')
|
redirect_url = (self._download_json(download_url, track_id, fatal=False) or {}).get('redirectUri')
|
||||||
if redirect_url:
|
if redirect_url:
|
||||||
urlh = self._request_webpage(
|
urlh = self._request_webpage(
|
||||||
HEADRequest(redirect_url), track_id, fatal=False)
|
HEADRequest(redirect_url), track_id, 'Checking for original download format', fatal=False)
|
||||||
if urlh:
|
if urlh:
|
||||||
format_url = urlh.url
|
format_url = urlh.url
|
||||||
format_urls.add(format_url)
|
format_urls.add(format_url)
|
||||||
|
@ -258,7 +270,7 @@ def add_format(f, protocol, is_preview=False):
|
||||||
abr = f.get('abr')
|
abr = f.get('abr')
|
||||||
if abr:
|
if abr:
|
||||||
f['abr'] = int(abr)
|
f['abr'] = int(abr)
|
||||||
if protocol == 'hls':
|
if protocol in ('hls', 'hls-aes'):
|
||||||
protocol = 'm3u8' if ext == 'aac' else 'm3u8_native'
|
protocol = 'm3u8' if ext == 'aac' else 'm3u8_native'
|
||||||
else:
|
else:
|
||||||
protocol = 'http'
|
protocol = 'http'
|
||||||
|
@ -274,11 +286,32 @@ def add_format(f, protocol, is_preview=False):
|
||||||
if extract_flat:
|
if extract_flat:
|
||||||
break
|
break
|
||||||
format_url = t['url']
|
format_url = t['url']
|
||||||
stream = None
|
|
||||||
|
|
||||||
|
protocol = traverse_obj(t, ('format', 'protocol', {str}))
|
||||||
|
if protocol == 'progressive':
|
||||||
|
protocol = 'http'
|
||||||
|
if protocol != 'hls' and '/hls' in format_url:
|
||||||
|
protocol = 'hls'
|
||||||
|
if protocol == 'encrypted-hls' or '/encrypted-hls' in format_url:
|
||||||
|
protocol = 'hls-aes'
|
||||||
|
|
||||||
|
ext = None
|
||||||
|
if preset := traverse_obj(t, ('preset', {str_or_none})):
|
||||||
|
ext = preset.split('_')[0]
|
||||||
|
if ext not in KNOWN_EXTENSIONS:
|
||||||
|
ext = mimetype2ext(traverse_obj(t, ('format', 'mime_type', {str})))
|
||||||
|
|
||||||
|
identifier = join_nonempty(protocol, ext, delim='_')
|
||||||
|
if not self._is_requested(identifier):
|
||||||
|
self.write_debug(f'"{identifier}" is not a requested format, skipping')
|
||||||
|
continue
|
||||||
|
|
||||||
|
stream = None
|
||||||
for retry in self.RetryManager(fatal=False):
|
for retry in self.RetryManager(fatal=False):
|
||||||
try:
|
try:
|
||||||
stream = self._download_json(format_url, track_id, query=query, headers=self._HEADERS)
|
stream = self._download_json(
|
||||||
|
format_url, track_id, f'Downloading {identifier} format info JSON',
|
||||||
|
query=query, headers=self._HEADERS)
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
if isinstance(e.cause, HTTPError) and e.cause.status == 429:
|
if isinstance(e.cause, HTTPError) and e.cause.status == 429:
|
||||||
self.report_warning(
|
self.report_warning(
|
||||||
|
@ -289,27 +322,14 @@ def add_format(f, protocol, is_preview=False):
|
||||||
else:
|
else:
|
||||||
self.report_warning(e.msg)
|
self.report_warning(e.msg)
|
||||||
|
|
||||||
if not isinstance(stream, dict):
|
stream_url = traverse_obj(stream, ('url', {url_or_none}))
|
||||||
continue
|
|
||||||
stream_url = url_or_none(stream.get('url'))
|
|
||||||
if invalid_url(stream_url):
|
if invalid_url(stream_url):
|
||||||
continue
|
continue
|
||||||
format_urls.add(stream_url)
|
format_urls.add(stream_url)
|
||||||
stream_format = t.get('format') or {}
|
|
||||||
protocol = stream_format.get('protocol')
|
|
||||||
if protocol != 'hls' and '/hls' in format_url:
|
|
||||||
protocol = 'hls'
|
|
||||||
ext = None
|
|
||||||
preset = str_or_none(t.get('preset'))
|
|
||||||
if preset:
|
|
||||||
ext = preset.split('_')[0]
|
|
||||||
if ext not in KNOWN_EXTENSIONS:
|
|
||||||
ext = mimetype2ext(stream_format.get('mime_type'))
|
|
||||||
add_format({
|
add_format({
|
||||||
'url': stream_url,
|
'url': stream_url,
|
||||||
'ext': ext,
|
'ext': ext,
|
||||||
}, 'http' if protocol == 'progressive' else protocol,
|
}, protocol, t.get('snipped') or '/preview/' in format_url)
|
||||||
t.get('snipped') or '/preview/' in format_url)
|
|
||||||
|
|
||||||
for f in formats:
|
for f in formats:
|
||||||
f['vcodec'] = 'none'
|
f['vcodec'] = 'none'
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
|
import functools
|
||||||
import itertools
|
import itertools
|
||||||
import json
|
import json
|
||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
import string
|
|
||||||
import time
|
import time
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
|
@ -15,10 +15,13 @@
|
||||||
UnsupportedError,
|
UnsupportedError,
|
||||||
UserNotLive,
|
UserNotLive,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
|
filter_dict,
|
||||||
format_field,
|
format_field,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
join_nonempty,
|
join_nonempty,
|
||||||
merge_dicts,
|
merge_dicts,
|
||||||
|
mimetype2ext,
|
||||||
|
parse_qs,
|
||||||
qualities,
|
qualities,
|
||||||
remove_start,
|
remove_start,
|
||||||
srt_subtitles_timecode,
|
srt_subtitles_timecode,
|
||||||
|
@ -45,19 +48,28 @@ class TikTokBaseIE(InfoExtractor):
|
||||||
# "app id": aweme = 1128, trill = 1180, musical_ly = 1233, universal = 0
|
# "app id": aweme = 1128, trill = 1180, musical_ly = 1233, universal = 0
|
||||||
'aid': '0',
|
'aid': '0',
|
||||||
}
|
}
|
||||||
_KNOWN_APP_INFO = [
|
|
||||||
'7351144126450059040',
|
|
||||||
'7351149742343391009',
|
|
||||||
'7351153174894626592',
|
|
||||||
]
|
|
||||||
_APP_INFO_POOL = None
|
_APP_INFO_POOL = None
|
||||||
_APP_INFO = None
|
_APP_INFO = None
|
||||||
_APP_USER_AGENT = None
|
_APP_USER_AGENT = None
|
||||||
|
|
||||||
@property
|
@functools.cached_property
|
||||||
|
def _KNOWN_APP_INFO(self):
|
||||||
|
# If we have a genuine device ID, we may not need any IID
|
||||||
|
default = [''] if self._KNOWN_DEVICE_ID else []
|
||||||
|
return self._configuration_arg('app_info', default, ie_key=TikTokIE)
|
||||||
|
|
||||||
|
@functools.cached_property
|
||||||
|
def _KNOWN_DEVICE_ID(self):
|
||||||
|
return self._configuration_arg('device_id', [None], ie_key=TikTokIE)[0]
|
||||||
|
|
||||||
|
@functools.cached_property
|
||||||
|
def _DEVICE_ID(self):
|
||||||
|
return self._KNOWN_DEVICE_ID or str(random.randint(7250000000000000000, 7351147085025500000))
|
||||||
|
|
||||||
|
@functools.cached_property
|
||||||
def _API_HOSTNAME(self):
|
def _API_HOSTNAME(self):
|
||||||
return self._configuration_arg(
|
return self._configuration_arg(
|
||||||
'api_hostname', ['api22-normal-c-useast2a.tiktokv.com'], ie_key=TikTokIE)[0]
|
'api_hostname', ['api16-normal-c-useast1a.tiktokv.com'], ie_key=TikTokIE)[0]
|
||||||
|
|
||||||
def _get_next_app_info(self):
|
def _get_next_app_info(self):
|
||||||
if self._APP_INFO_POOL is None:
|
if self._APP_INFO_POOL is None:
|
||||||
|
@ -66,13 +78,10 @@ def _get_next_app_info(self):
|
||||||
for key, default in self._APP_INFO_DEFAULTS.items()
|
for key, default in self._APP_INFO_DEFAULTS.items()
|
||||||
if key != 'iid'
|
if key != 'iid'
|
||||||
}
|
}
|
||||||
app_info_list = (
|
|
||||||
self._configuration_arg('app_info', ie_key=TikTokIE)
|
|
||||||
or random.sample(self._KNOWN_APP_INFO, len(self._KNOWN_APP_INFO)))
|
|
||||||
self._APP_INFO_POOL = [
|
self._APP_INFO_POOL = [
|
||||||
{**defaults, **dict(
|
{**defaults, **dict(
|
||||||
(k, v) for k, v in zip(self._APP_INFO_DEFAULTS, app_info.split('/')) if v
|
(k, v) for k, v in zip(self._APP_INFO_DEFAULTS, app_info.split('/')) if v
|
||||||
)} for app_info in app_info_list
|
)} for app_info in self._KNOWN_APP_INFO
|
||||||
]
|
]
|
||||||
|
|
||||||
if not self._APP_INFO_POOL:
|
if not self._APP_INFO_POOL:
|
||||||
|
@ -119,7 +128,7 @@ def _call_api_impl(self, ep, query, video_id, fatal=True,
|
||||||
}, query=query)
|
}, query=query)
|
||||||
|
|
||||||
def _build_api_query(self, query):
|
def _build_api_query(self, query):
|
||||||
return {
|
return filter_dict({
|
||||||
**query,
|
**query,
|
||||||
'device_platform': 'android',
|
'device_platform': 'android',
|
||||||
'os': 'android',
|
'os': 'android',
|
||||||
|
@ -160,10 +169,10 @@ def _build_api_query(self, query):
|
||||||
'build_number': self._APP_INFO['app_version'],
|
'build_number': self._APP_INFO['app_version'],
|
||||||
'region': 'US',
|
'region': 'US',
|
||||||
'ts': int(time.time()),
|
'ts': int(time.time()),
|
||||||
'iid': self._APP_INFO['iid'],
|
'iid': self._APP_INFO.get('iid'),
|
||||||
'device_id': random.randint(7250000000000000000, 7351147085025500000),
|
'device_id': self._DEVICE_ID,
|
||||||
'openudid': ''.join(random.choices('0123456789abcdef', k=16)),
|
'openudid': ''.join(random.choices('0123456789abcdef', k=16)),
|
||||||
}
|
})
|
||||||
|
|
||||||
def _call_api(self, ep, query, video_id, fatal=True,
|
def _call_api(self, ep, query, video_id, fatal=True,
|
||||||
note='Downloading API JSON', errnote='Unable to download API page'):
|
note='Downloading API JSON', errnote='Unable to download API page'):
|
||||||
|
@ -203,7 +212,31 @@ def _extract_aweme_app(self, aweme_id):
|
||||||
raise ExtractorError('Unable to find video in feed', video_id=aweme_id)
|
raise ExtractorError('Unable to find video in feed', video_id=aweme_id)
|
||||||
return self._parse_aweme_video_app(aweme_detail)
|
return self._parse_aweme_video_app(aweme_detail)
|
||||||
|
|
||||||
def _get_subtitles(self, aweme_detail, aweme_id):
|
def _extract_web_data_and_status(self, url, video_id, fatal=True):
|
||||||
|
webpage = self._download_webpage(url, video_id, headers={'User-Agent': 'Mozilla/5.0'}, fatal=fatal) or ''
|
||||||
|
video_data, status = {}, None
|
||||||
|
|
||||||
|
if universal_data := self._get_universal_data(webpage, video_id):
|
||||||
|
self.write_debug('Found universal data for rehydration')
|
||||||
|
status = traverse_obj(universal_data, ('webapp.video-detail', 'statusCode', {int})) or 0
|
||||||
|
video_data = traverse_obj(universal_data, ('webapp.video-detail', 'itemInfo', 'itemStruct', {dict}))
|
||||||
|
|
||||||
|
elif sigi_data := self._get_sigi_state(webpage, video_id):
|
||||||
|
self.write_debug('Found sigi state data')
|
||||||
|
status = traverse_obj(sigi_data, ('VideoPage', 'statusCode', {int})) or 0
|
||||||
|
video_data = traverse_obj(sigi_data, ('ItemModule', video_id, {dict}))
|
||||||
|
|
||||||
|
elif next_data := self._search_nextjs_data(webpage, video_id, default={}):
|
||||||
|
self.write_debug('Found next.js data')
|
||||||
|
status = traverse_obj(next_data, ('props', 'pageProps', 'statusCode', {int})) or 0
|
||||||
|
video_data = traverse_obj(next_data, ('props', 'pageProps', 'itemInfo', 'itemStruct', {dict}))
|
||||||
|
|
||||||
|
elif fatal:
|
||||||
|
raise ExtractorError('Unable to extract webpage video data')
|
||||||
|
|
||||||
|
return video_data, status
|
||||||
|
|
||||||
|
def _get_subtitles(self, aweme_detail, aweme_id, user_url):
|
||||||
# TODO: Extract text positioning info
|
# TODO: Extract text positioning info
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
# aweme/detail endpoint subs
|
# aweme/detail endpoint subs
|
||||||
|
@ -234,20 +267,17 @@ def _get_subtitles(self, aweme_detail, aweme_id):
|
||||||
})
|
})
|
||||||
# webpage subs
|
# webpage subs
|
||||||
if not subtitles:
|
if not subtitles:
|
||||||
for caption in traverse_obj(aweme_detail, ('video', 'subtitleInfos', ...), expected_type=dict):
|
if user_url: # only _parse_aweme_video_app needs to extract the webpage here
|
||||||
if not caption.get('Url'):
|
aweme_detail, _ = self._extract_web_data_and_status(
|
||||||
continue
|
f'{user_url}/video/{aweme_id}', aweme_id, fatal=False)
|
||||||
|
for caption in traverse_obj(aweme_detail, ('video', 'subtitleInfos', lambda _, v: v['Url'])):
|
||||||
subtitles.setdefault(caption.get('LanguageCodeName') or 'en', []).append({
|
subtitles.setdefault(caption.get('LanguageCodeName') or 'en', []).append({
|
||||||
'ext': remove_start(caption.get('Format'), 'web'),
|
'ext': remove_start(caption.get('Format'), 'web'),
|
||||||
'url': caption['Url'],
|
'url': caption['Url'],
|
||||||
})
|
})
|
||||||
return subtitles
|
return subtitles
|
||||||
|
|
||||||
def _parse_aweme_video_app(self, aweme_detail):
|
def _parse_url_key(self, url_key):
|
||||||
aweme_id = aweme_detail['aweme_id']
|
|
||||||
video_info = aweme_detail['video']
|
|
||||||
|
|
||||||
def parse_url_key(url_key):
|
|
||||||
format_id, codec, res, bitrate = self._search_regex(
|
format_id, codec, res, bitrate = self._search_regex(
|
||||||
r'v[^_]+_(?P<id>(?P<codec>[^_]+)_(?P<res>\d+p)_(?P<bitrate>\d+))', url_key,
|
r'v[^_]+_(?P<id>(?P<codec>[^_]+)_(?P<res>\d+p)_(?P<bitrate>\d+))', url_key,
|
||||||
'url key', default=(None, None, None, None), group=('id', 'codec', 'res', 'bitrate'))
|
'url key', default=(None, None, None, None), group=('id', 'codec', 'res', 'bitrate'))
|
||||||
|
@ -260,6 +290,9 @@ def parse_url_key(url_key):
|
||||||
'quality': qualities(self.QUALITIES)(res),
|
'quality': qualities(self.QUALITIES)(res),
|
||||||
}, res
|
}, res
|
||||||
|
|
||||||
|
def _parse_aweme_video_app(self, aweme_detail):
|
||||||
|
aweme_id = aweme_detail['aweme_id']
|
||||||
|
video_info = aweme_detail['video']
|
||||||
known_resolutions = {}
|
known_resolutions = {}
|
||||||
|
|
||||||
def audio_meta(url):
|
def audio_meta(url):
|
||||||
|
@ -274,7 +307,7 @@ def audio_meta(url):
|
||||||
} if ext == 'mp3' or '-music-' in url else {}
|
} if ext == 'mp3' or '-music-' in url else {}
|
||||||
|
|
||||||
def extract_addr(addr, add_meta={}):
|
def extract_addr(addr, add_meta={}):
|
||||||
parsed_meta, res = parse_url_key(addr.get('url_key', ''))
|
parsed_meta, res = self._parse_url_key(addr.get('url_key', ''))
|
||||||
is_bytevc2 = parsed_meta.get('vcodec') == 'bytevc2'
|
is_bytevc2 = parsed_meta.get('vcodec') == 'bytevc2'
|
||||||
if res:
|
if res:
|
||||||
known_resolutions.setdefault(res, {}).setdefault('height', int_or_none(addr.get('height')))
|
known_resolutions.setdefault(res, {}).setdefault('height', int_or_none(addr.get('height')))
|
||||||
|
@ -288,7 +321,7 @@ def extract_addr(addr, add_meta={}):
|
||||||
'acodec': 'aac',
|
'acodec': 'aac',
|
||||||
'source_preference': -2 if 'aweme/v1' in url else -1, # Downloads from API might get blocked
|
'source_preference': -2 if 'aweme/v1' in url else -1, # Downloads from API might get blocked
|
||||||
**add_meta, **parsed_meta,
|
**add_meta, **parsed_meta,
|
||||||
# bytevc2 is bytedance's proprietary (unplayable) video codec
|
# bytevc2 is bytedance's own custom h266/vvc codec, as-of-yet unplayable
|
||||||
'preference': -100 if is_bytevc2 else -1,
|
'preference': -100 if is_bytevc2 else -1,
|
||||||
'format_note': join_nonempty(
|
'format_note': join_nonempty(
|
||||||
add_meta.get('format_note'), '(API)' if 'aweme/v1' in url else None,
|
add_meta.get('format_note'), '(API)' if 'aweme/v1' in url else None,
|
||||||
|
@ -300,6 +333,7 @@ def extract_addr(addr, add_meta={}):
|
||||||
formats = []
|
formats = []
|
||||||
width = int_or_none(video_info.get('width'))
|
width = int_or_none(video_info.get('width'))
|
||||||
height = int_or_none(video_info.get('height'))
|
height = int_or_none(video_info.get('height'))
|
||||||
|
ratio = try_call(lambda: width / height) or 0.5625
|
||||||
if video_info.get('play_addr'):
|
if video_info.get('play_addr'):
|
||||||
formats.extend(extract_addr(video_info['play_addr'], {
|
formats.extend(extract_addr(video_info['play_addr'], {
|
||||||
'format_id': 'play_addr',
|
'format_id': 'play_addr',
|
||||||
|
@ -316,8 +350,8 @@ def extract_addr(addr, add_meta={}):
|
||||||
'format_id': 'download_addr',
|
'format_id': 'download_addr',
|
||||||
'format_note': 'Download video%s' % (', watermarked' if video_info.get('has_watermark') else ''),
|
'format_note': 'Download video%s' % (', watermarked' if video_info.get('has_watermark') else ''),
|
||||||
'vcodec': 'h264',
|
'vcodec': 'h264',
|
||||||
'width': dl_width or width,
|
'width': dl_width,
|
||||||
'height': try_call(lambda: int(dl_width / 0.5625)) or height, # download_addr['height'] is wrong
|
'height': try_call(lambda: int(dl_width / ratio)), # download_addr['height'] is wrong
|
||||||
'preference': -2 if video_info.get('has_watermark') else -1,
|
'preference': -2 if video_info.get('has_watermark') else -1,
|
||||||
}))
|
}))
|
||||||
if video_info.get('play_addr_h264'):
|
if video_info.get('play_addr_h264'):
|
||||||
|
@ -403,7 +437,7 @@ def extract_addr(addr, add_meta={}):
|
||||||
'album': str_or_none(music_info.get('album')) or None,
|
'album': str_or_none(music_info.get('album')) or None,
|
||||||
'artists': re.split(r'(?:, | & )', music_author) if music_author else None,
|
'artists': re.split(r'(?:, | & )', music_author) if music_author else None,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': self.extract_subtitles(aweme_detail, aweme_id),
|
'subtitles': self.extract_subtitles(aweme_detail, aweme_id, user_url),
|
||||||
'thumbnails': thumbnails,
|
'thumbnails': thumbnails,
|
||||||
'duration': int_or_none(traverse_obj(video_info, 'duration', ('download_addr', 'duration')), scale=1000),
|
'duration': int_or_none(traverse_obj(video_info, 'duration', ('download_addr', 'duration')), scale=1000),
|
||||||
'availability': self._availability(
|
'availability': self._availability(
|
||||||
|
@ -424,26 +458,88 @@ def _parse_aweme_video_web(self, aweme_detail, webpage_url, video_id):
|
||||||
formats = []
|
formats = []
|
||||||
width = int_or_none(video_info.get('width'))
|
width = int_or_none(video_info.get('width'))
|
||||||
height = int_or_none(video_info.get('height'))
|
height = int_or_none(video_info.get('height'))
|
||||||
|
ratio = try_call(lambda: width / height) or 0.5625
|
||||||
|
COMMON_FORMAT_INFO = {
|
||||||
|
'ext': 'mp4',
|
||||||
|
'vcodec': 'h264',
|
||||||
|
'acodec': 'aac',
|
||||||
|
}
|
||||||
|
|
||||||
|
for bitrate_info in traverse_obj(video_info, ('bitrateInfo', lambda _, v: v['PlayAddr']['UrlList'])):
|
||||||
|
format_info, res = self._parse_url_key(
|
||||||
|
traverse_obj(bitrate_info, ('PlayAddr', 'UrlKey', {str})) or '')
|
||||||
|
# bytevc2 is bytedance's own custom h266/vvc codec, as-of-yet unplayable
|
||||||
|
is_bytevc2 = format_info.get('vcodec') == 'bytevc2'
|
||||||
|
format_info.update({
|
||||||
|
'format_note': 'UNPLAYABLE' if is_bytevc2 else None,
|
||||||
|
'preference': -100 if is_bytevc2 else -1,
|
||||||
|
'filesize': traverse_obj(bitrate_info, ('PlayAddr', 'DataSize', {int_or_none})),
|
||||||
|
})
|
||||||
|
|
||||||
|
if dimension := (res and int(res[:-1])):
|
||||||
|
if dimension == 540: # '540p' is actually 576p
|
||||||
|
dimension = 576
|
||||||
|
if ratio < 1: # portrait: res/dimension is width
|
||||||
|
y = int(dimension / ratio)
|
||||||
|
format_info.update({
|
||||||
|
'width': dimension,
|
||||||
|
'height': y - (y % 2),
|
||||||
|
})
|
||||||
|
else: # landscape: res/dimension is height
|
||||||
|
x = int(dimension * ratio)
|
||||||
|
format_info.update({
|
||||||
|
'width': x - (x % 2),
|
||||||
|
'height': dimension,
|
||||||
|
})
|
||||||
|
|
||||||
|
for video_url in traverse_obj(bitrate_info, ('PlayAddr', 'UrlList', ..., {url_or_none})):
|
||||||
|
formats.append({
|
||||||
|
**COMMON_FORMAT_INFO,
|
||||||
|
**format_info,
|
||||||
|
'url': self._proto_relative_url(video_url),
|
||||||
|
})
|
||||||
|
|
||||||
|
# We don't have res string for play formats, but need quality for sorting & de-duplication
|
||||||
|
play_quality = traverse_obj(formats, (lambda _, v: v['width'] == width, 'quality', any))
|
||||||
|
|
||||||
for play_url in traverse_obj(video_info, ('playAddr', ((..., 'src'), None), {url_or_none})):
|
for play_url in traverse_obj(video_info, ('playAddr', ((..., 'src'), None), {url_or_none})):
|
||||||
formats.append({
|
formats.append({
|
||||||
|
**COMMON_FORMAT_INFO,
|
||||||
|
'format_id': 'play',
|
||||||
'url': self._proto_relative_url(play_url),
|
'url': self._proto_relative_url(play_url),
|
||||||
'ext': 'mp4',
|
|
||||||
'width': width,
|
'width': width,
|
||||||
'height': height,
|
'height': height,
|
||||||
|
'quality': play_quality,
|
||||||
})
|
})
|
||||||
|
|
||||||
for download_url in traverse_obj(video_info, (('downloadAddr', ('download', 'url')), {url_or_none})):
|
for download_url in traverse_obj(video_info, (('downloadAddr', ('download', 'url')), {url_or_none})):
|
||||||
formats.append({
|
formats.append({
|
||||||
|
**COMMON_FORMAT_INFO,
|
||||||
'format_id': 'download',
|
'format_id': 'download',
|
||||||
'url': self._proto_relative_url(download_url),
|
'url': self._proto_relative_url(download_url),
|
||||||
'ext': 'mp4',
|
|
||||||
'width': width,
|
|
||||||
'height': height,
|
|
||||||
})
|
})
|
||||||
|
|
||||||
self._remove_duplicate_formats(formats)
|
self._remove_duplicate_formats(formats)
|
||||||
|
|
||||||
|
for f in traverse_obj(formats, lambda _, v: 'unwatermarked' not in v['url']):
|
||||||
|
f.update({
|
||||||
|
'format_note': join_nonempty(f.get('format_note'), 'watermarked', delim=', '),
|
||||||
|
'preference': f.get('preference') or -2,
|
||||||
|
})
|
||||||
|
|
||||||
|
# Is it a slideshow with only audio for download?
|
||||||
|
if not formats and traverse_obj(music_info, ('playUrl', {url_or_none})):
|
||||||
|
audio_url = music_info['playUrl']
|
||||||
|
ext = traverse_obj(parse_qs(audio_url), (
|
||||||
|
'mime_type', -1, {lambda x: x.replace('_', '/')}, {mimetype2ext})) or 'm4a'
|
||||||
|
formats.append({
|
||||||
|
'format_id': 'audio',
|
||||||
|
'url': self._proto_relative_url(audio_url),
|
||||||
|
'ext': ext,
|
||||||
|
'acodec': 'aac' if ext == 'm4a' else ext,
|
||||||
|
'vcodec': 'none',
|
||||||
|
})
|
||||||
|
|
||||||
thumbnails = []
|
thumbnails = []
|
||||||
for thumb_url in traverse_obj(aweme_detail, (
|
for thumb_url in traverse_obj(aweme_detail, (
|
||||||
(None, 'video'), ('thumbnail', 'cover', 'dynamicCover', 'originCover'), {url_or_none})):
|
(None, 'video'), ('thumbnail', 'cover', 'dynamicCover', 'originCover'), {url_or_none})):
|
||||||
|
@ -455,10 +551,17 @@ def _parse_aweme_video_web(self, aweme_detail, webpage_url, video_id):
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
**traverse_obj(music_info, {
|
||||||
|
'track': ('title', {str}),
|
||||||
|
'album': ('album', {str}, {lambda x: x or None}),
|
||||||
|
'artists': ('authorName', {str}, {lambda x: [x] if x else None}),
|
||||||
|
'duration': ('duration', {int_or_none}),
|
||||||
|
}),
|
||||||
**traverse_obj(aweme_detail, {
|
**traverse_obj(aweme_detail, {
|
||||||
'title': ('desc', {str}),
|
'title': ('desc', {str}),
|
||||||
'description': ('desc', {str}),
|
'description': ('desc', {str}),
|
||||||
'duration': ('video', 'duration', {int_or_none}),
|
# audio-only slideshows have a video duration of 0 and an actual audio duration
|
||||||
|
'duration': ('video', 'duration', {int_or_none}, {lambda x: x or None}),
|
||||||
'timestamp': ('createTime', {int_or_none}),
|
'timestamp': ('createTime', {int_or_none}),
|
||||||
}),
|
}),
|
||||||
**traverse_obj(author_info or aweme_detail, {
|
**traverse_obj(author_info or aweme_detail, {
|
||||||
|
@ -473,14 +576,10 @@ def _parse_aweme_video_web(self, aweme_detail, webpage_url, video_id):
|
||||||
'repost_count': 'shareCount',
|
'repost_count': 'shareCount',
|
||||||
'comment_count': 'commentCount',
|
'comment_count': 'commentCount',
|
||||||
}, expected_type=int_or_none),
|
}, expected_type=int_or_none),
|
||||||
**traverse_obj(music_info, {
|
|
||||||
'track': ('title', {str}),
|
|
||||||
'album': ('album', {str}, {lambda x: x or None}),
|
|
||||||
'artists': ('authorName', {str}, {lambda x: [x] if x else None}),
|
|
||||||
}),
|
|
||||||
'channel_id': channel_id,
|
'channel_id': channel_id,
|
||||||
'uploader_url': user_url,
|
'uploader_url': user_url,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
'subtitles': self.extract_subtitles(aweme_detail, video_id, None),
|
||||||
'thumbnails': thumbnails,
|
'thumbnails': thumbnails,
|
||||||
'http_headers': {
|
'http_headers': {
|
||||||
'Referer': webpage_url,
|
'Referer': webpage_url,
|
||||||
|
@ -757,6 +856,8 @@ class TikTokIE(TikTokBaseIE):
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id, user_id = self._match_valid_url(url).group('id', 'user_id')
|
video_id, user_id = self._match_valid_url(url).group('id', 'user_id')
|
||||||
|
|
||||||
|
if self._KNOWN_APP_INFO:
|
||||||
try:
|
try:
|
||||||
return self._extract_aweme_app(video_id)
|
return self._extract_aweme_app(video_id)
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
|
@ -764,25 +865,7 @@ def _real_extract(self, url):
|
||||||
self.report_warning(f'{e}; trying with webpage')
|
self.report_warning(f'{e}; trying with webpage')
|
||||||
|
|
||||||
url = self._create_url(user_id, video_id)
|
url = self._create_url(user_id, video_id)
|
||||||
webpage = self._download_webpage(url, video_id, headers={'User-Agent': 'Mozilla/5.0'})
|
video_data, status = self._extract_web_data_and_status(url, video_id)
|
||||||
|
|
||||||
if universal_data := self._get_universal_data(webpage, video_id):
|
|
||||||
self.write_debug('Found universal data for rehydration')
|
|
||||||
status = traverse_obj(universal_data, ('webapp.video-detail', 'statusCode', {int})) or 0
|
|
||||||
video_data = traverse_obj(universal_data, ('webapp.video-detail', 'itemInfo', 'itemStruct', {dict}))
|
|
||||||
|
|
||||||
elif sigi_data := self._get_sigi_state(webpage, video_id):
|
|
||||||
self.write_debug('Found sigi state data')
|
|
||||||
status = traverse_obj(sigi_data, ('VideoPage', 'statusCode', {int})) or 0
|
|
||||||
video_data = traverse_obj(sigi_data, ('ItemModule', video_id, {dict}))
|
|
||||||
|
|
||||||
elif next_data := self._search_nextjs_data(webpage, video_id, default={}):
|
|
||||||
self.write_debug('Found next.js data')
|
|
||||||
status = traverse_obj(next_data, ('props', 'pageProps', 'statusCode', {int})) or 0
|
|
||||||
video_data = traverse_obj(next_data, ('props', 'pageProps', 'itemInfo', 'itemStruct', {dict}))
|
|
||||||
|
|
||||||
else:
|
|
||||||
raise ExtractorError('Unable to extract webpage video data')
|
|
||||||
|
|
||||||
if video_data and status == 0:
|
if video_data and status == 0:
|
||||||
return self._parse_aweme_video_web(video_data, url, video_id)
|
return self._parse_aweme_video_web(video_data, url, video_id)
|
||||||
|
@ -850,7 +933,7 @@ def _video_entries_api(self, webpage, user_id, username):
|
||||||
'max_cursor': 0,
|
'max_cursor': 0,
|
||||||
'min_cursor': 0,
|
'min_cursor': 0,
|
||||||
'retry_type': 'no_retry',
|
'retry_type': 'no_retry',
|
||||||
'device_id': ''.join(random.choices(string.digits, k=19)), # Some endpoints don't like randomized device_id, so it isn't directly set in _call_api.
|
'device_id': self._DEVICE_ID, # Some endpoints don't like randomized device_id, so it isn't directly set in _call_api.
|
||||||
}
|
}
|
||||||
|
|
||||||
for page in itertools.count(1):
|
for page in itertools.count(1):
|
||||||
|
@ -898,7 +981,7 @@ def _entries(self, list_id, display_id):
|
||||||
'cursor': 0,
|
'cursor': 0,
|
||||||
'count': 20,
|
'count': 20,
|
||||||
'type': 5,
|
'type': 5,
|
||||||
'device_id': ''.join(random.choices(string.digits, k=19))
|
'device_id': self._DEVICE_ID,
|
||||||
}
|
}
|
||||||
|
|
||||||
for page in itertools.count(1):
|
for page in itertools.count(1):
|
||||||
|
|
|
@ -2,85 +2,88 @@
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
extract_attributes,
|
extract_attributes,
|
||||||
|
get_element_by_class,
|
||||||
|
get_element_html_by_class,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_duration,
|
|
||||||
traverse_obj,
|
|
||||||
try_get,
|
|
||||||
url_or_none,
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
class TV5MondePlusIE(InfoExtractor):
|
class TV5MondePlusIE(InfoExtractor):
|
||||||
IE_DESC = 'TV5MONDE+'
|
IE_NAME = 'TV5MONDE'
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?:tv5mondeplus|revoir\.tv5monde)\.com/toutes-les-videos/[^/]+/(?P<id>[^/?#]+)'
|
_VALID_URL = r'https?://(?:www\.)?tv5monde\.com/tv/video/(?P<id>[^/?#]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# movie
|
# documentary
|
||||||
'url': 'https://revoir.tv5monde.com/toutes-les-videos/cinema/les-novices',
|
'url': 'https://www.tv5monde.com/tv/video/65931-baudouin-l-heritage-d-un-roi-baudouin-l-heritage-d-un-roi',
|
||||||
'md5': 'c86f60bf8b75436455b1b205f9745955',
|
'md5': 'd2a708902d3df230a357c99701aece05',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'ZX0ipMyFQq_6D4BA7b',
|
'id': '3FPa7JMu21_6D4BA7b',
|
||||||
'display_id': 'les-novices',
|
'display_id': '65931-baudouin-l-heritage-d-un-roi-baudouin-l-heritage-d-un-roi',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Les novices',
|
'title': "Baudouin, l'héritage d'un roi",
|
||||||
'description': 'md5:2e7c33ba3ad48dabfcc2a956b88bde2b',
|
'thumbnail': 'https://psi.tv5monde.com/upsilon-images/960x540/6f/baudouin-f49c6b0e.jpg',
|
||||||
'upload_date': '20230821',
|
'duration': 4842,
|
||||||
'thumbnail': 'https://revoir.tv5monde.com/uploads/media/video_thumbnail/0738/60/01e952b7ccf36b7c6007ec9131588954ab651de9.jpeg',
|
'upload_date': '20240130',
|
||||||
'duration': 5177,
|
'timestamp': 1706641242,
|
||||||
'episode': 'Les novices',
|
'episode': "BAUDOUIN, L'HERITAGE D'UN ROI",
|
||||||
|
'description': 'md5:78125c74a5cac06d7743a2d09126edad',
|
||||||
|
'series': "Baudouin, l'héritage d'un roi",
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# series episode
|
# series episode
|
||||||
'url': 'https://revoir.tv5monde.com/toutes-les-videos/series-fictions/opj-les-dents-de-la-terre-2',
|
'url': 'https://www.tv5monde.com/tv/video/52952-toute-la-vie-mardi-23-mars-2021',
|
||||||
|
'md5': 'f5e09637cadd55639c05874e22eb56bf',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'wJ0eeEPozr_6D4BA7b',
|
'id': 'obRRZ8m6g9_6D4BA7b',
|
||||||
'display_id': 'opj-les-dents-de-la-terre-2',
|
'display_id': '52952-toute-la-vie-mardi-23-mars-2021',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': "OPJ - Les dents de la Terre (2)",
|
'title': 'Toute la vie',
|
||||||
'description': 'md5:288f87fd68d993f814e66e60e5302d9d',
|
'description': 'md5:a824a2e1dfd94cf45fa379a1fb43ce65',
|
||||||
'upload_date': '20230823',
|
'thumbnail': 'https://psi.tv5monde.com/media/image/960px/5880553.jpg',
|
||||||
'series': 'OPJ',
|
'duration': 2526,
|
||||||
'episode': 'Les dents de la Terre (2)',
|
'upload_date': '20230721',
|
||||||
'duration': 2877,
|
'timestamp': 1689971646,
|
||||||
'thumbnail': 'https://dl-revoir.tv5monde.com/images/1a/5753448.jpg'
|
'series': 'Toute la vie',
|
||||||
|
'episode': 'Mardi 23 mars 2021',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# movie
|
# movie
|
||||||
'url': 'https://revoir.tv5monde.com/toutes-les-videos/cinema/ceux-qui-travaillent',
|
'url': 'https://www.tv5monde.com/tv/video/8771-ce-fleuve-qui-nous-charrie-ce-fleuve-qui-nous-charrie-p001-ce-fleuve-qui-nous-charrie',
|
||||||
'md5': '32fa0cde16a4480d1251502a66856d5f',
|
'md5': '87cefc34e10a6bf4f7823cccd7b36eb2',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'dc57a011-ec4b-4648-2a9a-4f03f8352ed3',
|
'id': 'DOcfvdLKXL_6D4BA7b',
|
||||||
'display_id': 'ceux-qui-travaillent',
|
'display_id': '8771-ce-fleuve-qui-nous-charrie-ce-fleuve-qui-nous-charrie-p001-ce-fleuve-qui-nous-charrie',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Ceux qui travaillent',
|
'title': 'Ce fleuve qui nous charrie',
|
||||||
'description': 'md5:570e8bb688036ace873b2d50d24c026d',
|
'description': 'md5:62ba3f875343c7fc4082bdfbbc1be992',
|
||||||
'upload_date': '20210819',
|
'thumbnail': 'https://psi.tv5monde.com/media/image/960px/5476617.jpg',
|
||||||
|
'duration': 5300,
|
||||||
|
'upload_date': '20210822',
|
||||||
|
'timestamp': 1629594105,
|
||||||
|
'episode': 'CE FLEUVE QUI NOUS CHARRIE-P001-CE FLEUVE QUI NOUS CHARRIE',
|
||||||
|
'series': 'Ce fleuve qui nous charrie',
|
||||||
},
|
},
|
||||||
'skip': 'no longer available',
|
|
||||||
}, {
|
}, {
|
||||||
# series episode
|
# news
|
||||||
'url': 'https://revoir.tv5monde.com/toutes-les-videos/series-fictions/vestiaires-caro-actrice',
|
'url': 'https://www.tv5monde.com/tv/video/70402-tv5monde-le-journal-edition-du-08-05-24-11h',
|
||||||
|
'md5': 'c62977d6d10754a2ecebba70ad370479',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '9e9d599e-23af-6915-843e-ecbf62e97925',
|
'id': 'LgQFrOCNsc_6D4BA7b',
|
||||||
'display_id': 'vestiaires-caro-actrice',
|
'display_id': '70402-tv5monde-le-journal-edition-du-08-05-24-11h',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': "Vestiaires - Caro actrice",
|
'title': 'TV5MONDE, le journal',
|
||||||
'description': 'md5:db15d2e1976641e08377f942778058ea',
|
'description': 'md5:777dc209eaa4423b678477c36b0b04a8',
|
||||||
'upload_date': '20210819',
|
'thumbnail': 'https://psi.tv5monde.com/media/image/960px/6184105.jpg',
|
||||||
'series': "Vestiaires",
|
'duration': 854,
|
||||||
'episode': 'Caro actrice',
|
'upload_date': '20240508',
|
||||||
|
'timestamp': 1715159640,
|
||||||
|
'series': 'TV5MONDE, le journal',
|
||||||
|
'episode': 'EDITION DU 08/05/24 - 11H',
|
||||||
},
|
},
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
'skip': 'no longer available',
|
|
||||||
}, {
|
|
||||||
'url': 'https://revoir.tv5monde.com/toutes-les-videos/series-fictions/neuf-jours-en-hiver-neuf-jours-en-hiver',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'https://revoir.tv5monde.com/toutes-les-videos/info-societe/le-journal-de-la-rts-edition-du-30-01-20-19h30',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
}]
|
||||||
_GEO_BYPASS = False
|
_GEO_BYPASS = False
|
||||||
|
|
||||||
|
@ -98,7 +101,6 @@ def _real_extract(self, url):
|
||||||
if ">Ce programme n'est malheureusement pas disponible pour votre zone géographique.<" in webpage:
|
if ">Ce programme n'est malheureusement pas disponible pour votre zone géographique.<" in webpage:
|
||||||
self.raise_geo_restricted(countries=['FR'])
|
self.raise_geo_restricted(countries=['FR'])
|
||||||
|
|
||||||
title = episode = self._html_search_regex(r'<h1>([^<]+)', webpage, 'title')
|
|
||||||
vpl_data = extract_attributes(self._search_regex(
|
vpl_data = extract_attributes(self._search_regex(
|
||||||
r'(<[^>]+class="video_player_loader"[^>]+>)',
|
r'(<[^>]+class="video_player_loader"[^>]+>)',
|
||||||
webpage, 'video player loader'))
|
webpage, 'video player loader'))
|
||||||
|
@ -147,26 +149,7 @@ def process_video_files(v):
|
||||||
process_video_files(video_files)
|
process_video_files(video_files)
|
||||||
|
|
||||||
metadata = self._parse_json(
|
metadata = self._parse_json(
|
||||||
vpl_data['data-metadata'], display_id)
|
vpl_data.get('data-metadata') or '{}', display_id, fatal=False)
|
||||||
duration = (int_or_none(try_get(metadata, lambda x: x['content']['duration']))
|
|
||||||
or parse_duration(self._html_search_meta('duration', webpage)))
|
|
||||||
|
|
||||||
description = self._html_search_regex(
|
|
||||||
r'(?s)<div[^>]+class=["\']episode-texte[^>]+>(.+?)</div>', webpage,
|
|
||||||
'description', fatal=False)
|
|
||||||
|
|
||||||
series = self._html_search_regex(
|
|
||||||
r'<p[^>]+class=["\']episode-emission[^>]+>([^<]+)', webpage,
|
|
||||||
'series', default=None)
|
|
||||||
|
|
||||||
if series and series != title:
|
|
||||||
title = '%s - %s' % (series, title)
|
|
||||||
|
|
||||||
upload_date = self._search_regex(
|
|
||||||
r'(?:date_publication|publish_date)["\']\s*:\s*["\'](\d{4}_\d{2}_\d{2})',
|
|
||||||
webpage, 'upload date', default=None)
|
|
||||||
if upload_date:
|
|
||||||
upload_date = upload_date.replace('_', '')
|
|
||||||
|
|
||||||
if not video_id:
|
if not video_id:
|
||||||
video_id = self._search_regex(
|
video_id = self._search_regex(
|
||||||
|
@ -175,16 +158,20 @@ def process_video_files(v):
|
||||||
default=display_id)
|
default=display_id)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
**traverse_obj(metadata, ('content', {
|
||||||
|
'id': ('id', {str}),
|
||||||
|
'title': ('title', {str}),
|
||||||
|
'episode': ('title', {str}),
|
||||||
|
'series': ('series', {str}),
|
||||||
|
'timestamp': ('publishDate_ts', {int_or_none}),
|
||||||
|
'duration': ('duration', {int_or_none}),
|
||||||
|
})),
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'title': title,
|
'title': clean_html(get_element_by_class('main-title', webpage)),
|
||||||
'description': description,
|
'description': clean_html(get_element_by_class('text', get_element_html_by_class('ep-summary', webpage) or '')),
|
||||||
'thumbnail': vpl_data.get('data-image'),
|
'thumbnail': url_or_none(vpl_data.get('data-image')),
|
||||||
'duration': duration,
|
|
||||||
'upload_date': upload_date,
|
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': self._extract_subtitles(self._parse_json(
|
'subtitles': self._extract_subtitles(self._parse_json(
|
||||||
traverse_obj(vpl_data, ('data-captions', {str}), default='{}'), display_id, fatal=False)),
|
traverse_obj(vpl_data, ('data-captions', {str}), default='{}'), display_id, fatal=False)),
|
||||||
'series': series,
|
|
||||||
'episode': episode,
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -34,9 +34,9 @@
|
||||||
|
|
||||||
class TwitterBaseIE(InfoExtractor):
|
class TwitterBaseIE(InfoExtractor):
|
||||||
_NETRC_MACHINE = 'twitter'
|
_NETRC_MACHINE = 'twitter'
|
||||||
_API_BASE = 'https://api.twitter.com/1.1/'
|
_API_BASE = 'https://api.x.com/1.1/'
|
||||||
_GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
|
_GRAPHQL_API_BASE = 'https://x.com/i/api/graphql/'
|
||||||
_BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
|
_BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:(?:twitter|x)\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
|
||||||
_AUTH = 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'
|
_AUTH = 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'
|
||||||
_LEGACY_AUTH = 'AAAAAAAAAAAAAAAAAAAAAIK1zgAAAAAA2tUWuhGZ2JceoId5GwYWU5GspY4%3DUq7gzFoCZs1QfwGoVdvSac3IniczZEYXIcDyumCauIXpcAPorE'
|
_LEGACY_AUTH = 'AAAAAAAAAAAAAAAAAAAAAIK1zgAAAAAA2tUWuhGZ2JceoId5GwYWU5GspY4%3DUq7gzFoCZs1QfwGoVdvSac3IniczZEYXIcDyumCauIXpcAPorE'
|
||||||
_flow_token = None
|
_flow_token = None
|
||||||
|
@ -153,6 +153,14 @@ def _search_dimensions_in_video_url(a_format, video_url):
|
||||||
def is_logged_in(self):
|
def is_logged_in(self):
|
||||||
return bool(self._get_cookies(self._API_BASE).get('auth_token'))
|
return bool(self._get_cookies(self._API_BASE).get('auth_token'))
|
||||||
|
|
||||||
|
# XXX: Temporary workaround until twitter.com => x.com migration is completed
|
||||||
|
def _real_initialize(self):
|
||||||
|
if self.is_logged_in or not self._get_cookies('https://twitter.com/').get('auth_token'):
|
||||||
|
return
|
||||||
|
# User has not yet been migrated to x.com and has passed twitter.com cookies
|
||||||
|
TwitterBaseIE._API_BASE = 'https://api.twitter.com/1.1/'
|
||||||
|
TwitterBaseIE._GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
|
||||||
|
|
||||||
@functools.cached_property
|
@functools.cached_property
|
||||||
def _selected_api(self):
|
def _selected_api(self):
|
||||||
return self._configuration_arg('api', ['graphql'], ie_key='Twitter')[0]
|
return self._configuration_arg('api', ['graphql'], ie_key='Twitter')[0]
|
||||||
|
@ -196,17 +204,15 @@ def _perform_login(self, username, password):
|
||||||
if self.is_logged_in:
|
if self.is_logged_in:
|
||||||
return
|
return
|
||||||
|
|
||||||
webpage = self._download_webpage('https://twitter.com/', None, 'Downloading login page')
|
guest_token = self._fetch_guest_token(None)
|
||||||
guest_token = self._search_regex(
|
|
||||||
r'\.cookie\s*=\s*["\']gt=(\d+);', webpage, 'gt', default=None) or self._fetch_guest_token(None)
|
|
||||||
headers = {
|
headers = {
|
||||||
**self._set_base_headers(),
|
**self._set_base_headers(),
|
||||||
'content-type': 'application/json',
|
'content-type': 'application/json',
|
||||||
'x-guest-token': guest_token,
|
'x-guest-token': guest_token,
|
||||||
'x-twitter-client-language': 'en',
|
'x-twitter-client-language': 'en',
|
||||||
'x-twitter-active-user': 'yes',
|
'x-twitter-active-user': 'yes',
|
||||||
'Referer': 'https://twitter.com/',
|
'Referer': 'https://x.com/',
|
||||||
'Origin': 'https://twitter.com',
|
'Origin': 'https://x.com',
|
||||||
}
|
}
|
||||||
|
|
||||||
def build_login_json(*subtask_inputs):
|
def build_login_json(*subtask_inputs):
|
||||||
|
@ -1191,6 +1197,31 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
'_old_archive_ids': ['twitter 1724884212803834154'],
|
'_old_archive_ids': ['twitter 1724884212803834154'],
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# x.com
|
||||||
|
'url': 'https://x.com/historyinmemes/status/1790637656616943991',
|
||||||
|
'md5': 'daca3952ba0defe2cfafb1276d4c1ea5',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1790637589910654976',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Historic Vids - One of the most intense moments in history',
|
||||||
|
'description': 'One of the most intense moments in history https://t.co/Zgzhvix8ES',
|
||||||
|
'display_id': '1790637656616943991',
|
||||||
|
'uploader': 'Historic Vids',
|
||||||
|
'uploader_id': 'historyinmemes',
|
||||||
|
'uploader_url': 'https://twitter.com/historyinmemes',
|
||||||
|
'channel_id': '855481986290524160',
|
||||||
|
'upload_date': '20240515',
|
||||||
|
'timestamp': 1715756260.0,
|
||||||
|
'duration': 15.488,
|
||||||
|
'tags': [],
|
||||||
|
'comment_count': int,
|
||||||
|
'repost_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
|
||||||
|
'age_limit': 0,
|
||||||
|
'_old_archive_ids': ['twitter 1790637656616943991'],
|
||||||
|
}
|
||||||
}, {
|
}, {
|
||||||
# onion route
|
# onion route
|
||||||
'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
|
'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
|
||||||
|
|
|
@ -173,6 +173,20 @@ class KnownPiracyIE(UnsupportedInfoExtractor):
|
||||||
r'filemoon\.sx',
|
r'filemoon\.sx',
|
||||||
r'hentai\.animestigma\.com',
|
r'hentai\.animestigma\.com',
|
||||||
r'thisav\.com',
|
r'thisav\.com',
|
||||||
|
r'gounlimited\.to',
|
||||||
|
r'highstream\.tv',
|
||||||
|
r'uqload\.com',
|
||||||
|
r'vedbam\.xyz',
|
||||||
|
r'vadbam\.net'
|
||||||
|
r'vidlo\.us',
|
||||||
|
r'wolfstream\.tv',
|
||||||
|
r'xvideosharing\.com',
|
||||||
|
r'(?:\w+\.)?viidshar\.com',
|
||||||
|
r'sxyprn\.com',
|
||||||
|
r'jable\.tv',
|
||||||
|
r'91porn\.com',
|
||||||
|
r'einthusan\.(?:tv|com|ca)',
|
||||||
|
r'yourupload\.com',
|
||||||
)
|
)
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
|
|
@ -1,198 +0,0 @@
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import (
|
|
||||||
ExtractorError,
|
|
||||||
decode_packed_codes,
|
|
||||||
determine_ext,
|
|
||||||
int_or_none,
|
|
||||||
js_to_json,
|
|
||||||
urlencode_postdata,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
# based on openload_decode from 2bfeee69b976fe049761dd3012e30b637ee05a58
|
|
||||||
def aa_decode(aa_code):
|
|
||||||
symbol_table = [
|
|
||||||
('7', '((゚ー゚) + (o^_^o))'),
|
|
||||||
('6', '((o^_^o) +(o^_^o))'),
|
|
||||||
('5', '((゚ー゚) + (゚Θ゚))'),
|
|
||||||
('2', '((o^_^o) - (゚Θ゚))'),
|
|
||||||
('4', '(゚ー゚)'),
|
|
||||||
('3', '(o^_^o)'),
|
|
||||||
('1', '(゚Θ゚)'),
|
|
||||||
('0', '(c^_^o)'),
|
|
||||||
]
|
|
||||||
delim = '(゚Д゚)[゚ε゚]+'
|
|
||||||
ret = ''
|
|
||||||
for aa_char in aa_code.split(delim):
|
|
||||||
for val, pat in symbol_table:
|
|
||||||
aa_char = aa_char.replace(pat, val)
|
|
||||||
aa_char = aa_char.replace('+ ', '')
|
|
||||||
m = re.match(r'^\d+', aa_char)
|
|
||||||
if m:
|
|
||||||
ret += chr(int(m.group(0), 8))
|
|
||||||
else:
|
|
||||||
m = re.match(r'^u([\da-f]+)', aa_char)
|
|
||||||
if m:
|
|
||||||
ret += chr(int(m.group(1), 16))
|
|
||||||
return ret
|
|
||||||
|
|
||||||
|
|
||||||
class XFileShareIE(InfoExtractor):
|
|
||||||
_SITES = (
|
|
||||||
(r'aparat\.cam', 'Aparat'),
|
|
||||||
(r'clipwatching\.com', 'ClipWatching'),
|
|
||||||
(r'gounlimited\.to', 'GoUnlimited'),
|
|
||||||
(r'govid\.me', 'GoVid'),
|
|
||||||
(r'holavid\.com', 'HolaVid'),
|
|
||||||
(r'streamty\.com', 'Streamty'),
|
|
||||||
(r'thevideobee\.to', 'TheVideoBee'),
|
|
||||||
(r'uqload\.com', 'Uqload'),
|
|
||||||
(r'vidbom\.com', 'VidBom'),
|
|
||||||
(r'vidlo\.us', 'vidlo'),
|
|
||||||
(r'vidlocker\.xyz', 'VidLocker'),
|
|
||||||
(r'vidshare\.tv', 'VidShare'),
|
|
||||||
(r'vup\.to', 'VUp'),
|
|
||||||
(r'wolfstream\.tv', 'WolfStream'),
|
|
||||||
(r'xvideosharing\.com', 'XVideoSharing'),
|
|
||||||
)
|
|
||||||
|
|
||||||
IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1])
|
|
||||||
_VALID_URL = (r'https?://(?:www\.)?(?P<host>%s)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
|
|
||||||
% '|'.join(site for site in list(zip(*_SITES))[0]))
|
|
||||||
_EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:%s)/embed-[0-9a-zA-Z]+.*?)\1' % '|'.join(site for site in list(zip(*_SITES))[0])]
|
|
||||||
|
|
||||||
_FILE_NOT_FOUND_REGEXES = (
|
|
||||||
r'>(?:404 - )?File Not Found<',
|
|
||||||
r'>The file was removed by administrator<',
|
|
||||||
)
|
|
||||||
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://uqload.com/dltx1wztngdz',
|
|
||||||
'md5': '3cfbb65e4c90e93d7b37bcb65a595557',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'dltx1wztngdz',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Rick Astley Never Gonna Give You mp4',
|
|
||||||
'thumbnail': r're:https://.*\.jpg'
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
'url': 'http://xvideosharing.com/fq65f94nd2ve',
|
|
||||||
'md5': '4181f63957e8fe90ac836fa58dc3c8a6',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'fq65f94nd2ve',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'sample',
|
|
||||||
'thumbnail': r're:http://.*\.jpg',
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'https://aparat.cam/n4d6dh0wvlpr',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'https://wolfstream.tv/nthme29v9u2x',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
host, video_id = self._match_valid_url(url).groups()
|
|
||||||
|
|
||||||
url = 'https://%s/' % host + ('embed-%s.html' % video_id if host in ('govid.me', 'vidlo.us') else video_id)
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
|
|
||||||
if any(re.search(p, webpage) for p in self._FILE_NOT_FOUND_REGEXES):
|
|
||||||
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
|
||||||
|
|
||||||
fields = self._hidden_inputs(webpage)
|
|
||||||
|
|
||||||
if fields.get('op') == 'download1':
|
|
||||||
countdown = int_or_none(self._search_regex(
|
|
||||||
r'<span id="countdown_str">(?:[Ww]ait)?\s*<span id="cxc">(\d+)</span>\s*(?:seconds?)?</span>',
|
|
||||||
webpage, 'countdown', default=None))
|
|
||||||
if countdown:
|
|
||||||
self._sleep(countdown, video_id)
|
|
||||||
|
|
||||||
webpage = self._download_webpage(
|
|
||||||
url, video_id, 'Downloading video page',
|
|
||||||
data=urlencode_postdata(fields), headers={
|
|
||||||
'Referer': url,
|
|
||||||
'Content-type': 'application/x-www-form-urlencoded',
|
|
||||||
})
|
|
||||||
|
|
||||||
title = (self._search_regex(
|
|
||||||
(r'style="z-index: [0-9]+;">([^<]+)</span>',
|
|
||||||
r'<td nowrap>([^<]+)</td>',
|
|
||||||
r'h4-fine[^>]*>([^<]+)<',
|
|
||||||
r'>Watch (.+)[ <]',
|
|
||||||
r'<h2 class="video-page-head">([^<]+)</h2>',
|
|
||||||
r'<h2 style="[^"]*color:#403f3d[^"]*"[^>]*>([^<]+)<', # streamin.to
|
|
||||||
r'title\s*:\s*"([^"]+)"'), # govid.me
|
|
||||||
webpage, 'title', default=None) or self._og_search_title(
|
|
||||||
webpage, default=None) or video_id).strip()
|
|
||||||
|
|
||||||
for regex, func in (
|
|
||||||
(r'(eval\(function\(p,a,c,k,e,d\){.+)', decode_packed_codes),
|
|
||||||
(r'(゚.+)', aa_decode)):
|
|
||||||
obf_code = self._search_regex(regex, webpage, 'obfuscated code', default=None)
|
|
||||||
if obf_code:
|
|
||||||
webpage = webpage.replace(obf_code, func(obf_code))
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
|
|
||||||
jwplayer_data = self._search_regex(
|
|
||||||
[
|
|
||||||
r'jwplayer\("[^"]+"\)\.load\(\[({.+?})\]\);',
|
|
||||||
r'jwplayer\("[^"]+"\)\.setup\(({.+?})\);',
|
|
||||||
], webpage,
|
|
||||||
'jwplayer data', default=None)
|
|
||||||
if jwplayer_data:
|
|
||||||
jwplayer_data = self._parse_json(
|
|
||||||
jwplayer_data.replace(r"\'", "'"), video_id, js_to_json)
|
|
||||||
if jwplayer_data:
|
|
||||||
formats = self._parse_jwplayer_data(
|
|
||||||
jwplayer_data, video_id, False,
|
|
||||||
m3u8_id='hls', mpd_id='dash')['formats']
|
|
||||||
|
|
||||||
if not formats:
|
|
||||||
urls = []
|
|
||||||
for regex in (
|
|
||||||
r'(?:file|src)\s*:\s*(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1',
|
|
||||||
r'file_link\s*=\s*(["\'])(?P<url>http(?:(?!\1).)+)\1',
|
|
||||||
r'addVariable\((\\?["\'])file\1\s*,\s*(\\?["\'])(?P<url>http(?:(?!\2).)+)\2\)',
|
|
||||||
r'<embed[^>]+src=(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1'):
|
|
||||||
for mobj in re.finditer(regex, webpage):
|
|
||||||
video_url = mobj.group('url')
|
|
||||||
if video_url not in urls:
|
|
||||||
urls.append(video_url)
|
|
||||||
|
|
||||||
sources = self._search_regex(
|
|
||||||
r'sources\s*:\s*(\[(?!{)[^\]]+\])', webpage, 'sources', default=None)
|
|
||||||
if sources:
|
|
||||||
urls.extend(self._parse_json(sources, video_id))
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
for video_url in urls:
|
|
||||||
if determine_ext(video_url) == 'm3u8':
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
video_url, video_id, 'mp4',
|
|
||||||
entry_protocol='m3u8_native', m3u8_id='hls',
|
|
||||||
fatal=False))
|
|
||||||
else:
|
|
||||||
formats.append({
|
|
||||||
'url': video_url,
|
|
||||||
'format_id': 'sd',
|
|
||||||
})
|
|
||||||
|
|
||||||
thumbnail = self._search_regex(
|
|
||||||
[
|
|
||||||
r'<video[^>]+poster="([^"]+)"',
|
|
||||||
r'(?:image|poster)\s*:\s*["\'](http[^"\']+)["\'],',
|
|
||||||
], webpage, 'thumbnail', default=None)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'formats': formats,
|
|
||||||
'http_headers': {'Referer': url}
|
|
||||||
}
|
|
|
@ -173,8 +173,41 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
class XVideosQuickiesIE(InfoExtractor):
|
class XVideosQuickiesIE(InfoExtractor):
|
||||||
IE_NAME = 'xvideos:quickies'
|
IE_NAME = 'xvideos:quickies'
|
||||||
_VALID_URL = r'https?://(?P<domain>(?:[^/]+\.)?xvideos2?\.com)/amateur-channels/[^#]+#quickies/a/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?P<domain>(?:[^/?#]+\.)?xvideos2?\.com)/(?:profiles/|amateur-channels/)?[^/?#]+#quickies/a/(?P<id>\w+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
'url': 'https://www.xvideos.com/lili_love#quickies/a/ipdtikh1a4c',
|
||||||
|
'md5': 'f9e4f518ff1de14b99a400bbd0fc5ee0',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'ipdtikh1a4c',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Mexican chichóna putisima',
|
||||||
|
'age_limit': 18,
|
||||||
|
'duration': 81,
|
||||||
|
'thumbnail': r're:^https://cdn.*-pic.xvideos-cdn.com/.+\.jpg',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.xvideos.com/profiles/lili_love#quickies/a/ipphaob6fd1',
|
||||||
|
'md5': '5340938aac6b46e19ebdd1d84535862e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'ipphaob6fd1',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Puta chichona mexicana squirting',
|
||||||
|
'age_limit': 18,
|
||||||
|
'duration': 56,
|
||||||
|
'thumbnail': r're:^https://cdn.*-pic.xvideos-cdn.com/.+\.jpg',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.xvideos.com/amateur-channels/lili_love#quickies/a/hfmffmd7661',
|
||||||
|
'md5': '92428518bbabcb4c513e55922e022491',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'hfmffmd7661',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Chichona mexican slut',
|
||||||
|
'age_limit': 18,
|
||||||
|
'duration': 9,
|
||||||
|
'thumbnail': r're:^https://cdn.*-pic.xvideos-cdn.com/.+\.jpg',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
'url': 'https://www.xvideos.com/amateur-channels/wifeluna#quickies/a/47258683',
|
'url': 'https://www.xvideos.com/amateur-channels/wifeluna#quickies/a/47258683',
|
||||||
'md5': '16e322a93282667f1963915568f782c1',
|
'md5': '16e322a93282667f1963915568f782c1',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -189,4 +222,4 @@ class XVideosQuickiesIE(InfoExtractor):
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
domain, id_ = self._match_valid_url(url).group('domain', 'id')
|
domain, id_ = self._match_valid_url(url).group('domain', 'id')
|
||||||
return self.url_result(f'https://{domain}/video{id_}/_', XVideosIE, id_)
|
return self.url_result(f'https://{domain}/video{"" if id_.isdecimal() else "."}{id_}/_', XVideosIE, id_)
|
||||||
|
|
|
@ -1,65 +0,0 @@
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..compat import compat_str
|
|
||||||
from ..utils import (
|
|
||||||
parse_duration,
|
|
||||||
urljoin,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class YourPornIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?sxyprn\.com/post/(?P<id>[^/?#&.]+)'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://sxyprn.com/post/57ffcb2e1179b.html',
|
|
||||||
'md5': '6f8682b6464033d87acaa7a8ff0c092e',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '57ffcb2e1179b',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'md5:c9f43630bd968267672651ba905a7d35',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
|
||||||
'duration': 165,
|
|
||||||
'age_limit': 18,
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'https://sxyprn.com/post/57ffcb2e1179b.html',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
|
|
||||||
parts = self._parse_json(
|
|
||||||
self._search_regex(
|
|
||||||
r'data-vnfo=(["\'])(?P<data>{.+?})\1', webpage, 'data info',
|
|
||||||
group='data'),
|
|
||||||
video_id)[video_id].split('/')
|
|
||||||
|
|
||||||
num = 0
|
|
||||||
for c in parts[6] + parts[7]:
|
|
||||||
if c.isnumeric():
|
|
||||||
num += int(c)
|
|
||||||
parts[5] = compat_str(int(parts[5]) - num)
|
|
||||||
parts[1] += '8'
|
|
||||||
video_url = urljoin(url, '/'.join(parts))
|
|
||||||
|
|
||||||
title = (self._search_regex(
|
|
||||||
r'<[^>]+\bclass=["\']PostEditTA[^>]+>([^<]+)', webpage, 'title',
|
|
||||||
default=None) or self._og_search_description(webpage)).strip()
|
|
||||||
thumbnail = self._og_search_thumbnail(webpage)
|
|
||||||
duration = parse_duration(self._search_regex(
|
|
||||||
r'duration\s*:\s*<[^>]+>([\d:]+)', webpage, 'duration',
|
|
||||||
default=None))
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'url': video_url,
|
|
||||||
'title': title,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'duration': duration,
|
|
||||||
'age_limit': 18,
|
|
||||||
'ext': 'mp4',
|
|
||||||
}
|
|
|
@ -1,43 +0,0 @@
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import urljoin
|
|
||||||
|
|
||||||
|
|
||||||
class YourUploadIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?:yourupload\.com/(?:watch|embed)|embed\.yourupload\.com)/(?P<id>[A-Za-z0-9]+)'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'http://yourupload.com/watch/14i14h',
|
|
||||||
'md5': '5e2c63385454c557f97c4c4131a393cd',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '14i14h',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'BigBuckBunny_320x180.mp4',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpe?g',
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.yourupload.com/embed/14i14h',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://embed.yourupload.com/14i14h',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
|
|
||||||
embed_url = 'http://www.yourupload.com/embed/%s' % video_id
|
|
||||||
|
|
||||||
webpage = self._download_webpage(embed_url, video_id)
|
|
||||||
|
|
||||||
title = self._og_search_title(webpage)
|
|
||||||
video_url = urljoin(embed_url, self._og_search_video_url(webpage))
|
|
||||||
thumbnail = self._og_search_thumbnail(webpage, default=None)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'url': video_url,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'http_headers': {
|
|
||||||
'Referer': embed_url,
|
|
||||||
},
|
|
||||||
}
|
|
|
@ -240,6 +240,16 @@
|
||||||
},
|
},
|
||||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 85
|
'INNERTUBE_CONTEXT_CLIENT_NAME': 85
|
||||||
},
|
},
|
||||||
|
# This client has pre-merged video+audio 720p/1080p streams
|
||||||
|
'mediaconnect': {
|
||||||
|
'INNERTUBE_CONTEXT': {
|
||||||
|
'client': {
|
||||||
|
'clientName': 'MEDIA_CONNECT_FRONTEND',
|
||||||
|
'clientVersion': '0.1',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
'INNERTUBE_CONTEXT_CLIENT_NAME': 95
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -1171,7 +1181,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
|
r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
|
||||||
r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
|
r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
|
||||||
)
|
)
|
||||||
_formats = {
|
_formats = { # NB: Used in YoutubeWebArchiveIE and GoogleDriveIE
|
||||||
'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
|
'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
|
||||||
'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
|
'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
|
||||||
'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
|
'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
|
||||||
|
@ -2343,6 +2353,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
'format': '17', # 3gp format available on android
|
'format': '17', # 3gp format available on android
|
||||||
'extractor_args': {'youtube': {'player_client': ['android']}},
|
'extractor_args': {'youtube': {'player_client': ['android']}},
|
||||||
},
|
},
|
||||||
|
'skip': 'android client broken',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
# Skip download of additional client configs (remix client config in this case)
|
# Skip download of additional client configs (remix client config in this case)
|
||||||
|
@ -2720,7 +2731,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
'heatmap': 'count:100',
|
'heatmap': 'count:100',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'extractor_args': {'youtube': {'player_client': ['android'], 'player_skip': ['webpage']}},
|
'extractor_args': {'youtube': {'player_client': ['ios'], 'player_skip': ['webpage']}},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
@ -3336,7 +3347,36 @@ def _extract_heatmap(self, data):
|
||||||
'value': ('intensityScoreNormalized', {float_or_none}),
|
'value': ('intensityScoreNormalized', {float_or_none}),
|
||||||
})) or None
|
})) or None
|
||||||
|
|
||||||
def _extract_comment(self, comment_renderer, parent=None):
|
def _extract_comment(self, entities, parent=None):
|
||||||
|
comment_entity_payload = get_first(entities, ('payload', 'commentEntityPayload', {dict}))
|
||||||
|
if not (comment_id := traverse_obj(comment_entity_payload, ('properties', 'commentId', {str}))):
|
||||||
|
return
|
||||||
|
|
||||||
|
toolbar_entity_payload = get_first(entities, ('payload', 'engagementToolbarStateEntityPayload', {dict}))
|
||||||
|
time_text = traverse_obj(comment_entity_payload, ('properties', 'publishedTime', {str})) or ''
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': comment_id,
|
||||||
|
'parent': parent or 'root',
|
||||||
|
**traverse_obj(comment_entity_payload, {
|
||||||
|
'text': ('properties', 'content', 'content', {str}),
|
||||||
|
'like_count': ('toolbar', 'likeCountA11y', {parse_count}),
|
||||||
|
'author_id': ('author', 'channelId', {self.ucid_or_none}),
|
||||||
|
'author': ('author', 'displayName', {str}),
|
||||||
|
'author_thumbnail': ('author', 'avatarThumbnailUrl', {url_or_none}),
|
||||||
|
'author_is_uploader': ('author', 'isCreator', {bool}),
|
||||||
|
'author_is_verified': ('author', 'isVerified', {bool}),
|
||||||
|
'author_url': ('author', 'channelCommand', 'innertubeCommand', (
|
||||||
|
('browseEndpoint', 'canonicalBaseUrl'), ('commandMetadata', 'webCommandMetadata', 'url')
|
||||||
|
), {lambda x: urljoin('https://www.youtube.com', x)}),
|
||||||
|
}, get_all=False),
|
||||||
|
'is_favorited': (None if toolbar_entity_payload is None else
|
||||||
|
toolbar_entity_payload.get('heartState') == 'TOOLBAR_HEART_STATE_HEARTED'),
|
||||||
|
'_time_text': time_text, # FIXME: non-standard, but we need a way of showing that it is an estimate.
|
||||||
|
'timestamp': self._parse_time_text(time_text),
|
||||||
|
}
|
||||||
|
|
||||||
|
def _extract_comment_old(self, comment_renderer, parent=None):
|
||||||
comment_id = comment_renderer.get('commentId')
|
comment_id = comment_renderer.get('commentId')
|
||||||
if not comment_id:
|
if not comment_id:
|
||||||
return
|
return
|
||||||
|
@ -3417,21 +3457,39 @@ def extract_header(contents):
|
||||||
break
|
break
|
||||||
return _continuation
|
return _continuation
|
||||||
|
|
||||||
def extract_thread(contents):
|
def extract_thread(contents, entity_payloads):
|
||||||
if not parent:
|
if not parent:
|
||||||
tracker['current_page_thread'] = 0
|
tracker['current_page_thread'] = 0
|
||||||
for content in contents:
|
for content in contents:
|
||||||
if not parent and tracker['total_parent_comments'] >= max_parents:
|
if not parent and tracker['total_parent_comments'] >= max_parents:
|
||||||
yield
|
yield
|
||||||
comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
|
comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
|
||||||
|
|
||||||
|
# old comment format
|
||||||
|
if not entity_payloads:
|
||||||
comment_renderer = get_first(
|
comment_renderer = get_first(
|
||||||
(comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
|
(comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
|
||||||
expected_type=dict, default={})
|
expected_type=dict, default={})
|
||||||
|
|
||||||
comment = self._extract_comment(comment_renderer, parent)
|
comment = self._extract_comment_old(comment_renderer, parent)
|
||||||
|
|
||||||
|
# new comment format
|
||||||
|
else:
|
||||||
|
view_model = (
|
||||||
|
traverse_obj(comment_thread_renderer, ('commentViewModel', 'commentViewModel', {dict}))
|
||||||
|
or traverse_obj(content, ('commentViewModel', {dict})))
|
||||||
|
comment_keys = traverse_obj(view_model, (('commentKey', 'toolbarStateKey'), {str}))
|
||||||
|
if not comment_keys:
|
||||||
|
continue
|
||||||
|
entities = traverse_obj(entity_payloads, lambda _, v: v['entityKey'] in comment_keys)
|
||||||
|
comment = self._extract_comment(entities, parent)
|
||||||
|
if comment:
|
||||||
|
comment['is_pinned'] = traverse_obj(view_model, ('pinnedText', {str})) is not None
|
||||||
|
|
||||||
if not comment:
|
if not comment:
|
||||||
continue
|
continue
|
||||||
comment_id = comment['id']
|
comment_id = comment['id']
|
||||||
|
|
||||||
if comment.get('is_pinned'):
|
if comment.get('is_pinned'):
|
||||||
tracker['pinned_comment_ids'].add(comment_id)
|
tracker['pinned_comment_ids'].add(comment_id)
|
||||||
# Sometimes YouTube may break and give us infinite looping comments.
|
# Sometimes YouTube may break and give us infinite looping comments.
|
||||||
|
@ -3524,7 +3582,7 @@ def extract_thread(contents):
|
||||||
check_get_keys = None
|
check_get_keys = None
|
||||||
if not is_forced_continuation and not (tracker['est_total'] == 0 and tracker['running_total'] == 0):
|
if not is_forced_continuation and not (tracker['est_total'] == 0 and tracker['running_total'] == 0):
|
||||||
check_get_keys = [[*continuation_items_path, ..., (
|
check_get_keys = [[*continuation_items_path, ..., (
|
||||||
'commentsHeaderRenderer' if is_first_continuation else ('commentThreadRenderer', 'commentRenderer'))]]
|
'commentsHeaderRenderer' if is_first_continuation else ('commentThreadRenderer', 'commentViewModel', 'commentRenderer'))]]
|
||||||
try:
|
try:
|
||||||
response = self._extract_response(
|
response = self._extract_response(
|
||||||
item_id=None, query=continuation,
|
item_id=None, query=continuation,
|
||||||
|
@ -3548,6 +3606,7 @@ def extract_thread(contents):
|
||||||
raise
|
raise
|
||||||
is_forced_continuation = False
|
is_forced_continuation = False
|
||||||
continuation = None
|
continuation = None
|
||||||
|
mutations = traverse_obj(response, ('frameworkUpdates', 'entityBatchUpdate', 'mutations', ..., {dict}))
|
||||||
for continuation_items in traverse_obj(response, continuation_items_path, expected_type=list, default=[]):
|
for continuation_items in traverse_obj(response, continuation_items_path, expected_type=list, default=[]):
|
||||||
if is_first_continuation:
|
if is_first_continuation:
|
||||||
continuation = extract_header(continuation_items)
|
continuation = extract_header(continuation_items)
|
||||||
|
@ -3556,7 +3615,7 @@ def extract_thread(contents):
|
||||||
break
|
break
|
||||||
continue
|
continue
|
||||||
|
|
||||||
for entry in extract_thread(continuation_items):
|
for entry in extract_thread(continuation_items, mutations):
|
||||||
if not entry:
|
if not entry:
|
||||||
return
|
return
|
||||||
yield entry
|
yield entry
|
||||||
|
@ -3633,8 +3692,6 @@ def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg,
|
||||||
yt_query = {
|
yt_query = {
|
||||||
'videoId': video_id,
|
'videoId': video_id,
|
||||||
}
|
}
|
||||||
if _split_innertube_client(client)[0] in ('android', 'android_embedscreen'):
|
|
||||||
yt_query['params'] = 'CgIIAQ=='
|
|
||||||
|
|
||||||
pp_arg = self._configuration_arg('player_params', [None], casesense=True)[0]
|
pp_arg = self._configuration_arg('player_params', [None], casesense=True)[0]
|
||||||
if pp_arg:
|
if pp_arg:
|
||||||
|
@ -3650,19 +3707,24 @@ def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg,
|
||||||
|
|
||||||
def _get_requested_clients(self, url, smuggled_data):
|
def _get_requested_clients(self, url, smuggled_data):
|
||||||
requested_clients = []
|
requested_clients = []
|
||||||
default = ['ios', 'android', 'web']
|
android_clients = []
|
||||||
|
default = ['ios', 'web']
|
||||||
allowed_clients = sorted(
|
allowed_clients = sorted(
|
||||||
(client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),
|
(client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),
|
||||||
key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
|
key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
|
||||||
for client in self._configuration_arg('player_client'):
|
for client in self._configuration_arg('player_client'):
|
||||||
if client in allowed_clients:
|
if client == 'default':
|
||||||
requested_clients.append(client)
|
|
||||||
elif client == 'default':
|
|
||||||
requested_clients.extend(default)
|
requested_clients.extend(default)
|
||||||
elif client == 'all':
|
elif client == 'all':
|
||||||
requested_clients.extend(allowed_clients)
|
requested_clients.extend(allowed_clients)
|
||||||
else:
|
elif client not in allowed_clients:
|
||||||
self.report_warning(f'Skipping unsupported client {client}')
|
self.report_warning(f'Skipping unsupported client {client}')
|
||||||
|
elif client.startswith('android'):
|
||||||
|
android_clients.append(client)
|
||||||
|
else:
|
||||||
|
requested_clients.append(client)
|
||||||
|
# Force deprioritization of broken Android clients for format de-duplication
|
||||||
|
requested_clients.extend(android_clients)
|
||||||
if not requested_clients:
|
if not requested_clients:
|
||||||
requested_clients = default
|
requested_clients = default
|
||||||
|
|
||||||
|
@ -3881,6 +3943,14 @@ def build_fragments(f):
|
||||||
f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
|
f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
|
||||||
|
|
||||||
client_name = fmt.get(STREAMING_DATA_CLIENT_NAME)
|
client_name = fmt.get(STREAMING_DATA_CLIENT_NAME)
|
||||||
|
# Android client formats are broken due to integrity check enforcement
|
||||||
|
# Ref: https://github.com/yt-dlp/yt-dlp/issues/9554
|
||||||
|
is_broken = client_name and client_name.startswith(short_client_name('android'))
|
||||||
|
if is_broken:
|
||||||
|
self.report_warning(
|
||||||
|
f'{video_id}: Android client formats are broken and may yield HTTP Error 403. '
|
||||||
|
'They will be deprioritized', only_once=True)
|
||||||
|
|
||||||
name = fmt.get('qualityLabel') or quality.replace('audio_quality_', '') or ''
|
name = fmt.get('qualityLabel') or quality.replace('audio_quality_', '') or ''
|
||||||
fps = int_or_none(fmt.get('fps')) or 0
|
fps = int_or_none(fmt.get('fps')) or 0
|
||||||
dct = {
|
dct = {
|
||||||
|
@ -3893,7 +3963,7 @@ def build_fragments(f):
|
||||||
name, fmt.get('isDrc') and 'DRC',
|
name, fmt.get('isDrc') and 'DRC',
|
||||||
try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
|
try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
|
||||||
try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
|
try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
|
||||||
throttled and 'THROTTLED', is_damaged and 'DAMAGED',
|
throttled and 'THROTTLED', is_damaged and 'DAMAGED', is_broken and 'BROKEN',
|
||||||
(self.get_param('verbose') or all_formats) and client_name,
|
(self.get_param('verbose') or all_formats) and client_name,
|
||||||
delim=', '),
|
delim=', '),
|
||||||
# Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
|
# Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
|
||||||
|
@ -3911,8 +3981,8 @@ def build_fragments(f):
|
||||||
'language': join_nonempty(audio_track.get('id', '').split('.')[0],
|
'language': join_nonempty(audio_track.get('id', '').split('.')[0],
|
||||||
'desc' if language_preference < -1 else '') or None,
|
'desc' if language_preference < -1 else '') or None,
|
||||||
'language_preference': language_preference,
|
'language_preference': language_preference,
|
||||||
# Strictly de-prioritize damaged and 3gp formats
|
# Strictly de-prioritize broken, damaged and 3gp formats
|
||||||
'preference': -10 if is_damaged else -2 if itag == '17' else None,
|
'preference': -20 if is_broken else -10 if is_damaged else -2 if itag == '17' else None,
|
||||||
}
|
}
|
||||||
mime_mobj = re.match(
|
mime_mobj = re.match(
|
||||||
r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
|
r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
|
||||||
|
|
|
@ -21,7 +21,7 @@
|
||||||
TransportError,
|
TransportError,
|
||||||
)
|
)
|
||||||
from .impersonate import ImpersonateRequestHandler, ImpersonateTarget
|
from .impersonate import ImpersonateRequestHandler, ImpersonateTarget
|
||||||
from ..dependencies import curl_cffi
|
from ..dependencies import curl_cffi, certifi
|
||||||
from ..utils import int_or_none
|
from ..utils import int_or_none
|
||||||
|
|
||||||
if curl_cffi is None:
|
if curl_cffi is None:
|
||||||
|
@ -166,6 +166,13 @@ def _send(self, request: Request):
|
||||||
# See: https://curl.se/libcurl/c/CURLOPT_HTTPPROXYTUNNEL.html
|
# See: https://curl.se/libcurl/c/CURLOPT_HTTPPROXYTUNNEL.html
|
||||||
session.curl.setopt(CurlOpt.HTTPPROXYTUNNEL, 1)
|
session.curl.setopt(CurlOpt.HTTPPROXYTUNNEL, 1)
|
||||||
|
|
||||||
|
# curl_cffi does not currently set these for proxies
|
||||||
|
session.curl.setopt(CurlOpt.PROXY_CAINFO, certifi.where())
|
||||||
|
|
||||||
|
if not self.verify:
|
||||||
|
session.curl.setopt(CurlOpt.PROXY_SSL_VERIFYPEER, 0)
|
||||||
|
session.curl.setopt(CurlOpt.PROXY_SSL_VERIFYHOST, 0)
|
||||||
|
|
||||||
headers = self._get_impersonate_headers(request)
|
headers = self._get_impersonate_headers(request)
|
||||||
|
|
||||||
if self._client_cert:
|
if self._client_cert:
|
||||||
|
@ -213,7 +220,10 @@ def _send(self, request: Request):
|
||||||
max_redirects_exceeded = True
|
max_redirects_exceeded = True
|
||||||
curl_response = e.response
|
curl_response = e.response
|
||||||
|
|
||||||
elif e.code == CurlECode.PROXY:
|
elif (
|
||||||
|
e.code == CurlECode.PROXY
|
||||||
|
or (e.code == CurlECode.RECV_ERROR and 'Received HTTP code 407 from proxy after CONNECT' in str(e))
|
||||||
|
):
|
||||||
raise ProxyError(cause=e) from e
|
raise ProxyError(cause=e) from e
|
||||||
else:
|
else:
|
||||||
raise TransportError(cause=e) from e
|
raise TransportError(cause=e) from e
|
||||||
|
|
|
@ -28,6 +28,7 @@
|
||||||
import requests.utils
|
import requests.utils
|
||||||
import urllib3.connection
|
import urllib3.connection
|
||||||
import urllib3.exceptions
|
import urllib3.exceptions
|
||||||
|
import urllib3.util
|
||||||
|
|
||||||
from ._helper import (
|
from ._helper import (
|
||||||
InstanceStoreMixin,
|
InstanceStoreMixin,
|
||||||
|
@ -180,10 +181,25 @@ def proxy_manager_for(self, proxy, **proxy_kwargs):
|
||||||
extra_kwargs['proxy_ssl_context'] = self._proxy_ssl_context
|
extra_kwargs['proxy_ssl_context'] = self._proxy_ssl_context
|
||||||
return super().proxy_manager_for(proxy, **proxy_kwargs, **self._pm_args, **extra_kwargs)
|
return super().proxy_manager_for(proxy, **proxy_kwargs, **self._pm_args, **extra_kwargs)
|
||||||
|
|
||||||
|
# Skip `requests` internal verification; we use our own SSLContext
|
||||||
|
# requests 2.31.0+
|
||||||
def cert_verify(*args, **kwargs):
|
def cert_verify(*args, **kwargs):
|
||||||
# lean on SSLContext for cert verification
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
# requests 2.31.0-2.32.1
|
||||||
|
def _get_connection(self, request, *_, proxies=None, **__):
|
||||||
|
return self.get_connection(request.url, proxies)
|
||||||
|
|
||||||
|
# requests 2.32.2+: Reimplementation without `_urllib3_request_context`
|
||||||
|
def get_connection_with_tls_context(self, request, verify, proxies=None, cert=None):
|
||||||
|
url = urllib3.util.parse_url(request.url).url
|
||||||
|
|
||||||
|
manager = self.poolmanager
|
||||||
|
if proxy := select_proxy(url, proxies):
|
||||||
|
manager = self.proxy_manager_for(proxy)
|
||||||
|
|
||||||
|
return manager.connection_from_url(url)
|
||||||
|
|
||||||
|
|
||||||
class RequestsSession(requests.sessions.Session):
|
class RequestsSession(requests.sessions.Session):
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -31,6 +31,8 @@
|
||||||
)
|
)
|
||||||
from ..utils.networking import HTTPHeaderDict, normalize_url
|
from ..utils.networking import HTTPHeaderDict, normalize_url
|
||||||
|
|
||||||
|
DEFAULT_TIMEOUT = 20
|
||||||
|
|
||||||
|
|
||||||
def register_preference(*handlers: type[RequestHandler]):
|
def register_preference(*handlers: type[RequestHandler]):
|
||||||
assert all(issubclass(handler, RequestHandler) for handler in handlers)
|
assert all(issubclass(handler, RequestHandler) for handler in handlers)
|
||||||
|
@ -235,7 +237,7 @@ def __init__(
|
||||||
self._logger = logger
|
self._logger = logger
|
||||||
self.headers = headers or {}
|
self.headers = headers or {}
|
||||||
self.cookiejar = cookiejar if cookiejar is not None else YoutubeDLCookieJar()
|
self.cookiejar = cookiejar if cookiejar is not None else YoutubeDLCookieJar()
|
||||||
self.timeout = float(timeout or 20)
|
self.timeout = float(timeout or DEFAULT_TIMEOUT)
|
||||||
self.proxies = proxies or {}
|
self.proxies = proxies or {}
|
||||||
self.source_address = source_address
|
self.source_address = source_address
|
||||||
self.verbose = verbose
|
self.verbose = verbose
|
||||||
|
|
Loading…
Reference in a new issue