Merge branch 'master' into elemental_tv_to_upstream

This commit is contained in:
mp107 2024-01-21 17:15:15 +01:00 committed by GitHub
commit 1019529884
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
42 changed files with 2225 additions and 498 deletions

View File

@ -1888,6 +1888,9 @@ The following extractors use this feature:
#### nflplusreplay
* `type`: Type(s) of game replays to extract. Valid types are: `full_game`, `full_game_spanish`, `condensed_game` and `all_22`. You can use `all` to extract all available replay types, which is the default
#### jiosaavn
* `bitrate`: Audio bitrates to request. One or more of `16`, `32`, `64`, `128`, `320`. Default is `128,320`
**Note**: These options may be changed/removed in the future without concern for backward compatibility
<!-- MANPAGE: MOVE "INSTALLATION" SECTION HERE -->

View File

@ -10,7 +10,7 @@ import types
import yt_dlp.extractor
from yt_dlp import YoutubeDL
from yt_dlp.compat import compat_os_name
from yt_dlp.utils import preferredencoding, try_call, write_string
from yt_dlp.utils import preferredencoding, try_call, write_string, find_available_port
if 'pytest' in sys.modules:
import pytest
@ -329,3 +329,8 @@ def http_server_port(httpd):
else:
sock = httpd.socket
return sock.getsockname()[1]
def verify_address_availability(address):
if find_available_port(address) is None:
pytest.skip(f'Unable to bind to source address {address} (address may not exist)')

View File

@ -26,7 +26,7 @@ import zlib
from email.message import Message
from http.cookiejar import CookieJar
from test.helper import FakeYDL, http_server_port
from test.helper import FakeYDL, http_server_port, verify_address_availability
from yt_dlp.cookies import YoutubeDLCookieJar
from yt_dlp.dependencies import brotli, requests, urllib3
from yt_dlp.networking import (
@ -180,6 +180,12 @@ class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
self.send_header('Location', '/a/b/./../../headers')
self.send_header('Content-Length', '0')
self.end_headers()
elif self.path == '/redirect_dotsegments_absolute':
self.send_response(301)
# redirect to /headers but with dot segments before - absolute url
self.send_header('Location', f'http://127.0.0.1:{http_server_port(self.server)}/a/b/./../../headers')
self.send_header('Content-Length', '0')
self.end_headers()
elif self.path.startswith('/redirect_'):
self._redirect()
elif self.path.startswith('/method'):
@ -345,16 +351,17 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
res.close()
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
def test_remove_dot_segments(self, handler):
with handler() as rh:
@pytest.mark.parametrize('path', [
'/a/b/./../../headers',
'/redirect_dotsegments',
# https://github.com/yt-dlp/yt-dlp/issues/9020
'/redirect_dotsegments_absolute',
])
def test_remove_dot_segments(self, handler, path):
with handler(verbose=True) as rh:
# This isn't a comprehensive test,
# but it should be enough to check whether the handler is removing dot segments
res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/a/b/./../../headers'))
assert res.status == 200
assert res.url == f'http://127.0.0.1:{self.http_port}/headers'
res.close()
res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_dotsegments'))
# but it should be enough to check whether the handler is removing dot segments in required scenarios
res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}{path}'))
assert res.status == 200
assert res.url == f'http://127.0.0.1:{self.http_port}/headers'
res.close()
@ -538,6 +545,9 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
def test_source_address(self, handler):
source_address = f'127.0.0.{random.randint(5, 255)}'
# on some systems these loopback addresses we need for testing may not be available
# see: https://github.com/yt-dlp/yt-dlp/issues/8890
verify_address_availability(source_address)
with handler(source_address=source_address) as rh:
data = validate_and_send(
rh, Request(f'http://127.0.0.1:{self.http_port}/source_address')).read().decode()

View File

@ -8,13 +8,9 @@ import pytest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import contextlib
import io
import platform
import random
import ssl
import urllib.error
import warnings
from yt_dlp.cookies import YoutubeDLCookieJar
from yt_dlp.dependencies import certifi
@ -30,7 +26,6 @@ from yt_dlp.networking._helper import (
from yt_dlp.networking.exceptions import (
HTTPError,
IncompleteRead,
_CompatHTTPError,
)
from yt_dlp.socks import ProxyType
from yt_dlp.utils.networking import HTTPHeaderDict
@ -179,11 +174,10 @@ class TestNetworkingExceptions:
def create_response(status):
return Response(fp=io.BytesIO(b'test'), url='http://example.com', headers={'tesT': 'test'}, status=status)
@pytest.mark.parametrize('http_error_class', [HTTPError, lambda r: _CompatHTTPError(HTTPError(r))])
def test_http_error(self, http_error_class):
def test_http_error(self):
response = self.create_response(403)
error = http_error_class(response)
error = HTTPError(response)
assert error.status == 403
assert str(error) == error.msg == 'HTTP Error 403: Forbidden'
@ -194,80 +188,12 @@ class TestNetworkingExceptions:
assert data == b'test'
assert repr(error) == '<HTTPError 403: Forbidden>'
@pytest.mark.parametrize('http_error_class', [HTTPError, lambda *args, **kwargs: _CompatHTTPError(HTTPError(*args, **kwargs))])
def test_redirect_http_error(self, http_error_class):
def test_redirect_http_error(self):
response = self.create_response(301)
error = http_error_class(response, redirect_loop=True)
error = HTTPError(response, redirect_loop=True)
assert str(error) == error.msg == 'HTTP Error 301: Moved Permanently (redirect loop detected)'
assert error.reason == 'Moved Permanently'
def test_compat_http_error(self):
response = self.create_response(403)
error = _CompatHTTPError(HTTPError(response))
assert isinstance(error, HTTPError)
assert isinstance(error, urllib.error.HTTPError)
@contextlib.contextmanager
def raises_deprecation_warning():
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('always')
yield
if len(w) == 0:
pytest.fail('Did not raise DeprecationWarning')
if len(w) > 1:
pytest.fail(f'Raised multiple warnings: {w}')
if not issubclass(w[-1].category, DeprecationWarning):
pytest.fail(f'Expected DeprecationWarning, got {w[-1].category}')
w.clear()
with raises_deprecation_warning():
assert error.code == 403
with raises_deprecation_warning():
assert error.getcode() == 403
with raises_deprecation_warning():
assert error.hdrs is error.response.headers
with raises_deprecation_warning():
assert error.info() is error.response.headers
with raises_deprecation_warning():
assert error.headers is error.response.headers
with raises_deprecation_warning():
assert error.filename == error.response.url
with raises_deprecation_warning():
assert error.url == error.response.url
with raises_deprecation_warning():
assert error.geturl() == error.response.url
# Passthrough file operations
with raises_deprecation_warning():
assert error.read() == b'test'
with raises_deprecation_warning():
assert not error.closed
with raises_deprecation_warning():
# Technically Response operations are also passed through, which should not be used.
assert error.get_header('test') == 'test'
# Should not raise a warning
error.close()
@pytest.mark.skipif(
platform.python_implementation() == 'PyPy', reason='garbage collector works differently in pypy')
def test_compat_http_error_autoclose(self):
# Compat HTTPError should not autoclose response
response = self.create_response(403)
_CompatHTTPError(HTTPError(response))
assert not response.closed
def test_incomplete_read_error(self):
error = IncompleteRead(4, 3, cause='test')
assert isinstance(error, IncompleteRead)

View File

@ -25,7 +25,7 @@ from socketserver import (
ThreadingTCPServer,
)
from test.helper import http_server_port
from test.helper import http_server_port, verify_address_availability
from yt_dlp.networking import Request
from yt_dlp.networking.exceptions import ProxyError, TransportError
from yt_dlp.socks import (
@ -326,6 +326,7 @@ class TestSocks4Proxy:
def test_ipv4_client_source_address(self, handler, ctx):
with ctx.socks_server(Socks4ProxyHandler) as server_address:
source_address = f'127.0.0.{random.randint(5, 255)}'
verify_address_availability(source_address)
with handler(proxies={'all': f'socks4://{server_address}'},
source_address=source_address) as rh:
response = ctx.socks_info_request(rh)
@ -441,6 +442,7 @@ class TestSocks5Proxy:
def test_ipv4_client_source_address(self, handler, ctx):
with ctx.socks_server(Socks5ProxyHandler) as server_address:
source_address = f'127.0.0.{random.randint(5, 255)}'
verify_address_availability(source_address)
with handler(proxies={'all': f'socks5://{server_address}'}, source_address=source_address) as rh:
response = ctx.socks_info_request(rh)
assert response['client_address'][0] == source_address

View File

@ -6,6 +6,8 @@ import sys
import pytest
from test.helper import verify_address_availability
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import http.client
@ -227,6 +229,7 @@ class TestWebsSocketRequestHandlerConformance:
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
def test_source_address(self, handler):
source_address = f'127.0.0.{random.randint(5, 255)}'
verify_address_availability(source_address)
with handler(source_address=source_address) as rh:
ws = validate_and_send(rh, Request(self.ws_base_url))
ws.send('source_address')

View File

@ -40,7 +40,6 @@ from .networking.exceptions import (
NoSupportingHandlers,
RequestError,
SSLError,
_CompatHTTPError,
network_exceptions,
)
from .plugins import directories as plugin_directories
@ -4110,8 +4109,6 @@ class YoutubeDL:
'SSLV3_ALERT_HANDSHAKE_FAILURE: The server may not support the current cipher list. '
'Try using --legacy-server-connect', cause=e) from e
raise
except HTTPError as e: # TODO: Remove in a future release
raise _CompatHTTPError(e) from e
def build_request_director(self, handlers, preferences=None):
logger = _YDLLogger(self)

View File

@ -35,6 +35,7 @@ from .compat_utils import passthrough_module
from ..dependencies import brotli as compat_brotli # noqa: F401
from ..dependencies import websockets as compat_websockets # noqa: F401
from ..dependencies.Cryptodome import AES as compat_pycrypto_AES # noqa: F401
from ..networking.exceptions import HTTPError as compat_HTTPError # noqa: F401
passthrough_module(__name__, '...utils', ('WINDOWS_VT_MODE', 'windows_enable_vt_mode'))
@ -70,7 +71,6 @@ compat_html_parser_HTMLParseError = compat_HTMLParseError
compat_HTMLParser = compat_html_parser_HTMLParser = html.parser.HTMLParser
compat_http_client = http.client
compat_http_server = http.server
compat_HTTPError = urllib.error.HTTPError
compat_input = input
compat_integer_types = (int, )
compat_itertools_count = itertools.count
@ -88,7 +88,7 @@ compat_struct_unpack = struct.unpack
compat_subprocess_get_DEVNULL = lambda: subprocess.DEVNULL
compat_tokenize_tokenize = tokenize.tokenize
compat_urllib_error = urllib.error
compat_urllib_HTTPError = urllib.error.HTTPError
compat_urllib_HTTPError = compat_HTTPError
compat_urllib_parse = urllib.parse
compat_urllib_parse_parse_qs = urllib.parse.parse_qs
compat_urllib_parse_quote = urllib.parse.quote

View File

@ -369,7 +369,10 @@ class HlsFD(FragmentFD):
return output.getvalue().encode()
self.download_and_append_fragments(
ctx, fragments, info_dict, pack_func=pack_fragment, finish_func=fin_fragments)
if len(fragments) == 1:
self.download_and_append_fragments(ctx, fragments, info_dict)
else:
self.download_and_append_fragments(
ctx, fragments, info_dict, pack_func=pack_fragment, finish_func=fin_fragments)
else:
return self.download_and_append_fragments(ctx, fragments, info_dict)

View File

@ -47,7 +47,7 @@ from .acast import (
ACastChannelIE,
)
from .acfun import AcFunVideoIE, AcFunBangumiIE
from .adn import ADNIE
from .adn import ADNIE, ADNSeasonIE
from .adobeconnect import AdobeConnectIE
from .adobetv import (
AdobeTVEmbedIE,
@ -93,6 +93,7 @@ from .alura import (
AluraIE,
AluraCourseIE
)
from .amadeustv import AmadeusTVIE
from .amara import AmaraIE
from .amcnetworks import AMCNetworksIE
from .amazon import (
@ -144,6 +145,7 @@ from .arte import (
ArteTVCategoryIE,
)
from .arnes import ArnesIE
from .asobichannel import AsobiChannelIE, AsobiChannelTagURLIE
from .atresplayer import AtresPlayerIE
from .atscaleconf import AtScaleConfEventIE
from .atvat import ATVAtIE
@ -345,6 +347,10 @@ from .chingari import (
ChingariIE,
ChingariUserIE,
)
from .chzzk import (
CHZZKLiveIE,
CHZZKVideoIE,
)
from .cinemax import CinemaxIE
from .cinetecamilano import CinetecaMilanoIE
from .cineverse import (
@ -541,6 +547,7 @@ from .eighttracks import EightTracksIE
from .einthusan import EinthusanIE
from .eitb import EitbIE
from .elemental_tv import ElementalTVIE
from .elementorembed import ElementorEmbedIE
from .elonet import ElonetIE
from .elpais import ElPaisIE
from .eltrecetv import ElTreceTVIE
@ -681,6 +688,10 @@ from .genius import (
GeniusIE,
GeniusLyricsIE,
)
from .getcourseru import (
GetCourseRuPlayerIE,
GetCourseRuIE
)
from .gettr import (
GettrIE,
GettrStreamingIE,
@ -788,6 +799,7 @@ from .iheart import (
IHeartRadioIE,
IHeartRadioPodcastIE,
)
from .ilpost import IlPostIE
from .iltalehti import IltalehtiIE
from .imdb import (
ImdbIE,
@ -900,6 +912,7 @@ from .koo import KooIE
from .kth import KTHIE
from .krasview import KrasViewIE
from .ku6 import Ku6IE
from .kukululive import KukuluLiveIE
from .kusi import KUSIIE
from .kuwo import (
KuwoIE,
@ -997,7 +1010,7 @@ from .lynda import (
)
from .maariv import MaarivIE
from .magellantv import MagellanTVIE
from .magentamusik360 import MagentaMusik360IE
from .magentamusik import MagentaMusikIE
from .mailru import (
MailRuIE,
MailRuMusicIE,
@ -1099,6 +1112,7 @@ from .motherless import (
MotherlessIE,
MotherlessGroupIE,
MotherlessGalleryIE,
MotherlessUploaderIE,
)
from .motorsport import MotorsportIE
from .moviepilot import MoviepilotIE
@ -1125,6 +1139,11 @@ from .musicdex import (
MusicdexArtistIE,
MusicdexPlaylistIE,
)
from .mx3 import (
Mx3IE,
Mx3NeoIE,
Mx3VolksmusikIE,
)
from .mxplayer import (
MxplayerIE,
MxplayerShowIE,
@ -1264,6 +1283,7 @@ from .niconicochannelplus import (
NiconicoChannelPlusChannelLivesIE,
)
from .ninegag import NineGagIE
from .ninenews import NineNewsIE
from .ninenow import NineNowIE
from .nintendo import NintendoIE
from .nitter import NitterIE
@ -1580,6 +1600,7 @@ from .redbulltv import (
RedBullIE,
)
from .reddit import RedditIE
from .redge import RedCDNLivxIE
from .redgifs import (
RedGifsIE,
RedGifsSearchIE,
@ -1595,7 +1616,10 @@ from .restudy import RestudyIE
from .reuters import ReutersIE
from .reverbnation import ReverbNationIE
from .rheinmaintv import RheinMainTVIE
from .rinsefm import RinseFMIE
from .rinsefm import (
RinseFMIE,
RinseFMArtistPlaylistIE,
)
from .rmcdecouverte import RMCDecouverteIE
from .rockstargames import RockstarGamesIE
from .rokfin import (
@ -1711,6 +1735,7 @@ from .scte import (
)
from .scrolller import ScrolllerIE
from .seeker import SeekerIE
from .sejmpl import SejmIE
from .senalcolombia import SenalColombiaLiveIE
from .senategov import SenateISVPIE, SenateGovIE
from .sendtonews import SendtoNewsIE
@ -2003,6 +2028,7 @@ from .trovo import (
TrovoChannelClipIE,
)
from .trtcocuk import TrtCocukVideoIE
from .trtworld import TrtWorldIE
from .trueid import TrueIDIE
from .trunews import TruNewsIE
from .truth import TruthIE

View File

@ -92,6 +92,8 @@ class AbemaLicenseHandler(urllib.request.BaseHandler):
class AbemaTVBaseIE(InfoExtractor):
_NETRC_MACHINE = 'abematv'
_USERTOKEN = None
_DEVICE_ID = None
_MEDIATOKEN = None
@ -136,11 +138,15 @@ class AbemaTVBaseIE(InfoExtractor):
if self._USERTOKEN:
return self._USERTOKEN
add_opener(self._downloader, AbemaLicenseHandler(self))
username, _ = self._get_login_info()
AbemaTVBaseIE._USERTOKEN = username and self.cache.load(self._NETRC_MACHINE, username)
auth_cache = username and self.cache.load(self._NETRC_MACHINE, username, min_ver='2024.01.19')
AbemaTVBaseIE._USERTOKEN = auth_cache and auth_cache.get('usertoken')
if AbemaTVBaseIE._USERTOKEN:
# try authentication with locally stored token
try:
AbemaTVBaseIE._DEVICE_ID = auth_cache.get('device_id')
self._get_media_token(True)
return
except ExtractorError as e:
@ -159,7 +165,6 @@ class AbemaTVBaseIE(InfoExtractor):
})
AbemaTVBaseIE._USERTOKEN = user_data['token']
add_opener(self._downloader, AbemaLicenseHandler(self))
return self._USERTOKEN
def _get_media_token(self, invalidate=False, to_show=True):
@ -181,6 +186,37 @@ class AbemaTVBaseIE(InfoExtractor):
return self._MEDIATOKEN
def _perform_login(self, username, password):
self._get_device_token()
if self.cache.load(self._NETRC_MACHINE, username, min_ver='2024.01.19') and self._get_media_token():
self.write_debug('Skipping logging in')
return
if '@' in username: # don't strictly check if it's email address or not
ep, method = 'user/email', 'email'
else:
ep, method = 'oneTimePassword', 'userId'
login_response = self._download_json(
f'https://api.abema.io/v1/auth/{ep}', None, note='Logging in',
data=json.dumps({
method: username,
'password': password
}).encode('utf-8'), headers={
'Authorization': f'bearer {self._get_device_token()}',
'Origin': 'https://abema.tv',
'Referer': 'https://abema.tv/',
'Content-Type': 'application/json',
})
AbemaTVBaseIE._USERTOKEN = login_response['token']
self._get_media_token(True)
auth_cache = {
'device_id': AbemaTVBaseIE._DEVICE_ID,
'usertoken': AbemaTVBaseIE._USERTOKEN,
}
self.cache.store(self._NETRC_MACHINE, username, auth_cache)
def _call_api(self, endpoint, video_id, query=None, note='Downloading JSON metadata'):
return self._download_json(
f'https://api.abema.io/{endpoint}', video_id, query=query or {},
@ -204,7 +240,6 @@ class AbemaTVBaseIE(InfoExtractor):
class AbemaTVIE(AbemaTVBaseIE):
_VALID_URL = r'https?://abema\.tv/(?P<type>now-on-air|video/episode|channels/.+?/slots)/(?P<id>[^?/]+)'
_NETRC_MACHINE = 'abematv'
_TESTS = [{
'url': 'https://abema.tv/video/episode/194-25_s2_p1',
'info_dict': {
@ -253,33 +288,6 @@ class AbemaTVIE(AbemaTVBaseIE):
}]
_TIMETABLE = None
def _perform_login(self, username, password):
self._get_device_token()
if self.cache.load(self._NETRC_MACHINE, username) and self._get_media_token():
self.write_debug('Skipping logging in')
return
if '@' in username: # don't strictly check if it's email address or not
ep, method = 'user/email', 'email'
else:
ep, method = 'oneTimePassword', 'userId'
login_response = self._download_json(
f'https://api.abema.io/v1/auth/{ep}', None, note='Logging in',
data=json.dumps({
method: username,
'password': password
}).encode('utf-8'), headers={
'Authorization': f'bearer {self._get_device_token()}',
'Origin': 'https://abema.tv',
'Referer': 'https://abema.tv/',
'Content-Type': 'application/json',
})
AbemaTVBaseIE._USERTOKEN = login_response['token']
self._get_media_token(True)
self.cache.store(self._NETRC_MACHINE, username, AbemaTVBaseIE._USERTOKEN)
def _real_extract(self, url):
# starting download using infojson from this extractor is undefined behavior,
# and never be fixed in the future; you must trigger downloads by directly specifying URL.

View File

@ -19,15 +19,35 @@ from ..utils import (
long_to_bytes,
pkcs1pad,
strip_or_none,
str_or_none,
try_get,
unified_strdate,
urlencode_postdata,
)
from ..utils.traversal import traverse_obj
class ADNIE(InfoExtractor):
class ADNBaseIE(InfoExtractor):
IE_DESC = 'Animation Digital Network'
_VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.fr/video/[^/]+/(?P<id>\d+)'
_NETRC_MACHINE = 'animationdigitalnetwork'
_BASE = 'animationdigitalnetwork.fr'
_API_BASE_URL = f'https://gw.api.{_BASE}/'
_PLAYER_BASE_URL = f'{_API_BASE_URL}player/'
_HEADERS = {}
_LOGIN_ERR_MESSAGE = 'Unable to log in'
_RSA_KEY = (0x9B42B08905199A5CCE2026274399CA560ECB209EE9878A708B1C0812E1BB8CB5D1FB7441861147C1A1F2F3A0476DD63A9CAC20D3E983613346850AA6CB38F16DC7D720FD7D86FC6E5B3D5BBC72E14CD0BF9E869F2CEA2CCAD648F1DCE38F1FF916CEFB2D339B64AA0264372344BC775E265E8A852F88144AB0BD9AA06C1A4ABB, 65537)
_POS_ALIGN_MAP = {
'start': 1,
'end': 3,
}
_LINE_ALIGN_MAP = {
'middle': 8,
'end': 4,
}
class ADNIE(ADNBaseIE):
_VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.(?P<lang>fr|de)/video/[^/?#]+/(?P<id>\d+)'
_TESTS = [{
'url': 'https://animationdigitalnetwork.fr/video/fruits-basket/9841-episode-1-a-ce-soir',
'md5': '1c9ef066ceb302c86f80c2b371615261',
@ -44,29 +64,35 @@ class ADNIE(InfoExtractor):
'season_number': 1,
'episode': 'À ce soir !',
'episode_number': 1,
'thumbnail': str,
'season': 'Season 1',
},
'skip': 'Only available in region (FR, ...)',
'skip': 'Only available in French and German speaking Europe',
}, {
'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
'only_matching': True,
}, {
'url': 'https://animationdigitalnetwork.de/video/the-eminence-in-shadow/23550-folge-1',
'md5': '5c5651bf5791fa6fcd7906012b9d94e8',
'info_dict': {
'id': '23550',
'ext': 'mp4',
'episode_number': 1,
'duration': 1417,
'release_date': '20231004',
'series': 'The Eminence in Shadow',
'season_number': 2,
'episode': str,
'title': str,
'thumbnail': str,
'season': 'Season 2',
'comment_count': int,
'average_rating': float,
'description': str,
},
# 'skip': 'Only available in French and German speaking Europe',
}]
_NETRC_MACHINE = 'animationdigitalnetwork'
_BASE = 'animationdigitalnetwork.fr'
_API_BASE_URL = 'https://gw.api.' + _BASE + '/'
_PLAYER_BASE_URL = _API_BASE_URL + 'player/'
_HEADERS = {}
_LOGIN_ERR_MESSAGE = 'Unable to log in'
_RSA_KEY = (0x9B42B08905199A5CCE2026274399CA560ECB209EE9878A708B1C0812E1BB8CB5D1FB7441861147C1A1F2F3A0476DD63A9CAC20D3E983613346850AA6CB38F16DC7D720FD7D86FC6E5B3D5BBC72E14CD0BF9E869F2CEA2CCAD648F1DCE38F1FF916CEFB2D339B64AA0264372344BC775E265E8A852F88144AB0BD9AA06C1A4ABB, 65537)
_POS_ALIGN_MAP = {
'start': 1,
'end': 3,
}
_LINE_ALIGN_MAP = {
'middle': 8,
'end': 4,
}
def _get_subtitles(self, sub_url, video_id):
if not sub_url:
return None
@ -116,6 +142,8 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
if sub_lang == 'vostf':
sub_lang = 'fr'
elif sub_lang == 'vostde':
sub_lang = 'de'
subtitles.setdefault(sub_lang, []).extend([{
'ext': 'json',
'data': json.dumps(sub),
@ -147,7 +175,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
self.report_warning(message or self._LOGIN_ERR_MESSAGE)
def _real_extract(self, url):
video_id = self._match_id(url)
lang, video_id = self._match_valid_url(url).group('lang', 'id')
video_base_url = self._PLAYER_BASE_URL + 'video/%s/' % video_id
player = self._download_json(
video_base_url + 'configuration', video_id,
@ -162,7 +190,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
token = self._download_json(
user.get('refreshTokenUrl') or (self._PLAYER_BASE_URL + 'refresh/token'),
video_id, 'Downloading access token', headers={
'x-player-refresh-token': user['refreshToken']
'X-Player-Refresh-Token': user['refreshToken'],
}, data=b'')['token']
links_url = try_get(options, lambda x: x['video']['url']) or (video_base_url + 'link')
@ -184,7 +212,9 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
try:
links_data = self._download_json(
links_url, video_id, 'Downloading links JSON metadata', headers={
'X-Player-Token': authorization
'X-Player-Token': authorization,
'X-Target-Distribution': lang,
**self._HEADERS
}, query={
'freeWithAds': 'true',
'adaptive': 'false',
@ -232,6 +262,9 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
if format_id == 'vf':
for f in m3u8_formats:
f['language'] = 'fr'
elif format_id == 'vde':
for f in m3u8_formats:
f['language'] = 'de'
formats.extend(m3u8_formats)
video = (self._download_json(
@ -255,3 +288,40 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
'average_rating': float_or_none(video.get('rating') or metas.get('rating')),
'comment_count': int_or_none(video.get('commentsCount')),
}
class ADNSeasonIE(ADNBaseIE):
_VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.(?P<lang>fr|de)/video/(?P<id>[^/?#]+)/?(?:$|[#?])'
_TESTS = [{
'url': 'https://animationdigitalnetwork.fr/video/tokyo-mew-mew-new',
'playlist_count': 12,
'info_dict': {
'id': '911',
'title': 'Tokyo Mew Mew New',
},
# 'skip': 'Only available in French end German speaking Europe',
}]
def _real_extract(self, url):
lang, video_show_slug = self._match_valid_url(url).group('lang', 'id')
show = self._download_json(
f'{self._API_BASE_URL}show/{video_show_slug}/', video_show_slug,
'Downloading show JSON metadata', headers=self._HEADERS)['show']
show_id = str(show['id'])
episodes = self._download_json(
f'{self._API_BASE_URL}video/show/{show_id}', video_show_slug,
'Downloading episode list', headers={
'X-Target-Distribution': lang,
**self._HEADERS
}, query={
'order': 'asc',
'limit': '-1',
})
def entries():
for episode_id in traverse_obj(episodes, ('videos', ..., 'id', {str_or_none})):
yield self.url_result(
f'https://animationdigitalnetwork.{lang}/video/{video_show_slug}/{episode_id}',
ADNIE, episode_id)
return self.playlist_result(entries(), show_id, show.get('title'))

View File

@ -93,7 +93,7 @@ class AENetworksBaseIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
resource = self._get_mvpd_resource(
requestor_id, theplatform_metadata['title'],
theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'),
theplatform_metadata['ratings'][0]['rating'])
traverse_obj(theplatform_metadata, ('ratings', 0, 'rating')))
auth = self._extract_mvpd_auth(
url, video_id, requestor_id, resource)
info.update(self._extract_aen_smil(media_url, video_id, auth))

View File

@ -0,0 +1,77 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
float_or_none,
int_or_none,
parse_iso8601,
url_or_none,
)
from ..utils.traversal import traverse_obj
class AmadeusTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?amadeus\.tv/library/(?P<id>[\da-f]+)'
_TESTS = [{
'url': 'http://www.amadeus.tv/library/65091a87ff85af59d9fc54c3',
'info_dict': {
'id': '5576678021301411311',
'ext': 'mp4',
'title': 'Jieon Park - 第五届珠海莫扎特国际青少年音乐周小提琴C组第三轮',
'thumbnail': 'http://1253584441.vod2.myqcloud.com/a0046a27vodtransbj1253584441/7db4af535576678021301411311/coverBySnapshot_10_0.jpg',
'duration': 1264.8,
'upload_date': '20230918',
'timestamp': 1695034800,
'display_id': '65091a87ff85af59d9fc54c3',
'view_count': int,
'description': 'md5:a0357b9c215489e2067cbae0b777bb95',
}
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
nuxt_data = self._search_nuxt_data(webpage, display_id, traverse=('fetch', '0'))
video_id = traverse_obj(nuxt_data, ('item', 'video', {str}))
if not video_id:
raise ExtractorError('Unable to extract actual video ID')
video_data = self._download_json(
f'http://playvideo.qcloud.com/getplayinfo/v2/1253584441/{video_id}',
video_id, headers={'Referer': 'http://www.amadeus.tv/'})
formats = []
for video in traverse_obj(video_data, ('videoInfo', ('sourceVideo', ('transcodeList', ...)), {dict})):
if not url_or_none(video.get('url')):
continue
formats.append({
**traverse_obj(video, {
'url': 'url',
'format_id': ('definition', {lambda x: f'http-{x or "0"}'}),
'width': ('width', {int_or_none}),
'height': ('height', {int_or_none}),
'filesize': (('totalSize', 'size'), {int_or_none}),
'vcodec': ('videoStreamList', 0, 'codec'),
'acodec': ('audioStreamList', 0, 'codec'),
'fps': ('videoStreamList', 0, 'fps', {float_or_none}),
}, get_all=False),
'http_headers': {'Referer': 'http://www.amadeus.tv/'},
})
return {
'id': video_id,
'display_id': display_id,
'formats': formats,
**traverse_obj(video_data, {
'title': ('videoInfo', 'basicInfo', 'name', {str}),
'thumbnail': ('coverInfo', 'coverUrl', {url_or_none}),
'duration': ('videoInfo', 'sourceVideo', ('floatDuration', 'duration'), {float_or_none}),
}, get_all=False),
**traverse_obj(nuxt_data, ('item', {
'title': (('title', 'title_en', 'title_cn'), {str}),
'description': (('description', 'description_en', 'description_cn'), {str}),
'timestamp': ('date', {parse_iso8601}),
'view_count': ('view', {int_or_none}),
}), get_all=False),
}

View File

@ -70,7 +70,24 @@ class ArteTVIE(ArteTVBaseIE):
'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/q82dTTfyuCXupPsGxXsd7B/940x530',
'upload_date': '20230930',
'ext': 'mp4',
}
},
}, {
'url': 'https://www.arte.tv/de/videos/085374-003-A/im-hohen-norden-geboren/',
'info_dict': {
'id': '085374-003-A',
'ext': 'mp4',
'description': 'md5:ab79ec7cc472a93164415b4e4916abf9',
'timestamp': 1702872000,
'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/TnyHBfPxv3v2GEY3suXGZP/940x530',
'duration': 2594,
'title': 'Die kurze Zeit der Jugend',
'alt_title': 'Im hohen Norden geboren',
'upload_date': '20231218',
'subtitles': {
'fr': 'mincount:1',
'fr-acc': 'mincount:1',
},
},
}]
_GEO_BYPASS = True
@ -121,6 +138,16 @@ class ArteTVIE(ArteTVBaseIE):
),
}
@staticmethod
def _fix_accessible_subs_locale(subs):
updated_subs = {}
for lang, sub_formats in subs.items():
for format in sub_formats:
if format.get('url', '').endswith('-MAL.m3u8'):
lang += '-acc'
updated_subs.setdefault(lang, []).append(format)
return updated_subs
def _real_extract(self, url):
mobj = self._match_valid_url(url)
video_id = mobj.group('id')
@ -174,6 +201,7 @@ class ArteTVIE(ArteTVBaseIE):
secondary_formats.extend(fmts)
else:
formats.extend(fmts)
subs = self._fix_accessible_subs_locale(subs)
self._merge_subtitles(subs, target=subtitles)
elif stream['protocol'] in ('HTTPS', 'RTMP'):

View File

@ -0,0 +1,168 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
clean_html,
merge_dicts,
parse_iso8601,
url_or_none,
)
from ..utils.traversal import traverse_obj
class AsobiChannelBaseIE(InfoExtractor):
_MICROCMS_HEADER = {'X-MICROCMS-API-KEY': 'qRaKehul9AHU8KtL0dnq1OCLKnFec6yrbcz3'}
def _extract_info(self, metadata):
return traverse_obj(metadata, {
'id': ('id', {str}),
'title': ('title', {str}),
'description': ('body', {clean_html}),
'thumbnail': ('contents', 'video_thumb', 'url', {url_or_none}),
'timestamp': ('publishedAt', {parse_iso8601}),
'modified_timestamp': ('updatedAt', {parse_iso8601}),
'channel': ('channel', 'name', {str}),
'channel_id': ('channel', 'id', {str}),
})
class AsobiChannelIE(AsobiChannelBaseIE):
IE_NAME = 'asobichannel'
IE_DESC = 'ASOBI CHANNEL'
_VALID_URL = r'https?://asobichannel\.asobistore\.jp/watch/(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://asobichannel.asobistore.jp/watch/1ypp48qd32p',
'md5': '39df74e872afe032c4eb27b89144fc92',
'info_dict': {
'id': '1ypp48qd32p',
'ext': 'mp4',
'title': 'アイドルマスター ミリオンライブ! 765プロch 原っぱ通信 #1',
'description': 'md5:b930bd2199c9b2fd75951ce4aaa7efd2',
'thumbnail': 'https://images.microcms-assets.io/assets/d2420de4b9194e11beb164f99edb1f95/a8e6f84119f54eb9ab4ce16729239905/%E3%82%B5%E3%83%A0%E3%83%8D%20(1).png',
'timestamp': 1697098247,
'upload_date': '20231012',
'modified_timestamp': 1698381162,
'modified_date': '20231027',
'channel': 'アイドルマスター',
'channel_id': 'idolmaster',
},
}, {
'url': 'https://asobichannel.asobistore.jp/watch/redigiwnjzqj',
'md5': '229fa8fb5c591c75ce8c37a497f113f6',
'info_dict': {
'id': 'redigiwnjzqj',
'ext': 'mp4',
'title': '【おまけ放送】アイドルマスター ミリオンライブ! 765プロch 原っぱ通信 #1',
'description': 'md5:7d9cd35fb54425a6967822bd564ea2d9',
'thumbnail': 'https://images.microcms-assets.io/assets/d2420de4b9194e11beb164f99edb1f95/20e5c1d6184242eebc2512a5dec59bf0/P1_%E5%8E%9F%E3%81%A3%E3%81%B1%E3%82%B5%E3%83%A0%E3%83%8D.png',
'modified_timestamp': 1697797125,
'modified_date': '20231020',
'timestamp': 1697261769,
'upload_date': '20231014',
'channel': 'アイドルマスター',
'channel_id': 'idolmaster',
},
}]
_survapi_header = None
def _real_initialize(self):
token = self._download_json(
'https://asobichannel-api.asobistore.jp/api/v1/vspf/token', None,
note='Retrieving API token')
self._survapi_header = {'Authorization': f'Bearer {token}'}
def _process_vod(self, video_id, metadata):
content_id = metadata['contents']['video_id']
vod_data = self._download_json(
f'https://survapi.channel.or.jp/proxy/v1/contents/{content_id}/get_by_cuid', video_id,
headers=self._survapi_header, note='Downloading vod data')
return {
'formats': self._extract_m3u8_formats(vod_data['ex_content']['streaming_url'], video_id),
}
def _process_live(self, video_id, metadata):
content_id = metadata['contents']['video_id']
event_data = self._download_json(
f'https://survapi.channel.or.jp/ex/events/{content_id}?embed=channel', video_id,
headers=self._survapi_header, note='Downloading event data')
player_type = traverse_obj(event_data, ('data', 'Player_type', {str}))
if player_type == 'poster':
self.raise_no_formats('Live event has not yet started', expected=True)
live_status = 'is_upcoming'
formats = []
elif player_type == 'player':
live_status = 'is_live'
formats = self._extract_m3u8_formats(
event_data['data']['Channel']['Custom_live_url'], video_id, live=True)
else:
raise ExtractorError('Unsupported player type {player_type!r}')
return {
'release_timestamp': traverse_obj(metadata, ('period', 'start', {parse_iso8601})),
'live_status': live_status,
'formats': formats,
}
def _real_extract(self, url):
video_id = self._match_id(url)
metadata = self._download_json(
f'https://channel.microcms.io/api/v1/media/{video_id}', video_id,
headers=self._MICROCMS_HEADER)
info = self._extract_info(metadata)
video_type = traverse_obj(metadata, ('contents', 'video_type', 0, {str}))
if video_type == 'VOD':
return merge_dicts(info, self._process_vod(video_id, metadata))
if video_type == 'LIVE':
return merge_dicts(info, self._process_live(video_id, metadata))
raise ExtractorError(f'Unexpected video type {video_type!r}')
class AsobiChannelTagURLIE(AsobiChannelBaseIE):
IE_NAME = 'asobichannel:tag'
IE_DESC = 'ASOBI CHANNEL'
_VALID_URL = r'https?://asobichannel\.asobistore\.jp/tag/(?P<id>[a-z0-9-_]+)'
_TESTS = [{
'url': 'https://asobichannel.asobistore.jp/tag/bjhh-nbcja',
'info_dict': {
'id': 'bjhh-nbcja',
'title': 'アイドルマスター ミリオンライブ! 765プロch 原っぱ通信',
},
'playlist_mincount': 16,
}, {
'url': 'https://asobichannel.asobistore.jp/tag/hvm5qw3c6od',
'info_dict': {
'id': 'hvm5qw3c6od',
'title': 'アイマスMOIW2023ラジオ',
},
'playlist_mincount': 13,
}]
def _real_extract(self, url):
tag_id = self._match_id(url)
webpage = self._download_webpage(url, tag_id)
title = traverse_obj(self._search_nextjs_data(
webpage, tag_id, fatal=False), ('props', 'pageProps', 'data', 'name', {str}))
media = self._download_json(
f'https://channel.microcms.io/api/v1/media?limit=999&filters=(tag[contains]{tag_id})',
tag_id, headers=self._MICROCMS_HEADER)
def entries():
for metadata in traverse_obj(media, ('contents', lambda _, v: v['id'])):
yield {
'_type': 'url',
'url': f'https://asobichannel.asobistore.jp/watch/{metadata["id"]}',
'ie_key': AsobiChannelIE.ie_key(),
**self._extract_info(metadata),
}
return self.playlist_result(entries(), tag_id, title)

View File

@ -18,6 +18,7 @@ from ..utils import (
OnDemandPagedList,
bool_or_none,
clean_html,
determine_ext,
filter_dict,
float_or_none,
format_field,
@ -1621,6 +1622,7 @@ class BiliBiliPlayerIE(InfoExtractor):
class BiliIntlBaseIE(InfoExtractor):
_API_URL = 'https://api.bilibili.tv/intl/gateway'
_NETRC_MACHINE = 'biliintl'
_HEADERS = {'Referer': 'https://www.bilibili.com/'}
def _call_api(self, endpoint, *args, **kwargs):
json = self._download_json(self._API_URL + endpoint, *args, **kwargs)
@ -1658,19 +1660,34 @@ class BiliIntlBaseIE(InfoExtractor):
'aid': aid,
})) or {}
subtitles = {}
for sub in sub_json.get('subtitles') or []:
sub_url = sub.get('url')
if not sub_url:
continue
sub_data = self._download_json(
sub_url, ep_id or aid, errnote='Unable to download subtitles', fatal=False,
note='Downloading subtitles%s' % f' for {sub["lang"]}' if sub.get('lang') else '')
if not sub_data:
continue
subtitles.setdefault(sub.get('lang_key', 'en'), []).append({
'ext': 'srt',
'data': self.json2srt(sub_data)
})
fetched_urls = set()
for sub in traverse_obj(sub_json, (('subtitles', 'video_subtitle'), ..., {dict})):
for url in traverse_obj(sub, ((None, 'ass', 'srt'), 'url', {url_or_none})):
if url in fetched_urls:
continue
fetched_urls.add(url)
sub_ext = determine_ext(url)
sub_lang = sub.get('lang_key') or 'en'
if sub_ext == 'ass':
subtitles.setdefault(sub_lang, []).append({
'ext': 'ass',
'url': url,
})
elif sub_ext == 'json':
sub_data = self._download_json(
url, ep_id or aid, fatal=False,
note=f'Downloading subtitles{format_field(sub, "lang", " for %s")} ({sub_lang})',
errnote='Unable to download subtitles')
if sub_data:
subtitles.setdefault(sub_lang, []).append({
'ext': 'srt',
'data': self.json2srt(sub_data),
})
else:
self.report_warning('Unexpected subtitle extension', ep_id or aid)
return subtitles
def _get_formats(self, *, ep_id=None, aid=None):
@ -1716,7 +1733,9 @@ class BiliIntlBaseIE(InfoExtractor):
def _parse_video_metadata(self, video_data):
return {
'title': video_data.get('title_display') or video_data.get('title'),
'description': video_data.get('desc'),
'thumbnail': video_data.get('cover'),
'timestamp': unified_timestamp(video_data.get('formatted_pub_date')),
'episode_number': int_or_none(self._search_regex(
r'^E(\d+)(?:$| - )', video_data.get('title_display') or '', 'episode number', default=None)),
}
@ -1813,17 +1832,6 @@ class BiliIntlIE(BiliIntlBaseIE):
'episode_number': 140,
},
'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.'
}, {
'url': 'https://www.bilibili.tv/en/video/2041863208',
'info_dict': {
'id': '2041863208',
'ext': 'mp4',
'timestamp': 1670874843,
'description': 'Scheduled for April 2023.\nStudio: ufotable',
'thumbnail': r're:https?://pic[-\.]bstarstatic.+/ugc/.+\.jpg$',
'upload_date': '20221212',
'title': 'Kimetsu no Yaiba Season 3 Official Trailer - Bstation',
},
}, {
# episode comment extraction
'url': 'https://www.bilibili.tv/en/play/34580/340317',
@ -1864,9 +1872,9 @@ class BiliIntlIE(BiliIntlBaseIE):
'description': 'md5:693b6f3967fb4e7e7764ea817857c33a',
'timestamp': 1667891924,
'upload_date': '20221108',
'title': 'That Time I Got Reincarnated as a Slime: Scarlet Bond - Official Trailer 3| AnimeStan - Bstation',
'title': 'That Time I Got Reincarnated as a Slime: Scarlet Bond - Official Trailer 3| AnimeStan',
'comment_count': int,
'thumbnail': 'https://pic.bstarstatic.com/ugc/f6c363659efd2eabe5683fbb906b1582.jpg',
'thumbnail': r're:https://pic\.bstarstatic\.(?:com|net)/ugc/f6c363659efd2eabe5683fbb906b1582\.jpg',
},
'params': {
'getcomments': True
@ -1929,10 +1937,12 @@ class BiliIntlIE(BiliIntlBaseIE):
# XXX: webpage metadata may not accurate, it just used to not crash when video_data not found
return merge_dicts(
self._parse_video_metadata(video_data), self._search_json_ld(webpage, video_id, fatal=False), {
'title': self._html_search_meta('og:title', webpage),
'description': self._html_search_meta('og:description', webpage)
})
self._parse_video_metadata(video_data), {
'title': get_element_by_class(
'bstar-meta__title', webpage) or self._html_search_meta('og:title', webpage),
'description': get_element_by_class(
'bstar-meta__desc', webpage) or self._html_search_meta('og:description'),
}, self._search_json_ld(webpage, video_id, default={}))
def _get_comments_reply(self, root_id, next_id=0, display_id=None):
comment_api_raw_data = self._download_json(
@ -2020,7 +2030,8 @@ class BiliIntlIE(BiliIntlBaseIE):
'formats': self._get_formats(ep_id=ep_id, aid=aid),
'subtitles': self.extract_subtitles(ep_id=ep_id, aid=aid),
'chapters': chapters,
'__post_extractor': self.extract_comments(video_id, ep_id)
'__post_extractor': self.extract_comments(video_id, ep_id),
'http_headers': self._HEADERS,
}

139
yt_dlp/extractor/chzzk.py Normal file
View File

@ -0,0 +1,139 @@
import functools
from .common import InfoExtractor
from ..utils import (
ExtractorError,
float_or_none,
int_or_none,
parse_iso8601,
url_or_none,
)
from ..utils.traversal import traverse_obj
class CHZZKLiveIE(InfoExtractor):
IE_NAME = 'chzzk:live'
_VALID_URL = r'https?://chzzk\.naver\.com/live/(?P<id>[\da-f]+)'
_TESTS = [{
'url': 'https://chzzk.naver.com/live/c68b8ef525fb3d2fa146344d84991753',
'info_dict': {
'id': 'c68b8ef525fb3d2fa146344d84991753',
'ext': 'mp4',
'title': str,
'channel': '진짜도현',
'channel_id': 'c68b8ef525fb3d2fa146344d84991753',
'channel_is_verified': False,
'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1705510344,
'upload_date': '20240117',
'live_status': 'is_live',
'view_count': int,
'concurrent_view_count': int,
},
'skip': 'The channel is not currently live',
}]
def _real_extract(self, url):
channel_id = self._match_id(url)
live_detail = self._download_json(
f'https://api.chzzk.naver.com/service/v2/channels/{channel_id}/live-detail', channel_id,
note='Downloading channel info', errnote='Unable to download channel info')['content']
if live_detail.get('status') == 'CLOSE':
raise ExtractorError('The channel is not currently live', expected=True)
live_playback = self._parse_json(live_detail['livePlaybackJson'], channel_id)
thumbnails = []
thumbnail_template = traverse_obj(
live_playback, ('thumbnail', 'snapshotThumbnailTemplate', {url_or_none}))
if thumbnail_template and '{type}' in thumbnail_template:
for width in traverse_obj(live_playback, ('thumbnail', 'types', ..., {str})):
thumbnails.append({
'id': width,
'url': thumbnail_template.replace('{type}', width),
'width': int_or_none(width),
})
formats, subtitles = [], {}
for media in traverse_obj(live_playback, ('media', lambda _, v: url_or_none(v['path']))):
is_low_latency = media.get('mediaId') == 'LLHLS'
fmts, subs = self._extract_m3u8_formats_and_subtitles(
media['path'], channel_id, 'mp4', fatal=False, live=True,
m3u8_id='hls-ll' if is_low_latency else 'hls')
for f in fmts:
if is_low_latency:
f['source_preference'] = -2
if '-afragalow.stream-audio.stream' in f['format_id']:
f['quality'] = -2
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
return {
'id': channel_id,
'is_live': True,
'formats': formats,
'subtitles': subtitles,
'thumbnails': thumbnails,
**traverse_obj(live_detail, {
'title': ('liveTitle', {str}),
'timestamp': ('openDate', {functools.partial(parse_iso8601, delimiter=' ')}),
'concurrent_view_count': ('concurrentUserCount', {int_or_none}),
'view_count': ('accumulateCount', {int_or_none}),
'channel': ('channel', 'channelName', {str}),
'channel_id': ('channel', 'channelId', {str}),
'channel_is_verified': ('channel', 'verifiedMark', {bool}),
}),
}
class CHZZKVideoIE(InfoExtractor):
IE_NAME = 'chzzk:video'
_VALID_URL = r'https?://chzzk\.naver\.com/video/(?P<id>\d+)'
_TESTS = [{
'url': 'https://chzzk.naver.com/video/1754',
'md5': 'b0c0c1bb888d913b93d702b1512c7f06',
'info_dict': {
'id': '1754',
'ext': 'mp4',
'title': '치지직 테스트 방송',
'channel': '침착맨',
'channel_id': 'bb382c2c0cc9fa7c86ab3b037fb5799c',
'channel_is_verified': False,
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 15577,
'timestamp': 1702970505.417,
'upload_date': '20231219',
'view_count': int,
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
video_meta = self._download_json(
f'https://api.chzzk.naver.com/service/v2/videos/{video_id}', video_id,
note='Downloading video info', errnote='Unable to download video info')['content']
formats, subtitles = self._extract_mpd_formats_and_subtitles(
f'https://apis.naver.com/neonplayer/vodplay/v1/playback/{video_meta["videoId"]}', video_id,
query={
'key': video_meta['inKey'],
'env': 'real',
'lc': 'en_US',
'cpl': 'en_US',
}, note='Downloading video playback', errnote='Unable to download video playback')
return {
'id': video_id,
'formats': formats,
'subtitles': subtitles,
**traverse_obj(video_meta, {
'title': ('videoTitle', {str}),
'thumbnail': ('thumbnailImageUrl', {url_or_none}),
'timestamp': ('publishDateAt', {functools.partial(float_or_none, scale=1000)}),
'view_count': ('readCount', {int_or_none}),
'duration': ('duration', {int_or_none}),
'channel': ('channel', 'channelName', {str}),
'channel_id': ('channel', 'channelId', {str}),
'channel_is_verified': ('channel', 'verifiedMark', {bool}),
}),
}

View File

@ -46,15 +46,18 @@ class CloudflareStreamIE(InfoExtractor):
video_id.split('.')[1] + '==='), video_id)['sub']
manifest_base_url = base_url + 'manifest/video.'
formats = self._extract_m3u8_formats(
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
manifest_base_url + 'm3u8', video_id, 'mp4',
'm3u8_native', m3u8_id='hls', fatal=False)
formats.extend(self._extract_mpd_formats(
manifest_base_url + 'mpd', video_id, mpd_id='dash', fatal=False))
fmts, subs = self._extract_mpd_formats_and_subtitles(
manifest_base_url + 'mpd', video_id, mpd_id='dash', fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
return {
'id': video_id,
'title': video_id,
'thumbnail': base_url + 'thumbnails/thumbnail.jpg',
'formats': formats,
'subtitles': subtitles,
}

View File

@ -0,0 +1,72 @@
import re
from .common import InfoExtractor
from .vimeo import VimeoIE
from .youtube import YoutubeIE
from ..utils import unescapeHTML, url_or_none
from ..utils.traversal import traverse_obj
class ElementorEmbedIE(InfoExtractor):
_VALID_URL = False
_WEBPAGE_TESTS = [{
'url': 'https://capitaltv.cy/2023/12/14/υγεια-και-ζωη-14-12-2023-δρ-ξενια-κωσταντινιδο/',
'info_dict': {
'id': 'KgzuxwuQwM4',
'ext': 'mp4',
'title': 'ΥΓΕΙΑ ΚΑΙ ΖΩΗ 14 12 2023 ΔΡ ΞΕΝΙΑ ΚΩΣΤΑΝΤΙΝΙΔΟΥ',
'thumbnail': 'https://i.ytimg.com/vi/KgzuxwuQwM4/maxresdefault.jpg',
'playable_in_embed': True,
'tags': 'count:16',
'like_count': int,
'channel': 'Capital TV Cyprus',
'channel_id': 'UCR8LwVKTLGEXt4ZAErpCMrg',
'availability': 'public',
'description': 'md5:7a3308a22881aea4612358c4ba121f77',
'duration': 2891,
'upload_date': '20231214',
'uploader_id': '@capitaltvcyprus6389',
'live_status': 'not_live',
'channel_url': 'https://www.youtube.com/channel/UCR8LwVKTLGEXt4ZAErpCMrg',
'uploader_url': 'https://www.youtube.com/@capitaltvcyprus6389',
'uploader': 'Capital TV Cyprus',
'age_limit': 0,
'categories': ['News & Politics'],
'view_count': int,
'channel_follower_count': int,
},
}, {
'url': 'https://elementor.com/academy/theme-builder-collection/?playlist=76011151&video=9e59909',
'info_dict': {
'id': '?playlist=76011151&video=9e59909',
'title': 'Theme Builder Collection - Academy',
'age_limit': 0,
'timestamp': 1702196984.0,
'upload_date': '20231210',
'description': 'md5:7f52c52715ee9e54fd7f82210511673d',
'thumbnail': 'https://elementor.com/academy/wp-content/uploads/2021/07/Theme-Builder-1.png',
},
'playlist_count': 11,
'params': {
'skip_download': True,
},
}]
_WIDGET_REGEX = r'<div[^>]+class="[^"]*elementor-widget-video(?:-playlist)?[^"]*"[^>]*data-settings="([^"]*)"'
def _extract_from_webpage(self, url, webpage):
for data_settings in re.findall(self._WIDGET_REGEX, webpage):
data = self._parse_json(data_settings, None, fatal=False, transform_source=unescapeHTML)
if youtube_url := traverse_obj(data, ('youtube_url', {url_or_none})):
yield self.url_result(youtube_url, ie=YoutubeIE)
for video in traverse_obj(data, ('tabs', lambda _, v: v['_id'], {dict})):
if youtube_url := traverse_obj(video, ('youtube_url', {url_or_none})):
yield self.url_result(youtube_url, ie=YoutubeIE)
if vimeo_url := traverse_obj(video, ('vimeo_url', {url_or_none})):
yield self.url_result(vimeo_url, ie=VimeoIE)
for direct_url in traverse_obj(video, (('hosted_url', 'external_url'), 'url', {url_or_none})):
yield {
'id': video['_id'],
'url': direct_url,
'title': video.get('title'),
}

View File

@ -57,7 +57,7 @@ class FacebookIE(InfoExtractor):
)|
facebook:
)
(?P<id>[0-9]+)
(?P<id>pfbid[A-Za-z0-9]+|\d+)
'''
_EMBED_REGEX = [
r'<iframe[^>]+?src=(["\'])(?P<url>https?://www\.facebook\.com/(?:video/embed|plugins/video\.php).+?)\1',
@ -247,6 +247,24 @@ class FacebookIE(InfoExtractor):
'thumbnail': r're:^https?://.*',
'duration': 148.435,
},
}, {
'url': 'https://www.facebook.com/attn/posts/pfbid0j1Czf2gGDVqeQ8KiMLFm3pWN8GxsQmeRrVhimWDzMuKQoR8r4b1knNsejELmUgyhl',
'info_dict': {
'id': '6968553779868435',
'ext': 'mp4',
'description': 'md5:2f2fcf93e97ac00244fe64521bbdb0cb',
'uploader': 'ATTN:',
'upload_date': '20231207',
'title': 'ATTN:',
'duration': 132.675,
'uploader_id': '100064451419378',
'view_count': int,
'thumbnail': r're:^https?://.*',
'timestamp': 1701975646,
},
}, {
'url': 'https://www.facebook.com/story.php?story_fbid=pfbid0Fnzhm8UuzjBYpPMNFzaSpFE9UmLdU4fJN8qTANi1Dmtj5q7DNrL5NERXfsAzDEV7l&id=100073071055552',
'only_matching': True,
}, {
'url': 'https://www.facebook.com/video.php?v=10204634152394104',
'only_matching': True,

View File

@ -0,0 +1,179 @@
import re
import time
import urllib.parse
from .common import InfoExtractor
from ..utils import ExtractorError, int_or_none, url_or_none, urlencode_postdata
from ..utils.traversal import traverse_obj
class GetCourseRuPlayerIE(InfoExtractor):
_VALID_URL = r'https?://player02\.getcourse\.ru/sign-player/?\?(?:[^#]+&)?json=[^#&]+'
_EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL}[^\'"]*)']
_TESTS = [{
'url': 'http://player02.getcourse.ru/sign-player/?json=eyJ2aWRlb19oYXNoIjoiMTkwYmRmOTNmMWIyOTczNTMwOTg1M2E3YTE5ZTI0YjMiLCJ1c2VyX2lkIjozNTk1MjUxODMsInN1Yl9sb2dpbl91c2VyX2lkIjpudWxsLCJsZXNzb25faWQiOm51bGwsImlwIjoiNDYuMTQyLjE4Mi4yNDciLCJnY19ob3N0IjoiYWNhZGVteW1lbC5vbmxpbmUiLCJ0aW1lIjoxNzA1NDQ5NjQyLCJwYXlsb2FkIjoidV8zNTk1MjUxODMiLCJ1aV9sYW5ndWFnZSI6InJ1IiwiaXNfaGF2ZV9jdXN0b21fc3R5bGUiOnRydWV9&s=354ad2c993d95d5ac629e3133d6cefea&vh-static-feature=zigzag',
'info_dict': {
'id': '513573381',
'title': '190bdf93f1b29735309853a7a19e24b3',
'ext': 'mp4',
'thumbnail': 'https://preview-htz.kinescopecdn.net/preview/190bdf93f1b29735309853a7a19e24b3/preview.jpg?version=1702370546&host=vh-80',
'duration': 1693
},
'skip': 'JWT expired',
}]
def _real_extract(self, url):
webpage = self._download_webpage(url, None, 'Downloading player page')
window_configs = self._search_json(
r'window\.configs\s*=', webpage, 'config', None)
video_id = str(window_configs['gcFileId'])
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
window_configs['masterPlaylistUrl'], video_id)
return {
**traverse_obj(window_configs, {
'title': ('videoHash', {str}),
'thumbnail': ('previewUrl', {url_or_none}),
'duration': ('videoDuration', {int_or_none}),
}),
'id': video_id,
'formats': formats,
'subtitles': subtitles
}
class GetCourseRuIE(InfoExtractor):
_NETRC_MACHINE = 'getcourseru'
_DOMAINS = [
'academymel.online',
'marafon.mani-beauty.com',
'on.psbook.ru'
]
_BASE_URL_RE = rf'https?://(?:(?!player02\.)[^.]+\.getcourse\.(?:ru|io)|{"|".join(map(re.escape, _DOMAINS))})'
_VALID_URL = [
rf'{_BASE_URL_RE}/(?!pl/|teach/)(?P<id>[^?#]+)',
rf'{_BASE_URL_RE}/(:?pl/)?teach/control/lesson/view\?(?:[^#]+&)?id=(?P<id>\d+)',
]
_TESTS = [{
'url': 'http://academymel.online/3video_1',
'info_dict': {
'id': '3059742',
'display_id': '3video_1',
'title': 'Промоуроки Академии МЕЛ',
},
'playlist_count': 1,
'playlist': [{
'info_dict': {
'id': '513573381',
'ext': 'mp4',
'title': 'Промоуроки Академии МЕЛ',
'thumbnail': 'https://preview-htz.kinescopecdn.net/preview/190bdf93f1b29735309853a7a19e24b3/preview.jpg?version=1702370546&host=vh-80',
'duration': 1693
},
}]
}, {
'url': 'https://academymel.getcourse.ru/3video_1',
'info_dict': {
'id': '3059742',
'display_id': '3video_1',
'title': 'Промоуроки Академии МЕЛ',
},
'playlist_count': 1,
'playlist': [{
'info_dict': {
'id': '513573381',
'ext': 'mp4',
'title': 'Промоуроки Академии МЕЛ',
'thumbnail': 'https://preview-htz.kinescopecdn.net/preview/190bdf93f1b29735309853a7a19e24b3/preview.jpg?version=1702370546&host=vh-80',
'duration': 1693
},
}]
}, {
'url': 'https://academymel.getcourse.ru/pl/teach/control/lesson/view?id=319141781&editMode=0',
'info_dict': {
'id': '319141781',
'title': '1. Разминка у стены',
},
'playlist_count': 1,
'playlist': [{
'info_dict': {
'id': '4919601',
'ext': 'mp4',
'title': '1. Разминка у стены',
'thumbnail': 'https://preview-htz.vhcdn.com/preview/5a521788e7dc25b4f70c3dff6512d90e/preview.jpg?version=1703223532&host=vh-81',
'duration': 704
},
}],
'skip': 'paid lesson'
}, {
'url': 'https://manibeauty.getcourse.ru/pl/teach/control/lesson/view?id=272499894',
'info_dict': {
'id': '272499894',
'title': 'Мотивация к тренировкам',
},
'playlist_count': 1,
'playlist': [{
'info_dict': {
'id': '447479687',
'ext': 'mp4',
'title': 'Мотивация к тренировкам',
'thumbnail': 'https://preview-htz.vhcdn.com/preview/70ed5b9f489dd03b4aff55bfdff71a26/preview.jpg?version=1685115787&host=vh-71',
'duration': 30
},
}],
'skip': 'paid lesson'
}, {
'url': 'https://gaismasmandalas.getcourse.io/ATLAUTSEVBUT',
'only_matching': True,
}]
_LOGIN_URL_PATH = '/cms/system/login'
def _login(self, hostname, username, password):
if self._get_cookies(f'https://{hostname}').get('PHPSESSID5'):
return
login_url = f'https://{hostname}{self._LOGIN_URL_PATH}'
webpage = self._download_webpage(login_url, None)
self._request_webpage(
login_url, None, 'Logging in', 'Failed to log in',
data=urlencode_postdata({
'action': 'processXdget',
'xdgetId': self._html_search_regex(
r'<form[^>]+\bclass="[^"]*\bstate-login[^"]*"[^>]+\bdata-xdget-id="([^"]+)"',
webpage, 'xdgetId'),
'params[action]': 'login',
'params[url]': login_url,
'params[object_type]': 'cms_page',
'params[object_id]': -1,
'params[email]': username,
'params[password]': password,
'requestTime': int(time.time()),
'requestSimpleSign': self._html_search_regex(
r'window.requestSimpleSign\s*=\s*"([\da-f]+)"', webpage, 'simple sign'),
}))
def _real_extract(self, url):
hostname = urllib.parse.urlparse(url).hostname
username, password = self._get_login_info(netrc_machine=hostname)
if username:
self._login(hostname, username, password)
display_id = self._match_id(url)
# NB: 404 is returned due to yt-dlp not properly following redirects #9020
webpage, urlh = self._download_webpage_handle(url, display_id, expected_status=404)
if self._LOGIN_URL_PATH in urlh.url or urlh.status == 404:
raise ExtractorError(
f'This video is only available for registered users. {self._login_hint("any", netrc=hostname)}',
expected=True)
playlist_id = self._search_regex(
r'window\.(?:lessonId|gcsObjectId)\s*=\s*(\d+)', webpage, 'playlist id', default=display_id)
title = self._og_search_title(webpage) or self._html_extract_title(webpage)
return self.playlist_from_matches(
re.findall(GetCourseRuPlayerIE._EMBED_REGEX[0], webpage),
playlist_id, title, display_id=display_id, ie=GetCourseRuPlayerIE, video_kwargs={
'url_transparent': True,
'title': title,
})

View File

@ -19,9 +19,9 @@ class GoogleDriveIE(InfoExtractor):
_VALID_URL = r'''(?x)
https?://
(?:
(?:docs|drive)\.google\.com/
(?:docs|drive|drive\.usercontent)\.google\.com/
(?:
(?:uc|open)\?.*?id=|
(?:uc|open|download)\?.*?id=|
file/d/
)|
video\.google\.com/get_player\?.*?docid=
@ -53,6 +53,9 @@ class GoogleDriveIE(InfoExtractor):
}, {
'url': 'https://drive.google.com/uc?id=0B2fjwgkl1A_CX083Tkowdmt6d28',
'only_matching': True,
}, {
'url': 'https://drive.usercontent.google.com/download?id=0ByeS4oOUV-49Zzh4R1J6R09zazQ',
'only_matching': True,
}]
_FORMATS_EXT = {
'5': 'flv',
@ -205,9 +208,10 @@ class GoogleDriveIE(InfoExtractor):
formats.append(f)
source_url = update_url_query(
'https://drive.google.com/uc', {
'https://drive.usercontent.google.com/download', {
'id': video_id,
'export': 'download',
'confirm': 't',
})
def request_source_file(source_url, kind, data=None):

View File

@ -57,8 +57,8 @@ class GoProIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
metadata = self._parse_json(
self._html_search_regex(r'window\.__reflectData\s*=\s*([^;]+)', webpage, 'metadata'), video_id)
metadata = self._search_json(
r'window\.__reflectData\s*=', webpage, 'metadata', video_id)
video_info = metadata['collectionMedia'][0]
media_data = self._download_json(
@ -99,7 +99,7 @@ class GoProIE(InfoExtractor):
'duration': int_or_none(
video_info.get('source_duration')),
'artist': str_or_none(
video_info.get('music_track_artist')),
video_info.get('music_track_artist')) or None,
'track': str_or_none(
video_info.get('music_track_name')),
video_info.get('music_track_name')) or None,
}

View File

@ -0,0 +1,69 @@
import functools
from .common import InfoExtractor
from ..utils import (
ExtractorError,
float_or_none,
int_or_none,
url_or_none,
urlencode_postdata,
)
from ..utils.traversal import traverse_obj
class IlPostIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ilpost\.it/episodes/(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://www.ilpost.it/episodes/1-avis-akvasas-ka/',
'md5': '43649f002d85e1c2f319bb478d479c40',
'info_dict': {
'id': '2972047',
'ext': 'mp3',
'display_id': '1-avis-akvasas-ka',
'title': '1. Avis akvasas ka',
'url': 'https://www.ilpost.it/wp-content/uploads/2023/12/28/1703781217-l-invasione-pt1-v6.mp3',
'timestamp': 1703835014,
'upload_date': '20231229',
'duration': 2495.0,
'availability': 'public',
'series_id': '235598',
'description': '',
}
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
endpoint_metadata = self._search_json(
r'var\s+ilpostpodcast\s*=', webpage, 'metadata', display_id)
episode_id = endpoint_metadata['post_id']
podcast_id = endpoint_metadata['podcast_id']
podcast_metadata = self._download_json(
endpoint_metadata['ajax_url'], display_id, data=urlencode_postdata({
'action': 'checkpodcast',
'cookie': endpoint_metadata['cookie'],
'post_id': episode_id,
'podcast_id': podcast_id,
}))
episode = traverse_obj(podcast_metadata, (
'data', 'postcastList', lambda _, v: str(v['id']) == episode_id, {dict}), get_all=False)
if not episode:
raise ExtractorError('Episode could not be extracted')
return {
'id': episode_id,
'display_id': display_id,
'series_id': podcast_id,
'vcodec': 'none',
**traverse_obj(episode, {
'title': ('title', {str}),
'description': ('description', {str}),
'url': ('podcast_raw_url', {url_or_none}),
'thumbnail': ('image', {url_or_none}),
'timestamp': ('timestamp', {int_or_none}),
'duration': ('milliseconds', {functools.partial(float_or_none, scale=1000)}),
'availability': ('free', {lambda v: 'public' if v else 'subscriber_only'}),
}),
}

View File

@ -1,5 +1,6 @@
from .common import InfoExtractor
from ..utils import (
int_or_none,
js_to_json,
url_or_none,
urlencode_postdata,
@ -20,39 +21,64 @@ class JioSaavnSongIE(JioSaavnBaseIE):
_VALID_URL = r'https?://(?:www\.)?(?:jiosaavn\.com/song/[^/?#]+/|saavn\.com/s/song/(?:[^/?#]+/){3})(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://www.jiosaavn.com/song/leja-re/OQsEfQFVUXk',
'md5': '7b1f70de088ede3a152ea34aece4df42',
'md5': '3b84396d15ed9e083c3106f1fa589c04',
'info_dict': {
'id': 'OQsEfQFVUXk',
'ext': 'mp3',
'ext': 'mp4',
'title': 'Leja Re',
'album': 'Leja Re',
'thumbnail': 'https://c.saavncdn.com/258/Leja-Re-Hindi-2018-20181124024539-500x500.jpg',
'duration': 205,
'view_count': int,
'release_year': 2018,
},
}, {
'url': 'https://www.saavn.com/s/song/hindi/Saathiya/O-Humdum-Suniyo-Re/KAMiazoCblU',
'only_matching': True,
}]
_VALID_BITRATES = ('16', '32', '64', '128', '320')
def _real_extract(self, url):
audio_id = self._match_id(url)
extract_bitrates = self._configuration_arg('bitrate', ['128', '320'], ie_key='JioSaavn')
if invalid_bitrates := [br for br in extract_bitrates if br not in self._VALID_BITRATES]:
raise ValueError(
f'Invalid bitrate(s): {", ".join(invalid_bitrates)}. '
+ f'Valid bitrates are: {", ".join(self._VALID_BITRATES)}')
song_data = self._extract_initial_data(url, audio_id)['song']['song']
media_data = self._download_json(
'https://www.jiosaavn.com/api.php', audio_id, data=urlencode_postdata({
'__call': 'song.generateAuthToken',
'_format': 'json',
'bitrate': '128',
'url': song_data['encrypted_media_url'],
}))
formats = []
for bitrate in extract_bitrates:
media_data = self._download_json(
'https://www.jiosaavn.com/api.php', audio_id, f'Downloading format info for {bitrate}',
fatal=False, data=urlencode_postdata({
'__call': 'song.generateAuthToken',
'_format': 'json',
'bitrate': bitrate,
'url': song_data['encrypted_media_url'],
}))
if not media_data.get('auth_url'):
self.report_warning(f'Unable to extract format info for {bitrate}')
continue
formats.append({
'url': media_data['auth_url'],
'ext': media_data.get('type'),
'format_id': bitrate,
'abr': int(bitrate),
'vcodec': 'none',
})
return {
'id': audio_id,
'url': media_data['auth_url'],
'ext': media_data.get('type'),
'vcodec': 'none',
'formats': formats,
**traverse_obj(song_data, {
'title': ('title', 'text'),
'album': ('album', 'text'),
'thumbnail': ('image', 0, {url_or_none}),
'duration': ('duration', {int_or_none}),
'view_count': ('play_count', {int_or_none}),
'release_year': ('year', {int_or_none}),
}),
}

View File

@ -0,0 +1,140 @@
import urllib.parse
from .common import InfoExtractor
from ..utils import (
ExtractorError,
clean_html,
filter_dict,
get_element_by_id,
int_or_none,
join_nonempty,
js_to_json,
qualities,
url_or_none,
urljoin,
)
from ..utils.traversal import traverse_obj
class KukuluLiveIE(InfoExtractor):
_VALID_URL = r'https?://live\.erinn\.biz/live\.php\?h(?P<id>\d+)'
_TESTS = [{
'url': 'https://live.erinn.biz/live.php?h675134569',
'md5': 'e380fa6a47fc703d91cea913ab44ec2e',
'info_dict': {
'id': '675134569',
'ext': 'mp4',
'title': 'プロセカ',
'description': 'テストも兼ねたプロセカ配信。',
'timestamp': 1702689148,
'upload_date': '20231216',
'thumbnail': r're:^https?://.*',
},
}, {
'url': 'https://live.erinn.biz/live.php?h102338092',
'md5': 'dcf5167a934b1c60333461e13a81a6e2',
'info_dict': {
'id': '102338092',
'ext': 'mp4',
'title': 'Among Usで遊びます',
'description': 'VTuberになりましたねんねこ㌨ですよろしくお願いします',
'timestamp': 1704603118,
'upload_date': '20240107',
'thumbnail': r're:^https?://.*',
},
}, {
'url': 'https://live.erinn.biz/live.php?h878049531',
'only_matching': True,
}]
def _get_quality_meta(self, video_id, desc, code, force_h264=None):
desc += ' (force_h264)' if force_h264 else ''
qs = self._download_webpage(
'https://live.erinn.biz/live.player.fplayer.php', video_id,
f'Downloading {desc} quality metadata', f'Unable to download {desc} quality metadata',
query=filter_dict({
'hash': video_id,
'action': f'get{code}liveByAjax',
'force_h264': force_h264,
}))
return urllib.parse.parse_qs(qs)
def _add_quality_formats(self, formats, quality_meta):
vcodec = traverse_obj(quality_meta, ('vcodec', 0, {str}))
quality = traverse_obj(quality_meta, ('now_quality', 0, {str}))
quality_priority = qualities(('low', 'h264', 'high'))(quality)
if traverse_obj(quality_meta, ('hlsaddr', 0, {url_or_none})):
formats.append({
'format_id': quality,
'url': quality_meta['hlsaddr'][0],
'ext': 'mp4',
'vcodec': vcodec,
'quality': quality_priority,
})
if traverse_obj(quality_meta, ('hlsaddr_audioonly', 0, {url_or_none})):
formats.append({
'format_id': join_nonempty(quality, 'audioonly'),
'url': quality_meta['hlsaddr_audioonly'][0],
'ext': 'm4a',
'vcodec': 'none',
'quality': quality_priority,
})
def _real_extract(self, url):
video_id = self._match_id(url)
html = self._download_webpage(url, video_id)
if '>タイムシフトが見つかりませんでした。<' in html:
raise ExtractorError('This stream has expired', expected=True)
title = clean_html(
get_element_by_id('livetitle', html.replace('<SPAN', '<span').replace('SPAN>', 'span>')))
description = self._html_search_meta('Description', html)
thumbnail = self._html_search_meta(['og:image', 'twitter:image'], html)
if self._search_regex(r'(var\s+timeshift\s*=\s*false)', html, 'is livestream', default=False):
formats = []
for (desc, code) in [('high', 'Z'), ('low', 'ForceLow')]:
quality_meta = self._get_quality_meta(video_id, desc, code)
self._add_quality_formats(formats, quality_meta)
if desc == 'high' and traverse_obj(quality_meta, ('vcodec', 0)) == 'HEVC':
self._add_quality_formats(
formats, self._get_quality_meta(video_id, desc, code, force_h264='1'))
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'is_live': True,
'formats': formats,
}
# VOD extraction
player_html = self._download_webpage(
'https://live.erinn.biz/live.timeshift.fplayer.php', video_id,
'Downloading player html', 'Unable to download player html', query={'hash': video_id})
sources = traverse_obj(self._search_json(
r'var\s+fplayer_source\s*=', player_html, 'stream data', video_id,
contains_pattern=r'\[(?s:.+)\]', transform_source=js_to_json), lambda _, v: v['file'])
def entries(segments, playlist=True):
for i, segment in enumerate(segments, 1):
yield {
'id': f'{video_id}_{i}' if playlist else video_id,
'title': f'{title} (Part {i})' if playlist else title,
'description': description,
'timestamp': traverse_obj(segment, ('time_start', {int_or_none})),
'thumbnail': thumbnail,
'formats': [{
'url': urljoin('https://live.erinn.biz', segment['file']),
'ext': 'mp4',
'protocol': 'm3u8_native',
}],
}
if len(sources) == 1:
return next(entries(sources, playlist=False))
return self.playlist_result(entries(sources), video_id, title, description, multi_video=True)

View File

@ -0,0 +1,62 @@
from .common import InfoExtractor
from ..utils import ExtractorError, int_or_none, join_nonempty, url_or_none
from ..utils.traversal import traverse_obj
class MagentaMusikIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?magentamusik\.de/(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://www.magentamusik.de/marty-friedman-woa-2023-9208205928595409235',
'md5': 'd82dd4748f55fc91957094546aaf8584',
'info_dict': {
'id': '9208205928595409235',
'display_id': 'marty-friedman-woa-2023-9208205928595409235',
'ext': 'mp4',
'title': 'Marty Friedman: W:O:A 2023',
'alt_title': 'Konzert vom: 05.08.2023 13:00',
'duration': 2760,
'categories': ['Musikkonzert'],
'release_year': 2023,
'location': 'Deutschland',
}
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
player_config = self._search_json(
r'data-js-element="o-video-player__config">', webpage, 'player config', display_id, fatal=False)
if not player_config:
raise ExtractorError('No video found', expected=True)
asset_id = player_config['assetId']
asset_details = self._download_json(
f'https://wcps.t-online.de/cvss/magentamusic/vodclient/v2/assetdetails/58938/{asset_id}',
display_id, note='Downloading asset details')
video_id = traverse_obj(
asset_details, ('content', 'partnerInformation', ..., 'reference', {str}), get_all=False)
if not video_id:
raise ExtractorError('Unable to extract video id')
vod_data = self._download_json(
f'https://wcps.t-online.de/cvss/magentamusic/vodclient/v2/player/58935/{video_id}/Main%20Movie', video_id)
smil_url = traverse_obj(
vod_data, ('content', 'feature', 'representations', ...,
'contentPackages', ..., 'media', 'href', {url_or_none}), get_all=False)
return {
'id': video_id,
'display_id': display_id,
'formats': self._extract_smil_formats(smil_url, video_id),
**traverse_obj(vod_data, ('content', 'feature', 'metadata', {
'title': 'title',
'alt_title': 'originalTitle',
'description': 'longDescription',
'duration': ('runtimeInSeconds', {int_or_none}),
'location': ('countriesOfProduction', {list}, {lambda x: join_nonempty(*x, delim=', ')}),
'release_year': ('yearOfProduction', {int_or_none}),
'categories': ('mainGenre', {str}, {lambda x: x and [x]}),
})),
}

View File

@ -1,58 +0,0 @@
from .common import InfoExtractor
class MagentaMusik360IE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?magenta-musik-360\.de/([a-z0-9-]+-(?P<id>[0-9]+)|festivals/.+)'
_TESTS = [{
'url': 'https://www.magenta-musik-360.de/within-temptation-wacken-2019-1-9208205928595185932',
'md5': '65b6f060b40d90276ec6fb9b992c1216',
'info_dict': {
'id': '9208205928595185932',
'ext': 'm3u8',
'title': 'WITHIN TEMPTATION',
'description': 'Robert Westerholt und Sharon Janny den Adel gründeten die Symphonic Metal-Band. Privat sind die Niederländer ein Paar und haben zwei Kinder. Die Single Ice Queen brachte ihnen Platin und Gold und verhalf 2002 zum internationalen Durchbruch. Charakteristisch für die Band war Anfangs der hohe Gesang von Frontfrau Sharon. Stilistisch fing die Band im Gothic Metal an. Mit neuem Sound, schnellen Gitarrenriffs und Gitarrensoli, avancierte Within Temptation zur erfolgreichen Rockband. Auch dieses Jahr wird die Band ihre Fangemeinde wieder mitreißen.',
}
}, {
'url': 'https://www.magenta-musik-360.de/festivals/wacken-world-wide-2020-body-count-feat-ice-t',
'md5': '81010d27d7cab3f7da0b0f681b983b7e',
'info_dict': {
'id': '9208205928595231363',
'ext': 'm3u8',
'title': 'Body Count feat. Ice-T',
'description': 'Body Count feat. Ice-T konnten bereits im vergangenen Jahr auf dem „Holy Ground“ in Wacken überzeugen. 2020 gehen die Crossover-Metaller aus einem Club in Los Angeles auf Sendung und bringen mit ihrer Mischung aus Metal und Hip-Hop Abwechslung und ordentlich Alarm zum WWW. Bereits seit 1990 stehen die beiden Gründer Ice-T (Gesang) und Ernie C (Gitarre) auf der Bühne. Sieben Studioalben hat die Gruppe bis jetzt veröffentlicht, darunter das Debüt „Body Count“ (1992) mit dem kontroversen Track „Cop Killer“.',
}
}]
def _real_extract(self, url):
video_id = self._match_id(url)
# _match_id casts to string, but since "None" is not a valid video_id for magenta
# there is no risk for confusion
if video_id == "None":
webpage = self._download_webpage(url, video_id)
video_id = self._html_search_regex(r'data-asset-id="([^"]+)"', webpage, 'video_id')
json = self._download_json("https://wcps.t-online.de/cvss/magentamusic/vodplayer/v3/player/58935/%s/Main%%20Movie" % video_id, video_id)
xml_url = json['content']['feature']['representations'][0]['contentPackages'][0]['media']['href']
metadata = json['content']['feature'].get('metadata')
title = None
description = None
duration = None
thumbnails = []
if metadata:
title = metadata.get('title')
description = metadata.get('fullDescription')
duration = metadata.get('runtimeInSeconds')
for img_key in ('teaserImageWide', 'smallCoverImage'):
if img_key in metadata:
thumbnails.append({'url': metadata[img_key].get('href')})
xml = self._download_xml(xml_url, video_id)
final_url = xml[0][0][0].attrib['src']
return {
'id': video_id,
'title': title,
'description': description,
'url': final_url,
'duration': duration,
'thumbnails': thumbnails
}

View File

@ -355,11 +355,11 @@ class MLBArticleIE(InfoExtractor):
'info_dict': {
'id': '36db7394-343c-4ea3-b8ca-ead2e61bca9a',
'title': 'Machado\'s grab draws hilarious irate reaction',
'modified_timestamp': 1650130737,
'modified_timestamp': 1675888370,
'description': 'md5:a19d4eb0487b2cb304e9a176f6b67676',
'modified_date': '20220416',
'modified_date': '20230208',
},
'playlist_count': 2,
'playlist_mincount': 2,
}]
def _real_extract(self, url):
@ -367,15 +367,13 @@ class MLBArticleIE(InfoExtractor):
webpage = self._download_webpage(url, display_id)
apollo_cache_json = self._search_json(r'window\.initState\s*=', webpage, 'window.initState', display_id)['apolloCache']
content_data_id = traverse_obj(
apollo_cache_json, ('ROOT_QUERY', lambda k, _: k.startswith('getForgeContent'), 'id'), get_all=False)
content_real_info = apollo_cache_json[content_data_id]
content_real_info = traverse_obj(
apollo_cache_json, ('ROOT_QUERY', lambda k, _: k.startswith('getArticle')), get_all=False)
return self.playlist_from_matches(
traverse_obj(content_real_info, ('parts', lambda _, v: v['typename'] == 'Video', 'id')),
getter=lambda x: f'https://www.mlb.com/video/{apollo_cache_json[x]["slug"]}',
ie=MLBVideoIE, playlist_id=content_real_info.get('_translationId'),
traverse_obj(content_real_info, ('parts', lambda _, v: v['__typename'] == 'Video' or v['type'] == 'video')),
getter=lambda x: f'https://www.mlb.com/video/{x["slug"]}',
ie=MLBVideoIE, playlist_id=content_real_info.get('translationId'),
title=self._html_search_meta('og:title', webpage),
description=content_real_info.get('summary'),
modified_timestamp=parse_iso8601(content_real_info.get('lastUpdatedDate')))

View File

@ -177,6 +177,7 @@ class MotherlessIE(InfoExtractor):
class MotherlessPaginatedIE(InfoExtractor):
_EXTRA_QUERY = {}
_PAGE_SIZE = 60
def _correct_path(self, url, item_id):
@ -199,7 +200,7 @@ class MotherlessPaginatedIE(InfoExtractor):
def get_page(idx):
page = idx + 1
current_page = webpage if not idx else self._download_webpage(
real_url, item_id, note=f'Downloading page {page}', query={'page': page})
real_url, item_id, note=f'Downloading page {page}', query={'page': page, **self._EXTRA_QUERY})
yield from self._extract_entries(current_page, real_url)
return self.playlist_result(
@ -213,7 +214,7 @@ class MotherlessGroupIE(MotherlessPaginatedIE):
'url': 'http://motherless.com/gv/movie_scenes',
'info_dict': {
'id': 'movie_scenes',
'title': 'Movie Scenes',
'title': 'Movie Scenes - Videos - Hot and sexy scenes from "regular" movies... Beautiful actresses fully',
},
'playlist_mincount': 540,
}, {
@ -244,7 +245,7 @@ class MotherlessGalleryIE(MotherlessPaginatedIE):
'id': '338999F',
'title': 'Random',
},
'playlist_mincount': 190,
'playlist_mincount': 171,
}, {
'url': 'https://motherless.com/GVABD6213',
'info_dict': {
@ -270,3 +271,27 @@ class MotherlessGalleryIE(MotherlessPaginatedIE):
def _correct_path(self, url, item_id):
return urllib.parse.urljoin(url, f'/GV{item_id}')
class MotherlessUploaderIE(MotherlessPaginatedIE):
_VALID_URL = r'https?://(?:www\.)?motherless\.com/u/(?P<id>\w+)/?(?:$|[?#])'
_TESTS = [{
'url': 'https://motherless.com/u/Mrgo4hrs2023',
'info_dict': {
'id': 'Mrgo4hrs2023',
'title': "Mrgo4hrs2023's Uploads - Videos",
},
'playlist_mincount': 32,
}, {
'url': 'https://motherless.com/u/Happy_couple?t=v',
'info_dict': {
'id': 'Happy_couple',
'title': "Happy_couple's Uploads - Videos",
},
'playlist_mincount': 8,
}]
_EXTRA_QUERY = {'t': 'v'}
def _correct_path(self, url, item_id):
return urllib.parse.urljoin(url, f'/u/{item_id}?t=v')

171
yt_dlp/extractor/mx3.py Normal file
View File

@ -0,0 +1,171 @@
import re
from .common import InfoExtractor
from ..networking import HEADRequest
from ..utils import (
get_element_by_class,
int_or_none,
try_call,
url_or_none,
urlhandle_detect_ext,
)
from ..utils.traversal import traverse_obj
class Mx3BaseIE(InfoExtractor):
_VALID_URL_TMPL = r'https?://(?:www\.)?%s/t/(?P<id>\w+)'
_FORMATS = [{
'url': 'player_asset',
'format_id': 'default',
'quality': 0,
}, {
'url': 'player_asset?quality=hd',
'format_id': 'hd',
'quality': 1,
}, {
'url': 'download',
'format_id': 'download',
'quality': 2,
}, {
'url': 'player_asset?quality=source',
'format_id': 'source',
'quality': 2,
}]
def _extract_formats(self, track_id):
formats = []
for fmt in self._FORMATS:
format_url = f'https://{self._DOMAIN}/tracks/{track_id}/{fmt["url"]}'
urlh = self._request_webpage(
HEADRequest(format_url), track_id, fatal=False, expected_status=404,
note=f'Checking for format {fmt["format_id"]}')
if urlh and urlh.status == 200:
formats.append({
**fmt,
'url': format_url,
'ext': urlhandle_detect_ext(urlh),
'filesize': int_or_none(urlh.headers.get('Content-Length')),
})
return formats
def _real_extract(self, url):
track_id = self._match_id(url)
webpage = self._download_webpage(url, track_id)
more_info = get_element_by_class('single-more-info', webpage)
data = self._download_json(f'https://{self._DOMAIN}/t/{track_id}.json', track_id, fatal=False)
def get_info_field(name):
return self._html_search_regex(
rf'<dt[^>]*>\s*{name}\s*</dt>\s*<dd[^>]*>(.*?)</dd>',
more_info, name, default=None, flags=re.DOTALL)
return {
'id': track_id,
'formats': self._extract_formats(track_id),
'genre': self._html_search_regex(
r'<div\b[^>]+class="single-band-genre"[^>]*>([^<]+)</div>', webpage, 'genre', default=None),
'release_year': int_or_none(get_info_field('Year of creation')),
'description': get_info_field('Description'),
'tags': try_call(lambda: get_info_field('Tag').split(', '), list),
**traverse_obj(data, {
'title': ('title', {str}),
'artist': (('performer_name', 'artist'), {str}),
'album_artist': ('artist', {str}),
'composer': ('composer_name', {str}),
'thumbnail': (('picture_url_xlarge', 'picture_url'), {url_or_none}),
}, get_all=False),
}
class Mx3IE(Mx3BaseIE):
_DOMAIN = 'mx3.ch'
_VALID_URL = Mx3BaseIE._VALID_URL_TMPL % re.escape(_DOMAIN)
_TESTS = [{
'url': 'https://mx3.ch/t/1Cru',
'md5': '7ba09e9826b4447d4e1ce9d69e0e295f',
'info_dict': {
'id': '1Cru',
'ext': 'wav',
'artist': 'Godina',
'album_artist': 'Tortue Tortue',
'composer': 'Olivier Godinat',
'genre': 'Rock',
'thumbnail': 'https://mx3.ch/pictures/mx3/file/0101/4643/square_xlarge/1-s-envoler-1.jpg?1630272813',
'title': "S'envoler",
'release_year': 2021,
'tags': [],
}
}, {
'url': 'https://mx3.ch/t/1LIY',
'md5': '48293cb908342547827f963a5a2e9118',
'info_dict': {
'id': '1LIY',
'ext': 'mov',
'artist': 'Tania Kimfumu',
'album_artist': 'The Broots',
'composer': 'Emmanuel Diserens',
'genre': 'Electro',
'thumbnail': 'https://mx3.ch/pictures/mx3/file/0110/0003/video_xlarge/frame_0000.png?1686963670',
'title': 'The Broots-Larytta remix "Begging For Help"',
'release_year': 2023,
'tags': ['the broots', 'cassata records', 'larytta'],
'description': '"Begging for Help" Larytta Remix Official Video\nRealized By Kali Donkilie in 2023',
}
}, {
'url': 'https://mx3.ch/t/1C6E',
'md5': '1afcd578493ddb8e5008e94bb6d97e25',
'info_dict': {
'id': '1C6E',
'ext': 'wav',
'artist': 'Alien Bubblegum',
'album_artist': 'Alien Bubblegum',
'composer': 'Alien Bubblegum',
'genre': 'Punk',
'thumbnail': 'https://mx3.ch/pictures/mx3/file/0101/1551/square_xlarge/pandora-s-box-cover-with-title.png?1627054733',
'title': 'Wide Awake',
'release_year': 2021,
'tags': ['alien bubblegum', 'bubblegum', 'alien', 'pop punk', 'poppunk'],
}
}]
class Mx3NeoIE(Mx3BaseIE):
_DOMAIN = 'neo.mx3.ch'
_VALID_URL = Mx3BaseIE._VALID_URL_TMPL % re.escape(_DOMAIN)
_TESTS = [{
'url': 'https://neo.mx3.ch/t/1hpd',
'md5': '6d9986bbae5cac3296ec8813bf965eb2',
'info_dict': {
'id': '1hpd',
'ext': 'wav',
'artist': 'Baptiste Lopez',
'album_artist': 'Kammerorchester Basel',
'composer': 'Jannik Giger',
'genre': 'Composition, Orchestra',
'title': 'Troisième œil. Für Kammerorchester (2023)',
'thumbnail': 'https://neo.mx3.ch/pictures/neo/file/0000/0241/square_xlarge/kammerorchester-basel-group-photo-2_c_-lukasz-rajchert.jpg?1560341252',
'release_year': 2023,
'tags': [],
}
}]
class Mx3VolksmusikIE(Mx3BaseIE):
_DOMAIN = 'volksmusik.mx3.ch'
_VALID_URL = Mx3BaseIE._VALID_URL_TMPL % re.escape(_DOMAIN)
_TESTS = [{
'url': 'https://volksmusik.mx3.ch/t/Zx',
'md5': 'dd967a7b0c1ef898f3e072cf9c2eae3c',
'info_dict': {
'id': 'Zx',
'ext': 'mp3',
'artist': 'Ländlerkapelle GrischArt',
'album_artist': 'Ländlerkapelle GrischArt',
'composer': 'Urs Glauser',
'genre': 'Instrumental, Graubünden',
'title': 'Chämilouf',
'thumbnail': 'https://volksmusik.mx3.ch/pictures/vxm/file/0000/3815/square_xlarge/grischart1.jpg?1450530120',
'release_year': 2012,
'tags': [],
}
}]

View File

@ -1,20 +1,25 @@
import base64
import hashlib
import hmac
import itertools
import json
import re
from urllib.parse import urlparse, parse_qs
import time
from urllib.parse import parse_qs, urlparse
from .common import InfoExtractor
from ..utils import (
ExtractorError,
clean_html,
dict_get,
int_or_none,
join_nonempty,
merge_dicts,
parse_duration,
parse_iso8601,
traverse_obj,
try_get,
unified_timestamp,
update_url_query,
url_or_none,
)
@ -110,6 +115,18 @@ class NaverBaseIE(InfoExtractor):
**self.process_subtitles(video_data, get_subs),
}
def _call_api(self, path, video_id):
api_endpoint = f'https://apis.naver.com/now_web2/now_web_api/v1{path}'
key = b'nbxvs5nwNG9QKEWK0ADjYA4JZoujF4gHcIwvoCxFTPAeamq5eemvt5IWAYXxrbYM'
msgpad = int(time.time() * 1000)
md = base64.b64encode(hmac.HMAC(
key, f'{api_endpoint[:255]}{msgpad}'.encode(), digestmod=hashlib.sha1).digest()).decode()
return self._download_json(api_endpoint, video_id=video_id, headers=self.geo_verification_headers(), query={
'msgpad': msgpad,
'md': md,
})['result']
class NaverIE(NaverBaseIE):
_VALID_URL = r'https?://(?:m\.)?tv(?:cast)?\.naver\.com/(?:v|embed)/(?P<id>\d+)'
@ -125,21 +142,32 @@ class NaverIE(NaverBaseIE):
'upload_date': '20130903',
'uploader': '메가스터디, 합격불변의 법칙',
'uploader_id': 'megastudy',
'uploader_url': 'https://tv.naver.com/megastudy',
'view_count': int,
'like_count': int,
'comment_count': int,
'duration': 2118,
'thumbnail': r're:^https?://.*\.jpg',
},
}, {
'url': 'http://tv.naver.com/v/395837',
'md5': '8a38e35354d26a17f73f4e90094febd3',
'md5': '7791205fa89dbed2f5e3eb16d287ff05',
'info_dict': {
'id': '395837',
'ext': 'mp4',
'title': '9년이 지나도 아픈 기억, 전효성의 아버지',
'description': 'md5:eb6aca9d457b922e43860a2a2b1984d3',
'description': 'md5:c76be23e21403a6473d8119678cdb5cb',
'timestamp': 1432030253,
'upload_date': '20150519',
'uploader': '4가지쇼 시즌2',
'uploader_id': 'wrappinguser29',
'uploader': '4가지쇼',
'uploader_id': '4show',
'uploader_url': 'https://tv.naver.com/4show',
'view_count': int,
'like_count': int,
'comment_count': int,
'duration': 277,
'thumbnail': r're:^https?://.*\.jpg',
},
'skip': 'Georestricted',
}, {
'url': 'http://tvcast.naver.com/v/81652',
'only_matching': True,
@ -147,56 +175,63 @@ class NaverIE(NaverBaseIE):
def _real_extract(self, url):
video_id = self._match_id(url)
content = self._download_json(
'https://tv.naver.com/api/json/v/' + video_id,
video_id, headers=self.geo_verification_headers())
player_info_json = content.get('playerInfoJson') or {}
current_clip = player_info_json.get('currentClip') or {}
data = self._call_api(f'/clips/{video_id}/play-info', video_id)
vid = current_clip.get('videoId')
in_key = current_clip.get('inKey')
vid = traverse_obj(data, ('clip', 'videoId', {str}))
in_key = traverse_obj(data, ('play', 'inKey', {str}))
if not vid or not in_key:
player_auth = try_get(player_info_json, lambda x: x['playerOption']['auth'])
if player_auth == 'notCountry':
self.raise_geo_restricted(countries=['KR'])
elif player_auth == 'notLogin':
self.raise_login_required()
raise ExtractorError('couldn\'t extract vid and key')
raise ExtractorError('Unable to extract video info')
info = self._extract_video_info(video_id, vid, in_key)
info.update({
'description': clean_html(current_clip.get('description')),
'timestamp': int_or_none(current_clip.get('firstExposureTime'), 1000),
'duration': parse_duration(current_clip.get('displayPlayTime')),
'like_count': int_or_none(current_clip.get('recommendPoint')),
'age_limit': 19 if current_clip.get('adult') else None,
})
info.update(traverse_obj(data, ('clip', {
'title': 'title',
'description': 'description',
'timestamp': ('firstExposureDatetime', {parse_iso8601}),
'duration': ('playTime', {int_or_none}),
'like_count': ('likeItCount', {int_or_none}),
'view_count': ('playCount', {int_or_none}),
'comment_count': ('commentCount', {int_or_none}),
'thumbnail': ('thumbnailImageUrl', {url_or_none}),
'uploader': 'channelName',
'uploader_id': 'channelId',
'uploader_url': ('channelUrl', {url_or_none}),
'age_limit': ('adultVideo', {lambda x: 19 if x else None}),
})))
return info
class NaverLiveIE(InfoExtractor):
class NaverLiveIE(NaverBaseIE):
IE_NAME = 'Naver:live'
_VALID_URL = r'https?://(?:m\.)?tv(?:cast)?\.naver\.com/l/(?P<id>\d+)'
_GEO_BYPASS = False
_TESTS = [{
'url': 'https://tv.naver.com/l/52010',
'url': 'https://tv.naver.com/l/127062',
'info_dict': {
'id': '52010',
'id': '127062',
'ext': 'mp4',
'title': '[LIVE] 뉴스특보 : "수도권 거리두기, 2주간 2단계로 조정"',
'description': 'md5:df7f0c237a5ed5e786ce5c91efbeaab3',
'channel_id': 'NTV-ytnnews24-0',
'start_time': 1597026780000,
'live_status': 'is_live',
'channel': '뉴스는 YTN',
'channel_id': 'ytnnews24',
'title': 're:^대한민국 24시간 뉴스 채널 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'description': 'md5:f938b5956711beab6f882314ffadf4d5',
'start_time': 1677752280,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)',
'like_count': int,
},
}, {
'url': 'https://tv.naver.com/l/51549',
'url': 'https://tv.naver.com/l/140535',
'info_dict': {
'id': '51549',
'id': '140535',
'ext': 'mp4',
'title': '연합뉴스TV - 코로나19 뉴스특보',
'description': 'md5:c655e82091bc21e413f549c0eaccc481',
'channel_id': 'NTV-yonhapnewstv-0',
'start_time': 1596406380000,
'live_status': 'is_live',
'channel': 'KBS뉴스',
'channel_id': 'kbsnews',
'start_time': 1696867320,
'title': 're:^언제 어디서나! KBS 뉴스 24 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'description': 'md5:6ad419c0bf2f332829bda3f79c295284',
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)',
'like_count': int,
},
}, {
'url': 'https://tv.naver.com/l/54887',
@ -205,55 +240,27 @@ class NaverLiveIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
page = self._download_webpage(url, video_id, 'Downloading Page', 'Unable to download Page')
secure_url = self._search_regex(r'sApiF:\s+(?:"|\')([^"\']+)', page, 'secureurl')
info = self._extract_video_info(video_id, secure_url)
info.update({
'description': self._og_search_description(page)
})
return info
def _extract_video_info(self, video_id, url):
video_data = self._download_json(url, video_id, headers=self.geo_verification_headers())
meta = video_data.get('meta')
status = meta.get('status')
data = self._call_api(f'/live-end/normal/{video_id}/play-info?renewLastPlayDate=true', video_id)
status = traverse_obj(data, ('live', 'liveStatus'))
if status == 'CLOSED':
raise ExtractorError('Stream is offline.', expected=True)
elif status != 'OPENED':
raise ExtractorError('Unknown status %s' % status)
title = meta.get('title')
stream_list = video_data.get('streams')
if stream_list is None:
raise ExtractorError('Could not get stream data.', expected=True)
formats = []
for quality in stream_list:
if not quality.get('url'):
continue
prop = quality.get('property')
if prop.get('abr'): # This abr doesn't mean Average audio bitrate.
continue
formats.extend(self._extract_m3u8_formats(
quality.get('url'), video_id, 'mp4',
m3u8_id=quality.get('qualityId'), live=True
))
raise ExtractorError(f'Unknown status {status!r}')
return {
'id': video_id,
'title': title,
'formats': formats,
'channel_id': meta.get('channelId'),
'channel_url': meta.get('channelUrl'),
'thumbnail': meta.get('imgUrl'),
'start_time': meta.get('startTime'),
'categories': [meta.get('categoryId')],
'formats': self._extract_m3u8_formats(
traverse_obj(data, ('playbackBody', {json.loads}, 'media', 0, 'path')), video_id, live=True),
**traverse_obj(data, ('live', {
'title': 'title',
'channel': 'channelName',
'channel_id': 'channelId',
'description': 'description',
'like_count': (('likeCount', 'likeItCount'), {int_or_none}),
'thumbnail': ('thumbnailImageUrl', {url_or_none}),
'start_time': (('startTime', 'startDateTime', 'startYmdt'), {parse_iso8601}),
}), get_all=False),
'is_live': True
}

View File

@ -0,0 +1,72 @@
from .common import InfoExtractor
from .brightcove import BrightcoveNewIE
from ..utils import ExtractorError
from ..utils.traversal import traverse_obj
class NineNewsIE(InfoExtractor):
IE_NAME = '9News'
_VALID_URL = r'https?://(?:www\.)?9news\.com\.au/(?:[\w-]+/){2,3}(?P<id>[\w-]+)/?(?:$|[?#])'
_TESTS = [{
'url': 'https://www.9news.com.au/videos/national/fair-trading-pulls-dozens-of-toys-from-shelves/clqgc7dvj000y0jnvfism0w5m',
'md5': 'd1a65b2e9d126e5feb9bc5cb96e62c80',
'info_dict': {
'id': '6343717246112',
'ext': 'mp4',
'title': 'Fair Trading pulls dozens of toys from shelves',
'description': 'Fair Trading Australia have been forced to pull dozens of toys from shelves over hazard fears.',
'thumbnail': 'md5:bdbe44294e2323b762d97acf8843f66c',
'duration': 93.44,
'timestamp': 1703231748,
'upload_date': '20231222',
'uploader_id': '664969388001',
'tags': ['networkclip', 'aunews_aunationalninenews', 'christmas presents', 'toys', 'fair trading', 'au_news'],
}
}, {
'url': 'https://www.9news.com.au/world/tape-reveals-donald-trump-pressured-michigan-officials-not-to-certify-2020-vote-a-new-report-says/0b8b880e-7d3c-41b9-b2bd-55bc7e492259',
'md5': 'a885c44d20898c3e70e9a53e8188cea1',
'info_dict': {
'id': '6343587450112',
'ext': 'mp4',
'title': 'Trump found ineligible to run for president by state court',
'description': 'md5:40e6e7db7a4ac6be0e960569a5af6066',
'thumbnail': 'md5:3e132c48c186039fd06c10787de9bff2',
'duration': 104.64,
'timestamp': 1703058034,
'upload_date': '20231220',
'uploader_id': '664969388001',
'tags': ['networkclip', 'aunews_aunationalninenews', 'ineligible', 'presidential candidate', 'donald trump', 'au_news'],
}
}, {
'url': 'https://www.9news.com.au/national/outrage-as-parents-banned-from-giving-gifts-to-kindergarten-teachers/e19b49d4-a1a4-4533-9089-6e10e2d9386a',
'info_dict': {
'id': '6343716797112',
'ext': 'mp4',
'title': 'Outrage as parents banned from giving gifts to kindergarten teachers',
'description': 'md5:7a8b0ed2f9e08875fd9a3e86e462bc46',
'thumbnail': 'md5:5ee4d66717bdd0dee9fc9a705ef041b8',
'duration': 91.307,
'timestamp': 1703229584,
'upload_date': '20231222',
'uploader_id': '664969388001',
'tags': ['networkclip', 'aunews_aunationalninenews', 'presents', 'teachers', 'kindergarten', 'au_news'],
},
}]
def _real_extract(self, url):
article_id = self._match_id(url)
webpage = self._download_webpage(url, article_id)
initial_state = self._search_json(
r'var\s+__INITIAL_STATE__\s*=', webpage, 'initial state', article_id)
video_id = traverse_obj(
initial_state, ('videoIndex', 'currentVideo', 'brightcoveId', {str}),
('article', ..., 'media', lambda _, v: v['type'] == 'video', 'urn', {str}), get_all=False)
account = traverse_obj(initial_state, (
'videoIndex', 'config', (None, 'video'), 'account', {str}), get_all=False)
if not video_id or not account:
raise ExtractorError('Unable to get the required video data')
return self.url_result(
f'https://players.brightcove.net/{account}/default_default/index.html?videoId={video_id}',
BrightcoveNewIE, video_id)

View File

@ -12,7 +12,7 @@ from ..utils import (
class PiaproIE(InfoExtractor):
_NETRC_MACHINE = 'piapro'
_VALID_URL = r'https?://piapro\.jp/(?:t|content)/(?P<id>\w+)/?'
_VALID_URL = r'https?://piapro\.jp/(?:t|content)/(?P<id>[\w-]+)/?'
_TESTS = [{
'url': 'https://piapro.jp/t/NXYR',
'md5': 'f7c0f760913fb1d44a1c45a4af793909',
@ -49,6 +49,9 @@ class PiaproIE(InfoExtractor):
}, {
'url': 'https://piapro.jp/content/hcw0z3a169wtemz6',
'only_matching': True
}, {
'url': 'https://piapro.jp/t/-SO-',
'only_matching': True
}]
_login_status = False

135
yt_dlp/extractor/redge.py Normal file
View File

@ -0,0 +1,135 @@
import functools
from .common import InfoExtractor
from ..networking import HEADRequest
from ..utils import (
float_or_none,
int_or_none,
join_nonempty,
parse_qs,
update_url_query,
)
from ..utils.traversal import traverse_obj
class RedCDNLivxIE(InfoExtractor):
_VALID_URL = r'https?://[^.]+\.(?:dcs\.redcdn|atmcdn)\.pl/(?:live(?:dash|hls|ss)|nvr)/o2/(?P<tenant>[^/?#]+)/(?P<id>[^?#]+)\.livx'
IE_NAME = 'redcdnlivx'
_TESTS = [{
'url': 'https://r.dcs.redcdn.pl/livedash/o2/senat/ENC02/channel.livx?indexMode=true&startTime=638272860000&stopTime=638292544000',
'info_dict': {
'id': 'ENC02-638272860000-638292544000',
'ext': 'mp4',
'title': 'ENC02',
'duration': 19683.982,
'live_status': 'was_live',
},
}, {
'url': 'https://r.dcs.redcdn.pl/livedash/o2/sejm/ENC18/live.livx?indexMode=true&startTime=722333096000&stopTime=722335562000',
'info_dict': {
'id': 'ENC18-722333096000-722335562000',
'ext': 'mp4',
'title': 'ENC18',
'duration': 2463.995,
'live_status': 'was_live',
},
}, {
'url': 'https://r.dcs.redcdn.pl/livehls/o2/sportevolution/live/triathlon2018/warsaw.livx/playlist.m3u8?startTime=550305000000&stopTime=550327620000',
'info_dict': {
'id': 'triathlon2018-warsaw-550305000000-550327620000',
'ext': 'mp4',
'title': 'triathlon2018/warsaw',
'duration': 22619.98,
'live_status': 'was_live',
},
}, {
'url': 'https://n-25-12.dcs.redcdn.pl/nvr/o2/sejm/Migacz-ENC01/1.livx?startTime=722347200000&stopTime=722367345000',
'only_matching': True,
}, {
'url': 'https://redir.atmcdn.pl/nvr/o2/sejm/ENC08/1.livx?startTime=503831270000&stopTime=503840040000',
'only_matching': True,
}]
"""
Known methods (first in url path):
- `livedash` - DASH MPD
- `livehls` - HTTP Live Streaming
- `livess` - IIS Smooth Streaming
- `nvr` - CCTV mode, directly returns a file, typically flv, avc1, aac
- `sc` - shoutcast/icecast (audio streams, like radio)
"""
def _real_extract(self, url):
tenant, path = self._match_valid_url(url).group('tenant', 'id')
qs = parse_qs(url)
start_time = traverse_obj(qs, ('startTime', 0, {int_or_none}))
stop_time = traverse_obj(qs, ('stopTime', 0, {int_or_none}))
def livx_mode(mode):
suffix = ''
if mode == 'livess':
suffix = '/manifest'
elif mode == 'livehls':
suffix = '/playlist.m3u8'
file_qs = {}
if start_time:
file_qs['startTime'] = start_time
if stop_time:
file_qs['stopTime'] = stop_time
if mode == 'nvr':
file_qs['nolimit'] = 1
elif mode != 'sc':
file_qs['indexMode'] = 'true'
return update_url_query(f'https://r.dcs.redcdn.pl/{mode}/o2/{tenant}/{path}.livx{suffix}', file_qs)
# no id or title for a transmission. making ones up.
title = path \
.replace('/live', '').replace('live/', '') \
.replace('/channel', '').replace('channel/', '') \
.strip('/')
video_id = join_nonempty(title.replace('/', '-'), start_time, stop_time)
formats = []
# downloading the manifest separately here instead of _extract_ism_formats to also get some stream metadata
ism_res = self._download_xml_handle(
livx_mode('livess'), video_id,
note='Downloading ISM manifest',
errnote='Failed to download ISM manifest',
fatal=False)
ism_doc = None
if ism_res is not False:
ism_doc, ism_urlh = ism_res
formats, _ = self._parse_ism_formats_and_subtitles(ism_doc, ism_urlh.url, 'ss')
nvr_urlh = self._request_webpage(
HEADRequest(livx_mode('nvr')), video_id, 'Follow flv file redirect', fatal=False,
expected_status=lambda _: True)
if nvr_urlh and nvr_urlh.status == 200:
formats.append({
'url': nvr_urlh.url,
'ext': 'flv',
'format_id': 'direct-0',
'preference': -1, # might be slow
})
formats.extend(self._extract_mpd_formats(livx_mode('livedash'), video_id, mpd_id='dash', fatal=False))
formats.extend(self._extract_m3u8_formats(
livx_mode('livehls'), video_id, m3u8_id='hls', ext='mp4', fatal=False))
time_scale = traverse_obj(ism_doc, ('@TimeScale', {int_or_none})) or 10000000
duration = traverse_obj(
ism_doc, ('@Duration', {functools.partial(float_or_none, scale=time_scale)})) or None
live_status = None
if traverse_obj(ism_doc, '@IsLive') == 'TRUE':
live_status = 'is_live'
elif duration:
live_status = 'was_live'
return {
'id': video_id,
'title': title,
'formats': formats,
'duration': duration,
'live_status': live_status,
}

View File

@ -1,8 +1,34 @@
from .common import InfoExtractor
from ..utils import format_field, parse_iso8601
from ..utils import (
MEDIA_EXTENSIONS,
determine_ext,
parse_iso8601,
traverse_obj,
url_or_none,
)
class RinseFMIE(InfoExtractor):
class RinseFMBaseIE(InfoExtractor):
@staticmethod
def _parse_entry(entry):
return {
**traverse_obj(entry, {
'id': ('id', {str}),
'title': ('title', {str}),
'url': ('fileUrl', {url_or_none}),
'release_timestamp': ('episodeDate', {parse_iso8601}),
'thumbnail': ('featuredImage', 0, 'filename', {str},
{lambda x: x and f'https://rinse.imgix.net/media/{x}'}),
'webpage_url': ('slug', {str},
{lambda x: x and f'https://rinse.fm/episodes/{x}'}),
}),
'vcodec': 'none',
'extractor_key': RinseFMIE.ie_key(),
'extractor': RinseFMIE.IE_NAME,
}
class RinseFMIE(RinseFMBaseIE):
_VALID_URL = r'https?://(?:www\.)?rinse\.fm/episodes/(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://rinse.fm/episodes/club-glow-15-12-2023-2000/',
@ -22,12 +48,42 @@ class RinseFMIE(InfoExtractor):
webpage = self._download_webpage(url, display_id)
entry = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['entry']
return {
'id': entry['id'],
'title': entry.get('title'),
'url': entry['fileUrl'],
'vcodec': 'none',
'release_timestamp': parse_iso8601(entry.get('episodeDate')),
'thumbnail': format_field(
entry, [('featuredImage', 0, 'filename')], 'https://rinse.imgix.net/media/%s', default=None),
}
return self._parse_entry(entry)
class RinseFMArtistPlaylistIE(RinseFMBaseIE):
_VALID_URL = r'https?://(?:www\.)?rinse\.fm/shows/(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://rinse.fm/shows/resources/',
'info_dict': {
'id': 'resources',
'title': '[re]sources',
'description': '[re]sources est un label parisien piloté par le DJ et producteur Tommy Kid.'
},
'playlist_mincount': 40
}, {
'url': 'https://rinse.fm/shows/ivy/',
'info_dict': {
'id': 'ivy',
'title': '[IVY]',
'description': 'A dedicated space for DNB/Turbo House and 4x4.'
},
'playlist_mincount': 7
}]
def _entries(self, data):
for episode in traverse_obj(data, (
'props', 'pageProps', 'episodes', lambda _, v: determine_ext(v['fileUrl']) in MEDIA_EXTENSIONS.audio)
):
yield self._parse_entry(episode)
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
title = self._og_search_title(webpage) or self._html_search_meta('title', webpage)
description = self._og_search_description(webpage) or self._html_search_meta(
'description', webpage)
data = self._search_nextjs_data(webpage, playlist_id)
return self.playlist_result(
self._entries(data), playlist_id, title, description=description)

View File

@ -1,7 +1,20 @@
import re
from ..utils import parse_duration, unescapeHTML
from .common import InfoExtractor
from ..utils import (
clean_html,
extract_attributes,
get_element_by_attribute,
get_element_by_class,
get_element_html_by_class,
get_elements_by_class,
int_or_none,
join_nonempty,
parse_count,
parse_duration,
unescapeHTML,
)
from ..utils.traversal import traverse_obj
class Rule34VideoIE(InfoExtractor):
@ -17,7 +30,16 @@ class Rule34VideoIE(InfoExtractor):
'thumbnail': 'https://rule34video.com/contents/videos_screenshots/3065000/3065157/preview.jpg',
'duration': 347.0,
'age_limit': 18,
'tags': 'count:14'
'view_count': int,
'like_count': int,
'comment_count': int,
'timestamp': 1639872000,
'description': 'https://discord.gg/aBqPrHSHvv',
'upload_date': '20211219',
'uploader': 'Sweet HMV',
'uploader_url': 'https://rule34video.com/members/22119/',
'categories': ['3D', 'MMD', 'iwara'],
'tags': 'mincount:10'
}
},
{
@ -30,7 +52,17 @@ class Rule34VideoIE(InfoExtractor):
'thumbnail': 'https://rule34video.com/contents/videos_screenshots/3065000/3065296/preview.jpg',
'duration': 938.0,
'age_limit': 18,
'tags': 'count:50'
'view_count': int,
'like_count': int,
'comment_count': int,
'timestamp': 1640131200,
'description': '',
'creator': 'WildeerStudio',
'upload_date': '20211222',
'uploader': 'CerZule',
'uploader_url': 'https://rule34video.com/members/36281/',
'categories': ['3D', 'Tomb Raider'],
'tags': 'mincount:40'
}
},
]
@ -49,17 +81,44 @@ class Rule34VideoIE(InfoExtractor):
'quality': quality,
})
title = self._html_extract_title(webpage)
thumbnail = self._html_search_regex(r'preview_url:\s+\'([^\']+)\'', webpage, 'thumbnail', default=None)
duration = self._html_search_regex(r'"icon-clock"></i>\s+<span>((?:\d+:?)+)', webpage, 'duration', default=None)
categories, creator, uploader, uploader_url = [None] * 4
for col in get_elements_by_class('col', webpage):
label = clean_html(get_element_by_class('label', col))
if label == 'Categories:':
categories = list(map(clean_html, get_elements_by_class('item', col)))
elif label == 'Artist:':
creator = join_nonempty(*map(clean_html, get_elements_by_class('item', col)), delim=', ')
elif label == 'Uploaded By:':
uploader = clean_html(get_element_by_class('name', col))
uploader_url = extract_attributes(get_element_html_by_class('name', col) or '').get('href')
return {
**traverse_obj(self._search_json_ld(webpage, video_id, default={}), ({
'title': 'title',
'view_count': 'view_count',
'like_count': 'like_count',
'duration': 'duration',
'timestamp': 'timestamp',
'description': 'description',
'thumbnail': ('thumbnails', 0, 'url'),
})),
'id': video_id,
'formats': formats,
'title': title,
'thumbnail': thumbnail,
'duration': parse_duration(duration),
'title': self._html_extract_title(webpage),
'thumbnail': self._html_search_regex(
r'preview_url:\s+\'([^\']+)\'', webpage, 'thumbnail', default=None),
'duration': parse_duration(self._html_search_regex(
r'"icon-clock"></i>\s+<span>((?:\d+:?)+)', webpage, 'duration', default=None)),
'view_count': int_or_none(self._html_search_regex(
r'"icon-eye"></i>\s+<span>([ \d]+)', webpage, 'views', default='').replace(' ', '')),
'like_count': parse_count(get_element_by_class('voters count', webpage)),
'comment_count': int_or_none(self._search_regex(
r'[^(]+\((\d+)\)', get_element_by_attribute('href', '#tab_comments', webpage), 'comment count', fatal=False)),
'age_limit': 18,
'creator': creator,
'uploader': uploader,
'uploader_url': uploader_url,
'categories': categories,
'tags': list(map(unescapeHTML, re.findall(
r'<a class="tag_item"[^>]+\bhref="https://rule34video\.com/tags/\d+/"[^>]*>(?P<tag>[^>]*)</a>', webpage))),
}

218
yt_dlp/extractor/sejmpl.py Normal file
View File

@ -0,0 +1,218 @@
import datetime
from .common import InfoExtractor
from .redge import RedCDNLivxIE
from ..utils import (
clean_html,
join_nonempty,
js_to_json,
strip_or_none,
update_url_query,
)
from ..utils.traversal import traverse_obj
def is_dst(date):
last_march = datetime.datetime(date.year, 3, 31)
last_october = datetime.datetime(date.year, 10, 31)
last_sunday_march = last_march - datetime.timedelta(days=last_march.isoweekday() % 7)
last_sunday_october = last_october - datetime.timedelta(days=last_october.isoweekday() % 7)
return last_sunday_march.replace(hour=2) <= date <= last_sunday_october.replace(hour=3)
def rfc3339_to_atende(date):
date = datetime.datetime.fromisoformat(date)
date = date + datetime.timedelta(hours=1 if is_dst(date) else 0)
return int((date.timestamp() - 978307200) * 1000)
class SejmIE(InfoExtractor):
_VALID_URL = (
r'https?://(?:www\.)?sejm\.gov\.pl/[Ss]ejm(?P<term>\d+)\.nsf/transmisje(?:_arch)?\.xsp(?:\?[^#]*)?#(?P<id>[\dA-F]+)',
r'https?://(?:www\.)?sejm\.gov\.pl/[Ss]ejm(?P<term>\d+)\.nsf/transmisje(?:_arch)?\.xsp\?(?:[^#]+&)?unid=(?P<id>[\dA-F]+)',
r'https?://sejm-embed\.redcdn\.pl/[Ss]ejm(?P<term>\d+)\.nsf/VideoFrame\.xsp/(?P<id>[\dA-F]+)',
)
IE_NAME = 'sejm'
_TESTS = [{
# multiple cameras, polish SL iterpreter
'url': 'https://www.sejm.gov.pl/Sejm10.nsf/transmisje_arch.xsp#6181EF1AD9CEEBB5C1258A6D006452B5',
'info_dict': {
'id': '6181EF1AD9CEEBB5C1258A6D006452B5',
'title': '1. posiedzenie Sejmu X kadencji',
'duration': 20145,
'live_status': 'was_live',
'location': 'Sala Posiedzeń',
},
'playlist': [{
'info_dict': {
'id': 'ENC01-722340000000-722360145000',
'ext': 'mp4',
'duration': 20145,
'title': '1. posiedzenie Sejmu X kadencji - ENC01',
'live_status': 'was_live',
},
}, {
'info_dict': {
'id': 'ENC30-722340000000-722360145000',
'ext': 'mp4',
'duration': 20145,
'title': '1. posiedzenie Sejmu X kadencji - ENC30',
'live_status': 'was_live',
},
}, {
'info_dict': {
'id': 'ENC31-722340000000-722360145000',
'ext': 'mp4',
'duration': 20145,
'title': '1. posiedzenie Sejmu X kadencji - ENC31',
'live_status': 'was_live',
},
}, {
'info_dict': {
'id': 'ENC32-722340000000-722360145000',
'ext': 'mp4',
'duration': 20145,
'title': '1. posiedzenie Sejmu X kadencji - ENC32',
'live_status': 'was_live',
},
}, {
# sign lang interpreter
'info_dict': {
'id': 'Migacz-ENC01-1-722340000000-722360145000',
'ext': 'mp4',
'duration': 20145,
'title': '1. posiedzenie Sejmu X kadencji - Migacz-ENC01',
'live_status': 'was_live',
},
}],
}, {
'url': 'https://www.sejm.gov.pl/Sejm8.nsf/transmisje.xsp?unid=9377A9D65518E9A5C125808E002E9FF2',
'info_dict': {
'id': '9377A9D65518E9A5C125808E002E9FF2',
'title': 'Debata "Lepsza Polska: obywatelska"',
'description': 'KP .Nowoczesna',
'duration': 8770,
'live_status': 'was_live',
'location': 'sala kolumnowa im. Kazimierza Pużaka (bud. C-D)',
},
'playlist': [{
'info_dict': {
'id': 'ENC08-1-503831270000-503840040000',
'ext': 'mp4',
'duration': 8770,
'title': 'Debata "Lepsza Polska: obywatelska" - ENC08',
'live_status': 'was_live',
},
}],
}, {
# 7th term is very special, since it does not use redcdn livx
'url': 'https://www.sejm.gov.pl/sejm7.nsf/transmisje_arch.xsp?rok=2015&month=11#A6E6D475ECCC6FE5C1257EF90034817F',
'info_dict': {
'id': 'A6E6D475ECCC6FE5C1257EF90034817F',
'title': 'Konferencja prasowa - Stanowisko SLD ws. składu nowego rządu',
'description': 'SLD - Biuro Prasowe Klubu',
'duration': 514,
'location': 'sala 101/bud. C',
'live_status': 'was_live',
},
'playlist': [{
'info_dict': {
'id': 'A6E6D475ECCC6FE5C1257EF90034817F',
'ext': 'mp4',
'title': 'Konferencja prasowa - Stanowisko SLD ws. składu nowego rządu',
'duration': 514,
},
}],
}, {
'url': 'https://sejm-embed.redcdn.pl/Sejm10.nsf/VideoFrame.xsp/FED58EABB97FBD53C1258A7400386492',
'only_matching': True,
}]
def _real_extract(self, url):
term, video_id = self._match_valid_url(url).group('term', 'id')
frame = self._download_webpage(
f'https://sejm-embed.redcdn.pl/Sejm{term}.nsf/VideoFrame.xsp/{video_id}',
video_id)
# despite it says "transmisje_arch", it works for live streams too!
data = self._download_json(
f'https://www.sejm.gov.pl/Sejm{term}.nsf/transmisje_arch.xsp/json/{video_id}',
video_id)
params = data['params']
title = strip_or_none(data.get('title'))
if data.get('status') == 'VIDEO_ENDED':
live_status = 'was_live'
elif data.get('status') == 'VIDEO_PLAYING':
live_status = 'is_live'
else:
live_status = None
self.report_warning(f'unknown status: {data.get("status")}')
start_time = rfc3339_to_atende(params['start'])
# current streams have a stop time of *expected* end of session, but actual times
# can change during the transmission. setting a stop_time would artificially
# end the stream at that time, while the session actually keeps going.
if live_status == 'was_live':
stop_time = rfc3339_to_atende(params['stop'])
duration = (stop_time - start_time) // 1000
else:
stop_time, duration = None, None
entries = []
def add_entry(file, legacy_file=False):
if not file:
return
file = self._proto_relative_url(file)
if not legacy_file:
file = update_url_query(file, {'startTime': start_time})
if stop_time is not None:
file = update_url_query(file, {'stopTime': stop_time})
stream_id = self._search_regex(r'/o2/sejm/([^/]+)/[^./]+\.livx', file, 'stream id')
common_info = {
'url': file,
'duration': duration,
}
if legacy_file:
entries.append({
**common_info,
'id': video_id,
'title': title,
})
else:
entries.append({
**common_info,
'_type': 'url_transparent',
'ie_key': RedCDNLivxIE.ie_key(),
'id': stream_id,
'title': join_nonempty(title, stream_id, delim=' - '),
})
cameras = self._search_json(
r'var\s+cameras\s*=', frame, 'camera list', video_id,
contains_pattern=r'\[(?s:.+)\]', transform_source=js_to_json,
fatal=False) or []
for camera_file in traverse_obj(cameras, (..., 'file', {dict})):
if camera_file.get('flv'):
add_entry(camera_file['flv'])
elif camera_file.get('mp4'):
# this is only a thing in 7th term. no streams before, and starting 8th it's redcdn livx
add_entry(camera_file['mp4'], legacy_file=True)
else:
self.report_warning('Unknown camera stream type found')
if params.get('mig'):
add_entry(self._search_regex(r"var sliUrl\s*=\s*'([^']+)'", frame, 'sign language interpreter url', fatal=False))
return {
'_type': 'playlist',
'entries': entries,
'id': video_id,
'title': title,
'description': clean_html(data.get('desc')) or None,
'duration': duration,
'live_status': live_status,
'location': strip_or_none(data.get('location')),
}

View File

@ -0,0 +1,101 @@
from .common import InfoExtractor
from ..utils import ExtractorError, determine_ext, parse_iso8601, url_or_none
from ..utils.traversal import traverse_obj
class TrtWorldIE(InfoExtractor):
_VALID_URL = r'https?://www\.trtworld\.com/video/[\w-]+/[\w-]+-(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.trtworld.com/video/news/turkiye-switches-to-sustainable-tourism-16067690',
'info_dict': {
'id': '16067690',
'ext': 'mp4',
'title': 'Türkiye switches to sustainable tourism',
'release_timestamp': 1701529569,
'release_date': '20231202',
'thumbnail': 'https://cdn-i.pr.trt.com.tr/trtworld/17647563_0-0-1920-1080.jpeg',
'description': 'md5:0a975c04257fb529c8f99c7b76a2cf12',
}
}, {
'url': 'https://www.trtworld.com/video/one-offs/frames-from-anatolia-recreating-a-james-bond-scene-in-istanbuls-grand-bazaar-14541780',
'info_dict': {
'id': '14541780',
'ext': 'mp4',
'title': 'Frames From Anatolia: Recreating a James Bond Scene in Istanbuls Grand Bazaar',
'release_timestamp': 1692440844,
'release_date': '20230819',
'thumbnail': 'https://cdn-i.pr.trt.com.tr/trtworld/16939810_0-0-1920-1080.jpeg',
'description': 'md5:4050e21570cc3c40b6c9badae800a94f',
}
}, {
'url': 'https://www.trtworld.com/video/the-newsmakers/can-sudan-find-peace-amidst-failed-transition-to-democracy-12904760',
'info_dict': {
'id': '12904760',
'ext': 'mp4',
'title': 'Can Sudan find peace amidst failed transition to democracy?',
'release_timestamp': 1681972747,
'release_date': '20230420',
'thumbnail': 'http://cdni0.trtworld.com/w768/q70/154214_NMYOUTUBETEMPLATE1_1681833018736.jpg'
}
}, {
'url': 'https://www.trtworld.com/video/africa-matters/locals-learning-to-cope-with-rising-tides-of-kenyas-great-lakes-16059545',
'info_dict': {
'id': 'zEns2dWl00w',
'ext': 'mp4',
'title': "Locals learning to cope with rising tides of Kenya's Great Lakes",
'thumbnail': 'https://i.ytimg.com/vi/zEns2dWl00w/maxresdefault.jpg',
'description': 'md5:3ad9d7c5234d752a4ead4340c79c6b8d',
'channel_id': 'UC7fWeaHhqgM4Ry-RMpM2YYw',
'channel_url': 'https://www.youtube.com/channel/UC7fWeaHhqgM4Ry-RMpM2YYw',
'duration': 210,
'view_count': int,
'age_limit': 0,
'webpage_url': 'https://www.youtube.com/watch?v=zEns2dWl00w',
'categories': ['News & Politics'],
'channel': 'TRT World',
'channel_follower_count': int,
'channel_is_verified': True,
'uploader': 'TRT World',
'uploader_id': '@trtworld',
'uploader_url': 'https://www.youtube.com/@trtworld',
'upload_date': '20231202',
'availability': 'public',
'comment_count': int,
'playable_in_embed': True,
'tags': [],
'live_status': 'not_live',
'like_count': int,
}
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
nuxtjs_data = self._search_nuxt_data(webpage, display_id)['videoData']['content']['platforms']
formats = []
for media_url in traverse_obj(nuxtjs_data, (
('website', 'ott'), 'metadata', ('hls_url', 'url'), {url_or_none})):
# NB: Website sometimes serves mp4 files under `hls_url` key
if determine_ext(media_url) == 'm3u8':
formats.extend(self._extract_m3u8_formats(media_url, display_id, fatal=False))
else:
formats.append({
'format_id': 'http',
'url': media_url,
})
if not formats:
if youtube_id := traverse_obj(nuxtjs_data, ('youtube', 'metadata', 'youtubeId')):
return self.url_result(youtube_id, 'Youtube')
raise ExtractorError('No video found', expected=True)
return {
'id': display_id,
'formats': formats,
**traverse_obj(nuxtjs_data, (('website', 'ott'), {
'title': ('fields', 'title', 'text', {str}),
'description': ('fields', 'description', 'text', {str}),
'thumbnail': ('fields', 'thumbnail', 'url', {url_or_none}),
'release_timestamp': ('published', 'date', {parse_iso8601}),
}), get_all=False),
}

View File

@ -8,6 +8,7 @@ import warnings
from ..dependencies import brotli, requests, urllib3
from ..utils import bug_reports_message, int_or_none, variadic
from ..utils.networking import normalize_url
if requests is None:
raise ImportError('requests module is not installed')
@ -199,6 +200,10 @@ class RequestsSession(requests.sessions.Session):
prepared_request.method = new_method
# Requests fails to resolve dot segments on absolute redirect locations
# See: https://github.com/yt-dlp/yt-dlp/issues/9020
prepared_request.url = normalize_url(prepared_request.url)
def rebuild_auth(self, prepared_request, response):
# HACK: undo status code change from rebuild_method, if applicable.
# rebuild_auth runs after requests would remove headers/body based on status code

View File

@ -1,9 +1,8 @@
from __future__ import annotations
import typing
import urllib.error
from ..utils import YoutubeDLError, deprecation_warning
from ..utils import YoutubeDLError
if typing.TYPE_CHECKING:
from .common import RequestHandler, Response
@ -101,117 +100,4 @@ class ProxyError(TransportError):
pass
class _CompatHTTPError(urllib.error.HTTPError, HTTPError):
"""
Provides backwards compatibility with urllib.error.HTTPError.
Do not use this class directly, use HTTPError instead.
"""
def __init__(self, http_error: HTTPError):
super().__init__(
url=http_error.response.url,
code=http_error.status,
msg=http_error.msg,
hdrs=http_error.response.headers,
fp=http_error.response
)
self._closer.close_called = True # Disable auto close
self._http_error = http_error
HTTPError.__init__(self, http_error.response, redirect_loop=http_error.redirect_loop)
@property
def status(self):
return self._http_error.status
@status.setter
def status(self, value):
return
@property
def reason(self):
return self._http_error.reason
@reason.setter
def reason(self, value):
return
@property
def headers(self):
deprecation_warning('HTTPError.headers is deprecated, use HTTPError.response.headers instead')
return self._http_error.response.headers
@headers.setter
def headers(self, value):
return
def info(self):
deprecation_warning('HTTPError.info() is deprecated, use HTTPError.response.headers instead')
return self.response.headers
def getcode(self):
deprecation_warning('HTTPError.getcode is deprecated, use HTTPError.status instead')
return self.status
def geturl(self):
deprecation_warning('HTTPError.geturl is deprecated, use HTTPError.response.url instead')
return self.response.url
@property
def code(self):
deprecation_warning('HTTPError.code is deprecated, use HTTPError.status instead')
return self.status
@code.setter
def code(self, value):
return
@property
def url(self):
deprecation_warning('HTTPError.url is deprecated, use HTTPError.response.url instead')
return self.response.url
@url.setter
def url(self, value):
return
@property
def hdrs(self):
deprecation_warning('HTTPError.hdrs is deprecated, use HTTPError.response.headers instead')
return self.response.headers
@hdrs.setter
def hdrs(self, value):
return
@property
def filename(self):
deprecation_warning('HTTPError.filename is deprecated, use HTTPError.response.url instead')
return self.response.url
@filename.setter
def filename(self, value):
return
def __getattr__(self, name):
# File operations are passed through the response.
# Warn for some commonly used ones
passthrough_warnings = {
'read': 'response.read()',
# technically possibly due to passthrough, but we should discourage this
'get_header': 'response.get_header()',
'readable': 'response.readable()',
'closed': 'response.closed',
'tell': 'response.tell()',
}
if name in passthrough_warnings:
deprecation_warning(f'HTTPError.{name} is deprecated, use HTTPError.{passthrough_warnings[name]} instead')
return super().__getattr__(name)
def __str__(self):
return str(self._http_error)
def __repr__(self):
return repr(self._http_error)
network_exceptions = (HTTPError, TransportError)