mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-07-06 02:43:37 +00:00
Merge remote-tracking branch 'upstream/master'
This commit is contained in:
commit
d8d31be98e
5
.github/workflows/core.yml
vendored
5
.github/workflows/core.yml
vendored
|
@ -13,13 +13,16 @@ jobs:
|
||||||
matrix:
|
matrix:
|
||||||
os: [ubuntu-latest]
|
os: [ubuntu-latest]
|
||||||
# CPython 3.11 is in quick-test
|
# CPython 3.11 is in quick-test
|
||||||
python-version: ['3.8', '3.9', '3.10', pypy-3.7, pypy-3.8]
|
python-version: ['3.8', '3.9', '3.10', '3.12-dev', pypy-3.7, pypy-3.8, pypy-3.10]
|
||||||
run-tests-ext: [sh]
|
run-tests-ext: [sh]
|
||||||
include:
|
include:
|
||||||
# atleast one of each CPython/PyPy tests must be in windows
|
# atleast one of each CPython/PyPy tests must be in windows
|
||||||
- os: windows-latest
|
- os: windows-latest
|
||||||
python-version: '3.7'
|
python-version: '3.7'
|
||||||
run-tests-ext: bat
|
run-tests-ext: bat
|
||||||
|
- os: windows-latest
|
||||||
|
python-version: '3.12-dev'
|
||||||
|
run-tests-ext: bat
|
||||||
- os: windows-latest
|
- os: windows-latest
|
||||||
python-version: pypy-3.9
|
python-version: pypy-3.9
|
||||||
run-tests-ext: bat
|
run-tests-ext: bat
|
||||||
|
|
|
@ -76,7 +76,7 @@
|
||||||
|
|
||||||
# NEW FEATURES
|
# NEW FEATURES
|
||||||
|
|
||||||
* Forked from [**yt-dlc@f9401f2**](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee) and merged with [**youtube-dl@42f2d4**](https://github.com/ytdl-org/youtube-dl/commit/07af47960f3bb262ead02490ce65c8c45c01741e) ([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21))
|
* Forked from [**yt-dlc@f9401f2**](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee) and merged with [**youtube-dl@66ab08**](https://github.com/ytdl-org/youtube-dl/commit/66ab0814c4baa2dc79c2dd5287bc0ad61a37c5b9) ([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21))
|
||||||
|
|
||||||
* **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in YouTube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API
|
* **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in YouTube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API
|
||||||
|
|
||||||
|
@ -1854,7 +1854,7 @@ #### rokfinchannel
|
||||||
* `tab`: Which tab to download - one of `new`, `top`, `videos`, `podcasts`, `streams`, `stacks`
|
* `tab`: Which tab to download - one of `new`, `top`, `videos`, `podcasts`, `streams`, `stacks`
|
||||||
|
|
||||||
#### twitter
|
#### twitter
|
||||||
* `legacy_api`: Force usage of the legacy Twitter API instead of the GraphQL API for tweet extraction. Has no effect if login cookies are passed
|
* `api`: Select one of `graphql` (default), `legacy` or `syndication` as the API for tweet extraction. Has no effect if logged in
|
||||||
|
|
||||||
#### stacommu, wrestleuniverse
|
#### stacommu, wrestleuniverse
|
||||||
* `device_id`: UUID value assigned by the website and used to enforce device limits for paid livestream content. Can be found in browser local storage
|
* `device_id`: UUID value assigned by the website and used to enforce device limits for paid livestream content. Can be found in browser local storage
|
||||||
|
|
|
@ -68,6 +68,25 @@
|
||||||
{
|
{
|
||||||
"action": "change",
|
"action": "change",
|
||||||
"when": "b03fa7834579a01cc5fba48c0e73488a16683d48",
|
"when": "b03fa7834579a01cc5fba48c0e73488a16683d48",
|
||||||
"short": "[ie/twitter] Revert 92315c03774cfabb3a921884326beb4b981f786b"
|
"short": "[ie/twitter] Revert 92315c03774cfabb3a921884326beb4b981f786b",
|
||||||
|
"authors": ["pukkandan"]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action": "change",
|
||||||
|
"when": "fcd6a76adc49d5cd8783985c7ce35384b72e545f",
|
||||||
|
"short": "[test] Add tests for socks proxies (#7908)",
|
||||||
|
"authors": ["coletdjnz"]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action": "change",
|
||||||
|
"when": "4bf912282a34b58b6b35d8f7e6be535770c89c76",
|
||||||
|
"short": "[rh:urllib] Remove dot segments during URL normalization (#7662)",
|
||||||
|
"authors": ["coletdjnz"]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action": "change",
|
||||||
|
"when": "59e92b1f1833440bb2190f847eb735cf0f90bc85",
|
||||||
|
"short": "[rh:urllib] Simplify gzip decoding (#7611)",
|
||||||
|
"authors": ["Grub4K"]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
|
@ -31,35 +31,27 @@ class CommitGroup(enum.Enum):
|
||||||
EXTRACTOR = 'Extractor'
|
EXTRACTOR = 'Extractor'
|
||||||
DOWNLOADER = 'Downloader'
|
DOWNLOADER = 'Downloader'
|
||||||
POSTPROCESSOR = 'Postprocessor'
|
POSTPROCESSOR = 'Postprocessor'
|
||||||
|
NETWORKING = 'Networking'
|
||||||
MISC = 'Misc.'
|
MISC = 'Misc.'
|
||||||
|
|
||||||
@classmethod
|
|
||||||
@property
|
|
||||||
def ignorable_prefixes(cls):
|
|
||||||
return ('core', 'downloader', 'extractor', 'misc', 'postprocessor', 'upstream')
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@lru_cache
|
@lru_cache
|
||||||
def commit_lookup(cls):
|
def subgroup_lookup(cls):
|
||||||
return {
|
return {
|
||||||
name: group
|
name: group
|
||||||
for group, names in {
|
for group, names in {
|
||||||
cls.PRIORITY: {'priority'},
|
|
||||||
cls.CORE: {
|
cls.CORE: {
|
||||||
'aes',
|
'aes',
|
||||||
'cache',
|
'cache',
|
||||||
'compat_utils',
|
'compat_utils',
|
||||||
'compat',
|
'compat',
|
||||||
'cookies',
|
'cookies',
|
||||||
'core',
|
|
||||||
'dependencies',
|
'dependencies',
|
||||||
'formats',
|
'formats',
|
||||||
'jsinterp',
|
'jsinterp',
|
||||||
'networking',
|
|
||||||
'outtmpl',
|
'outtmpl',
|
||||||
'plugins',
|
'plugins',
|
||||||
'update',
|
'update',
|
||||||
'upstream',
|
|
||||||
'utils',
|
'utils',
|
||||||
},
|
},
|
||||||
cls.MISC: {
|
cls.MISC: {
|
||||||
|
@ -67,23 +59,40 @@ def commit_lookup(cls):
|
||||||
'cleanup',
|
'cleanup',
|
||||||
'devscripts',
|
'devscripts',
|
||||||
'docs',
|
'docs',
|
||||||
'misc',
|
|
||||||
'test',
|
'test',
|
||||||
},
|
},
|
||||||
cls.EXTRACTOR: {'extractor', 'ie'},
|
cls.NETWORKING: {
|
||||||
cls.DOWNLOADER: {'downloader', 'fd'},
|
'rh',
|
||||||
cls.POSTPROCESSOR: {'postprocessor', 'pp'},
|
},
|
||||||
}.items()
|
}.items()
|
||||||
for name in names
|
for name in names
|
||||||
}
|
}
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get(cls, value):
|
@lru_cache
|
||||||
result = cls.commit_lookup().get(value)
|
def group_lookup(cls):
|
||||||
if result:
|
result = {
|
||||||
logger.debug(f'Mapped {value!r} => {result.name}')
|
'fd': cls.DOWNLOADER,
|
||||||
|
'ie': cls.EXTRACTOR,
|
||||||
|
'pp': cls.POSTPROCESSOR,
|
||||||
|
'upstream': cls.CORE,
|
||||||
|
}
|
||||||
|
result.update({item.name.lower(): item for item in iter(cls)})
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get(cls, value: str) -> tuple[CommitGroup | None, str | None]:
|
||||||
|
group, _, subgroup = (group.strip().lower() for group in value.partition('/'))
|
||||||
|
|
||||||
|
result = cls.group_lookup().get(group)
|
||||||
|
if not result:
|
||||||
|
if subgroup:
|
||||||
|
return None, value
|
||||||
|
subgroup = group
|
||||||
|
result = cls.subgroup_lookup().get(subgroup)
|
||||||
|
|
||||||
|
return result, subgroup or None
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Commit:
|
class Commit:
|
||||||
|
@ -198,19 +207,23 @@ def _prepare_cleanup_misc_items(self, items):
|
||||||
for commit_infos in cleanup_misc_items.values():
|
for commit_infos in cleanup_misc_items.values():
|
||||||
sorted_items.append(CommitInfo(
|
sorted_items.append(CommitInfo(
|
||||||
'cleanup', ('Miscellaneous',), ', '.join(
|
'cleanup', ('Miscellaneous',), ', '.join(
|
||||||
self._format_message_link(None, info.commit.hash).strip()
|
self._format_message_link(None, info.commit.hash)
|
||||||
for info in sorted(commit_infos, key=lambda item: item.commit.hash or '')),
|
for info in sorted(commit_infos, key=lambda item: item.commit.hash or '')),
|
||||||
[], Commit(None, '', commit_infos[0].commit.authors), []))
|
[], Commit(None, '', commit_infos[0].commit.authors), []))
|
||||||
|
|
||||||
return sorted_items
|
return sorted_items
|
||||||
|
|
||||||
def format_single_change(self, info):
|
def format_single_change(self, info: CommitInfo):
|
||||||
message = self._format_message_link(info.message, info.commit.hash)
|
message, sep, rest = info.message.partition('\n')
|
||||||
|
if '[' not in message:
|
||||||
|
# If the message doesn't already contain markdown links, try to add a link to the commit
|
||||||
|
message = self._format_message_link(message, info.commit.hash)
|
||||||
|
|
||||||
if info.issues:
|
if info.issues:
|
||||||
message = message.replace('\n', f' ({self._format_issues(info.issues)})\n', 1)
|
message = f'{message} ({self._format_issues(info.issues)})'
|
||||||
|
|
||||||
if info.commit.authors:
|
if info.commit.authors:
|
||||||
message = message.replace('\n', f' by {self._format_authors(info.commit.authors)}\n', 1)
|
message = f'{message} by {self._format_authors(info.commit.authors)}'
|
||||||
|
|
||||||
if info.fixes:
|
if info.fixes:
|
||||||
fix_message = ', '.join(f'{self._format_message_link(None, fix.hash)}' for fix in info.fixes)
|
fix_message = ', '.join(f'{self._format_message_link(None, fix.hash)}' for fix in info.fixes)
|
||||||
|
@ -219,16 +232,14 @@ def format_single_change(self, info):
|
||||||
if authors != info.commit.authors:
|
if authors != info.commit.authors:
|
||||||
fix_message = f'{fix_message} by {self._format_authors(authors)}'
|
fix_message = f'{fix_message} by {self._format_authors(authors)}'
|
||||||
|
|
||||||
message = message.replace('\n', f' (With fixes in {fix_message})\n', 1)
|
message = f'{message} (With fixes in {fix_message})'
|
||||||
|
|
||||||
return message[:-1]
|
return message if not sep else f'{message}{sep}{rest}'
|
||||||
|
|
||||||
def _format_message_link(self, message, hash):
|
def _format_message_link(self, message, hash):
|
||||||
assert message or hash, 'Improperly defined commit message or override'
|
assert message or hash, 'Improperly defined commit message or override'
|
||||||
message = message if message else hash[:HASH_LENGTH]
|
message = message if message else hash[:HASH_LENGTH]
|
||||||
if not hash:
|
return f'[{message}]({self.repo_url}/commit/{hash})' if hash else message
|
||||||
return f'{message}\n'
|
|
||||||
return f'[{message}\n'.replace('\n', f']({self.repo_url}/commit/{hash})\n', 1)
|
|
||||||
|
|
||||||
def _format_issues(self, issues):
|
def _format_issues(self, issues):
|
||||||
return ', '.join(f'[#{issue}]({self.repo_url}/issues/{issue})' for issue in issues)
|
return ', '.join(f'[#{issue}]({self.repo_url}/issues/{issue})' for issue in issues)
|
||||||
|
@ -318,7 +329,7 @@ def _get_commits_and_fixes(self, default_author):
|
||||||
for commitish, revert_commit in reverts.items():
|
for commitish, revert_commit in reverts.items():
|
||||||
reverted = commits.pop(commitish, None)
|
reverted = commits.pop(commitish, None)
|
||||||
if reverted:
|
if reverted:
|
||||||
logger.debug(f'{commit} fully reverted {reverted}')
|
logger.debug(f'{commitish} fully reverted {reverted}')
|
||||||
else:
|
else:
|
||||||
commits[revert_commit.hash] = revert_commit
|
commits[revert_commit.hash] = revert_commit
|
||||||
|
|
||||||
|
@ -337,7 +348,7 @@ def apply_overrides(self, overrides):
|
||||||
for override in overrides:
|
for override in overrides:
|
||||||
when = override.get('when')
|
when = override.get('when')
|
||||||
if when and when not in self and when != self._start:
|
if when and when not in self and when != self._start:
|
||||||
logger.debug(f'Ignored {when!r}, not in commits {self._start!r}')
|
logger.debug(f'Ignored {when!r} override')
|
||||||
continue
|
continue
|
||||||
|
|
||||||
override_hash = override.get('hash') or when
|
override_hash = override.get('hash') or when
|
||||||
|
@ -365,7 +376,7 @@ def groups(self):
|
||||||
for commit in self:
|
for commit in self:
|
||||||
upstream_re = self.UPSTREAM_MERGE_RE.search(commit.short)
|
upstream_re = self.UPSTREAM_MERGE_RE.search(commit.short)
|
||||||
if upstream_re:
|
if upstream_re:
|
||||||
commit.short = f'[core/upstream] Merged with youtube-dl {upstream_re.group(1)}'
|
commit.short = f'[upstream] Merged with youtube-dl {upstream_re.group(1)}'
|
||||||
|
|
||||||
match = self.MESSAGE_RE.fullmatch(commit.short)
|
match = self.MESSAGE_RE.fullmatch(commit.short)
|
||||||
if not match:
|
if not match:
|
||||||
|
@ -410,25 +421,20 @@ def details_from_prefix(prefix):
|
||||||
if not prefix:
|
if not prefix:
|
||||||
return CommitGroup.CORE, None, ()
|
return CommitGroup.CORE, None, ()
|
||||||
|
|
||||||
prefix, _, details = prefix.partition('/')
|
prefix, *sub_details = prefix.split(':')
|
||||||
prefix = prefix.strip()
|
|
||||||
details = details.strip()
|
|
||||||
|
|
||||||
group = CommitGroup.get(prefix.lower())
|
group, details = CommitGroup.get(prefix)
|
||||||
if group is CommitGroup.PRIORITY:
|
if group is CommitGroup.PRIORITY and details:
|
||||||
prefix, _, details = details.partition('/')
|
details = details.partition('/')[2].strip()
|
||||||
|
|
||||||
if not details and prefix and prefix not in CommitGroup.ignorable_prefixes:
|
if details and '/' in details:
|
||||||
logger.debug(f'Replaced details with {prefix!r}')
|
logger.error(f'Prefix is overnested, using first part: {prefix}')
|
||||||
details = prefix or None
|
details = details.partition('/')[0].strip()
|
||||||
|
|
||||||
if details == 'common':
|
if details == 'common':
|
||||||
details = None
|
details = None
|
||||||
|
elif group is CommitGroup.NETWORKING and details == 'rh':
|
||||||
if details:
|
details = 'Request Handler'
|
||||||
details, *sub_details = details.split(':')
|
|
||||||
else:
|
|
||||||
sub_details = []
|
|
||||||
|
|
||||||
return group, details, sub_details
|
return group, details, sub_details
|
||||||
|
|
||||||
|
|
|
@ -10,14 +10,14 @@
|
||||||
import argparse
|
import argparse
|
||||||
import contextlib
|
import contextlib
|
||||||
import sys
|
import sys
|
||||||
from datetime import datetime
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
from devscripts.utils import read_version, run_process, write_file
|
from devscripts.utils import read_version, run_process, write_file
|
||||||
|
|
||||||
|
|
||||||
def get_new_version(version, revision):
|
def get_new_version(version, revision):
|
||||||
if not version:
|
if not version:
|
||||||
version = datetime.utcnow().strftime('%Y.%m.%d')
|
version = datetime.now(timezone.utc).strftime('%Y.%m.%d')
|
||||||
|
|
||||||
if revision:
|
if revision:
|
||||||
assert revision.isdigit(), 'Revision must be a number'
|
assert revision.isdigit(), 'Revision must be a number'
|
||||||
|
|
|
@ -281,17 +281,13 @@ def test_socks4_auth(self, handler, ctx):
|
||||||
rh, proxies={'all': f'socks4://user:@{server_address}'})
|
rh, proxies={'all': f'socks4://user:@{server_address}'})
|
||||||
assert response['version'] == 4
|
assert response['version'] == 4
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler,ctx', [
|
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
||||||
pytest.param('Urllib', 'http', marks=pytest.mark.xfail(
|
|
||||||
reason='socks4a implementation currently broken when destination is not a domain name'))
|
|
||||||
], indirect=True)
|
|
||||||
def test_socks4a_ipv4_target(self, handler, ctx):
|
def test_socks4a_ipv4_target(self, handler, ctx):
|
||||||
with ctx.socks_server(Socks4ProxyHandler) as server_address:
|
with ctx.socks_server(Socks4ProxyHandler) as server_address:
|
||||||
with handler(proxies={'all': f'socks4a://{server_address}'}) as rh:
|
with handler(proxies={'all': f'socks4a://{server_address}'}) as rh:
|
||||||
response = ctx.socks_info_request(rh, target_domain='127.0.0.1')
|
response = ctx.socks_info_request(rh, target_domain='127.0.0.1')
|
||||||
assert response['version'] == 4
|
assert response['version'] == 4
|
||||||
assert response['ipv4_address'] == '127.0.0.1'
|
assert (response['ipv4_address'] == '127.0.0.1') != (response['domain_address'] == '127.0.0.1')
|
||||||
assert response['domain_address'] is None
|
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
||||||
def test_socks4a_domain_target(self, handler, ctx):
|
def test_socks4a_domain_target(self, handler, ctx):
|
||||||
|
@ -302,10 +298,7 @@ def test_socks4a_domain_target(self, handler, ctx):
|
||||||
assert response['ipv4_address'] is None
|
assert response['ipv4_address'] is None
|
||||||
assert response['domain_address'] == 'localhost'
|
assert response['domain_address'] == 'localhost'
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler,ctx', [
|
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
||||||
pytest.param('Urllib', 'http', marks=pytest.mark.xfail(
|
|
||||||
reason='source_address is not yet supported for socks4 proxies'))
|
|
||||||
], indirect=True)
|
|
||||||
def test_ipv4_client_source_address(self, handler, ctx):
|
def test_ipv4_client_source_address(self, handler, ctx):
|
||||||
with ctx.socks_server(Socks4ProxyHandler) as server_address:
|
with ctx.socks_server(Socks4ProxyHandler) as server_address:
|
||||||
source_address = f'127.0.0.{random.randint(5, 255)}'
|
source_address = f'127.0.0.{random.randint(5, 255)}'
|
||||||
|
@ -327,10 +320,7 @@ def test_socks4_errors(self, handler, ctx, reply_code):
|
||||||
with pytest.raises(ProxyError):
|
with pytest.raises(ProxyError):
|
||||||
ctx.socks_info_request(rh)
|
ctx.socks_info_request(rh)
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler,ctx', [
|
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
||||||
pytest.param('Urllib', 'http', marks=pytest.mark.xfail(
|
|
||||||
reason='IPv6 socks4 proxies are not yet supported'))
|
|
||||||
], indirect=True)
|
|
||||||
def test_ipv6_socks4_proxy(self, handler, ctx):
|
def test_ipv6_socks4_proxy(self, handler, ctx):
|
||||||
with ctx.socks_server(Socks4ProxyHandler, bind_ip='::1') as server_address:
|
with ctx.socks_server(Socks4ProxyHandler, bind_ip='::1') as server_address:
|
||||||
with handler(proxies={'all': f'socks4://{server_address}'}) as rh:
|
with handler(proxies={'all': f'socks4://{server_address}'}) as rh:
|
||||||
|
@ -342,7 +332,7 @@ def test_ipv6_socks4_proxy(self, handler, ctx):
|
||||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
||||||
def test_timeout(self, handler, ctx):
|
def test_timeout(self, handler, ctx):
|
||||||
with ctx.socks_server(Socks4ProxyHandler, sleep=2) as server_address:
|
with ctx.socks_server(Socks4ProxyHandler, sleep=2) as server_address:
|
||||||
with handler(proxies={'all': f'socks4://{server_address}'}, timeout=1) as rh:
|
with handler(proxies={'all': f'socks4://{server_address}'}, timeout=0.5) as rh:
|
||||||
with pytest.raises(TransportError):
|
with pytest.raises(TransportError):
|
||||||
ctx.socks_info_request(rh)
|
ctx.socks_info_request(rh)
|
||||||
|
|
||||||
|
@ -383,7 +373,7 @@ def test_socks5_domain_target(self, handler, ctx):
|
||||||
with ctx.socks_server(Socks5ProxyHandler) as server_address:
|
with ctx.socks_server(Socks5ProxyHandler) as server_address:
|
||||||
with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
|
with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
|
||||||
response = ctx.socks_info_request(rh, target_domain='localhost')
|
response = ctx.socks_info_request(rh, target_domain='localhost')
|
||||||
assert response['ipv4_address'] == '127.0.0.1'
|
assert (response['ipv4_address'] == '127.0.0.1') != (response['ipv6_address'] == '::1')
|
||||||
assert response['version'] == 5
|
assert response['version'] == 5
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
||||||
|
@ -404,22 +394,15 @@ def test_socks5h_ip_target(self, handler, ctx):
|
||||||
assert response['domain_address'] is None
|
assert response['domain_address'] is None
|
||||||
assert response['version'] == 5
|
assert response['version'] == 5
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler,ctx', [
|
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
||||||
pytest.param('Urllib', 'http', marks=pytest.mark.xfail(
|
|
||||||
reason='IPv6 destination addresses are not yet supported'))
|
|
||||||
], indirect=True)
|
|
||||||
def test_socks5_ipv6_destination(self, handler, ctx):
|
def test_socks5_ipv6_destination(self, handler, ctx):
|
||||||
with ctx.socks_server(Socks5ProxyHandler) as server_address:
|
with ctx.socks_server(Socks5ProxyHandler) as server_address:
|
||||||
with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
|
with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
|
||||||
response = ctx.socks_info_request(rh, target_domain='[::1]')
|
response = ctx.socks_info_request(rh, target_domain='[::1]')
|
||||||
assert response['ipv6_address'] == '::1'
|
assert response['ipv6_address'] == '::1'
|
||||||
assert response['port'] == 80
|
|
||||||
assert response['version'] == 5
|
assert response['version'] == 5
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler,ctx', [
|
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
||||||
pytest.param('Urllib', 'http', marks=pytest.mark.xfail(
|
|
||||||
reason='IPv6 socks5 proxies are not yet supported'))
|
|
||||||
], indirect=True)
|
|
||||||
def test_ipv6_socks5_proxy(self, handler, ctx):
|
def test_ipv6_socks5_proxy(self, handler, ctx):
|
||||||
with ctx.socks_server(Socks5ProxyHandler, bind_ip='::1') as server_address:
|
with ctx.socks_server(Socks5ProxyHandler, bind_ip='::1') as server_address:
|
||||||
with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
|
with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
|
||||||
|
@ -430,10 +413,7 @@ def test_ipv6_socks5_proxy(self, handler, ctx):
|
||||||
|
|
||||||
# XXX: is there any feasible way of testing IPv6 source addresses?
|
# XXX: is there any feasible way of testing IPv6 source addresses?
|
||||||
# Same would go for non-proxy source_address test...
|
# Same would go for non-proxy source_address test...
|
||||||
@pytest.mark.parametrize('handler,ctx', [
|
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
||||||
pytest.param('Urllib', 'http', marks=pytest.mark.xfail(
|
|
||||||
reason='source_address is not yet supported for socks5 proxies'))
|
|
||||||
], indirect=True)
|
|
||||||
def test_ipv4_client_source_address(self, handler, ctx):
|
def test_ipv4_client_source_address(self, handler, ctx):
|
||||||
with ctx.socks_server(Socks5ProxyHandler) as server_address:
|
with ctx.socks_server(Socks5ProxyHandler) as server_address:
|
||||||
source_address = f'127.0.0.{random.randint(5, 255)}'
|
source_address = f'127.0.0.{random.randint(5, 255)}'
|
||||||
|
|
|
@ -2591,7 +2591,7 @@ def _fill_common_fields(self, info_dict, final=True):
|
||||||
# Working around out-of-range timestamp values (e.g. negative ones on Windows,
|
# Working around out-of-range timestamp values (e.g. negative ones on Windows,
|
||||||
# see http://bugs.python.org/issue1646728)
|
# see http://bugs.python.org/issue1646728)
|
||||||
with contextlib.suppress(ValueError, OverflowError, OSError):
|
with contextlib.suppress(ValueError, OverflowError, OSError):
|
||||||
upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
|
upload_date = datetime.datetime.fromtimestamp(info_dict[ts_key], datetime.timezone.utc)
|
||||||
info_dict[date_key] = upload_date.strftime('%Y%m%d')
|
info_dict[date_key] = upload_date.strftime('%Y%m%d')
|
||||||
|
|
||||||
live_keys = ('is_live', 'was_live')
|
live_keys = ('is_live', 'was_live')
|
||||||
|
|
|
@ -15,7 +15,7 @@ def get_package_info(module):
|
||||||
name=getattr(module, '_yt_dlp__identifier', module.__name__),
|
name=getattr(module, '_yt_dlp__identifier', module.__name__),
|
||||||
version=str(next(filter(None, (
|
version=str(next(filter(None, (
|
||||||
getattr(module, attr, None)
|
getattr(module, attr, None)
|
||||||
for attr in ('__version__', 'version_string', 'version')
|
for attr in ('_yt_dlp__version', '__version__', 'version_string', 'version')
|
||||||
)), None)))
|
)), None)))
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -43,6 +43,8 @@
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import sqlite3
|
import sqlite3
|
||||||
|
# We need to get the underlying `sqlite` version, see https://github.com/yt-dlp/yt-dlp/issues/8152
|
||||||
|
sqlite3._yt_dlp__version = sqlite3.sqlite_version
|
||||||
except ImportError:
|
except ImportError:
|
||||||
# although sqlite3 is part of the standard library, it is possible to compile python without
|
# although sqlite3 is part of the standard library, it is possible to compile python without
|
||||||
# sqlite support. See: https://github.com/yt-dlp/yt-dlp/issues/544
|
# sqlite support. See: https://github.com/yt-dlp/yt-dlp/issues/544
|
||||||
|
|
|
@ -122,7 +122,6 @@
|
||||||
from .archiveorg import (
|
from .archiveorg import (
|
||||||
ArchiveOrgIE,
|
ArchiveOrgIE,
|
||||||
YoutubeWebArchiveIE,
|
YoutubeWebArchiveIE,
|
||||||
VLiveWebArchiveIE,
|
|
||||||
)
|
)
|
||||||
from .arcpublishing import ArcPublishingIE
|
from .arcpublishing import ArcPublishingIE
|
||||||
from .arkena import ArkenaIE
|
from .arkena import ArkenaIE
|
||||||
|
@ -165,6 +164,7 @@
|
||||||
AWAANLiveIE,
|
AWAANLiveIE,
|
||||||
AWAANSeasonIE,
|
AWAANSeasonIE,
|
||||||
)
|
)
|
||||||
|
from .axs import AxsIE
|
||||||
from .azmedien import AZMedienIE
|
from .azmedien import AZMedienIE
|
||||||
from .baidu import BaiduVideoIE
|
from .baidu import BaiduVideoIE
|
||||||
from .banbye import (
|
from .banbye import (
|
||||||
|
@ -223,7 +223,11 @@
|
||||||
BiliBiliPlayerIE,
|
BiliBiliPlayerIE,
|
||||||
BilibiliSpaceVideoIE,
|
BilibiliSpaceVideoIE,
|
||||||
BilibiliSpaceAudioIE,
|
BilibiliSpaceAudioIE,
|
||||||
BilibiliSpacePlaylistIE,
|
BilibiliCollectionListIE,
|
||||||
|
BilibiliSeriesListIE,
|
||||||
|
BilibiliFavoritesListIE,
|
||||||
|
BilibiliWatchlaterIE,
|
||||||
|
BilibiliPlaylistIE,
|
||||||
BiliIntlIE,
|
BiliIntlIE,
|
||||||
BiliIntlSeriesIE,
|
BiliIntlSeriesIE,
|
||||||
BiliLiveIE,
|
BiliLiveIE,
|
||||||
|
@ -292,9 +296,11 @@
|
||||||
from .camsoda import CamsodaIE
|
from .camsoda import CamsodaIE
|
||||||
from .camtasia import CamtasiaEmbedIE
|
from .camtasia import CamtasiaEmbedIE
|
||||||
from .camwithher import CamWithHerIE
|
from .camwithher import CamWithHerIE
|
||||||
|
from .canal1 import Canal1IE
|
||||||
from .canalalpha import CanalAlphaIE
|
from .canalalpha import CanalAlphaIE
|
||||||
from .canalplus import CanalplusIE
|
from .canalplus import CanalplusIE
|
||||||
from .canalc2 import Canalc2IE
|
from .canalc2 import Canalc2IE
|
||||||
|
from .caracoltv import CaracolTvPlayIE
|
||||||
from .carambatv import (
|
from .carambatv import (
|
||||||
CarambaTVIE,
|
CarambaTVIE,
|
||||||
CarambaTVPageIE,
|
CarambaTVPageIE,
|
||||||
|
@ -561,6 +567,7 @@
|
||||||
EpiconIE,
|
EpiconIE,
|
||||||
EpiconSeriesIE,
|
EpiconSeriesIE,
|
||||||
)
|
)
|
||||||
|
from .eplus import EplusIbIE
|
||||||
from .epoch import EpochIE
|
from .epoch import EpochIE
|
||||||
from .eporner import EpornerIE
|
from .eporner import EpornerIE
|
||||||
from .eroprofile import (
|
from .eroprofile import (
|
||||||
|
@ -1501,6 +1508,7 @@
|
||||||
from .popcorntimes import PopcorntimesIE
|
from .popcorntimes import PopcorntimesIE
|
||||||
from .popcorntv import PopcornTVIE
|
from .popcorntv import PopcornTVIE
|
||||||
from .porn91 import Porn91IE
|
from .porn91 import Porn91IE
|
||||||
|
from .pornbox import PornboxIE
|
||||||
from .porncom import PornComIE
|
from .porncom import PornComIE
|
||||||
from .pornflip import PornFlipIE
|
from .pornflip import PornFlipIE
|
||||||
from .pornhd import PornHdIE
|
from .pornhd import PornHdIE
|
||||||
|
@ -1519,7 +1527,7 @@
|
||||||
PuhuTVIE,
|
PuhuTVIE,
|
||||||
PuhuTVSerieIE,
|
PuhuTVSerieIE,
|
||||||
)
|
)
|
||||||
from .pr0gramm import Pr0grammStaticIE, Pr0grammIE
|
from .pr0gramm import Pr0grammIE
|
||||||
from .prankcast import PrankCastIE
|
from .prankcast import PrankCastIE
|
||||||
from .premiershiprugby import PremiershipRugbyIE
|
from .premiershiprugby import PremiershipRugbyIE
|
||||||
from .presstv import PressTVIE
|
from .presstv import PressTVIE
|
||||||
|
@ -1555,7 +1563,14 @@
|
||||||
from .radiode import RadioDeIE
|
from .radiode import RadioDeIE
|
||||||
from .radiojavan import RadioJavanIE
|
from .radiojavan import RadioJavanIE
|
||||||
from .radiobremen import RadioBremenIE
|
from .radiobremen import RadioBremenIE
|
||||||
from .radiofrance import FranceCultureIE, RadioFranceIE
|
from .radiofrance import (
|
||||||
|
FranceCultureIE,
|
||||||
|
RadioFranceIE,
|
||||||
|
RadioFranceLiveIE,
|
||||||
|
RadioFrancePodcastIE,
|
||||||
|
RadioFranceProfileIE,
|
||||||
|
RadioFranceProgramScheduleIE,
|
||||||
|
)
|
||||||
from .radiozet import RadioZetPodcastIE
|
from .radiozet import RadioZetPodcastIE
|
||||||
from .radiokapital import (
|
from .radiokapital import (
|
||||||
RadioKapitalIE,
|
RadioKapitalIE,
|
||||||
|
@ -1586,6 +1601,7 @@
|
||||||
from .rbgtum import (
|
from .rbgtum import (
|
||||||
RbgTumIE,
|
RbgTumIE,
|
||||||
RbgTumCourseIE,
|
RbgTumCourseIE,
|
||||||
|
RbgTumNewCourseIE,
|
||||||
)
|
)
|
||||||
from .rcs import (
|
from .rcs import (
|
||||||
RCSIE,
|
RCSIE,
|
||||||
|
@ -1710,7 +1726,10 @@
|
||||||
RuvIE,
|
RuvIE,
|
||||||
RuvSpilaIE
|
RuvSpilaIE
|
||||||
)
|
)
|
||||||
from .s4c import S4CIE
|
from .s4c import (
|
||||||
|
S4CIE,
|
||||||
|
S4CSeriesIE
|
||||||
|
)
|
||||||
from .safari import (
|
from .safari import (
|
||||||
SafariIE,
|
SafariIE,
|
||||||
SafariApiIE,
|
SafariApiIE,
|
||||||
|
@ -1791,7 +1810,10 @@
|
||||||
from .slutload import SlutloadIE
|
from .slutload import SlutloadIE
|
||||||
from .smotrim import SmotrimIE
|
from .smotrim import SmotrimIE
|
||||||
from .snotr import SnotrIE
|
from .snotr import SnotrIE
|
||||||
from .sohu import SohuIE
|
from .sohu import (
|
||||||
|
SohuIE,
|
||||||
|
SohuVIE,
|
||||||
|
)
|
||||||
from .sonyliv import (
|
from .sonyliv import (
|
||||||
SonyLIVIE,
|
SonyLIVIE,
|
||||||
SonyLIVSeriesIE,
|
SonyLIVSeriesIE,
|
||||||
|
@ -2354,7 +2376,8 @@
|
||||||
)
|
)
|
||||||
from .weibo import (
|
from .weibo import (
|
||||||
WeiboIE,
|
WeiboIE,
|
||||||
WeiboMobileIE
|
WeiboVideoIE,
|
||||||
|
WeiboUserIE,
|
||||||
)
|
)
|
||||||
from .weiqitv import WeiqiTVIE
|
from .weiqitv import WeiqiTVIE
|
||||||
from .weverse import (
|
from .weverse import (
|
||||||
|
|
|
@ -12,7 +12,7 @@
|
||||||
import urllib.request
|
import urllib.request
|
||||||
import urllib.response
|
import urllib.response
|
||||||
import uuid
|
import uuid
|
||||||
|
from ..utils.networking import clean_proxies
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..aes import aes_ecb_decrypt
|
from ..aes import aes_ecb_decrypt
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
@ -35,7 +35,10 @@ def add_opener(ydl, handler): # FIXME: Create proper API in .networking
|
||||||
rh = ydl._request_director.handlers['Urllib']
|
rh = ydl._request_director.handlers['Urllib']
|
||||||
if 'abematv-license' in rh._SUPPORTED_URL_SCHEMES:
|
if 'abematv-license' in rh._SUPPORTED_URL_SCHEMES:
|
||||||
return
|
return
|
||||||
opener = rh._get_instance(cookiejar=ydl.cookiejar, proxies=ydl.proxies)
|
headers = ydl.params['http_headers'].copy()
|
||||||
|
proxies = ydl.proxies.copy()
|
||||||
|
clean_proxies(proxies, headers)
|
||||||
|
opener = rh._get_instance(cookiejar=ydl.cookiejar, proxies=proxies)
|
||||||
assert isinstance(opener, urllib.request.OpenerDirector)
|
assert isinstance(opener, urllib.request.OpenerDirector)
|
||||||
opener.add_handler(handler)
|
opener.add_handler(handler)
|
||||||
rh._SUPPORTED_URL_SCHEMES = (*rh._SUPPORTED_URL_SCHEMES, 'abematv-license')
|
rh._SUPPORTED_URL_SCHEMES = (*rh._SUPPORTED_URL_SCHEMES, 'abematv-license')
|
||||||
|
|
|
@ -22,8 +22,11 @@ def _call_api(self, asin, data=None, note=None):
|
||||||
|
|
||||||
resp = self._download_json(
|
resp = self._download_json(
|
||||||
f'https://www.amazon.in/minitv/api/web/{"graphql" if data else "prs"}',
|
f'https://www.amazon.in/minitv/api/web/{"graphql" if data else "prs"}',
|
||||||
asin, note=note, headers={'Content-Type': 'application/json'},
|
asin, note=note, headers={
|
||||||
data=json.dumps(data).encode() if data else None,
|
'Content-Type': 'application/json',
|
||||||
|
'currentpageurl': '/',
|
||||||
|
'currentplatform': 'dWeb'
|
||||||
|
}, data=json.dumps(data).encode() if data else None,
|
||||||
query=None if data else {
|
query=None if data else {
|
||||||
'deviceType': 'A1WMMUXPCUJL4N',
|
'deviceType': 'A1WMMUXPCUJL4N',
|
||||||
'contentId': asin,
|
'contentId': asin,
|
||||||
|
@ -46,7 +49,7 @@ class AmazonMiniTVIE(AmazonMiniTVBaseIE):
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'May I Kiss You?',
|
'title': 'May I Kiss You?',
|
||||||
'language': 'Hindi',
|
'language': 'Hindi',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
|
||||||
'description': 'md5:a549bfc747973e04feb707833474e59d',
|
'description': 'md5:a549bfc747973e04feb707833474e59d',
|
||||||
'release_timestamp': 1644710400,
|
'release_timestamp': 1644710400,
|
||||||
'release_date': '20220213',
|
'release_date': '20220213',
|
||||||
|
@ -68,7 +71,7 @@ class AmazonMiniTVIE(AmazonMiniTVBaseIE):
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Jahaan',
|
'title': 'Jahaan',
|
||||||
'language': 'Hindi',
|
'language': 'Hindi',
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||||
'description': 'md5:05eb765a77bf703f322f120ec6867339',
|
'description': 'md5:05eb765a77bf703f322f120ec6867339',
|
||||||
'release_timestamp': 1647475200,
|
'release_timestamp': 1647475200,
|
||||||
'release_date': '20220317',
|
'release_date': '20220317',
|
||||||
|
|
|
@ -3,7 +3,6 @@
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .naver import NaverBaseIE
|
|
||||||
from .youtube import YoutubeBaseInfoExtractor, YoutubeIE
|
from .youtube import YoutubeBaseInfoExtractor, YoutubeIE
|
||||||
from ..compat import compat_urllib_parse_unquote
|
from ..compat import compat_urllib_parse_unquote
|
||||||
from ..networking import HEADRequest
|
from ..networking import HEADRequest
|
||||||
|
@ -947,237 +946,3 @@ def _real_extract(self, url):
|
||||||
if not info.get('title'):
|
if not info.get('title'):
|
||||||
info['title'] = video_id
|
info['title'] = video_id
|
||||||
return info
|
return info
|
||||||
|
|
||||||
|
|
||||||
class VLiveWebArchiveIE(InfoExtractor):
|
|
||||||
IE_NAME = 'web.archive:vlive'
|
|
||||||
IE_DESC = 'web.archive.org saved vlive videos'
|
|
||||||
_VALID_URL = r'''(?x)
|
|
||||||
(?:https?://)?web\.archive\.org/
|
|
||||||
(?:web/)?(?:(?P<date>[0-9]{14})?[0-9A-Za-z_*]*/)? # /web and the version index is optional
|
|
||||||
(?:https?(?::|%3[Aa])//)?(?:
|
|
||||||
(?:(?:www|m)\.)?vlive\.tv(?::(?:80|443))?/(?:video|embed)/(?P<id>[0-9]+) # VLive URL
|
|
||||||
)
|
|
||||||
'''
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://web.archive.org/web/20221221144331/http://www.vlive.tv/video/1326',
|
|
||||||
'md5': 'cc7314812855ce56de70a06a27314983',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '1326',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': "Girl's Day's Broadcast",
|
|
||||||
'creator': "Girl's Day",
|
|
||||||
'view_count': int,
|
|
||||||
'uploader_id': 'muploader_a',
|
|
||||||
'uploader_url': None,
|
|
||||||
'uploader': None,
|
|
||||||
'upload_date': '20150817',
|
|
||||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
|
|
||||||
'timestamp': 1439816449,
|
|
||||||
'like_count': int,
|
|
||||||
'channel': 'Girl\'s Day',
|
|
||||||
'channel_id': 'FDF27',
|
|
||||||
'comment_count': int,
|
|
||||||
'release_timestamp': 1439818140,
|
|
||||||
'release_date': '20150817',
|
|
||||||
'duration': 1014,
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'https://web.archive.org/web/20221221182103/http://www.vlive.tv/video/16937',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '16937',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': '첸백시 걍방',
|
|
||||||
'creator': 'EXO',
|
|
||||||
'view_count': int,
|
|
||||||
'subtitles': 'mincount:12',
|
|
||||||
'uploader_id': 'muploader_j',
|
|
||||||
'uploader_url': 'http://vlive.tv',
|
|
||||||
'uploader': None,
|
|
||||||
'upload_date': '20161112',
|
|
||||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
|
|
||||||
'timestamp': 1478923074,
|
|
||||||
'like_count': int,
|
|
||||||
'channel': 'EXO',
|
|
||||||
'channel_id': 'F94BD',
|
|
||||||
'comment_count': int,
|
|
||||||
'release_timestamp': 1478924280,
|
|
||||||
'release_date': '20161112',
|
|
||||||
'duration': 906,
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'https://web.archive.org/web/20221127190050/http://www.vlive.tv/video/101870',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '101870',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': '[ⓓ xV] “레벨이들 매력에 반해? 안 반해?” 움직이는 HD 포토 (레드벨벳:Red Velvet)',
|
|
||||||
'creator': 'Dispatch',
|
|
||||||
'view_count': int,
|
|
||||||
'subtitles': 'mincount:6',
|
|
||||||
'uploader_id': 'V__FRA08071',
|
|
||||||
'uploader_url': 'http://vlive.tv',
|
|
||||||
'uploader': None,
|
|
||||||
'upload_date': '20181130',
|
|
||||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
|
|
||||||
'timestamp': 1543601327,
|
|
||||||
'like_count': int,
|
|
||||||
'channel': 'Dispatch',
|
|
||||||
'channel_id': 'C796F3',
|
|
||||||
'comment_count': int,
|
|
||||||
'release_timestamp': 1543601040,
|
|
||||||
'release_date': '20181130',
|
|
||||||
'duration': 279,
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}]
|
|
||||||
|
|
||||||
# The wayback machine has special timestamp and "mode" values:
|
|
||||||
# timestamp:
|
|
||||||
# 1 = the first capture
|
|
||||||
# 2 = the last capture
|
|
||||||
# mode:
|
|
||||||
# id_ = Identity - perform no alterations of the original resource, return it as it was archived.
|
|
||||||
_WAYBACK_BASE_URL = 'https://web.archive.org/web/2id_/'
|
|
||||||
|
|
||||||
def _download_archived_page(self, url, video_id, *, timestamp='2', **kwargs):
|
|
||||||
for retry in self.RetryManager():
|
|
||||||
try:
|
|
||||||
return self._download_webpage(f'https://web.archive.org/web/{timestamp}id_/{url}', video_id, **kwargs)
|
|
||||||
except ExtractorError as e:
|
|
||||||
if isinstance(e.cause, HTTPError) and e.cause.status == 404:
|
|
||||||
raise ExtractorError('Page was not archived', expected=True)
|
|
||||||
retry.error = e
|
|
||||||
continue
|
|
||||||
|
|
||||||
def _download_archived_json(self, url, video_id, **kwargs):
|
|
||||||
page = self._download_archived_page(url, video_id, **kwargs)
|
|
||||||
if not page:
|
|
||||||
raise ExtractorError('Page was not archived', expected=True)
|
|
||||||
else:
|
|
||||||
return self._parse_json(page, video_id)
|
|
||||||
|
|
||||||
def _extract_formats_from_m3u8(self, m3u8_url, params, video_id):
|
|
||||||
m3u8_doc = self._download_archived_page(m3u8_url, video_id, note='Downloading m3u8', query=params, fatal=False)
|
|
||||||
if not m3u8_doc:
|
|
||||||
return
|
|
||||||
|
|
||||||
# M3U8 document should be changed to archive domain
|
|
||||||
m3u8_doc = m3u8_doc.splitlines()
|
|
||||||
url_base = m3u8_url.rsplit('/', 1)[0]
|
|
||||||
first_segment = None
|
|
||||||
for i, line in enumerate(m3u8_doc):
|
|
||||||
if not line.startswith('#'):
|
|
||||||
m3u8_doc[i] = f'{self._WAYBACK_BASE_URL}{url_base}/{line}?{urllib.parse.urlencode(params)}'
|
|
||||||
first_segment = first_segment or m3u8_doc[i]
|
|
||||||
|
|
||||||
# Segments may not have been archived. See https://web.archive.org/web/20221127190050/http://www.vlive.tv/video/101870
|
|
||||||
urlh = self._request_webpage(HEADRequest(first_segment), video_id, errnote=False,
|
|
||||||
fatal=False, note='Check first segment availablity')
|
|
||||||
if urlh:
|
|
||||||
formats, subtitles = self._parse_m3u8_formats_and_subtitles('\n'.join(m3u8_doc), ext='mp4', video_id=video_id)
|
|
||||||
if subtitles:
|
|
||||||
self._report_ignoring_subs('m3u8')
|
|
||||||
return formats
|
|
||||||
|
|
||||||
# Closely follows the logic of the ArchiveTeam grab script
|
|
||||||
# See: https://github.com/ArchiveTeam/vlive-grab/blob/master/vlive.lua
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id, url_date = self._match_valid_url(url).group('id', 'date')
|
|
||||||
|
|
||||||
webpage = self._download_archived_page(f'https://www.vlive.tv/video/{video_id}', video_id, timestamp=url_date)
|
|
||||||
|
|
||||||
player_info = self._search_json(r'__PRELOADED_STATE__\s*=', webpage, 'player info', video_id)
|
|
||||||
user_country = traverse_obj(player_info, ('common', 'userCountry'))
|
|
||||||
|
|
||||||
main_script_url = self._search_regex(r'<script\s+src="([^"]+/js/main\.[^"]+\.js)"', webpage, 'main script url')
|
|
||||||
main_script = self._download_archived_page(main_script_url, video_id, note='Downloading main script')
|
|
||||||
app_id = self._search_regex(r'appId\s*=\s*"([^"]+)"', main_script, 'app id')
|
|
||||||
|
|
||||||
inkey = self._download_archived_json(
|
|
||||||
f'https://www.vlive.tv/globalv-web/vam-web/video/v1.0/vod/{video_id}/inkey', video_id, note='Fetching inkey', query={
|
|
||||||
'appId': app_id,
|
|
||||||
'platformType': 'PC',
|
|
||||||
'gcc': user_country,
|
|
||||||
'locale': 'en_US',
|
|
||||||
}, fatal=False)
|
|
||||||
|
|
||||||
vod_id = traverse_obj(player_info, ('postDetail', 'post', 'officialVideo', 'vodId'))
|
|
||||||
|
|
||||||
vod_data = self._download_archived_json(
|
|
||||||
f'https://apis.naver.com/rmcnmv/rmcnmv/vod/play/v2.0/{vod_id}', video_id, note='Fetching vod data', query={
|
|
||||||
'key': inkey.get('inkey'),
|
|
||||||
'pid': 'rmcPlayer_16692457559726800', # partially unix time and partially random. Fixed value used by archiveteam project
|
|
||||||
'sid': '2024',
|
|
||||||
'ver': '2.0',
|
|
||||||
'devt': 'html5_pc',
|
|
||||||
'doct': 'json',
|
|
||||||
'ptc': 'https',
|
|
||||||
'sptc': 'https',
|
|
||||||
'cpt': 'vtt',
|
|
||||||
'ctls': '%7B%22visible%22%3A%7B%22fullscreen%22%3Atrue%2C%22logo%22%3Afalse%2C%22playbackRate%22%3Afalse%2C%22scrap%22%3Afalse%2C%22playCount%22%3Atrue%2C%22commentCount%22%3Atrue%2C%22title%22%3Atrue%2C%22writer%22%3Atrue%2C%22expand%22%3Afalse%2C%22subtitles%22%3Atrue%2C%22thumbnails%22%3Atrue%2C%22quality%22%3Atrue%2C%22setting%22%3Atrue%2C%22script%22%3Afalse%2C%22logoDimmed%22%3Atrue%2C%22badge%22%3Atrue%2C%22seekingTime%22%3Atrue%2C%22muted%22%3Atrue%2C%22muteButton%22%3Afalse%2C%22viewerNotice%22%3Afalse%2C%22linkCount%22%3Afalse%2C%22createTime%22%3Afalse%2C%22thumbnail%22%3Atrue%7D%2C%22clicked%22%3A%7B%22expand%22%3Afalse%2C%22subtitles%22%3Afalse%7D%7D',
|
|
||||||
'pv': '4.26.9',
|
|
||||||
'dr': '1920x1080',
|
|
||||||
'cpl': 'en_US',
|
|
||||||
'lc': 'en_US',
|
|
||||||
'adi': '%5B%7B%22type%22%3A%22pre%22%2C%22exposure%22%3Afalse%2C%22replayExposure%22%3Afalse%7D%5D',
|
|
||||||
'adu': '%2F',
|
|
||||||
'videoId': vod_id,
|
|
||||||
'cc': user_country,
|
|
||||||
})
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
|
|
||||||
streams = traverse_obj(vod_data, ('streams', ...))
|
|
||||||
if len(streams) > 1:
|
|
||||||
self.report_warning('Multiple streams found. Only the first stream will be downloaded.')
|
|
||||||
stream = streams[0]
|
|
||||||
|
|
||||||
max_stream = max(
|
|
||||||
stream.get('videos') or [],
|
|
||||||
key=lambda v: traverse_obj(v, ('bitrate', 'video'), default=0), default=None)
|
|
||||||
if max_stream is not None:
|
|
||||||
params = {arg.get('name'): arg.get('value') for arg in stream.get('keys', []) if arg.get('type') == 'param'}
|
|
||||||
formats = self._extract_formats_from_m3u8(max_stream.get('source'), params, video_id) or []
|
|
||||||
|
|
||||||
# For parts of the project MP4 files were archived
|
|
||||||
max_video = max(
|
|
||||||
traverse_obj(vod_data, ('videos', 'list', ...)),
|
|
||||||
key=lambda v: traverse_obj(v, ('bitrate', 'video'), default=0), default=None)
|
|
||||||
if max_video is not None:
|
|
||||||
video_url = self._WAYBACK_BASE_URL + max_video.get('source')
|
|
||||||
urlh = self._request_webpage(HEADRequest(video_url), video_id, errnote=False,
|
|
||||||
fatal=False, note='Check video availablity')
|
|
||||||
if urlh:
|
|
||||||
formats.append({'url': video_url})
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'formats': formats,
|
|
||||||
**traverse_obj(player_info, ('postDetail', 'post', {
|
|
||||||
'title': ('officialVideo', 'title', {str}),
|
|
||||||
'creator': ('author', 'nickname', {str}),
|
|
||||||
'channel': ('channel', 'channelName', {str}),
|
|
||||||
'channel_id': ('channel', 'channelCode', {str}),
|
|
||||||
'duration': ('officialVideo', 'playTime', {int_or_none}),
|
|
||||||
'view_count': ('officialVideo', 'playCount', {int_or_none}),
|
|
||||||
'like_count': ('officialVideo', 'likeCount', {int_or_none}),
|
|
||||||
'comment_count': ('officialVideo', 'commentCount', {int_or_none}),
|
|
||||||
'timestamp': ('officialVideo', 'createdAt', {lambda x: int_or_none(x, scale=1000)}),
|
|
||||||
'release_timestamp': ('officialVideo', 'willStartAt', {lambda x: int_or_none(x, scale=1000)}),
|
|
||||||
})),
|
|
||||||
**traverse_obj(vod_data, ('meta', {
|
|
||||||
'uploader_id': ('user', 'id', {str}),
|
|
||||||
'uploader': ('user', 'name', {str}),
|
|
||||||
'uploader_url': ('user', 'url', {url_or_none}),
|
|
||||||
'thumbnail': ('cover', 'source', {url_or_none}),
|
|
||||||
}), expected_type=lambda x: x or None),
|
|
||||||
**NaverBaseIE.process_subtitles(vod_data, lambda x: [self._WAYBACK_BASE_URL + x]),
|
|
||||||
}
|
|
||||||
|
|
|
@ -12,7 +12,7 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with
|
||||||
|
|
||||||
def _aws_execute_api(self, aws_dict, video_id, query=None):
|
def _aws_execute_api(self, aws_dict, video_id, query=None):
|
||||||
query = query or {}
|
query = query or {}
|
||||||
amz_date = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')
|
amz_date = datetime.datetime.now(datetime.timezone.utc).strftime('%Y%m%dT%H%M%SZ')
|
||||||
date = amz_date[:8]
|
date = amz_date[:8]
|
||||||
headers = {
|
headers = {
|
||||||
'Accept': 'application/json',
|
'Accept': 'application/json',
|
||||||
|
|
87
yt_dlp/extractor/axs.py
Normal file
87
yt_dlp/extractor/axs.py
Normal file
|
@ -0,0 +1,87 @@
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
float_or_none,
|
||||||
|
js_to_json,
|
||||||
|
parse_iso8601,
|
||||||
|
traverse_obj,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AxsIE(InfoExtractor):
|
||||||
|
IE_NAME = 'axs.tv'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?axs\.tv/(?:channel/(?:[^/?#]+/)+)?video/(?P<id>[^/?#]+)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.axs.tv/video/5f4dc776b70e4f1c194f22ef/',
|
||||||
|
'md5': '8d97736ae8e50c64df528e5e676778cf',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5f4dc776b70e4f1c194f22ef',
|
||||||
|
'title': 'Small Town',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'description': 'md5:e314d28bfaa227a4d7ec965fae19997f',
|
||||||
|
'upload_date': '20230602',
|
||||||
|
'timestamp': 1685729564,
|
||||||
|
'duration': 1284.216,
|
||||||
|
'series': 'Rock & Roll Road Trip with Sammy Hagar',
|
||||||
|
'season': 2,
|
||||||
|
'episode': '3',
|
||||||
|
'thumbnail': 'https://images.dotstudiopro.com/5f4e9d330a0c3b295a7e8394',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.axs.tv/channel/rock-star-interview/video/daryl-hall',
|
||||||
|
'md5': '300ae795cd8f9984652c0949734ffbdc',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5f488148b70e4f392572977c',
|
||||||
|
'display_id': 'daryl-hall',
|
||||||
|
'title': 'Daryl Hall',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'description': 'md5:e54ecaa0f4b5683fc9259e9e4b196628',
|
||||||
|
'upload_date': '20230214',
|
||||||
|
'timestamp': 1676403615,
|
||||||
|
'duration': 2570.668,
|
||||||
|
'series': 'The Big Interview with Dan Rather',
|
||||||
|
'season': 3,
|
||||||
|
'episode': '5',
|
||||||
|
'thumbnail': 'https://images.dotstudiopro.com/5f4d1901f340b50d937cec32',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
webpage_json_data = self._search_json(
|
||||||
|
r'mountObj\s*=', webpage, 'video ID data', display_id,
|
||||||
|
transform_source=js_to_json)
|
||||||
|
video_id = webpage_json_data['video_id']
|
||||||
|
company_id = webpage_json_data['company_id']
|
||||||
|
|
||||||
|
meta = self._download_json(
|
||||||
|
f'https://api.myspotlight.tv/dotplayer/video/{company_id}/{video_id}',
|
||||||
|
video_id, query={'device_type': 'desktop_web'})['video']
|
||||||
|
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
meta['video_m3u8'], video_id, 'mp4', m3u8_id='hls')
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
for cc in traverse_obj(meta, ('closeCaption', lambda _, v: url_or_none(v['srtPath']))):
|
||||||
|
subtitles.setdefault(cc.get('srtShortLang') or 'en', []).append(
|
||||||
|
{'ext': cc.get('srtExt'), 'url': cc['srtPath']})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'formats': formats,
|
||||||
|
**traverse_obj(meta, {
|
||||||
|
'title': ('title', {str}),
|
||||||
|
'description': ('description', {str}),
|
||||||
|
'series': ('seriestitle', {str}),
|
||||||
|
'season': ('season', {int}),
|
||||||
|
'episode': ('episode', {str}),
|
||||||
|
'duration': ('duration', {float_or_none}),
|
||||||
|
'timestamp': ('updated_at', {parse_iso8601}),
|
||||||
|
'thumbnail': ('thumb', {url_or_none}),
|
||||||
|
}),
|
||||||
|
'subtitles': subtitles,
|
||||||
|
}
|
|
@ -31,7 +31,7 @@ def _extract_playlist(self, playlist_id):
|
||||||
|
|
||||||
|
|
||||||
class BanByeIE(BanByeBaseIE):
|
class BanByeIE(BanByeBaseIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?banbye.com/(?:en/)?watch/(?P<id>\w+)'
|
_VALID_URL = r'https?://(?:www\.)?banbye.com/(?:en/)?watch/(?P<id>[\w-]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://banbye.com/watch/v_ytfmvkVYLE8T',
|
'url': 'https://banbye.com/watch/v_ytfmvkVYLE8T',
|
||||||
'md5': '2f4ea15c5ca259a73d909b2cfd558eb5',
|
'md5': '2f4ea15c5ca259a73d909b2cfd558eb5',
|
||||||
|
@ -59,7 +59,27 @@ class BanByeIE(BanByeBaseIE):
|
||||||
'title': 'Krzysztof Karoń',
|
'title': 'Krzysztof Karoń',
|
||||||
'id': 'p_Ld82N6gBw_OJ',
|
'id': 'p_Ld82N6gBw_OJ',
|
||||||
},
|
},
|
||||||
'playlist_count': 9,
|
'playlist_mincount': 9,
|
||||||
|
}, {
|
||||||
|
'url': 'https://banbye.com/watch/v_kb6_o1Kyq-CD',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'v_kb6_o1Kyq-CD',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Co tak naprawdę dzieje się we Francji?! Czy Warszawa a potem cała Polska będzie drugim Paryżem?!🤔🇵🇱',
|
||||||
|
'description': 'md5:82be4c0e13eae8ea1ca8b9f2e07226a8',
|
||||||
|
'uploader': 'Marcin Rola - MOIM ZDANIEM!🇵🇱',
|
||||||
|
'channel_id': 'ch_QgWnHvDG2fo5',
|
||||||
|
'channel_url': 'https://banbye.com/channel/ch_QgWnHvDG2fo5',
|
||||||
|
'duration': 597,
|
||||||
|
'timestamp': 1688642656,
|
||||||
|
'upload_date': '20230706',
|
||||||
|
'thumbnail': 'https://cdn.banbye.com/video/v_kb6_o1Kyq-CD/96.webp',
|
||||||
|
'tags': ['Paryż', 'Francja', 'Polska', 'Imigranci', 'Morawiecki', 'Tusk'],
|
||||||
|
'like_count': int,
|
||||||
|
'dislike_count': int,
|
||||||
|
'view_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|
|
@ -15,11 +15,13 @@
|
||||||
float_or_none,
|
float_or_none,
|
||||||
get_element_by_class,
|
get_element_by_class,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
join_nonempty,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
parse_qs,
|
parse_qs,
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
|
traverse_obj,
|
||||||
try_get,
|
try_get,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
|
@ -41,7 +43,6 @@ class BBCCoUkIE(InfoExtractor):
|
||||||
iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
|
iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
|
||||||
music/(?:clips|audiovideo/popular)[/#]|
|
music/(?:clips|audiovideo/popular)[/#]|
|
||||||
radio/player/|
|
radio/player/|
|
||||||
sounds/play/|
|
|
||||||
events/[^/]+/play/[^/]+/
|
events/[^/]+/play/[^/]+/
|
||||||
)
|
)
|
||||||
(?P<id>%s)(?!/(?:episodes|broadcasts|clips))
|
(?P<id>%s)(?!/(?:episodes|broadcasts|clips))
|
||||||
|
@ -218,20 +219,6 @@ class BBCCoUkIE(InfoExtractor):
|
||||||
# rtmp download
|
# rtmp download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}, {
|
|
||||||
'url': 'https://www.bbc.co.uk/sounds/play/m0007jzb',
|
|
||||||
'note': 'Audio',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'm0007jz9',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'BBC Proms, 2019, Prom 34: West–Eastern Divan Orchestra',
|
|
||||||
'description': "Live BBC Proms. West–Eastern Divan Orchestra with Daniel Barenboim and Martha Argerich.",
|
|
||||||
'duration': 9840,
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
# rtmp download
|
|
||||||
'skip_download': True,
|
|
||||||
}
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
|
'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -844,6 +831,20 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||||
'upload_date': '20190604',
|
'upload_date': '20190604',
|
||||||
'categories': ['Psychology'],
|
'categories': ['Psychology'],
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# BBC Sounds
|
||||||
|
'url': 'https://www.bbc.co.uk/sounds/play/m001q78b',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'm001q789',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'The Night Tracks Mix - Music for the darkling hour',
|
||||||
|
'thumbnail': 'https://ichef.bbci.co.uk/images/ic/raw/p0c00hym.jpg',
|
||||||
|
'chapters': 'count:8',
|
||||||
|
'description': 'md5:815fb51cbdaa270040aab8145b3f1d67',
|
||||||
|
'uploader': 'Radio 3',
|
||||||
|
'duration': 1800,
|
||||||
|
'uploader_id': 'bbc_radio_three',
|
||||||
|
},
|
||||||
}, { # onion routes
|
}, { # onion routes
|
||||||
'url': 'https://www.bbcnewsd73hkzno2ini43t4gblxvycyac5aw4gnv7t2rccijh7745uqd.onion/news/av/world-europe-63208576',
|
'url': 'https://www.bbcnewsd73hkzno2ini43t4gblxvycyac5aw4gnv7t2rccijh7745uqd.onion/news/av/world-europe-63208576',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -1128,6 +1129,13 @@ def _real_extract(self, url):
|
||||||
'uploader_id': network.get('id'),
|
'uploader_id': network.get('id'),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
|
'chapters': traverse_obj(preload_state, (
|
||||||
|
'tracklist', 'tracks', lambda _, v: float_or_none(v['offset']['start']), {
|
||||||
|
'title': ('titles', {lambda x: join_nonempty(
|
||||||
|
'primary', 'secondary', 'tertiary', delim=' - ', from_dict=x)}),
|
||||||
|
'start_time': ('offset', 'start', {float_or_none}),
|
||||||
|
'end_time': ('offset', 'end', {float_or_none}),
|
||||||
|
})) or None,
|
||||||
}
|
}
|
||||||
|
|
||||||
bbc3_config = self._parse_json(
|
bbc3_config = self._parse_json(
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
traverse_obj,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -8,7 +9,8 @@
|
||||||
class BildIE(InfoExtractor):
|
class BildIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?bild\.de/(?:[^/]+/)+(?P<display_id>[^/]+)-(?P<id>\d+)(?:,auto=true)?\.bild\.html'
|
_VALID_URL = r'https?://(?:www\.)?bild\.de/(?:[^/]+/)+(?P<display_id>[^/]+)-(?P<id>\d+)(?:,auto=true)?\.bild\.html'
|
||||||
IE_DESC = 'Bild.de'
|
IE_DESC = 'Bild.de'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
|
'note': 'static MP4 only',
|
||||||
'url': 'http://www.bild.de/video/clip/apple-ipad-air/das-koennen-die-neuen-ipads-38184146.bild.html',
|
'url': 'http://www.bild.de/video/clip/apple-ipad-air/das-koennen-die-neuen-ipads-38184146.bild.html',
|
||||||
'md5': 'dd495cbd99f2413502a1713a1156ac8a',
|
'md5': 'dd495cbd99f2413502a1713a1156ac8a',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -19,7 +21,19 @@ class BildIE(InfoExtractor):
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'duration': 196,
|
'duration': 196,
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
'note': 'static MP4 and HLS',
|
||||||
|
'url': 'https://www.bild.de/video/clip/news-ausland/deftiger-abgang-vom-10m-turm-bademeister-sorgt-fuer-skandal-85158620.bild.html',
|
||||||
|
'md5': 'fb0ed4f09c495d4ba7ce2eee0bb90de1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '85158620',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Der Sprungturm-Skandal',
|
||||||
|
'description': 'md5:709b543c24dc31bbbffee73bccda34ad',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'duration': 69,
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
@ -27,11 +41,23 @@ def _real_extract(self, url):
|
||||||
video_data = self._download_json(
|
video_data = self._download_json(
|
||||||
url.split('.bild.html')[0] + ',view=json.bild.html', video_id)
|
url.split('.bild.html')[0] + ',view=json.bild.html', video_id)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for src in traverse_obj(video_data, ('clipList', 0, 'srces', lambda _, v: v['src'])):
|
||||||
|
src_type = src.get('type')
|
||||||
|
if src_type == 'application/x-mpegURL':
|
||||||
|
formats.extend(
|
||||||
|
self._extract_m3u8_formats(
|
||||||
|
src['src'], video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||||
|
elif src_type == 'video/mp4':
|
||||||
|
formats.append({'url': src['src'], 'format_id': 'http-mp4'})
|
||||||
|
else:
|
||||||
|
self.report_warning(f'Skipping unsupported format type: "{src_type}"')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': unescapeHTML(video_data['title']).strip(),
|
'title': unescapeHTML(video_data['title']).strip(),
|
||||||
'description': unescapeHTML(video_data.get('description')),
|
'description': unescapeHTML(video_data.get('description')),
|
||||||
'url': video_data['clipList'][0]['srces'][0]['src'],
|
'formats': formats,
|
||||||
'thumbnail': video_data.get('poster'),
|
'thumbnail': video_data.get('poster'),
|
||||||
'duration': int_or_none(video_data.get('durationSec')),
|
'duration': int_or_none(video_data.get('durationSec')),
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
import hashlib
|
import hashlib
|
||||||
import itertools
|
import itertools
|
||||||
import math
|
import math
|
||||||
|
import re
|
||||||
import time
|
import time
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
|
|
||||||
|
@ -14,6 +15,7 @@
|
||||||
GeoRestrictedError,
|
GeoRestrictedError,
|
||||||
InAdvancePagedList,
|
InAdvancePagedList,
|
||||||
OnDemandPagedList,
|
OnDemandPagedList,
|
||||||
|
bool_or_none,
|
||||||
filter_dict,
|
filter_dict,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
format_field,
|
format_field,
|
||||||
|
@ -34,27 +36,31 @@
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
|
variadic,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class BilibiliBaseIE(InfoExtractor):
|
class BilibiliBaseIE(InfoExtractor):
|
||||||
|
_FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?')
|
||||||
|
|
||||||
def extract_formats(self, play_info):
|
def extract_formats(self, play_info):
|
||||||
format_names = {
|
format_names = {
|
||||||
r['quality']: traverse_obj(r, 'new_description', 'display_desc')
|
r['quality']: traverse_obj(r, 'new_description', 'display_desc')
|
||||||
for r in traverse_obj(play_info, ('support_formats', lambda _, v: v['quality']))
|
for r in traverse_obj(play_info, ('support_formats', lambda _, v: v['quality']))
|
||||||
}
|
}
|
||||||
|
|
||||||
audios = traverse_obj(play_info, ('dash', 'audio', ...))
|
audios = traverse_obj(play_info, ('dash', (None, 'dolby'), 'audio', ..., {dict}))
|
||||||
flac_audio = traverse_obj(play_info, ('dash', 'flac', 'audio'))
|
flac_audio = traverse_obj(play_info, ('dash', 'flac', 'audio'))
|
||||||
if flac_audio:
|
if flac_audio:
|
||||||
audios.append(flac_audio)
|
audios.append(flac_audio)
|
||||||
formats = [{
|
formats = [{
|
||||||
'url': traverse_obj(audio, 'baseUrl', 'base_url', 'url'),
|
'url': traverse_obj(audio, 'baseUrl', 'base_url', 'url'),
|
||||||
'ext': mimetype2ext(traverse_obj(audio, 'mimeType', 'mime_type')),
|
'ext': mimetype2ext(traverse_obj(audio, 'mimeType', 'mime_type')),
|
||||||
'acodec': audio.get('codecs'),
|
'acodec': traverse_obj(audio, ('codecs', {str.lower})),
|
||||||
'vcodec': 'none',
|
'vcodec': 'none',
|
||||||
'tbr': float_or_none(audio.get('bandwidth'), scale=1000),
|
'tbr': float_or_none(audio.get('bandwidth'), scale=1000),
|
||||||
'filesize': int_or_none(audio.get('size'))
|
'filesize': int_or_none(audio.get('size')),
|
||||||
|
'format_id': str_or_none(audio.get('id')),
|
||||||
} for audio in audios]
|
} for audio in audios]
|
||||||
|
|
||||||
formats.extend({
|
formats.extend({
|
||||||
|
@ -65,9 +71,13 @@ def extract_formats(self, play_info):
|
||||||
'height': int_or_none(video.get('height')),
|
'height': int_or_none(video.get('height')),
|
||||||
'vcodec': video.get('codecs'),
|
'vcodec': video.get('codecs'),
|
||||||
'acodec': 'none' if audios else None,
|
'acodec': 'none' if audios else None,
|
||||||
|
'dynamic_range': {126: 'DV', 125: 'HDR10'}.get(int_or_none(video.get('id'))),
|
||||||
'tbr': float_or_none(video.get('bandwidth'), scale=1000),
|
'tbr': float_or_none(video.get('bandwidth'), scale=1000),
|
||||||
'filesize': int_or_none(video.get('size')),
|
'filesize': int_or_none(video.get('size')),
|
||||||
'quality': int_or_none(video.get('id')),
|
'quality': int_or_none(video.get('id')),
|
||||||
|
'format_id': traverse_obj(
|
||||||
|
video, (('baseUrl', 'base_url'), {self._FORMAT_ID_RE.search}, 1),
|
||||||
|
('id', {str_or_none}), get_all=False),
|
||||||
'format': format_names.get(video.get('id')),
|
'format': format_names.get(video.get('id')),
|
||||||
} for video in traverse_obj(play_info, ('dash', 'video', ...)))
|
} for video in traverse_obj(play_info, ('dash', 'video', ...)))
|
||||||
|
|
||||||
|
@ -149,7 +159,7 @@ def _get_episodes_from_season(self, ss_id, url):
|
||||||
|
|
||||||
|
|
||||||
class BiliBiliIE(BilibiliBaseIE):
|
class BiliBiliIE(BilibiliBaseIE):
|
||||||
_VALID_URL = r'https?://www\.bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.bilibili.com/video/BV13x41117TL',
|
'url': 'https://www.bilibili.com/video/BV13x41117TL',
|
||||||
|
@ -245,7 +255,7 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||||
'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
|
'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
|
||||||
'duration': 313.557,
|
'duration': 313.557,
|
||||||
'upload_date': '20220709',
|
'upload_date': '20220709',
|
||||||
'uploader': '小夫Tech',
|
'uploader': '小夫太渴',
|
||||||
'timestamp': 1657347907,
|
'timestamp': 1657347907,
|
||||||
'uploader_id': '1326814124',
|
'uploader_id': '1326814124',
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
|
@ -502,7 +512,7 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
|
|
||||||
class BiliBiliBangumiMediaIE(BilibiliBaseIE):
|
class BiliBiliBangumiMediaIE(BilibiliBaseIE):
|
||||||
_VALID_URL = r'https?://www\.bilibili\.com/bangumi/media/md(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/media/md(?P<id>\d+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.bilibili.com/bangumi/media/md24097891',
|
'url': 'https://www.bilibili.com/bangumi/media/md24097891',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -521,7 +531,7 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
|
|
||||||
class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
|
class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
|
||||||
_VALID_URL = r'(?x)https?://www\.bilibili\.com/bangumi/play/ss(?P<id>\d+)'
|
_VALID_URL = r'(?x)https?://(?:www\.)?bilibili\.com/bangumi/play/ss(?P<id>\d+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.bilibili.com/bangumi/play/ss26801',
|
'url': 'https://www.bilibili.com/bangumi/play/ss26801',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -672,13 +682,35 @@ def get_entries(page_data):
|
||||||
return self.playlist_result(paged_list, playlist_id)
|
return self.playlist_result(paged_list, playlist_id)
|
||||||
|
|
||||||
|
|
||||||
class BilibiliSpacePlaylistIE(BilibiliSpaceBaseIE):
|
class BilibiliSpaceListBaseIE(BilibiliSpaceBaseIE):
|
||||||
_VALID_URL = r'https?://space.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail\?sid=(?P<sid>\d+)'
|
def _get_entries(self, page_data, bvid_keys, ending_key='bvid'):
|
||||||
|
for bvid in traverse_obj(page_data, (*variadic(bvid_keys, (str, bytes, dict, set)), ..., ending_key, {str})):
|
||||||
|
yield self.url_result(f'https://www.bilibili.com/video/{bvid}', BiliBiliIE, bvid)
|
||||||
|
|
||||||
|
def _get_uploader(self, uid, playlist_id):
|
||||||
|
webpage = self._download_webpage(f'https://space.bilibili.com/{uid}', playlist_id, fatal=False)
|
||||||
|
return self._search_regex(r'(?s)<title\b[^>]*>([^<]+)的个人空间-', webpage, 'uploader', fatal=False)
|
||||||
|
|
||||||
|
def _extract_playlist(self, fetch_page, get_metadata, get_entries):
|
||||||
|
metadata, page_list = super()._extract_playlist(fetch_page, get_metadata, get_entries)
|
||||||
|
metadata.pop('page_count', None)
|
||||||
|
metadata.pop('page_size', None)
|
||||||
|
return metadata, page_list
|
||||||
|
|
||||||
|
|
||||||
|
class BilibiliCollectionListIE(BilibiliSpaceListBaseIE):
|
||||||
|
_VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail/?\?sid=(?P<sid>\d+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://space.bilibili.com/2142762/channel/collectiondetail?sid=57445',
|
'url': 'https://space.bilibili.com/2142762/channel/collectiondetail?sid=57445',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2142762_57445',
|
'id': '2142762_57445',
|
||||||
'title': '《底特律 变人》'
|
'title': '【完结】《底特律 变人》全结局流程解说',
|
||||||
|
'description': '',
|
||||||
|
'uploader': '老戴在此',
|
||||||
|
'uploader_id': '2142762',
|
||||||
|
'timestamp': int,
|
||||||
|
'upload_date': str,
|
||||||
|
'thumbnail': 'https://archive.biliimg.com/bfs/archive/e0e543ae35ad3df863ea7dea526bc32e70f4c091.jpg',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 31,
|
'playlist_mincount': 31,
|
||||||
}]
|
}]
|
||||||
|
@ -699,22 +731,251 @@ def get_metadata(page_data):
|
||||||
return {
|
return {
|
||||||
'page_count': math.ceil(entry_count / page_size),
|
'page_count': math.ceil(entry_count / page_size),
|
||||||
'page_size': page_size,
|
'page_size': page_size,
|
||||||
'title': traverse_obj(page_data, ('meta', 'name'))
|
'uploader': self._get_uploader(mid, playlist_id),
|
||||||
|
**traverse_obj(page_data, {
|
||||||
|
'title': ('meta', 'name', {str}),
|
||||||
|
'description': ('meta', 'description', {str}),
|
||||||
|
'uploader_id': ('meta', 'mid', {str_or_none}),
|
||||||
|
'timestamp': ('meta', 'ptime', {int_or_none}),
|
||||||
|
'thumbnail': ('meta', 'cover', {url_or_none}),
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
def get_entries(page_data):
|
def get_entries(page_data):
|
||||||
for entry in page_data.get('archives', []):
|
return self._get_entries(page_data, 'archives')
|
||||||
yield self.url_result(f'https://www.bilibili.com/video/{entry["bvid"]}',
|
|
||||||
BiliBiliIE, entry['bvid'])
|
|
||||||
|
|
||||||
metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
|
metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
|
||||||
return self.playlist_result(paged_list, playlist_id, metadata['title'])
|
return self.playlist_result(paged_list, playlist_id, **metadata)
|
||||||
|
|
||||||
|
|
||||||
|
class BilibiliSeriesListIE(BilibiliSpaceListBaseIE):
|
||||||
|
_VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/seriesdetail/?\?\bsid=(?P<sid>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://space.bilibili.com/1958703906/channel/seriesdetail?sid=547718&ctype=0',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1958703906_547718',
|
||||||
|
'title': '直播回放',
|
||||||
|
'description': '直播回放',
|
||||||
|
'uploader': '靡烟miya',
|
||||||
|
'uploader_id': '1958703906',
|
||||||
|
'timestamp': 1637985853,
|
||||||
|
'upload_date': '20211127',
|
||||||
|
'modified_timestamp': int,
|
||||||
|
'modified_date': str,
|
||||||
|
},
|
||||||
|
'playlist_mincount': 513,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mid, sid = self._match_valid_url(url).group('mid', 'sid')
|
||||||
|
playlist_id = f'{mid}_{sid}'
|
||||||
|
playlist_meta = traverse_obj(self._download_json(
|
||||||
|
f'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id, fatal=False
|
||||||
|
), {
|
||||||
|
'title': ('data', 'meta', 'name', {str}),
|
||||||
|
'description': ('data', 'meta', 'description', {str}),
|
||||||
|
'uploader_id': ('data', 'meta', 'mid', {str_or_none}),
|
||||||
|
'timestamp': ('data', 'meta', 'ctime', {int_or_none}),
|
||||||
|
'modified_timestamp': ('data', 'meta', 'mtime', {int_or_none}),
|
||||||
|
})
|
||||||
|
|
||||||
|
def fetch_page(page_idx):
|
||||||
|
return self._download_json(
|
||||||
|
'https://api.bilibili.com/x/series/archives',
|
||||||
|
playlist_id, note=f'Downloading page {page_idx}',
|
||||||
|
query={'mid': mid, 'series_id': sid, 'pn': page_idx + 1, 'ps': 30})['data']
|
||||||
|
|
||||||
|
def get_metadata(page_data):
|
||||||
|
page_size = page_data['page']['size']
|
||||||
|
entry_count = page_data['page']['total']
|
||||||
|
return {
|
||||||
|
'page_count': math.ceil(entry_count / page_size),
|
||||||
|
'page_size': page_size,
|
||||||
|
'uploader': self._get_uploader(mid, playlist_id),
|
||||||
|
**playlist_meta
|
||||||
|
}
|
||||||
|
|
||||||
|
def get_entries(page_data):
|
||||||
|
return self._get_entries(page_data, 'archives')
|
||||||
|
|
||||||
|
metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
|
||||||
|
return self.playlist_result(paged_list, playlist_id, **metadata)
|
||||||
|
|
||||||
|
|
||||||
|
class BilibiliFavoritesListIE(BilibiliSpaceListBaseIE):
|
||||||
|
_VALID_URL = r'https?://(?:space\.bilibili\.com/\d+/favlist/?\?fid=|(?:www\.)?bilibili\.com/medialist/detail/ml)(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://space.bilibili.com/84912/favlist?fid=1103407912&ftype=create',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1103407912',
|
||||||
|
'title': '【V2】(旧)',
|
||||||
|
'description': '',
|
||||||
|
'uploader': '晓月春日',
|
||||||
|
'uploader_id': '84912',
|
||||||
|
'timestamp': 1604905176,
|
||||||
|
'upload_date': '20201109',
|
||||||
|
'modified_timestamp': int,
|
||||||
|
'modified_date': str,
|
||||||
|
'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
},
|
||||||
|
'playlist_mincount': 22,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.bilibili.com/medialist/detail/ml1103407912',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
fid = self._match_id(url)
|
||||||
|
|
||||||
|
list_info = self._download_json(
|
||||||
|
f'https://api.bilibili.com/x/v3/fav/resource/list?media_id={fid}&pn=1&ps=20',
|
||||||
|
fid, note='Downloading favlist metadata')
|
||||||
|
if list_info['code'] == -403:
|
||||||
|
self.raise_login_required(msg='This is a private favorites list. You need to log in as its owner')
|
||||||
|
|
||||||
|
entries = self._get_entries(self._download_json(
|
||||||
|
f'https://api.bilibili.com/x/v3/fav/resource/ids?media_id={fid}',
|
||||||
|
fid, note='Download favlist entries'), 'data')
|
||||||
|
|
||||||
|
return self.playlist_result(entries, fid, **traverse_obj(list_info, ('data', 'info', {
|
||||||
|
'title': ('title', {str}),
|
||||||
|
'description': ('intro', {str}),
|
||||||
|
'uploader': ('upper', 'name', {str}),
|
||||||
|
'uploader_id': ('upper', 'mid', {str_or_none}),
|
||||||
|
'timestamp': ('ctime', {int_or_none}),
|
||||||
|
'modified_timestamp': ('mtime', {int_or_none}),
|
||||||
|
'thumbnail': ('cover', {url_or_none}),
|
||||||
|
'view_count': ('cnt_info', 'play', {int_or_none}),
|
||||||
|
'like_count': ('cnt_info', 'thumb_up', {int_or_none}),
|
||||||
|
})))
|
||||||
|
|
||||||
|
|
||||||
|
class BilibiliWatchlaterIE(BilibiliSpaceListBaseIE):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.bilibili.com/watchlater/#/list',
|
||||||
|
'info_dict': {'id': 'watchlater'},
|
||||||
|
'playlist_mincount': 0,
|
||||||
|
'skip': 'login required',
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
list_id = getattr(self._get_cookies(url).get('DedeUserID'), 'value', 'watchlater')
|
||||||
|
watchlater_info = self._download_json(
|
||||||
|
'https://api.bilibili.com/x/v2/history/toview/web?jsonp=jsonp', list_id)
|
||||||
|
if watchlater_info['code'] == -101:
|
||||||
|
self.raise_login_required(msg='You need to login to access your watchlater list')
|
||||||
|
entries = self._get_entries(watchlater_info, ('data', 'list'))
|
||||||
|
return self.playlist_result(entries, id=list_id, title='稍后再看')
|
||||||
|
|
||||||
|
|
||||||
|
class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:medialist/play|list)/(?P<id>\w+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.bilibili.com/list/1958703906?sid=547718',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5_547718',
|
||||||
|
'title': '直播回放',
|
||||||
|
'uploader': '靡烟miya',
|
||||||
|
'uploader_id': '1958703906',
|
||||||
|
'timestamp': 1637985853,
|
||||||
|
'upload_date': '20211127',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 513,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.bilibili.com/medialist/play/1958703906?business=space_series&business_id=547718&desc=1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5_547718',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 513,
|
||||||
|
'skip': 'redirect url',
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.bilibili.com/list/ml1103407912',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3_1103407912',
|
||||||
|
'title': '【V2】(旧)',
|
||||||
|
'uploader': '晓月春日',
|
||||||
|
'uploader_id': '84912',
|
||||||
|
'timestamp': 1604905176,
|
||||||
|
'upload_date': '20201109',
|
||||||
|
'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
|
||||||
|
},
|
||||||
|
'playlist_mincount': 22,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.bilibili.com/medialist/play/ml1103407912',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3_1103407912',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 22,
|
||||||
|
'skip': 'redirect url',
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.bilibili.com/list/watchlater',
|
||||||
|
'info_dict': {'id': 'watchlater'},
|
||||||
|
'playlist_mincount': 0,
|
||||||
|
'skip': 'login required',
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.bilibili.com/medialist/play/watchlater',
|
||||||
|
'info_dict': {'id': 'watchlater'},
|
||||||
|
'playlist_mincount': 0,
|
||||||
|
'skip': 'login required',
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _extract_medialist(self, query, list_id):
|
||||||
|
for page_num in itertools.count(1):
|
||||||
|
page_data = self._download_json(
|
||||||
|
'https://api.bilibili.com/x/v2/medialist/resource/list',
|
||||||
|
list_id, query=query, note=f'getting playlist {query["biz_id"]} page {page_num}'
|
||||||
|
)['data']
|
||||||
|
yield from self._get_entries(page_data, 'media_list', ending_key='bv_id')
|
||||||
|
query['oid'] = traverse_obj(page_data, ('media_list', -1, 'id'))
|
||||||
|
if not page_data.get('has_more', False):
|
||||||
|
break
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
list_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, list_id)
|
||||||
|
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', list_id)
|
||||||
|
if traverse_obj(initial_state, ('error', 'code', {int_or_none})) != 200:
|
||||||
|
error_code = traverse_obj(initial_state, ('error', 'trueCode', {int_or_none}))
|
||||||
|
error_message = traverse_obj(initial_state, ('error', 'message', {str_or_none}))
|
||||||
|
if error_code == -400 and list_id == 'watchlater':
|
||||||
|
self.raise_login_required('You need to login to access your watchlater playlist')
|
||||||
|
elif error_code == -403:
|
||||||
|
self.raise_login_required('This is a private playlist. You need to login as its owner')
|
||||||
|
elif error_code == 11010:
|
||||||
|
raise ExtractorError('Playlist is no longer available', expected=True)
|
||||||
|
raise ExtractorError(f'Could not access playlist: {error_code} {error_message}')
|
||||||
|
|
||||||
|
query = {
|
||||||
|
'ps': 20,
|
||||||
|
'with_current': False,
|
||||||
|
**traverse_obj(initial_state, {
|
||||||
|
'type': ('playlist', 'type', {int_or_none}),
|
||||||
|
'biz_id': ('playlist', 'id', {int_or_none}),
|
||||||
|
'tid': ('tid', {int_or_none}),
|
||||||
|
'sort_field': ('sortFiled', {int_or_none}),
|
||||||
|
'desc': ('desc', {bool_or_none}, {str_or_none}, {str.lower}),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
metadata = {
|
||||||
|
'id': f'{query["type"]}_{query["biz_id"]}',
|
||||||
|
**traverse_obj(initial_state, ('mediaListInfo', {
|
||||||
|
'title': ('title', {str}),
|
||||||
|
'uploader': ('upper', 'name', {str}),
|
||||||
|
'uploader_id': ('upper', 'mid', {str_or_none}),
|
||||||
|
'timestamp': ('ctime', {int_or_none}),
|
||||||
|
'thumbnail': ('cover', {url_or_none}),
|
||||||
|
})),
|
||||||
|
}
|
||||||
|
return self.playlist_result(self._extract_medialist(query, list_id), **metadata)
|
||||||
|
|
||||||
|
|
||||||
class BilibiliCategoryIE(InfoExtractor):
|
class BilibiliCategoryIE(InfoExtractor):
|
||||||
IE_NAME = 'Bilibili category extractor'
|
IE_NAME = 'Bilibili category extractor'
|
||||||
_MAX_RESULTS = 1000000
|
_MAX_RESULTS = 1000000
|
||||||
_VALID_URL = r'https?://www\.bilibili\.com/v/[a-zA-Z]+\/[a-zA-Z]+'
|
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/v/[a-zA-Z]+\/[a-zA-Z]+'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.bilibili.com/v/kichiku/mad',
|
'url': 'https://www.bilibili.com/v/kichiku/mad',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -1399,7 +1660,7 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
|
|
||||||
class BiliLiveIE(InfoExtractor):
|
class BiliLiveIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://live.bilibili.com/(?:blanc/)?(?P<id>\d+)'
|
_VALID_URL = r'https?://live\.bilibili\.com/(?:blanc/)?(?P<id>\d+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://live.bilibili.com/196',
|
'url': 'https://live.bilibili.com/196',
|
||||||
|
|
|
@ -1,56 +1,170 @@
|
||||||
|
import functools
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
|
extract_attributes,
|
||||||
|
get_element_text_and_html_by_tag,
|
||||||
|
get_elements_by_class,
|
||||||
|
join_nonempty,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
determine_ext,
|
mimetype2ext,
|
||||||
|
unified_strdate,
|
||||||
|
url_or_none,
|
||||||
|
urljoin,
|
||||||
|
variadic,
|
||||||
)
|
)
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
|
def html_get_element(tag=None, cls=None):
|
||||||
|
assert tag or cls, 'One of tag or class is required'
|
||||||
|
|
||||||
|
if cls:
|
||||||
|
func = functools.partial(get_elements_by_class, cls, tag=tag)
|
||||||
|
else:
|
||||||
|
func = functools.partial(get_element_text_and_html_by_tag, tag)
|
||||||
|
|
||||||
|
def html_get_element_wrapper(html):
|
||||||
|
return variadic(func(html))[0]
|
||||||
|
|
||||||
|
return html_get_element_wrapper
|
||||||
|
|
||||||
|
|
||||||
class BpbIE(InfoExtractor):
|
class BpbIE(InfoExtractor):
|
||||||
IE_DESC = 'Bundeszentrale für politische Bildung'
|
IE_DESC = 'Bundeszentrale für politische Bildung'
|
||||||
_VALID_URL = r'https?://(?:www\.)?bpb\.de/mediathek/(?P<id>[0-9]+)/'
|
_VALID_URL = r'https?://(?:www\.|m\.)?bpb\.de/(?:[^/?#]+/)*(?P<id>\d+)(?:[/?#]|$)'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.bpb.de/mediathek/297/joachim-gauck-zu-1989-und-die-erinnerung-an-die-ddr',
|
'url': 'http://www.bpb.de/mediathek/297/joachim-gauck-zu-1989-und-die-erinnerung-an-die-ddr',
|
||||||
'md5': 'c4f84c8a8044ca9ff68bb8441d300b3f',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '297',
|
'id': '297',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
'creator': 'Kooperative Berlin',
|
||||||
|
'description': 'md5:f4f75885ba009d3e2b156247a8941ce6',
|
||||||
|
'release_date': '20160115',
|
||||||
|
'series': 'Interview auf dem Geschichtsforum 1989 | 2009',
|
||||||
|
'tags': ['Friedliche Revolution', 'Erinnerungskultur', 'Vergangenheitspolitik', 'DDR 1949 - 1990', 'Freiheitsrecht', 'BStU', 'Deutschland'],
|
||||||
|
'thumbnail': 'https://www.bpb.de/cache/images/7/297_teaser_16x9_1240.jpg?8839D',
|
||||||
'title': 'Joachim Gauck zu 1989 und die Erinnerung an die DDR',
|
'title': 'Joachim Gauck zu 1989 und die Erinnerung an die DDR',
|
||||||
'description': 'Joachim Gauck, erster Beauftragter für die Stasi-Unterlagen, spricht auf dem Geschichtsforum über die friedliche Revolution 1989 und eine "gewisse Traurigkeit" im Umgang mit der DDR-Vergangenheit.'
|
'uploader': 'Bundeszentrale für politische Bildung',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.bpb.de/mediathek/video/522184/krieg-flucht-und-falschmeldungen-wirstattdesinformation-2/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '522184',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'creator': 'Institute for Strategic Dialogue Germany gGmbH (ISD)',
|
||||||
|
'description': 'md5:f83c795ff8f825a69456a9e51fc15903',
|
||||||
|
'release_date': '20230621',
|
||||||
|
'tags': ['Desinformation', 'Ukraine', 'Russland', 'Geflüchtete'],
|
||||||
|
'thumbnail': 'https://www.bpb.de/cache/images/4/522184_teaser_16x9_1240.png?EABFB',
|
||||||
|
'title': 'md5:9b01ccdbf58dbf9e5c9f6e771a803b1c',
|
||||||
|
'uploader': 'Bundeszentrale für politische Bildung',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.bpb.de/lernen/bewegtbild-und-politische-bildung/webvideo/518789/krieg-flucht-und-falschmeldungen-wirstattdesinformation-1/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '518789',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'creator': 'Institute for Strategic Dialogue Germany gGmbH (ISD)',
|
||||||
|
'description': 'md5:85228aed433e84ff0ff9bc582abd4ea8',
|
||||||
|
'release_date': '20230302',
|
||||||
|
'tags': ['Desinformation', 'Ukraine', 'Russland', 'Geflüchtete'],
|
||||||
|
'thumbnail': 'https://www.bpb.de/cache/images/9/518789_teaser_16x9_1240.jpeg?56D0D',
|
||||||
|
'title': 'md5:3e956f264bb501f6383f10495a401da4',
|
||||||
|
'uploader': 'Bundeszentrale für politische Bildung',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.bpb.de/mediathek/podcasts/apuz-podcast/539727/apuz-20-china/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.bpb.de/mediathek/audio/315813/folge-1-eine-einfuehrung/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '315813',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'creator': 'Axel Schröder',
|
||||||
|
'description': 'md5:eda9d1af34e5912efef5baf54fba4427',
|
||||||
|
'release_date': '20200921',
|
||||||
|
'series': 'Auf Endlagersuche. Der deutsche Weg zu einem sicheren Atommülllager',
|
||||||
|
'tags': ['Atomenergie', 'Endlager', 'hoch-radioaktiver Abfall', 'Endlagersuche', 'Atommüll', 'Atomendlager', 'Gorleben', 'Deutschland'],
|
||||||
|
'thumbnail': 'https://www.bpb.de/cache/images/3/315813_teaser_16x9_1240.png?92A94',
|
||||||
|
'title': 'Folge 1: Eine Einführung',
|
||||||
|
'uploader': 'Bundeszentrale für politische Bildung',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.bpb.de/517806/die-weltanschauung-der-neuen-rechten/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '517806',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'creator': 'Bundeszentrale für politische Bildung',
|
||||||
|
'description': 'md5:594689600e919912aade0b2871cc3fed',
|
||||||
|
'release_date': '20230127',
|
||||||
|
'series': 'Vorträge des Fachtags "Modernisierer. Grenzgänger. Anstifter. Sechs Jahrzehnte \'Neue Rechte\'"',
|
||||||
|
'tags': ['Rechtsextremismus', 'Konservatismus', 'Konservativismus', 'neue Rechte', 'Rechtspopulismus', 'Schnellroda', 'Deutschland'],
|
||||||
|
'thumbnail': 'https://www.bpb.de/cache/images/6/517806_teaser_16x9_1240.png?7A7A0',
|
||||||
|
'title': 'Die Weltanschauung der "Neuen Rechten"',
|
||||||
|
'uploader': 'Bundeszentrale für politische Bildung',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.bpb.de/mediathek/reihen/zahlen-und-fakten-soziale-situation-filme/520153/zahlen-und-fakten-die-soziale-situation-in-deutschland-migration/',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
_TITLE_RE = re.compile('(?P<title>[^<]*)<[^>]+>(?P<series>[^<]*)')
|
||||||
|
|
||||||
|
def _parse_vue_attributes(self, name, string, video_id):
|
||||||
|
attributes = extract_attributes(self._search_regex(rf'(<{name}(?:"[^"]*?"|[^>])*>)', string, name))
|
||||||
|
|
||||||
|
for key, value in attributes.items():
|
||||||
|
if key.startswith(':'):
|
||||||
|
attributes[key] = self._parse_json(value, video_id, transform_source=js_to_json, fatal=False)
|
||||||
|
|
||||||
|
return attributes
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _process_source(source):
|
||||||
|
url = url_or_none(source['src'])
|
||||||
|
if not url:
|
||||||
|
return None
|
||||||
|
|
||||||
|
source_type = source.get('type', '')
|
||||||
|
extension = mimetype2ext(source_type)
|
||||||
|
is_video = source_type.startswith('video')
|
||||||
|
note = url.rpartition('.')[0].rpartition('_')[2] if is_video else None
|
||||||
|
|
||||||
|
return {
|
||||||
|
'url': url,
|
||||||
|
'ext': extension,
|
||||||
|
'vcodec': None if is_video else 'none',
|
||||||
|
'quality': 10 if note == 'high' else 0,
|
||||||
|
'format_note': note,
|
||||||
|
'format_id': join_nonempty(extension, note),
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
title = self._html_search_regex(
|
title_result = traverse_obj(webpage, ({html_get_element(cls='opening-header__title')}, {self._TITLE_RE.match}))
|
||||||
r'<h2 class="white">(.*?)</h2>', webpage, 'title')
|
json_lds = list(self._yield_json_ld(webpage, video_id, fatal=False))
|
||||||
video_info_dicts = re.findall(
|
|
||||||
r"({\s*src\s*:\s*'https?://film\.bpb\.de/[^}]+})", webpage)
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
for video_info in video_info_dicts:
|
|
||||||
video_info = self._parse_json(
|
|
||||||
video_info, video_id, transform_source=js_to_json, fatal=False)
|
|
||||||
if not video_info:
|
|
||||||
continue
|
|
||||||
video_url = video_info.get('src')
|
|
||||||
if not video_url:
|
|
||||||
continue
|
|
||||||
quality = 'high' if '_high' in video_url else 'low'
|
|
||||||
formats.append({
|
|
||||||
'url': video_url,
|
|
||||||
'quality': 10 if quality == 'high' else 0,
|
|
||||||
'format_note': quality,
|
|
||||||
'format_id': '%s-%s' % (quality, determine_ext(video_url)),
|
|
||||||
})
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'formats': formats,
|
'title': traverse_obj(title_result, ('title', {str.strip})) or None,
|
||||||
'title': title,
|
# This metadata could be interpreted otherwise, but it fits "series" the most
|
||||||
'description': self._og_search_description(webpage),
|
'series': traverse_obj(title_result, ('series', {str.strip})) or None,
|
||||||
|
'description': join_nonempty(*traverse_obj(webpage, [(
|
||||||
|
{html_get_element(cls='opening-intro')},
|
||||||
|
[{html_get_element(tag='bpb-accordion-item')}, {html_get_element(cls='text-content')}],
|
||||||
|
), {clean_html}]), delim='\n\n') or None,
|
||||||
|
'creator': self._html_search_meta('author', webpage),
|
||||||
|
'uploader': self._html_search_meta('publisher', webpage),
|
||||||
|
'release_date': unified_strdate(self._html_search_meta('date', webpage)),
|
||||||
|
'tags': traverse_obj(json_lds, (..., 'keywords', {lambda x: x.split(',')}, ...)),
|
||||||
|
**traverse_obj(self._parse_vue_attributes('bpb-player', webpage, video_id), {
|
||||||
|
'formats': (':sources', ..., {self._process_source}),
|
||||||
|
'thumbnail': ('poster', {lambda x: urljoin(url, x)}),
|
||||||
|
}),
|
||||||
}
|
}
|
||||||
|
|
39
yt_dlp/extractor/canal1.py
Normal file
39
yt_dlp/extractor/canal1.py
Normal file
|
@ -0,0 +1,39 @@
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class Canal1IE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.|noticias\.)?canal1\.com\.co/(?:[^?#&])+/(?P<id>[\w-]+)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://canal1.com.co/noticias/napa-i-una-cadena-de-produccion-de-arroz-que-se-quedo-en-veremos-y-abandonada-en-el-departamento-del-choco/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '63b39f6b354977084b85ab54',
|
||||||
|
'display_id': 'napa-i-una-cadena-de-produccion-de-arroz-que-se-quedo-en-veremos-y-abandonada-en-el-departamento-del-choco',
|
||||||
|
'title': 'Ñapa I Una cadena de producción de arroz que se quedó en veremos y abandonada en el departamento del Chocó',
|
||||||
|
'description': 'md5:bc49c6d64d20610ea1e7daf079a0d013',
|
||||||
|
'thumbnail': r're:^https?://[^?#]+63b39f6b354977084b85ab54',
|
||||||
|
'ext': 'mp4',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://noticias.canal1.com.co/noticias/tres-i-el-triste-record-que-impuso-elon-musk-el-dueno-de-tesla-y-de-twitter/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '63b39e93f5fd223aa32250fb',
|
||||||
|
'display_id': 'tres-i-el-triste-record-que-impuso-elon-musk-el-dueno-de-tesla-y-de-twitter',
|
||||||
|
'title': 'Tres I El triste récord que impuso Elon Musk, el dueño de Tesla y de Twitter',
|
||||||
|
'description': 'md5:d9f691f131a21ce6767ca6c05d17d791',
|
||||||
|
'thumbnail': r're:^https?://[^?#]+63b39e93f5fd223aa32250fb',
|
||||||
|
'ext': 'mp4',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# Geo-restricted to Colombia
|
||||||
|
'url': 'https://canal1.com.co/programas/guerreros-canal-1/video-inedito-guerreros-despedida-kewin-zarate/',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
return self.url_result(
|
||||||
|
self._search_regex(r'"embedUrl"\s*:\s*"([^"]+)', webpage, 'embed url'),
|
||||||
|
display_id=display_id, url_transparent=True)
|
136
yt_dlp/extractor/caracoltv.py
Normal file
136
yt_dlp/extractor/caracoltv.py
Normal file
|
@ -0,0 +1,136 @@
|
||||||
|
import base64
|
||||||
|
import json
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
js_to_json,
|
||||||
|
traverse_obj,
|
||||||
|
urljoin,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class CaracolTvPlayIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://play\.caracoltv\.com/videoDetails/(?P<id>[^/?#]+)'
|
||||||
|
_NETRC_MACHINE = 'caracoltv-play'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://play.caracoltv.com/videoDetails/OTo4NGFmNjUwOWQ2ZmM0NTg2YWRiOWU0MGNhOWViOWJkYQ==',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'OTo4NGFmNjUwOWQ2ZmM0NTg2YWRiOWU0MGNhOWViOWJkYQ==',
|
||||||
|
'title': 'La teoría del promedio',
|
||||||
|
'description': 'md5:1cdd6d2c13f19ef0d9649ab81a023ac3',
|
||||||
|
},
|
||||||
|
'playlist_count': 6,
|
||||||
|
}, {
|
||||||
|
'url': 'https://play.caracoltv.com/videoDetails/OTo3OWM4ZTliYzQxMmM0MTMxYTk4Mjk2YjdjNGQ4NGRkOQ==/ella?season=0',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'OTo3OWM4ZTliYzQxMmM0MTMxYTk4Mjk2YjdjNGQ4NGRkOQ==',
|
||||||
|
'title': 'Ella',
|
||||||
|
'description': 'md5:a639b1feb5ddcc0cff92a489b4e544b8',
|
||||||
|
},
|
||||||
|
'playlist_count': 10,
|
||||||
|
}, {
|
||||||
|
'url': 'https://play.caracoltv.com/videoDetails/OTpiYTY1YTVmOTI5MzI0ZWJhOGZiY2Y3MmRlOWZlYmJkOA==/la-vuelta-al-mundo-en-80-risas-2022?season=0',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'OTpiYTY1YTVmOTI5MzI0ZWJhOGZiY2Y3MmRlOWZlYmJkOA==',
|
||||||
|
'title': 'La vuelta al mundo en 80 risas 2022',
|
||||||
|
'description': 'md5:e97aac36106e5c37ebf947b3350106a4',
|
||||||
|
},
|
||||||
|
'playlist_count': 17,
|
||||||
|
}, {
|
||||||
|
'url': 'https://play.caracoltv.com/videoDetails/MzoxX3BwbjRmNjB1',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
_USER_TOKEN = None
|
||||||
|
|
||||||
|
def _extract_app_token(self, webpage):
|
||||||
|
config_js_path = self._search_regex(
|
||||||
|
r'<script[^>]+src\s*=\s*"([^"]+coreConfig.js[^"]+)', webpage, 'config js url', fatal=False)
|
||||||
|
|
||||||
|
mediation_config = {} if not config_js_path else self._search_json(
|
||||||
|
r'mediation\s*:', self._download_webpage(
|
||||||
|
urljoin('https://play.caracoltv.com/', config_js_path), None, fatal=False, note='Extracting JS config'),
|
||||||
|
'mediation_config', None, transform_source=js_to_json, fatal=False)
|
||||||
|
|
||||||
|
key = traverse_obj(
|
||||||
|
mediation_config, ('live', 'key')) or '795cd9c089a1fc48094524a5eba85a3fca1331817c802f601735907c8bbb4f50'
|
||||||
|
secret = traverse_obj(
|
||||||
|
mediation_config, ('live', 'secret')) or '64dec00a6989ba83d087621465b5e5d38bdac22033b0613b659c442c78976fa0'
|
||||||
|
|
||||||
|
return base64.b64encode(f'{key}:{secret}'.encode()).decode()
|
||||||
|
|
||||||
|
def _perform_login(self, email, password):
|
||||||
|
webpage = self._download_webpage('https://play.caracoltv.com/', None, fatal=False)
|
||||||
|
app_token = self._extract_app_token(webpage)
|
||||||
|
|
||||||
|
bearer_token = self._download_json(
|
||||||
|
'https://eu-gateway.inmobly.com/applications/oauth', None, data=b'', note='Retrieving bearer token',
|
||||||
|
headers={'Authorization': f'Basic {app_token}'})['token']
|
||||||
|
|
||||||
|
self._USER_TOKEN = self._download_json(
|
||||||
|
'https://eu-gateway.inmobly.com/user/login', None, note='Performing login', headers={
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'Authorization': f'Bearer {bearer_token}',
|
||||||
|
}, data=json.dumps({
|
||||||
|
'device_data': {
|
||||||
|
'device_id': str(uuid.uuid4()),
|
||||||
|
'device_token': '',
|
||||||
|
'device_type': 'web'
|
||||||
|
},
|
||||||
|
'login_data': {
|
||||||
|
'enabled': True,
|
||||||
|
'email': email,
|
||||||
|
'password': password,
|
||||||
|
}
|
||||||
|
}).encode())['user_token']
|
||||||
|
|
||||||
|
def _extract_video(self, video_data, series_id=None, season_id=None, season_number=None):
|
||||||
|
formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_data['stream_url'], series_id, 'mp4')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_data['id'],
|
||||||
|
'title': video_data.get('name'),
|
||||||
|
'description': video_data.get('description'),
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'thumbnails': traverse_obj(
|
||||||
|
video_data, ('extra_thumbs', ..., {'url': 'thumb_url', 'height': 'height', 'width': 'width'})),
|
||||||
|
'series_id': series_id,
|
||||||
|
'season_id': season_id,
|
||||||
|
'season_number': int_or_none(season_number),
|
||||||
|
'episode_number': int_or_none(video_data.get('item_order')),
|
||||||
|
'is_live': video_data.get('entry_type') == 3,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _extract_series_seasons(self, seasons, series_id):
|
||||||
|
for season in seasons:
|
||||||
|
api_response = self._download_json(
|
||||||
|
'https://eu-gateway.inmobly.com/feed', series_id, query={'season_id': season['id']},
|
||||||
|
headers={'Authorization': f'Bearer {self._USER_TOKEN}'})
|
||||||
|
|
||||||
|
season_number = season.get('order')
|
||||||
|
for episode in api_response['items']:
|
||||||
|
yield self._extract_video(episode, series_id, season['id'], season_number)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
series_id = self._match_id(url)
|
||||||
|
|
||||||
|
if self._USER_TOKEN is None:
|
||||||
|
self._perform_login('guest@inmobly.com', 'Test@gus1')
|
||||||
|
|
||||||
|
api_response = self._download_json(
|
||||||
|
'https://eu-gateway.inmobly.com/feed', series_id, query={'include_ids': series_id},
|
||||||
|
headers={'Authorization': f'Bearer {self._USER_TOKEN}'})['items'][0]
|
||||||
|
|
||||||
|
if not api_response.get('seasons'):
|
||||||
|
return self._extract_video(api_response)
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
self._extract_series_seasons(api_response['seasons'], series_id),
|
||||||
|
series_id, **traverse_obj(api_response, {
|
||||||
|
'title': 'name',
|
||||||
|
'description': 'description',
|
||||||
|
}))
|
|
@ -339,12 +339,12 @@ def _new_claims_token(self, email, password):
|
||||||
data = json.dumps({'jwt': sig}).encode()
|
data = json.dumps({'jwt': sig}).encode()
|
||||||
headers = {'content-type': 'application/json', 'ott-device-type': 'web'}
|
headers = {'content-type': 'application/json', 'ott-device-type': 'web'}
|
||||||
resp = self._download_json('https://services.radio-canada.ca/ott/cbc-api/v2/token',
|
resp = self._download_json('https://services.radio-canada.ca/ott/cbc-api/v2/token',
|
||||||
None, data=data, headers=headers)
|
None, data=data, headers=headers, expected_status=426)
|
||||||
cbc_access_token = resp['accessToken']
|
cbc_access_token = resp['accessToken']
|
||||||
|
|
||||||
headers = {'content-type': 'application/json', 'ott-device-type': 'web', 'ott-access-token': cbc_access_token}
|
headers = {'content-type': 'application/json', 'ott-device-type': 'web', 'ott-access-token': cbc_access_token}
|
||||||
resp = self._download_json('https://services.radio-canada.ca/ott/cbc-api/v2/profile',
|
resp = self._download_json('https://services.radio-canada.ca/ott/cbc-api/v2/profile',
|
||||||
None, headers=headers)
|
None, headers=headers, expected_status=426)
|
||||||
return resp['claimsToken']
|
return resp['claimsToken']
|
||||||
|
|
||||||
def _get_claims_token_expiry(self):
|
def _get_claims_token_expiry(self):
|
||||||
|
|
|
@ -90,10 +90,17 @@ class CCCPlaylistIE(InfoExtractor):
|
||||||
'id': '30c3',
|
'id': '30c3',
|
||||||
},
|
},
|
||||||
'playlist_count': 135,
|
'playlist_count': 135,
|
||||||
|
}, {
|
||||||
|
'url': 'https://media.ccc.de/c/DS2023',
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'Datenspuren 2023',
|
||||||
|
'id': 'DS2023',
|
||||||
|
},
|
||||||
|
'playlist_count': 37
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
playlist_id = self._match_id(url).lower()
|
playlist_id = self._match_id(url)
|
||||||
|
|
||||||
conf = self._download_json(
|
conf = self._download_json(
|
||||||
'https://media.ccc.de/public/conferences/' + playlist_id,
|
'https://media.ccc.de/public/conferences/' + playlist_id,
|
||||||
|
|
|
@ -1,31 +1,72 @@
|
||||||
import time
|
import time
|
||||||
import hashlib
|
import hashlib
|
||||||
import re
|
|
||||||
import urllib
|
import urllib
|
||||||
|
import uuid
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from .openload import PhantomJSwrapper
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
UserNotLive,
|
||||||
|
determine_ext,
|
||||||
|
int_or_none,
|
||||||
|
js_to_json,
|
||||||
|
parse_resolution,
|
||||||
|
str_or_none,
|
||||||
|
traverse_obj,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_strdate,
|
url_or_none,
|
||||||
|
urlencode_postdata,
|
||||||
urljoin,
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class DouyuTVIE(InfoExtractor):
|
class DouyuBaseIE(InfoExtractor):
|
||||||
IE_DESC = '斗鱼'
|
def _download_cryptojs_md5(self, video_id):
|
||||||
|
for url in [
|
||||||
|
'https://cdnjs.cloudflare.com/ajax/libs/crypto-js/3.1.2/rollups/md5.js',
|
||||||
|
'https://cdn.bootcdn.net/ajax/libs/crypto-js/3.1.2/rollups/md5.js',
|
||||||
|
]:
|
||||||
|
js_code = self._download_webpage(
|
||||||
|
url, video_id, note='Downloading signing dependency', fatal=False)
|
||||||
|
if js_code:
|
||||||
|
self.cache.store('douyu', 'crypto-js-md5', js_code)
|
||||||
|
return js_code
|
||||||
|
raise ExtractorError('Unable to download JS dependency (crypto-js/md5)')
|
||||||
|
|
||||||
|
def _get_cryptojs_md5(self, video_id):
|
||||||
|
return self.cache.load('douyu', 'crypto-js-md5') or self._download_cryptojs_md5(video_id)
|
||||||
|
|
||||||
|
def _calc_sign(self, sign_func, video_id, a):
|
||||||
|
b = uuid.uuid4().hex
|
||||||
|
c = round(time.time())
|
||||||
|
js_script = f'{self._get_cryptojs_md5(video_id)};{sign_func};console.log(ub98484234("{a}","{b}","{c}"))'
|
||||||
|
phantom = PhantomJSwrapper(self)
|
||||||
|
result = phantom.execute(js_script, video_id,
|
||||||
|
note='Executing JS signing script').strip()
|
||||||
|
return {i: v[0] for i, v in urllib.parse.parse_qs(result).items()}
|
||||||
|
|
||||||
|
def _search_js_sign_func(self, webpage, fatal=True):
|
||||||
|
# The greedy look-behind ensures last possible script tag is matched
|
||||||
|
return self._search_regex(
|
||||||
|
r'(?:<script.*)?<script[^>]*>(.*?ub98484234.*?)</script>', webpage, 'JS sign func', fatal=fatal)
|
||||||
|
|
||||||
|
|
||||||
|
class DouyuTVIE(DouyuBaseIE):
|
||||||
|
IE_DESC = '斗鱼直播'
|
||||||
_VALID_URL = r'https?://(?:www\.)?douyu(?:tv)?\.com/(topic/\w+\?rid=|(?:[^/]+/))*(?P<id>[A-Za-z0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?douyu(?:tv)?\.com/(topic/\w+\?rid=|(?:[^/]+/))*(?P<id>[A-Za-z0-9]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.douyutv.com/iseven',
|
'url': 'https://www.douyu.com/pigff',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '17732',
|
'id': '24422',
|
||||||
'display_id': 'iseven',
|
'display_id': 'pigff',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 're:^清晨醒脑!根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
'title': 're:^【PIGFF】.* [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||||
'description': r're:.*m7show@163\.com.*',
|
'description': r'≥15级牌子看鱼吧置顶帖进粉丝vx群',
|
||||||
'thumbnail': r're:^https?://.*\.png',
|
'thumbnail': str,
|
||||||
'uploader': '7师傅',
|
'uploader': 'pigff',
|
||||||
'is_live': True,
|
'is_live': True,
|
||||||
|
'live_status': 'is_live',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
|
@ -85,15 +126,43 @@ class DouyuTVIE(InfoExtractor):
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
def _get_sign_func(self, room_id, video_id):
|
||||||
|
return self._download_json(
|
||||||
|
f'https://www.douyu.com/swf_api/homeH5Enc?rids={room_id}', video_id,
|
||||||
|
note='Getting signing script')['data'][f'room{room_id}']
|
||||||
|
|
||||||
|
def _extract_stream_formats(self, stream_formats):
|
||||||
|
formats = []
|
||||||
|
for stream_info in traverse_obj(stream_formats, (..., 'data')):
|
||||||
|
stream_url = urljoin(
|
||||||
|
traverse_obj(stream_info, 'rtmp_url'), traverse_obj(stream_info, 'rtmp_live'))
|
||||||
|
if stream_url:
|
||||||
|
rate_id = traverse_obj(stream_info, ('rate', {int_or_none}))
|
||||||
|
rate_info = traverse_obj(stream_info, ('multirates', lambda _, v: v['rate'] == rate_id), get_all=False)
|
||||||
|
ext = determine_ext(stream_url)
|
||||||
|
formats.append({
|
||||||
|
'url': stream_url,
|
||||||
|
'format_id': str_or_none(rate_id),
|
||||||
|
'ext': 'mp4' if ext == 'm3u8' else ext,
|
||||||
|
'protocol': 'm3u8_native' if ext == 'm3u8' else 'https',
|
||||||
|
'quality': rate_id % -10000 if rate_id is not None else None,
|
||||||
|
**traverse_obj(rate_info, {
|
||||||
|
'format': ('name', {str_or_none}),
|
||||||
|
'tbr': ('bit', {int_or_none}),
|
||||||
|
}),
|
||||||
|
})
|
||||||
|
return formats
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
if video_id.isdigit():
|
webpage = self._download_webpage(url, video_id)
|
||||||
room_id = video_id
|
room_id = self._search_regex(r'\$ROOM\.room_id\s*=\s*(\d+)', webpage, 'room id')
|
||||||
else:
|
|
||||||
page = self._download_webpage(url, video_id)
|
if self._search_regex(r'"videoLoop"\s*:\s*(\d+)', webpage, 'loop', default='') == '1':
|
||||||
room_id = self._html_search_regex(
|
raise UserNotLive('The channel is auto-playing VODs', video_id=video_id)
|
||||||
r'"room_id\\?"\s*:\s*(\d+),', page, 'room id')
|
if self._search_regex(r'\$ROOM\.show_status\s*=\s*(\d+)', webpage, 'status', default='') == '2':
|
||||||
|
raise UserNotLive(video_id=video_id)
|
||||||
|
|
||||||
# Grab metadata from API
|
# Grab metadata from API
|
||||||
params = {
|
params = {
|
||||||
|
@ -102,110 +171,136 @@ def _real_extract(self, url):
|
||||||
'time': int(time.time()),
|
'time': int(time.time()),
|
||||||
}
|
}
|
||||||
params['auth'] = hashlib.md5(
|
params['auth'] = hashlib.md5(
|
||||||
f'room/{video_id}?{urllib.parse.urlencode(params)}zNzMV1y4EMxOHS6I5WKm'.encode()).hexdigest()
|
f'room/{room_id}?{urllib.parse.urlencode(params)}zNzMV1y4EMxOHS6I5WKm'.encode()).hexdigest()
|
||||||
room = self._download_json(
|
room = traverse_obj(self._download_json(
|
||||||
f'http://www.douyutv.com/api/v1/room/{room_id}', video_id,
|
f'http://www.douyutv.com/api/v1/room/{room_id}', video_id,
|
||||||
note='Downloading room info', query=params)['data']
|
note='Downloading room info', query=params, fatal=False), 'data')
|
||||||
|
|
||||||
# 1 = live, 2 = offline
|
# 1 = live, 2 = offline
|
||||||
if room.get('show_status') == '2':
|
if traverse_obj(room, 'show_status') == '2':
|
||||||
raise ExtractorError('Live stream is offline', expected=True)
|
raise UserNotLive(video_id=video_id)
|
||||||
|
|
||||||
video_url = urljoin('https://hls3-akm.douyucdn.cn/', self._search_regex(r'(live/.*)', room['hls_url'], 'URL'))
|
js_sign_func = self._search_js_sign_func(webpage, fatal=False) or self._get_sign_func(room_id, video_id)
|
||||||
formats, subs = self._extract_m3u8_formats_and_subtitles(video_url, room_id)
|
form_data = {
|
||||||
|
'rate': 0,
|
||||||
|
**self._calc_sign(js_sign_func, video_id, room_id),
|
||||||
|
}
|
||||||
|
stream_formats = [self._download_json(
|
||||||
|
f'https://www.douyu.com/lapi/live/getH5Play/{room_id}',
|
||||||
|
video_id, note="Downloading livestream format",
|
||||||
|
data=urlencode_postdata(form_data))]
|
||||||
|
|
||||||
title = unescapeHTML(room['room_name'])
|
for rate_id in traverse_obj(stream_formats[0], ('data', 'multirates', ..., 'rate')):
|
||||||
description = room.get('show_details')
|
if rate_id != traverse_obj(stream_formats[0], ('data', 'rate')):
|
||||||
thumbnail = room.get('room_src')
|
form_data['rate'] = rate_id
|
||||||
uploader = room.get('nickname')
|
stream_formats.append(self._download_json(
|
||||||
|
f'https://www.douyu.com/lapi/live/getH5Play/{room_id}',
|
||||||
|
video_id, note=f'Downloading livestream format {rate_id}',
|
||||||
|
data=urlencode_postdata(form_data)))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': room_id,
|
'id': room_id,
|
||||||
'display_id': video_id,
|
'formats': self._extract_stream_formats(stream_formats),
|
||||||
'title': title,
|
|
||||||
'description': description,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'uploader': uploader,
|
|
||||||
'is_live': True,
|
'is_live': True,
|
||||||
'subtitles': subs,
|
**traverse_obj(room, {
|
||||||
'formats': formats,
|
'display_id': ('url', {str}, {lambda i: i[1:]}),
|
||||||
|
'title': ('room_name', {unescapeHTML}),
|
||||||
|
'description': ('show_details', {str}),
|
||||||
|
'uploader': ('nickname', {str}),
|
||||||
|
'thumbnail': ('room_src', {url_or_none}),
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class DouyuShowIE(InfoExtractor):
|
class DouyuShowIE(DouyuBaseIE):
|
||||||
_VALID_URL = r'https?://v(?:mobile)?\.douyu\.com/show/(?P<id>[0-9a-zA-Z]+)'
|
_VALID_URL = r'https?://v(?:mobile)?\.douyu\.com/show/(?P<id>[0-9a-zA-Z]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://v.douyu.com/show/rjNBdvnVXNzvE2yw',
|
'url': 'https://v.douyu.com/show/mPyq7oVNe5Yv1gLY',
|
||||||
'md5': '0c2cfd068ee2afe657801269b2d86214',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'rjNBdvnVXNzvE2yw',
|
'id': 'mPyq7oVNe5Yv1gLY',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '陈一发儿:砒霜 我有个室友系列!04-01 22点场',
|
'title': '四川人小时候的味道“蒜苗回锅肉”,传统菜不能丢,要常做来吃',
|
||||||
'duration': 7150.08,
|
'duration': 633,
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': str,
|
||||||
'uploader': '陈一发儿',
|
'uploader': '美食作家王刚V',
|
||||||
'uploader_id': 'XrZwYelr5wbK',
|
'uploader_id': 'OVAO4NVx1m7Q',
|
||||||
'uploader_url': 'https://v.douyu.com/author/XrZwYelr5wbK',
|
'timestamp': 1661850002,
|
||||||
'upload_date': '20170402',
|
'upload_date': '20220830',
|
||||||
|
'view_count': int,
|
||||||
|
'tags': ['美食', '美食综合'],
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://vmobile.douyu.com/show/rjNBdvnVXNzvE2yw',
|
'url': 'https://vmobile.douyu.com/show/rjNBdvnVXNzvE2yw',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
_FORMATS = {
|
||||||
|
'super': '原画',
|
||||||
|
'high': '超清',
|
||||||
|
'normal': '高清',
|
||||||
|
}
|
||||||
|
|
||||||
|
_QUALITIES = {
|
||||||
|
'super': -1,
|
||||||
|
'high': -2,
|
||||||
|
'normal': -3,
|
||||||
|
}
|
||||||
|
|
||||||
|
_RESOLUTIONS = {
|
||||||
|
'super': '1920x1080',
|
||||||
|
'high': '1280x720',
|
||||||
|
'normal': '852x480',
|
||||||
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
url = url.replace('vmobile.', 'v.')
|
url = url.replace('vmobile.', 'v.')
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
room_info = self._parse_json(self._search_regex(
|
video_info = self._search_json(
|
||||||
r'var\s+\$ROOM\s*=\s*({.+});', webpage, 'room info'), video_id)
|
r'<script>\s*window\.\$DATA\s*=', webpage,
|
||||||
|
'video info', video_id, transform_source=js_to_json)
|
||||||
|
|
||||||
video_info = None
|
js_sign_func = self._search_js_sign_func(webpage)
|
||||||
|
form_data = {
|
||||||
|
'vid': video_id,
|
||||||
|
**self._calc_sign(js_sign_func, video_id, video_info['ROOM']['point_id']),
|
||||||
|
}
|
||||||
|
url_info = self._download_json(
|
||||||
|
'https://v.douyu.com/api/stream/getStreamUrl', video_id,
|
||||||
|
data=urlencode_postdata(form_data), note="Downloading video formats")
|
||||||
|
|
||||||
for trial in range(5):
|
formats = []
|
||||||
# Sometimes Douyu rejects our request. Let's try it more times
|
for name, url in traverse_obj(url_info, ('data', 'thumb_video', {dict.items}, ...)):
|
||||||
try:
|
video_url = traverse_obj(url, ('url', {url_or_none}))
|
||||||
video_info = self._download_json(
|
if video_url:
|
||||||
'https://vmobile.douyu.com/video/getInfo', video_id,
|
ext = determine_ext(video_url)
|
||||||
query={'vid': video_id},
|
formats.append({
|
||||||
headers={
|
'format': self._FORMATS.get(name),
|
||||||
'Referer': url,
|
'format_id': name,
|
||||||
'x-requested-with': 'XMLHttpRequest',
|
'url': video_url,
|
||||||
})
|
'quality': self._QUALITIES.get(name),
|
||||||
break
|
'ext': 'mp4' if ext == 'm3u8' else ext,
|
||||||
except ExtractorError:
|
'protocol': 'm3u8_native' if ext == 'm3u8' else 'https',
|
||||||
self._sleep(1, video_id)
|
**parse_resolution(self._RESOLUTIONS.get(name))
|
||||||
|
})
|
||||||
if not video_info:
|
else:
|
||||||
raise ExtractorError('Can\'t fetch video info')
|
self.to_screen(
|
||||||
|
f'"{self._FORMATS.get(name, name)}" format may require logging in. {self._login_hint()}')
|
||||||
formats = self._extract_m3u8_formats(
|
|
||||||
video_info['data']['video_url'], video_id,
|
|
||||||
entry_protocol='m3u8_native', ext='mp4')
|
|
||||||
|
|
||||||
upload_date = unified_strdate(self._html_search_regex(
|
|
||||||
r'<em>上传时间:</em><span>([^<]+)</span>', webpage,
|
|
||||||
'upload date', fatal=False))
|
|
||||||
|
|
||||||
uploader = uploader_id = uploader_url = None
|
|
||||||
mobj = re.search(
|
|
||||||
r'(?m)<a[^>]+href="/author/([0-9a-zA-Z]+)".+?<strong[^>]+title="([^"]+)"',
|
|
||||||
webpage)
|
|
||||||
if mobj:
|
|
||||||
uploader_id, uploader = mobj.groups()
|
|
||||||
uploader_url = urljoin(url, '/author/' + uploader_id)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': room_info['name'],
|
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'duration': room_info.get('duration'),
|
**traverse_obj(video_info, ('DATA', {
|
||||||
'thumbnail': room_info.get('pic'),
|
'title': ('content', 'title', {str}),
|
||||||
'upload_date': upload_date,
|
'uploader': ('content', 'author', {str}),
|
||||||
'uploader': uploader,
|
'uploader_id': ('content', 'up_id', {str_or_none}),
|
||||||
'uploader_id': uploader_id,
|
'duration': ('content', 'video_duration', {int_or_none}),
|
||||||
'uploader_url': uploader_url,
|
'thumbnail': ('content', 'video_pic', {url_or_none}),
|
||||||
|
'timestamp': ('content', 'create_time', {int_or_none}),
|
||||||
|
'view_count': ('content', 'view_num', {int_or_none}),
|
||||||
|
'tags': ('videoTag', ..., 'tagName', {str}),
|
||||||
|
}))
|
||||||
}
|
}
|
||||||
|
|
96
yt_dlp/extractor/eplus.py
Normal file
96
yt_dlp/extractor/eplus.py
Normal file
|
@ -0,0 +1,96 @@
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
try_call,
|
||||||
|
unified_timestamp,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class EplusIbIE(InfoExtractor):
|
||||||
|
IE_NAME = 'eplus:inbound'
|
||||||
|
IE_DESC = 'e+ (イープラス) overseas'
|
||||||
|
_VALID_URL = r'https?://live\.eplus\.jp/ex/player\?ib=(?P<id>(?:\w|%2B|%2F){86}%3D%3D)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://live.eplus.jp/ex/player?ib=YEFxb3Vyc2Dombnjg7blkrLlrablnJLjgrnjgq%2Fjg7zjg6vjgqLjgqTjg4njg6vlkIzlpb3kvJpgTGllbGxhIQ%3D%3D',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '354502-0001-002',
|
||||||
|
'title': 'LoveLive!Series Presents COUNTDOWN LoveLive! 2021→2022~LIVE with a smile!~【Streaming+(配信)】',
|
||||||
|
'live_status': 'was_live',
|
||||||
|
'release_date': '20211231',
|
||||||
|
'release_timestamp': 1640952000,
|
||||||
|
'description': str,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
'ignore_no_formats_error': True,
|
||||||
|
},
|
||||||
|
'expected_warnings': [
|
||||||
|
'Could not find the playlist URL. This event may not be accessible',
|
||||||
|
'No video formats found!',
|
||||||
|
'Requested format is not available',
|
||||||
|
],
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
data_json = self._search_json(r'<script>\s*var app\s*=', webpage, 'data json', video_id)
|
||||||
|
|
||||||
|
delivery_status = data_json.get('delivery_status')
|
||||||
|
archive_mode = data_json.get('archive_mode')
|
||||||
|
release_timestamp = try_call(lambda: unified_timestamp(data_json['event_datetime']) - 32400)
|
||||||
|
release_timestamp_str = data_json.get('event_datetime_text') # JST
|
||||||
|
|
||||||
|
self.write_debug(f'delivery_status = {delivery_status}, archive_mode = {archive_mode}')
|
||||||
|
|
||||||
|
if delivery_status == 'PREPARING':
|
||||||
|
live_status = 'is_upcoming'
|
||||||
|
elif delivery_status == 'STARTED':
|
||||||
|
live_status = 'is_live'
|
||||||
|
elif delivery_status == 'STOPPED':
|
||||||
|
if archive_mode != 'ON':
|
||||||
|
raise ExtractorError(
|
||||||
|
'This event has ended and there is no archive for this event', expected=True)
|
||||||
|
live_status = 'post_live'
|
||||||
|
elif delivery_status == 'WAIT_CONFIRM_ARCHIVED':
|
||||||
|
live_status = 'post_live'
|
||||||
|
elif delivery_status == 'CONFIRMED_ARCHIVE':
|
||||||
|
live_status = 'was_live'
|
||||||
|
else:
|
||||||
|
self.report_warning(f'Unknown delivery_status {delivery_status}, treat it as a live')
|
||||||
|
live_status = 'is_live'
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
|
||||||
|
m3u8_playlist_urls = self._search_json(
|
||||||
|
r'var listChannels\s*=', webpage, 'hls URLs', video_id, contains_pattern=r'\[.+\]', default=[])
|
||||||
|
if not m3u8_playlist_urls:
|
||||||
|
if live_status == 'is_upcoming':
|
||||||
|
self.raise_no_formats(
|
||||||
|
f'Could not find the playlist URL. This live event will begin at {release_timestamp_str} JST', expected=True)
|
||||||
|
else:
|
||||||
|
self.raise_no_formats(
|
||||||
|
'Could not find the playlist URL. This event may not be accessible', expected=True)
|
||||||
|
elif live_status == 'is_upcoming':
|
||||||
|
self.raise_no_formats(f'This live event will begin at {release_timestamp_str} JST', expected=True)
|
||||||
|
elif live_status == 'post_live':
|
||||||
|
self.raise_no_formats('This event has ended, and the archive will be available shortly', expected=True)
|
||||||
|
else:
|
||||||
|
for m3u8_playlist_url in m3u8_playlist_urls:
|
||||||
|
formats.extend(self._extract_m3u8_formats(m3u8_playlist_url, video_id))
|
||||||
|
# FIXME: HTTP request headers need to be updated to continue download
|
||||||
|
warning = 'Due to technical limitations, the download will be interrupted after one hour'
|
||||||
|
if live_status == 'is_live':
|
||||||
|
self.report_warning(warning)
|
||||||
|
elif live_status == 'was_live':
|
||||||
|
self.report_warning(f'{warning}. You can restart to continue the download')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': data_json['app_id'],
|
||||||
|
'title': data_json.get('app_name'),
|
||||||
|
'formats': formats,
|
||||||
|
'live_status': live_status,
|
||||||
|
'description': data_json.get('content'),
|
||||||
|
'release_timestamp': release_timestamp,
|
||||||
|
}
|
|
@ -11,8 +11,8 @@ class ExpressenIE(InfoExtractor):
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
(?:www\.)?(?:expressen|di)\.se/
|
(?:www\.)?(?:expressen|di)\.se/
|
||||||
(?:(?:tvspelare/video|videoplayer/embed)/)?
|
(?:(?:tvspelare/video|video-?player/embed)/)?
|
||||||
tv/(?:[^/]+/)*
|
(?:tv|nyheter)/(?:[^/?#]+/)*
|
||||||
(?P<id>[^/?#&]+)
|
(?P<id>[^/?#&]+)
|
||||||
'''
|
'''
|
||||||
_EMBED_REGEX = [r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?(?:expressen|di)\.se/(?:tvspelare/video|videoplayer/embed)/tv/.+?)\1']
|
_EMBED_REGEX = [r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?(?:expressen|di)\.se/(?:tvspelare/video|videoplayer/embed)/tv/.+?)\1']
|
||||||
|
@ -42,6 +42,12 @@ class ExpressenIE(InfoExtractor):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.di.se/videoplayer/embed/tv/ditv/borsmorgon/implantica-rusar-70--under-borspremiaren-hor-styrelsemedlemmen/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
|
'url': 'https://www.di.se/videoplayer/embed/tv/ditv/borsmorgon/implantica-rusar-70--under-borspremiaren-hor-styrelsemedlemmen/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.expressen.se/video-player/embed/tv/nyheter/ekero-fodda-olof-gustafsson-forvaltar-knarkbaronen-pablo-escobars-namn',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.expressen.se/nyheter/efter-egna-telefonbluffen-escobar-stammer-klarna/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|
|
@ -74,6 +74,22 @@ class FacebookIE(InfoExtractor):
|
||||||
_VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true&payloadtype=primary'
|
_VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true&payloadtype=primary'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
'url': 'https://www.facebook.com/radiokicksfm/videos/3676516585958356/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3676516585958356',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'dr Adam Przygoda',
|
||||||
|
'description': 'md5:34675bda53336b1d16400265c2bb9b3b',
|
||||||
|
'uploader': 'RADIO KICKS FM',
|
||||||
|
'upload_date': '20230818',
|
||||||
|
'timestamp': 1692346159,
|
||||||
|
'thumbnail': r're:^https?://.*',
|
||||||
|
'uploader_id': '100063551323670',
|
||||||
|
'duration': 3132.184,
|
||||||
|
'view_count': int,
|
||||||
|
'concurrent_view_count': 0,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',
|
'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',
|
||||||
'md5': '6a40d33c0eccbb1af76cf0485a052659',
|
'md5': '6a40d33c0eccbb1af76cf0485a052659',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -97,7 +113,7 @@ class FacebookIE(InfoExtractor):
|
||||||
'upload_date': '20140506',
|
'upload_date': '20140506',
|
||||||
'timestamp': 1399398998,
|
'timestamp': 1399398998,
|
||||||
'thumbnail': r're:^https?://.*',
|
'thumbnail': r're:^https?://.*',
|
||||||
'uploader_id': 'pfbid04scW44U4P9iTyLZAGy8y8W3pR3i2VugvHCimiRudUAVbN3MPp9eXBaYFcgVworZwl',
|
'uploader_id': 'pfbid028wxorhX2ErLFJ578N6P3crHD3PHmXTCqCvfBpsnbSLmbokwSY75p5hWBjHGkG4zxl',
|
||||||
'duration': 131.03,
|
'duration': 131.03,
|
||||||
'concurrent_view_count': int,
|
'concurrent_view_count': int,
|
||||||
},
|
},
|
||||||
|
@ -179,7 +195,7 @@ class FacebookIE(InfoExtractor):
|
||||||
'timestamp': 1486648217,
|
'timestamp': 1486648217,
|
||||||
'upload_date': '20170209',
|
'upload_date': '20170209',
|
||||||
'uploader': 'Yaroslav Korpan',
|
'uploader': 'Yaroslav Korpan',
|
||||||
'uploader_id': 'pfbid029y8j22EwH3ikeqgH3SEP9G3CAi9kmWKgXJJG9s5geV7mo3J2bvURqHCdgucRgAyhl',
|
'uploader_id': 'pfbid06AScABAWcW91qpiuGrLt99Ef9tvwHoXP6t8KeFYEqkSfreMtfa9nTveh8b2ZEVSWl',
|
||||||
'concurrent_view_count': int,
|
'concurrent_view_count': int,
|
||||||
'thumbnail': r're:^https?://.*',
|
'thumbnail': r're:^https?://.*',
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
|
@ -274,7 +290,7 @@ class FacebookIE(InfoExtractor):
|
||||||
'title': 'Josef',
|
'title': 'Josef',
|
||||||
'thumbnail': r're:^https?://.*',
|
'thumbnail': r're:^https?://.*',
|
||||||
'concurrent_view_count': int,
|
'concurrent_view_count': int,
|
||||||
'uploader_id': 'pfbid02gXHbDwxumkaKJQaTGUf3znYfYzTuidGEWawiramNx4YamSj2afwYSRkpcjtHtMRJl',
|
'uploader_id': 'pfbid0cibUN6tV7DYgdbJdsUFN46wc4jKpVSPAvJQhFofGqBGmVn3V3JtAs2tfUwziw2hUl',
|
||||||
'timestamp': 1549275572,
|
'timestamp': 1549275572,
|
||||||
'duration': 3.413,
|
'duration': 3.413,
|
||||||
'uploader': 'Josef Novak',
|
'uploader': 'Josef Novak',
|
||||||
|
@ -401,9 +417,9 @@ def _extract_from_url(self, url, video_id):
|
||||||
|
|
||||||
def extract_metadata(webpage):
|
def extract_metadata(webpage):
|
||||||
post_data = [self._parse_json(j, video_id, fatal=False) for j in re.findall(
|
post_data = [self._parse_json(j, video_id, fatal=False) for j in re.findall(
|
||||||
r'handleWithCustomApplyEach\(\s*ScheduledApplyEach\s*,\s*(\{.+?\})\s*\);', webpage)]
|
r'data-sjs>({.*?ScheduledServerJS.*?})</script>', webpage)]
|
||||||
post = traverse_obj(post_data, (
|
post = traverse_obj(post_data, (
|
||||||
..., 'require', ..., ..., ..., '__bbox', 'result', 'data'), expected_type=dict) or []
|
..., 'require', ..., ..., ..., '__bbox', 'require', ..., ..., ..., '__bbox', 'result', 'data'), expected_type=dict) or []
|
||||||
media = traverse_obj(post, (..., 'attachments', ..., lambda k, v: (
|
media = traverse_obj(post, (..., 'attachments', ..., lambda k, v: (
|
||||||
k == 'media' and str(v['id']) == video_id and v['__typename'] == 'Video')), expected_type=dict)
|
k == 'media' and str(v['id']) == video_id and v['__typename'] == 'Video')), expected_type=dict)
|
||||||
title = get_first(media, ('title', 'text'))
|
title = get_first(media, ('title', 'text'))
|
||||||
|
@ -489,18 +505,17 @@ def process_formats(info):
|
||||||
# with non-browser User-Agent.
|
# with non-browser User-Agent.
|
||||||
for f in info['formats']:
|
for f in info['formats']:
|
||||||
f.setdefault('http_headers', {})['User-Agent'] = 'facebookexternalhit/1.1'
|
f.setdefault('http_headers', {})['User-Agent'] = 'facebookexternalhit/1.1'
|
||||||
info['_format_sort_fields'] = ('res', 'quality')
|
|
||||||
|
|
||||||
def extract_relay_data(_filter):
|
def extract_relay_data(_filter):
|
||||||
return self._parse_json(self._search_regex(
|
return self._parse_json(self._search_regex(
|
||||||
r'handleWithCustomApplyEach\([^,]+,\s*({.*?%s.*?})\);' % _filter,
|
r'data-sjs>({.*?%s.*?})</script>' % _filter,
|
||||||
webpage, 'replay data', default='{}'), video_id, fatal=False) or {}
|
webpage, 'replay data', default='{}'), video_id, fatal=False) or {}
|
||||||
|
|
||||||
def extract_relay_prefetched_data(_filter):
|
def extract_relay_prefetched_data(_filter):
|
||||||
replay_data = extract_relay_data(_filter)
|
return traverse_obj(extract_relay_data(_filter), (
|
||||||
for require in (replay_data.get('require') or []):
|
'require', (None, (..., ..., ..., '__bbox', 'require')),
|
||||||
if require[0] == 'RelayPrefetchedStreamCache':
|
lambda _, v: 'RelayPrefetchedStreamCache' in v, ..., ...,
|
||||||
return try_get(require, lambda x: x[3][1]['__bbox']['result']['data'], dict) or {}
|
'__bbox', 'result', 'data', {dict}), get_all=False) or {}
|
||||||
|
|
||||||
if not video_data:
|
if not video_data:
|
||||||
server_js_data = self._parse_json(self._search_regex([
|
server_js_data = self._parse_json(self._search_regex([
|
||||||
|
@ -511,7 +526,7 @@ def extract_relay_prefetched_data(_filter):
|
||||||
|
|
||||||
if not video_data:
|
if not video_data:
|
||||||
data = extract_relay_prefetched_data(
|
data = extract_relay_prefetched_data(
|
||||||
r'"(?:dash_manifest|playable_url(?:_quality_hd)?)"\s*:\s*"[^"]+"')
|
r'"(?:dash_manifest|playable_url(?:_quality_hd)?)')
|
||||||
if data:
|
if data:
|
||||||
entries = []
|
entries = []
|
||||||
|
|
||||||
|
@ -526,7 +541,8 @@ def parse_graphql_video(video):
|
||||||
formats = []
|
formats = []
|
||||||
q = qualities(['sd', 'hd'])
|
q = qualities(['sd', 'hd'])
|
||||||
for key, format_id in (('playable_url', 'sd'), ('playable_url_quality_hd', 'hd'),
|
for key, format_id in (('playable_url', 'sd'), ('playable_url_quality_hd', 'hd'),
|
||||||
('playable_url_dash', '')):
|
('playable_url_dash', ''), ('browser_native_hd_url', 'hd'),
|
||||||
|
('browser_native_sd_url', 'sd')):
|
||||||
playable_url = video.get(key)
|
playable_url = video.get(key)
|
||||||
if not playable_url:
|
if not playable_url:
|
||||||
continue
|
continue
|
||||||
|
@ -535,7 +551,8 @@ def parse_graphql_video(video):
|
||||||
else:
|
else:
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
'quality': q(format_id),
|
# sd, hd formats w/o resolution info should be deprioritized below DASH
|
||||||
|
'quality': q(format_id) - 3,
|
||||||
'url': playable_url,
|
'url': playable_url,
|
||||||
})
|
})
|
||||||
extract_dash_manifest(video, formats)
|
extract_dash_manifest(video, formats)
|
||||||
|
@ -702,9 +719,11 @@ def parse_attachment(attachment, key='media'):
|
||||||
for src_type in ('src', 'src_no_ratelimit'):
|
for src_type in ('src', 'src_no_ratelimit'):
|
||||||
src = f[0].get('%s_%s' % (quality, src_type))
|
src = f[0].get('%s_%s' % (quality, src_type))
|
||||||
if src:
|
if src:
|
||||||
preference = -10 if format_id == 'progressive' else -1
|
# sd, hd formats w/o resolution info should be deprioritized below DASH
|
||||||
|
# TODO: investigate if progressive or src formats still exist
|
||||||
|
preference = -10 if format_id == 'progressive' else -3
|
||||||
if quality == 'hd':
|
if quality == 'hd':
|
||||||
preference += 5
|
preference += 1
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': '%s_%s_%s' % (format_id, quality, src_type),
|
'format_id': '%s_%s_%s' % (format_id, quality, src_type),
|
||||||
'url': src,
|
'url': src,
|
||||||
|
|
|
@ -60,6 +60,7 @@ class Funker530IE(InfoExtractor):
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
info = {}
|
||||||
rumble_url = list(RumbleEmbedIE._extract_embed_urls(url, webpage))
|
rumble_url = list(RumbleEmbedIE._extract_embed_urls(url, webpage))
|
||||||
if rumble_url:
|
if rumble_url:
|
||||||
info = {'url': rumble_url[0], 'ie_key': RumbleEmbedIE.ie_key()}
|
info = {'url': rumble_url[0], 'ie_key': RumbleEmbedIE.ie_key()}
|
||||||
|
|
|
@ -2370,7 +2370,7 @@ def _extract_kvs(self, url, webpage, video_id):
|
||||||
'id': flashvars['video_id'],
|
'id': flashvars['video_id'],
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': urljoin(url, thumbnail),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -66,7 +66,7 @@ def _entries(self, file_id):
|
||||||
query_params = {
|
query_params = {
|
||||||
'contentId': file_id,
|
'contentId': file_id,
|
||||||
'token': self._TOKEN,
|
'token': self._TOKEN,
|
||||||
'websiteToken': 12345,
|
'websiteToken': '7fd94ds12fds4', # From https://gofile.io/dist/js/alljs.js
|
||||||
}
|
}
|
||||||
password = self.get_param('videopassword')
|
password = self.get_param('videopassword')
|
||||||
if password:
|
if password:
|
||||||
|
|
|
@ -383,9 +383,9 @@ def __get_current_timestamp():
|
||||||
months = [None, 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
|
months = [None, 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
|
||||||
days = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
|
days = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
|
||||||
|
|
||||||
time_now = datetime.datetime.utcnow()
|
time_now = datetime.datetime.now(datetime.timezone.utc)
|
||||||
format_string = "{} {} {} %H:%M:%S UTC %Y".format(days[time_now.weekday()], months[time_now.month], time_now.day)
|
format_string = "{} {} {} %H:%M:%S UTC %Y".format(days[time_now.weekday()], months[time_now.month], time_now.day)
|
||||||
time_string = datetime.datetime.utcnow().strftime(format_string)
|
time_string = time_now.strftime(format_string)
|
||||||
return time_string
|
return time_string
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_age_limit,
|
parse_age_limit,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
|
time_seconds,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -11,15 +11,14 @@
|
||||||
class IndavideoEmbedIE(InfoExtractor):
|
class IndavideoEmbedIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:(?:embed\.)?indavideo\.hu/player/video/|assets\.indavideo\.hu/swf/player\.swf\?.*\b(?:v(?:ID|id))=)(?P<id>[\da-f]+)'
|
_VALID_URL = r'https?://(?:(?:embed\.)?indavideo\.hu/player/video/|assets\.indavideo\.hu/swf/player\.swf\?.*\b(?:v(?:ID|id))=)(?P<id>[\da-f]+)'
|
||||||
# Some example URLs covered by generic extractor:
|
# Some example URLs covered by generic extractor:
|
||||||
# http://indavideo.hu/video/Vicces_cica_1
|
# https://indavideo.hu/video/Vicces_cica_1
|
||||||
# http://index.indavideo.hu/video/2015_0728_beregszasz
|
# https://index.indavideo.hu/video/Hod_Nemetorszagban
|
||||||
# http://auto.indavideo.hu/video/Sajat_utanfutoban_a_kis_tacsko
|
# https://auto.indavideo.hu/video/Sajat_utanfutoban_a_kis_tacsko
|
||||||
# http://erotika.indavideo.hu/video/Amator_tini_punci
|
# https://film.indavideo.hu/video/f_farkaslesen
|
||||||
# http://film.indavideo.hu/video/f_hrom_nagymamm_volt
|
# https://palyazat.indavideo.hu/video/Embertelen_dal_Dodgem_egyuttes
|
||||||
# http://palyazat.indavideo.hu/video/Embertelen_dal_Dodgem_egyuttes
|
_EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)//embed\.indavideo\.hu/player/video/[\da-f]+)']
|
||||||
_EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//embed\.indavideo\.hu/player/video/[\da-f]+)']
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://indavideo.hu/player/video/1bdc3c6d80/',
|
'url': 'https://indavideo.hu/player/video/1bdc3c6d80/',
|
||||||
'md5': 'c8a507a1c7410685f83a06eaeeaafeab',
|
'md5': 'c8a507a1c7410685f83a06eaeeaafeab',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1837039',
|
'id': '1837039',
|
||||||
|
@ -36,21 +35,33 @@ class IndavideoEmbedIE(InfoExtractor):
|
||||||
'tags': ['tánc', 'cica', 'cuki', 'cukiajanlo', 'newsroom'],
|
'tags': ['tánc', 'cica', 'cuki', 'cukiajanlo', 'newsroom'],
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://embed.indavideo.hu/player/video/1bdc3c6d80?autostart=1&hide=1',
|
'url': 'https://embed.indavideo.hu/player/video/1bdc3c6d80?autostart=1&hide=1',
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://assets.indavideo.hu/swf/player.swf?v=fe25e500&vID=1bdc3c6d80&autostart=1&hide=1&i=1',
|
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
_WEBPAGE_TESTS = [{
|
||||||
|
'url': 'https://indavideo.hu/video/Vicces_cica_1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1335611',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Vicces cica',
|
||||||
|
'description': 'Játszik a tablettel. :D',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'uploader': 'Jet_Pack',
|
||||||
|
'uploader_id': '491217',
|
||||||
|
'timestamp': 1390821212,
|
||||||
|
'upload_date': '20140127',
|
||||||
|
'duration': 7,
|
||||||
|
'age_limit': 0,
|
||||||
|
'tags': ['cica', 'Jet_Pack'],
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
video = self._download_json(
|
video = self._download_json(
|
||||||
'https://amfphp.indavideo.hu/SYm0json.php/player.playerHandler.getVideoData/%s' % video_id,
|
f'https://amfphp.indavideo.hu/SYm0json.php/player.playerHandler.getVideoData/{video_id}/',
|
||||||
video_id)['data']
|
video_id, query={'_': time_seconds()})['data']
|
||||||
|
|
||||||
title = video['title']
|
|
||||||
|
|
||||||
video_urls = []
|
video_urls = []
|
||||||
|
|
||||||
|
@ -60,33 +71,21 @@ def _real_extract(self, url):
|
||||||
elif isinstance(video_files, dict):
|
elif isinstance(video_files, dict):
|
||||||
video_urls.extend(video_files.values())
|
video_urls.extend(video_files.values())
|
||||||
|
|
||||||
video_file = video.get('video_file')
|
|
||||||
if video:
|
|
||||||
video_urls.append(video_file)
|
|
||||||
video_urls = list(set(video_urls))
|
video_urls = list(set(video_urls))
|
||||||
|
|
||||||
video_prefix = video_urls[0].rsplit('/', 1)[0]
|
filesh = video.get('filesh') or {}
|
||||||
|
|
||||||
for flv_file in video.get('flv_files', []):
|
|
||||||
flv_url = '%s/%s' % (video_prefix, flv_file)
|
|
||||||
if flv_url not in video_urls:
|
|
||||||
video_urls.append(flv_url)
|
|
||||||
|
|
||||||
filesh = video.get('filesh')
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for video_url in video_urls:
|
for video_url in video_urls:
|
||||||
height = int_or_none(self._search_regex(
|
height = int_or_none(self._search_regex(
|
||||||
r'\.(\d{3,4})\.mp4(?:\?|$)', video_url, 'height', default=None))
|
r'\.(\d{3,4})\.mp4(?:\?|$)', video_url, 'height', default=None))
|
||||||
if filesh:
|
if not height and len(filesh) == 1:
|
||||||
if not height:
|
height = int_or_none(list(filesh.keys())[0])
|
||||||
continue
|
token = filesh.get(str(height))
|
||||||
token = filesh.get(compat_str(height))
|
if token is None:
|
||||||
if token is None:
|
continue
|
||||||
continue
|
|
||||||
video_url = update_url_query(video_url, {'token': token})
|
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': video_url,
|
'url': update_url_query(video_url, {'token': token}),
|
||||||
'height': height,
|
'height': height,
|
||||||
})
|
})
|
||||||
|
|
||||||
|
@ -103,7 +102,7 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video.get('id') or video_id,
|
'id': video.get('id') or video_id,
|
||||||
'title': title,
|
'title': video.get('title'),
|
||||||
'description': video.get('description'),
|
'description': video.get('description'),
|
||||||
'thumbnails': thumbnails,
|
'thumbnails': thumbnails,
|
||||||
'uploader': video.get('user_name'),
|
'uploader': video.get('user_name'),
|
||||||
|
|
|
@ -57,8 +57,8 @@ class LecturioIE(LecturioBaseIE):
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https://
|
https://
|
||||||
(?:
|
(?:
|
||||||
app\.lecturio\.com/([^/]+/(?P<nt>[^/?#&]+)\.lecture|(?:\#/)?lecture/c/\d+/(?P<id>\d+))|
|
app\.lecturio\.com/([^/?#]+/(?P<nt>[^/?#&]+)\.lecture|(?:\#/)?lecture/c/\d+/(?P<id>\d+))|
|
||||||
(?:www\.)?lecturio\.de/[^/]+/(?P<nt_de>[^/?#&]+)\.vortrag
|
(?:www\.)?lecturio\.de/(?:[^/?#]+/)+(?P<nt_de>[^/?#&]+)\.vortrag
|
||||||
)
|
)
|
||||||
'''
|
'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
@ -73,6 +73,9 @@ class LecturioIE(LecturioBaseIE):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.lecturio.de/jura/oeffentliches-recht-staatsexamen.vortrag',
|
'url': 'https://www.lecturio.de/jura/oeffentliches-recht-staatsexamen.vortrag',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.lecturio.de/jura/oeffentliches-recht-at-1-staatsexamen/oeffentliches-recht-staatsexamen.vortrag',
|
||||||
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://app.lecturio.com/#/lecture/c/6434/39634',
|
'url': 'https://app.lecturio.com/#/lecture/c/6434/39634',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
|
|
@ -17,11 +17,12 @@ class MassengeschmackTVIE(InfoExtractor):
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'https://massengeschmack.tv/play/fktv202',
|
'url': 'https://massengeschmack.tv/play/fktv202',
|
||||||
'md5': 'a9e054db9c2b5a08f0a0527cc201e8d3',
|
'md5': '9996f314994a49fefe5f39aa1b07ae21',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'fktv202',
|
'id': 'fktv202',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Fernsehkritik-TV - Folge 202',
|
'title': 'Fernsehkritik-TV #202',
|
||||||
|
'thumbnail': 'https://cache.massengeschmack.tv/img/mag/fktv202.jpg'
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -29,9 +30,6 @@ def _real_extract(self, url):
|
||||||
episode = self._match_id(url)
|
episode = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, episode)
|
webpage = self._download_webpage(url, episode)
|
||||||
title = clean_html(self._html_search_regex(
|
|
||||||
'<h3>([^<]+)</h3>', webpage, 'title'))
|
|
||||||
thumbnail = self._search_regex(r'POSTER\s*=\s*"([^"]+)', webpage, 'thumbnail', fatal=False)
|
|
||||||
sources = self._parse_json(self._search_regex(r'(?s)MEDIA\s*=\s*(\[.+?\]);', webpage, 'media'), episode, js_to_json)
|
sources = self._parse_json(self._search_regex(r'(?s)MEDIA\s*=\s*(\[.+?\]);', webpage, 'media'), episode, js_to_json)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
|
@ -67,7 +65,8 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': episode,
|
'id': episode,
|
||||||
'title': title,
|
'title': clean_html(self._html_search_regex(
|
||||||
|
r'<span[^>]+\bid=["\']clip-title["\'][^>]*>([^<]+)', webpage, 'title', fatal=False)),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': self._search_regex(r'POSTER\s*=\s*"([^"]+)', webpage, 'thumbnail', fatal=False),
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,8 @@
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
unified_strdate
|
ExtractorError,
|
||||||
|
traverse_obj,
|
||||||
|
unified_strdate,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
|
@ -15,7 +18,7 @@ class MediaKlikkIE(InfoExtractor):
|
||||||
(?P<id>[^/#?_]+)'''
|
(?P<id>[^/#?_]+)'''
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# mediaklikk. date in html.
|
# (old) mediaklikk. date in html.
|
||||||
'url': 'https://mediaklikk.hu/video/hazajaro-delnyugat-bacska-a-duna-menten-palankatol-doroszloig/',
|
'url': 'https://mediaklikk.hu/video/hazajaro-delnyugat-bacska-a-duna-menten-palankatol-doroszloig/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '4754129',
|
'id': '4754129',
|
||||||
|
@ -23,9 +26,21 @@ class MediaKlikkIE(InfoExtractor):
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'upload_date': '20210901',
|
'upload_date': '20210901',
|
||||||
'thumbnail': 'http://mediaklikk.hu/wp-content/uploads/sites/4/2014/02/hazajarouj_JO.jpg'
|
'thumbnail': 'http://mediaklikk.hu/wp-content/uploads/sites/4/2014/02/hazajarouj_JO.jpg'
|
||||||
|
},
|
||||||
|
'skip': 'Webpage redirects to 404 page',
|
||||||
|
}, {
|
||||||
|
# mediaklikk. date in html.
|
||||||
|
'url': 'https://mediaklikk.hu/video/hazajaro-fabova-hegyseg-kishont-koronaja/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6696133',
|
||||||
|
'title': 'Hazajáró, Fabova-hegység - Kishont koronája',
|
||||||
|
'display_id': 'hazajaro-fabova-hegyseg-kishont-koronaja',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'upload_date': '20230903',
|
||||||
|
'thumbnail': 'https://mediaklikk.hu/wp-content/uploads/sites/4/2014/02/hazajarouj_JO.jpg'
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
# m4sport
|
# (old) m4sport
|
||||||
'url': 'https://m4sport.hu/video/2021/08/30/gyemant-liga-parizs/',
|
'url': 'https://m4sport.hu/video/2021/08/30/gyemant-liga-parizs/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '4754999',
|
'id': '4754999',
|
||||||
|
@ -33,6 +48,18 @@ class MediaKlikkIE(InfoExtractor):
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'upload_date': '20210830',
|
'upload_date': '20210830',
|
||||||
'thumbnail': 'http://m4sport.hu/wp-content/uploads/sites/4/2021/08/vlcsnap-2021-08-30-18h21m20s10-1024x576.jpg'
|
'thumbnail': 'http://m4sport.hu/wp-content/uploads/sites/4/2021/08/vlcsnap-2021-08-30-18h21m20s10-1024x576.jpg'
|
||||||
|
},
|
||||||
|
'skip': 'Webpage redirects to 404 page',
|
||||||
|
}, {
|
||||||
|
# m4sport
|
||||||
|
'url': 'https://m4sport.hu/sportkozvetitesek/video/2023/09/08/atletika-gyemant-liga-brusszel/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6711136',
|
||||||
|
'title': 'Atlétika – Gyémánt Liga, Brüsszel',
|
||||||
|
'display_id': 'atletika-gyemant-liga-brusszel',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'upload_date': '20230908',
|
||||||
|
'thumbnail': 'https://m4sport.hu/wp-content/uploads/sites/4/2023/09/vlcsnap-2023-09-08-22h43m18s691.jpg'
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
# m4sport with *video/ url and no date
|
# m4sport with *video/ url and no date
|
||||||
|
@ -40,20 +67,33 @@ class MediaKlikkIE(InfoExtractor):
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '4492099',
|
'id': '4492099',
|
||||||
'title': 'Real Madrid - Chelsea 1-1',
|
'title': 'Real Madrid - Chelsea 1-1',
|
||||||
|
'display_id': 'real-madrid-chelsea-1-1',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'thumbnail': 'http://m4sport.hu/wp-content/uploads/sites/4/2021/04/Sequence-01.Still001-1024x576.png'
|
'thumbnail': 'https://m4sport.hu/wp-content/uploads/sites/4/2021/04/Sequence-01.Still001-1024x576.png'
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
# hirado
|
# (old) hirado
|
||||||
'url': 'https://hirado.hu/videok/felteteleket-szabott-a-fovaros/',
|
'url': 'https://hirado.hu/videok/felteteleket-szabott-a-fovaros/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '4760120',
|
'id': '4760120',
|
||||||
'title': 'Feltételeket szabott a főváros',
|
'title': 'Feltételeket szabott a főváros',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'thumbnail': 'http://hirado.hu/wp-content/uploads/sites/4/2021/09/vlcsnap-2021-09-01-20h20m37s165.jpg'
|
'thumbnail': 'http://hirado.hu/wp-content/uploads/sites/4/2021/09/vlcsnap-2021-09-01-20h20m37s165.jpg'
|
||||||
|
},
|
||||||
|
'skip': 'Webpage redirects to video list page',
|
||||||
|
}, {
|
||||||
|
# hirado
|
||||||
|
'url': 'https://hirado.hu/belfold/video/2023/09/11/marad-az-eves-elszamolas-a-napelemekre-beruhazo-csaladoknal',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6716068',
|
||||||
|
'title': 'Marad az éves elszámolás a napelemekre beruházó családoknál',
|
||||||
|
'display_id': 'marad-az-eves-elszamolas-a-napelemekre-beruhazo-csaladoknal',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'upload_date': '20230911',
|
||||||
|
'thumbnail': 'https://hirado.hu/wp-content/uploads/sites/4/2023/09/vlcsnap-2023-09-11-09h16m09s882.jpg'
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
# petofilive
|
# (old) petofilive
|
||||||
'url': 'https://petofilive.hu/video/2021/06/07/tha-shudras-az-akusztikban/',
|
'url': 'https://petofilive.hu/video/2021/06/07/tha-shudras-az-akusztikban/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '4571948',
|
'id': '4571948',
|
||||||
|
@ -61,6 +101,18 @@ class MediaKlikkIE(InfoExtractor):
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'upload_date': '20210607',
|
'upload_date': '20210607',
|
||||||
'thumbnail': 'http://petofilive.hu/wp-content/uploads/sites/4/2021/06/vlcsnap-2021-06-07-22h14m23s915-1024x576.jpg'
|
'thumbnail': 'http://petofilive.hu/wp-content/uploads/sites/4/2021/06/vlcsnap-2021-06-07-22h14m23s915-1024x576.jpg'
|
||||||
|
},
|
||||||
|
'skip': 'Webpage redirects to empty page',
|
||||||
|
}, {
|
||||||
|
# petofilive
|
||||||
|
'url': 'https://petofilive.hu/video/2023/09/09/futball-fesztival-a-margitszigeten/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6713233',
|
||||||
|
'title': 'Futball Fesztivál a Margitszigeten',
|
||||||
|
'display_id': 'futball-fesztival-a-margitszigeten',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'upload_date': '20230909',
|
||||||
|
'thumbnail': 'https://petofilive.hu/wp-content/uploads/sites/4/2023/09/Clipboard11-2.jpg'
|
||||||
}
|
}
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
@ -84,8 +136,12 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
player_data['video'] = player_data.pop('token')
|
player_data['video'] = player_data.pop('token')
|
||||||
player_page = self._download_webpage('https://player.mediaklikk.hu/playernew/player.php', video_id, query=player_data)
|
player_page = self._download_webpage('https://player.mediaklikk.hu/playernew/player.php', video_id, query=player_data)
|
||||||
playlist_url = self._proto_relative_url(compat_urllib_parse_unquote(
|
player_json = self._search_json(
|
||||||
self._html_search_regex(r'\"file\":\s*\"(\\?/\\?/.*playlist\.m3u8)\"', player_page, 'playlist_url')).replace('\\/', '/'))
|
r'\bpl\.setup\s*\(', player_page, 'player json', video_id, end_pattern=r'\);')
|
||||||
|
playlist_url = traverse_obj(
|
||||||
|
player_json, ('playlist', lambda _, v: v['type'] == 'hls', 'file', {url_or_none}), get_all=False)
|
||||||
|
if not playlist_url:
|
||||||
|
raise ExtractorError('Unable to extract playlist url')
|
||||||
|
|
||||||
formats = self._extract_wowza_formats(
|
formats = self._extract_wowza_formats(
|
||||||
playlist_url, video_id, skip_protocols=['f4m', 'smil', 'dash'])
|
playlist_url, video_id, skip_protocols=['f4m', 'smil', 'dash'])
|
||||||
|
|
|
@ -14,7 +14,7 @@ class MediaStreamBaseIE(InfoExtractor):
|
||||||
_BASE_URL_RE = r'https?://mdstrm\.com/(?:embed|live-stream)'
|
_BASE_URL_RE = r'https?://mdstrm\.com/(?:embed|live-stream)'
|
||||||
|
|
||||||
def _extract_mediastream_urls(self, webpage):
|
def _extract_mediastream_urls(self, webpage):
|
||||||
yield from traverse_obj(list(self._yield_json_ld(webpage, None)), (
|
yield from traverse_obj(list(self._yield_json_ld(webpage, None, fatal=False)), (
|
||||||
lambda _, v: v['@type'] == 'VideoObject', ('embedUrl', 'contentUrl'),
|
lambda _, v: v['@type'] == 'VideoObject', ('embedUrl', 'contentUrl'),
|
||||||
{lambda x: x if re.match(rf'{self._BASE_URL_RE}/\w+', x) else None}))
|
{lambda x: x if re.match(rf'{self._BASE_URL_RE}/\w+', x) else None}))
|
||||||
|
|
||||||
|
@ -106,8 +106,12 @@ def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
if 'Debido a tu ubicación no puedes ver el contenido' in webpage:
|
for message in [
|
||||||
self.raise_geo_restricted()
|
'Debido a tu ubicación no puedes ver el contenido',
|
||||||
|
'You are not allowed to watch this video: Geo Fencing Restriction'
|
||||||
|
]:
|
||||||
|
if message in webpage:
|
||||||
|
self.raise_geo_restricted()
|
||||||
|
|
||||||
player_config = self._search_json(r'window\.MDSTRM\.OPTIONS\s*=', webpage, 'metadata', video_id)
|
player_config = self._search_json(r'window\.MDSTRM\.OPTIONS\s*=', webpage, 'metadata', video_id)
|
||||||
|
|
||||||
|
|
|
@ -20,7 +20,7 @@ class MixcloudBaseIE(InfoExtractor):
|
||||||
def _call_api(self, object_type, object_fields, display_id, username, slug=None):
|
def _call_api(self, object_type, object_fields, display_id, username, slug=None):
|
||||||
lookup_key = object_type + 'Lookup'
|
lookup_key = object_type + 'Lookup'
|
||||||
return self._download_json(
|
return self._download_json(
|
||||||
'https://www.mixcloud.com/graphql', display_id, query={
|
'https://app.mixcloud.com/graphql', display_id, query={
|
||||||
'query': '''{
|
'query': '''{
|
||||||
%s(lookup: {username: "%s"%s}) {
|
%s(lookup: {username: "%s"%s}) {
|
||||||
%s
|
%s
|
||||||
|
@ -46,7 +46,15 @@ class MixcloudIE(MixcloudBaseIE):
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'timestamp': 1321359578,
|
'timestamp': 1321359578,
|
||||||
'upload_date': '20111115',
|
'upload_date': '20111115',
|
||||||
|
'uploader_url': 'https://www.mixcloud.com/dholbach/',
|
||||||
|
'artist': 'Submorphics & Chino , Telekinesis, Porter Robinson, Enei, Breakage ft Jess Mills',
|
||||||
|
'duration': 3723,
|
||||||
|
'tags': [],
|
||||||
|
'comment_count': int,
|
||||||
|
'repost_count': int,
|
||||||
|
'like_count': int,
|
||||||
},
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/',
|
'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -60,7 +68,14 @@ class MixcloudIE(MixcloudBaseIE):
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'timestamp': 1422987057,
|
'timestamp': 1422987057,
|
||||||
'upload_date': '20150203',
|
'upload_date': '20150203',
|
||||||
|
'uploader_url': 'https://www.mixcloud.com/gillespeterson/',
|
||||||
|
'duration': 2992,
|
||||||
|
'tags': [],
|
||||||
|
'comment_count': int,
|
||||||
|
'repost_count': int,
|
||||||
|
'like_count': int,
|
||||||
},
|
},
|
||||||
|
'params': {'skip_download': '404 playback error on site'},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://beta.mixcloud.com/RedLightRadio/nosedrip-15-red-light-radio-01-18-2016/',
|
'url': 'https://beta.mixcloud.com/RedLightRadio/nosedrip-15-red-light-radio-01-18-2016/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -259,9 +274,9 @@ def _real_extract(self, url):
|
||||||
cloudcast_url = cloudcast.get('url')
|
cloudcast_url = cloudcast.get('url')
|
||||||
if not cloudcast_url:
|
if not cloudcast_url:
|
||||||
continue
|
continue
|
||||||
slug = try_get(cloudcast, lambda x: x['slug'], compat_str)
|
item_slug = try_get(cloudcast, lambda x: x['slug'], compat_str)
|
||||||
owner_username = try_get(cloudcast, lambda x: x['owner']['username'], compat_str)
|
owner_username = try_get(cloudcast, lambda x: x['owner']['username'], compat_str)
|
||||||
video_id = '%s_%s' % (owner_username, slug) if slug and owner_username else None
|
video_id = f'{owner_username}_{item_slug}' if item_slug and owner_username else None
|
||||||
entries.append(self.url_result(
|
entries.append(self.url_result(
|
||||||
cloudcast_url, MixcloudIE.ie_key(), video_id))
|
cloudcast_url, MixcloudIE.ie_key(), video_id))
|
||||||
|
|
||||||
|
@ -284,7 +299,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'dholbach_uploads',
|
'id': 'dholbach_uploads',
|
||||||
'title': 'Daniel Holbach (uploads)',
|
'title': 'Daniel Holbach (uploads)',
|
||||||
'description': 'md5:b60d776f0bab534c5dabe0a34e47a789',
|
'description': 'md5:a3f468a60ac8c3e1f8616380fc469b2b',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 36,
|
'playlist_mincount': 36,
|
||||||
}, {
|
}, {
|
||||||
|
@ -292,7 +307,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'dholbach_uploads',
|
'id': 'dholbach_uploads',
|
||||||
'title': 'Daniel Holbach (uploads)',
|
'title': 'Daniel Holbach (uploads)',
|
||||||
'description': 'md5:b60d776f0bab534c5dabe0a34e47a789',
|
'description': 'md5:a3f468a60ac8c3e1f8616380fc469b2b',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 36,
|
'playlist_mincount': 36,
|
||||||
}, {
|
}, {
|
||||||
|
@ -300,7 +315,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'dholbach_favorites',
|
'id': 'dholbach_favorites',
|
||||||
'title': 'Daniel Holbach (favorites)',
|
'title': 'Daniel Holbach (favorites)',
|
||||||
'description': 'md5:b60d776f0bab534c5dabe0a34e47a789',
|
'description': 'md5:a3f468a60ac8c3e1f8616380fc469b2b',
|
||||||
},
|
},
|
||||||
# 'params': {
|
# 'params': {
|
||||||
# 'playlist_items': '1-100',
|
# 'playlist_items': '1-100',
|
||||||
|
@ -323,9 +338,9 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'FirstEar_stream',
|
'id': 'FirstEar_stream',
|
||||||
'title': 'First Ear (stream)',
|
'title': 'First Ear (stream)',
|
||||||
'description': 'Curators of good music\r\n\r\nfirstearmusic.com',
|
'description': 'we maraud for ears',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 271,
|
'playlist_mincount': 269,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_TITLE_KEY = 'displayName'
|
_TITLE_KEY = 'displayName'
|
||||||
|
|
|
@ -151,7 +151,7 @@ def _real_extract(self, url):
|
||||||
'd': 'days',
|
'd': 'days',
|
||||||
}
|
}
|
||||||
kwargs = {_AGO_UNITS.get(uploaded_ago[-1]): delta}
|
kwargs = {_AGO_UNITS.get(uploaded_ago[-1]): delta}
|
||||||
upload_date = (datetime.datetime.utcnow() - datetime.timedelta(**kwargs)).strftime('%Y%m%d')
|
upload_date = (datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(**kwargs)).strftime('%Y%m%d')
|
||||||
|
|
||||||
comment_count = len(re.findall(r'''class\s*=\s*['"]media-comment-contents\b''', webpage))
|
comment_count = len(re.findall(r'''class\s*=\s*['"]media-comment-contents\b''', webpage))
|
||||||
uploader_id = self._html_search_regex(
|
uploader_id = self._html_search_regex(
|
||||||
|
|
|
@ -33,7 +33,7 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
class N1InfoIIE(InfoExtractor):
|
class N1InfoIIE(InfoExtractor):
|
||||||
IE_NAME = 'N1Info:article'
|
IE_NAME = 'N1Info:article'
|
||||||
_VALID_URL = r'https?://(?:(?:(?:ba|rs|hr)\.)?n1info\.(?:com|si)|nova\.rs)/(?:[^/]+/){1,2}(?P<id>[^/]+)'
|
_VALID_URL = r'https?://(?:(?:\w+\.)?n1info\.\w+|nova\.rs)/(?:[^/?#]+/){1,2}(?P<id>[^/?#]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# Youtube embedded
|
# Youtube embedded
|
||||||
'url': 'https://rs.n1info.com/sport-klub/tenis/kako-je-djokovic-propustio-istorijsku-priliku-video/',
|
'url': 'https://rs.n1info.com/sport-klub/tenis/kako-je-djokovic-propustio-istorijsku-priliku-video/',
|
||||||
|
@ -94,6 +94,16 @@ class N1InfoIIE(InfoExtractor):
|
||||||
'upload_date': '20211102',
|
'upload_date': '20211102',
|
||||||
'timestamp': 1635861677,
|
'timestamp': 1635861677,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://n1info.rs/vesti/cuta-biti-u-kosovskoj-mitrovici-znaci-da-te-docekaju-eksplozivnim-napravama/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1332368',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Ćuta: Biti u Kosovskoj Mitrovici znači da te dočekaju eksplozivnim napravama',
|
||||||
|
'upload_date': '20230620',
|
||||||
|
'timestamp': 1687290536,
|
||||||
|
'thumbnail': 'https://cdn.brid.tv/live/partners/26827/snapshot/1332368_th_6492013a8356f_1687290170.jpg'
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://hr.n1info.com/vijesti/pravobraniteljica-o-ubojstvu-u-zagrebu-radi-se-o-doista-nezapamcenoj-situaciji/',
|
'url': 'https://hr.n1info.com/vijesti/pravobraniteljica-o-ubojstvu-u-zagrebu-radi-se-o-doista-nezapamcenoj-situaciji/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -105,19 +115,35 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
title = self._html_search_regex(r'<h1[^>]+>(.+?)</h1>', webpage, 'title')
|
title = self._html_search_regex(r'<h1[^>]+>(.+?)</h1>', webpage, 'title')
|
||||||
timestamp = unified_timestamp(self._html_search_meta('article:published_time', webpage))
|
timestamp = unified_timestamp(self._html_search_meta('article:published_time', webpage))
|
||||||
|
plugin_data = self._html_search_meta('BridPlugin', webpage)
|
||||||
videos = re.findall(r'(?m)(<video[^>]+>)', webpage)
|
|
||||||
entries = []
|
entries = []
|
||||||
for video in videos:
|
if plugin_data:
|
||||||
video_data = extract_attributes(video)
|
site_id = self._html_search_regex(r'site:(\d+)', webpage, 'site id')
|
||||||
entries.append({
|
for video_data in re.findall(r'\$bp\("Brid_\d+", (.+)\);', webpage):
|
||||||
'_type': 'url_transparent',
|
video_id = self._parse_json(video_data, title)['video']
|
||||||
'url': video_data.get('data-url'),
|
entries.append({
|
||||||
'id': video_data.get('id'),
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'thumbnail': video_data.get('data-thumbnail'),
|
'timestamp': timestamp,
|
||||||
'timestamp': timestamp,
|
'thumbnail': self._html_search_meta('thumbnailURL', webpage),
|
||||||
'ie_key': 'N1InfoAsset'})
|
'formats': self._extract_m3u8_formats(
|
||||||
|
f'https://cdn-uc.brid.tv/live/partners/{site_id}/streaming/{video_id}/{video_id}.m3u8',
|
||||||
|
video_id, fatal=False),
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
# Old player still present in older articles
|
||||||
|
videos = re.findall(r'(?m)(<video[^>]+>)', webpage)
|
||||||
|
for video in videos:
|
||||||
|
video_data = extract_attributes(video)
|
||||||
|
entries.append({
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': video_data.get('data-url'),
|
||||||
|
'id': video_data.get('id'),
|
||||||
|
'title': title,
|
||||||
|
'thumbnail': video_data.get('data-thumbnail'),
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'ie_key': 'N1InfoAsset',
|
||||||
|
})
|
||||||
|
|
||||||
embedded_videos = re.findall(r'(<iframe[^>]+>)', webpage)
|
embedded_videos = re.findall(r'(<iframe[^>]+>)', webpage)
|
||||||
for embedded_video in embedded_videos:
|
for embedded_video in embedded_videos:
|
||||||
|
|
|
@ -21,7 +21,7 @@
|
||||||
class NaverBaseIE(InfoExtractor):
|
class NaverBaseIE(InfoExtractor):
|
||||||
_CAPTION_EXT_RE = r'\.(?:ttml|vtt)'
|
_CAPTION_EXT_RE = r'\.(?:ttml|vtt)'
|
||||||
|
|
||||||
@staticmethod # NB: Used in VLiveWebArchiveIE, WeverseIE
|
@staticmethod # NB: Used in WeverseIE
|
||||||
def process_subtitles(vod_data, process_url):
|
def process_subtitles(vod_data, process_url):
|
||||||
ret = {'subtitles': {}, 'automatic_captions': {}}
|
ret = {'subtitles': {}, 'automatic_captions': {}}
|
||||||
for caption in traverse_obj(vod_data, ('captions', 'list', ...)):
|
for caption in traverse_obj(vod_data, ('captions', 'list', ...)):
|
||||||
|
|
|
@ -265,6 +265,26 @@ class NitterIE(InfoExtractor):
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
}
|
}
|
||||||
|
}, { # no OpenGraph title
|
||||||
|
'url': f'https://{current_instance}/LocalBateman/status/1678455464038735895#m',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1678455464038735895',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Your Typical Local Man - Local man, what did Romanians ever do to you?',
|
||||||
|
'description': 'Local man, what did Romanians ever do to you?',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'uploader': 'Your Typical Local Man',
|
||||||
|
'uploader_id': 'LocalBateman',
|
||||||
|
'uploader_url': f'https://{current_instance}/LocalBateman',
|
||||||
|
'upload_date': '20230710',
|
||||||
|
'timestamp': 1689009900,
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'repost_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
},
|
||||||
|
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -292,7 +312,7 @@ def _real_extract(self, url):
|
||||||
'ext': ext
|
'ext': ext
|
||||||
}]
|
}]
|
||||||
|
|
||||||
title = description = self._og_search_description(full_webpage) or self._html_search_regex(
|
title = description = self._og_search_description(full_webpage, default=None) or self._html_search_regex(
|
||||||
r'<div class="tweet-content[^>]+>([^<]+)</div>', webpage, 'title', fatal=False)
|
r'<div class="tweet-content[^>]+>([^<]+)</div>', webpage, 'title', fatal=False)
|
||||||
|
|
||||||
uploader_id = self._html_search_regex(
|
uploader_id = self._html_search_regex(
|
||||||
|
|
|
@ -6,7 +6,6 @@
|
||||||
determine_ext,
|
determine_ext,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
qualities,
|
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
|
@ -49,77 +48,52 @@ def _real_extract(self, url):
|
||||||
duration = None
|
duration = None
|
||||||
formats = []
|
formats = []
|
||||||
|
|
||||||
player = self._parse_json(
|
def process_format_list(format_list, format_id=""):
|
||||||
self._search_regex(
|
nonlocal formats, has_drm
|
||||||
(r'(?:(?:replacePlaceholders|processAdTagModifier).*?:\s*)?(?:replacePlaceholders|processAdTagModifier)\s*\(\s*(?P<json>{.*?})\s*\)(?:\s*\))?\s*,',
|
if not isinstance(format_list, list):
|
||||||
r'Player\.init\s*\([^,]+,(?P<cndn>\s*\w+\s*\?)?\s*(?P<json>{(?(cndn).+?|.+)})\s*(?(cndn):|,\s*{.+?}\s*\)\s*;)'),
|
format_list = [format_list]
|
||||||
webpage, 'player', default='{}', group='json'), video_id, fatal=False)
|
for format_dict in format_list:
|
||||||
if player:
|
if not isinstance(format_dict, dict):
|
||||||
for format_id, format_list in player['tracks'].items():
|
continue
|
||||||
if not isinstance(format_list, list):
|
if (not self.get_param('allow_unplayable_formats')
|
||||||
format_list = [format_list]
|
and traverse_obj(format_dict, ('drm', 'keySystem'))):
|
||||||
for format_dict in format_list:
|
has_drm = True
|
||||||
if not isinstance(format_dict, dict):
|
continue
|
||||||
continue
|
format_url = url_or_none(format_dict.get('src'))
|
||||||
if (not self.get_param('allow_unplayable_formats')
|
format_type = format_dict.get('type')
|
||||||
and traverse_obj(format_dict, ('drm', 'keySystem'))):
|
ext = determine_ext(format_url)
|
||||||
has_drm = True
|
if (format_type == 'application/x-mpegURL'
|
||||||
continue
|
or format_id == 'HLS' or ext == 'm3u8'):
|
||||||
format_url = url_or_none(format_dict.get('src'))
|
formats.extend(self._extract_m3u8_formats(
|
||||||
format_type = format_dict.get('type')
|
format_url, video_id, 'mp4',
|
||||||
ext = determine_ext(format_url)
|
entry_protocol='m3u8_native', m3u8_id='hls',
|
||||||
if (format_type == 'application/x-mpegURL'
|
fatal=False))
|
||||||
or format_id == 'HLS' or ext == 'm3u8'):
|
elif (format_type == 'application/dash+xml'
|
||||||
formats.extend(self._extract_m3u8_formats(
|
or format_id == 'DASH' or ext == 'mpd'):
|
||||||
format_url, video_id, 'mp4',
|
formats.extend(self._extract_mpd_formats(
|
||||||
entry_protocol='m3u8_native', m3u8_id='hls',
|
format_url, video_id, mpd_id='dash', fatal=False))
|
||||||
fatal=False))
|
else:
|
||||||
elif (format_type == 'application/dash+xml'
|
formats.append({
|
||||||
or format_id == 'DASH' or ext == 'mpd'):
|
|
||||||
formats.extend(self._extract_mpd_formats(
|
|
||||||
format_url, video_id, mpd_id='dash', fatal=False))
|
|
||||||
else:
|
|
||||||
formats.append({
|
|
||||||
'url': format_url,
|
|
||||||
})
|
|
||||||
duration = int_or_none(player.get('duration'))
|
|
||||||
else:
|
|
||||||
# Old path, not actual as of 08.04.2020
|
|
||||||
bitrates = self._parse_json(
|
|
||||||
self._search_regex(
|
|
||||||
r'(?s)(?:src|bitrates)\s*=\s*({.+?})\s*;', webpage, 'formats'),
|
|
||||||
video_id, transform_source=js_to_json)
|
|
||||||
|
|
||||||
QUALITIES = ('lq', 'mq', 'hq', 'hd')
|
|
||||||
quality_key = qualities(QUALITIES)
|
|
||||||
|
|
||||||
for format_id, format_list in bitrates.items():
|
|
||||||
if not isinstance(format_list, list):
|
|
||||||
format_list = [format_list]
|
|
||||||
for format_url in format_list:
|
|
||||||
format_url = url_or_none(format_url)
|
|
||||||
if not format_url:
|
|
||||||
continue
|
|
||||||
if format_id == 'hls':
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
format_url, video_id, ext='mp4',
|
|
||||||
entry_protocol='m3u8_native', m3u8_id='hls',
|
|
||||||
fatal=False))
|
|
||||||
continue
|
|
||||||
f = {
|
|
||||||
'url': format_url,
|
'url': format_url,
|
||||||
}
|
})
|
||||||
f_id = format_id
|
|
||||||
for quality in QUALITIES:
|
player = self._search_json(
|
||||||
if '%s.mp4' % quality in format_url:
|
r'player:', webpage, 'player', video_id, fatal=False, end_pattern=r';\s*</script>')
|
||||||
f_id += '-%s' % quality
|
if player:
|
||||||
f.update({
|
for src in traverse_obj(player, ('lib', 'source', 'sources', ...)):
|
||||||
'quality': quality_key(quality),
|
process_format_list(src)
|
||||||
'format_note': quality.upper(),
|
duration = traverse_obj(player, ('sourceInfo', 'duration', {int_or_none}))
|
||||||
})
|
if not formats and not has_drm:
|
||||||
break
|
# older code path, in use before August 2023
|
||||||
f['format_id'] = f_id
|
player = self._parse_json(
|
||||||
formats.append(f)
|
self._search_regex(
|
||||||
|
(r'(?:(?:replacePlaceholders|processAdTagModifier).*?:\s*)?(?:replacePlaceholders|processAdTagModifier)\s*\(\s*(?P<json>{.*?})\s*\)(?:\s*\))?\s*,',
|
||||||
|
r'Player\.init\s*\([^,]+,(?P<cndn>\s*\w+\s*\?)?\s*(?P<json>{(?(cndn).+?|.+)})\s*(?(cndn):|,\s*{.+?}\s*\)\s*;)'),
|
||||||
|
webpage, 'player', group='json'), video_id)
|
||||||
|
if player:
|
||||||
|
for format_id, format_list in player['tracks'].items():
|
||||||
|
process_format_list(format_list, format_id)
|
||||||
|
duration = int_or_none(player.get('duration'))
|
||||||
|
|
||||||
if not formats and has_drm:
|
if not formats and has_drm:
|
||||||
self.report_drm(video_id)
|
self.report_drm(video_id)
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
import calendar
|
import calendar
|
||||||
import json
|
import json
|
||||||
import functools
|
import functools
|
||||||
from datetime import datetime
|
from datetime import datetime, timezone
|
||||||
from random import random
|
from random import random
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
@ -243,7 +243,7 @@ def _mark_watched(self, base_url, video_id, delivery_info):
|
||||||
invocation_id = delivery_info.get('InvocationId')
|
invocation_id = delivery_info.get('InvocationId')
|
||||||
stream_id = traverse_obj(delivery_info, ('Delivery', 'Streams', ..., 'PublicID'), get_all=False, expected_type=str)
|
stream_id = traverse_obj(delivery_info, ('Delivery', 'Streams', ..., 'PublicID'), get_all=False, expected_type=str)
|
||||||
if invocation_id and stream_id and duration:
|
if invocation_id and stream_id and duration:
|
||||||
timestamp_str = f'/Date({calendar.timegm(datetime.utcnow().timetuple())}000)/'
|
timestamp_str = f'/Date({calendar.timegm(datetime.now(timezone.utc).timetuple())}000)/'
|
||||||
data = {
|
data = {
|
||||||
'streamRequests': [
|
'streamRequests': [
|
||||||
{
|
{
|
||||||
|
|
113
yt_dlp/extractor/pornbox.py
Normal file
113
yt_dlp/extractor/pornbox.py
Normal file
|
@ -0,0 +1,113 @@
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import functools
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
parse_duration,
|
||||||
|
parse_iso8601,
|
||||||
|
qualities,
|
||||||
|
str_or_none,
|
||||||
|
traverse_obj,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class PornboxIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?pornbox\.com/application/watch-page/(?P<id>[0-9]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://pornbox.com/application/watch-page/212108',
|
||||||
|
'md5': '3ff6b6e206f263be4c5e987a3162ac6e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '212108',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'md5:ececc5c6e6c9dd35d290c45fed05fd49',
|
||||||
|
'uploader': 'Lily Strong',
|
||||||
|
'timestamp': 1665871200,
|
||||||
|
'upload_date': '20221015',
|
||||||
|
'age_limit': 18,
|
||||||
|
'availability': 'needs_auth',
|
||||||
|
'duration': 1505,
|
||||||
|
'cast': ['Lily Strong', 'John Strong'],
|
||||||
|
'tags': 'count:11',
|
||||||
|
'description': 'md5:589c7f33e183aa8aa939537300efb859',
|
||||||
|
'thumbnail': r're:^https?://cdn-image\.gtflixtv\.com.*\.jpg.*$'
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://pornbox.com/application/watch-page/216045',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '216045',
|
||||||
|
'title': 'md5:3e48528e73a9a2b12f7a2772ed0b26a2',
|
||||||
|
'description': 'md5:3e631dcaac029f15ed434e402d1b06c7',
|
||||||
|
'uploader': 'VK Studio',
|
||||||
|
'timestamp': 1618264800,
|
||||||
|
'upload_date': '20210412',
|
||||||
|
'age_limit': 18,
|
||||||
|
'availability': 'premium_only',
|
||||||
|
'duration': 2710,
|
||||||
|
'cast': 'count:3',
|
||||||
|
'tags': 'count:29',
|
||||||
|
'thumbnail': r're:^https?://cdn-image\.gtflixtv\.com.*\.jpg.*$',
|
||||||
|
'subtitles': 'count:6'
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
'ignore_no_formats_error': True
|
||||||
|
},
|
||||||
|
'expected_warnings': [
|
||||||
|
'You are either not logged in or do not have access to this scene',
|
||||||
|
'No video formats found', 'Requested format is not available']
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
public_data = self._download_json(f'https://pornbox.com/contents/{video_id}', video_id)
|
||||||
|
|
||||||
|
subtitles = {country_code: [{
|
||||||
|
'url': f'https://pornbox.com/contents/{video_id}/subtitles/{country_code}',
|
||||||
|
'ext': 'srt'
|
||||||
|
}] for country_code in traverse_obj(public_data, ('subtitles', ..., {str}))}
|
||||||
|
|
||||||
|
is_free_scene = traverse_obj(
|
||||||
|
public_data, ('price', 'is_available_for_free', {bool}), default=False)
|
||||||
|
|
||||||
|
metadata = {
|
||||||
|
'id': video_id,
|
||||||
|
**traverse_obj(public_data, {
|
||||||
|
'title': ('scene_name', {str.strip}),
|
||||||
|
'description': ('small_description', {str.strip}),
|
||||||
|
'uploader': 'studio',
|
||||||
|
'duration': ('runtime', {parse_duration}),
|
||||||
|
'cast': (('models', 'male_models'), ..., 'model_name'),
|
||||||
|
'thumbnail': ('player_poster', {url_or_none}),
|
||||||
|
'tags': ('niches', ..., 'niche'),
|
||||||
|
}),
|
||||||
|
'age_limit': 18,
|
||||||
|
'timestamp': parse_iso8601(traverse_obj(
|
||||||
|
public_data, ('studios', 'release_date'), 'publish_date')),
|
||||||
|
'availability': self._availability(needs_auth=True, needs_premium=not is_free_scene),
|
||||||
|
'subtitles': subtitles,
|
||||||
|
}
|
||||||
|
|
||||||
|
if not public_data.get('is_purchased') or not is_free_scene:
|
||||||
|
self.raise_login_required(
|
||||||
|
'You are either not logged in or do not have access to this scene', metadata_available=True)
|
||||||
|
return metadata
|
||||||
|
|
||||||
|
media_id = traverse_obj(public_data, (
|
||||||
|
'medias', lambda _, v: v['title'] == 'Full video', 'media_id', {int}), get_all=False)
|
||||||
|
if not media_id:
|
||||||
|
self.raise_no_formats('Could not find stream id', video_id=video_id)
|
||||||
|
|
||||||
|
stream_data = self._download_json(
|
||||||
|
f'https://pornbox.com/media/{media_id}/stream', video_id=video_id, note='Getting manifest urls')
|
||||||
|
|
||||||
|
get_quality = qualities(['web', 'vga', 'hd', '1080p', '4k', '8k'])
|
||||||
|
metadata['formats'] = traverse_obj(stream_data, ('qualities', lambda _, v: v['src'], {
|
||||||
|
'url': 'src',
|
||||||
|
'vbr': ('bitrate', {functools.partial(int_or_none, scale=1000)}),
|
||||||
|
'format_id': ('quality', {str_or_none}),
|
||||||
|
'quality': ('quality', {get_quality}),
|
||||||
|
'width': ('size', {lambda x: int(x[:-1])}),
|
||||||
|
}))
|
||||||
|
|
||||||
|
return metadata
|
|
@ -1,97 +1,155 @@
|
||||||
import re
|
import json
|
||||||
|
from datetime import date
|
||||||
|
from urllib.parse import unquote
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import merge_dicts
|
from ..compat import functools
|
||||||
|
from ..utils import ExtractorError, make_archive_id, urljoin
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
class Pr0grammStaticIE(InfoExtractor):
|
|
||||||
# Possible urls:
|
|
||||||
# https://pr0gramm.com/static/5466437
|
|
||||||
_VALID_URL = r'https?://pr0gramm\.com/static/(?P<id>[0-9]+)'
|
|
||||||
_TEST = {
|
|
||||||
'url': 'https://pr0gramm.com/static/5466437',
|
|
||||||
'md5': '52fa540d70d3edc286846f8ca85938aa',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '5466437',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'pr0gramm-5466437 by g11st',
|
|
||||||
'uploader': 'g11st',
|
|
||||||
'upload_date': '20221221',
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
|
|
||||||
# Fetch media sources
|
|
||||||
entries = self._parse_html5_media_entries(url, webpage, video_id)
|
|
||||||
media_info = entries[0]
|
|
||||||
|
|
||||||
# Fetch author
|
|
||||||
uploader = self._html_search_regex(r'by\W+([\w-]+)\W+', webpage, 'uploader')
|
|
||||||
|
|
||||||
# Fetch approx upload timestamp from filename
|
|
||||||
# Have None-defaults in case the extraction fails
|
|
||||||
uploadDay = None
|
|
||||||
uploadMon = None
|
|
||||||
uploadYear = None
|
|
||||||
uploadTimestr = None
|
|
||||||
# (//img.pr0gramm.com/2022/12/21/62ae8aa5e2da0ebf.mp4)
|
|
||||||
m = re.search(r'//img\.pr0gramm\.com/(?P<year>[\d]+)/(?P<mon>[\d]+)/(?P<day>[\d]+)/\w+\.\w{,4}', webpage)
|
|
||||||
|
|
||||||
if (m):
|
|
||||||
# Up to a day of accuracy should suffice...
|
|
||||||
uploadDay = m.groupdict().get('day')
|
|
||||||
uploadMon = m.groupdict().get('mon')
|
|
||||||
uploadYear = m.groupdict().get('year')
|
|
||||||
uploadTimestr = uploadYear + uploadMon + uploadDay
|
|
||||||
|
|
||||||
return merge_dicts({
|
|
||||||
'id': video_id,
|
|
||||||
'title': 'pr0gramm-%s%s' % (video_id, (' by ' + uploader) if uploader else ''),
|
|
||||||
'uploader': uploader,
|
|
||||||
'upload_date': uploadTimestr
|
|
||||||
}, media_info)
|
|
||||||
|
|
||||||
|
|
||||||
# This extractor is for the primary url (used for sharing, and appears in the
|
|
||||||
# location bar) Since this page loads the DOM via JS, yt-dl can't find any
|
|
||||||
# video information here. So let's redirect to a compatibility version of
|
|
||||||
# the site, which does contain the <video>-element by itself, without requiring
|
|
||||||
# js to be ran.
|
|
||||||
class Pr0grammIE(InfoExtractor):
|
class Pr0grammIE(InfoExtractor):
|
||||||
# Possible urls:
|
_VALID_URL = r'https?://pr0gramm\.com\/(?:[^/?#]+/)+(?P<id>[\d]+)(?:[/?#:]|$)'
|
||||||
# https://pr0gramm.com/new/546637
|
_TESTS = [{
|
||||||
# https://pr0gramm.com/new/video/546637
|
# Tags require account
|
||||||
# https://pr0gramm.com/top/546637
|
|
||||||
# https://pr0gramm.com/top/video/546637
|
|
||||||
# https://pr0gramm.com/user/g11st/uploads/5466437
|
|
||||||
# https://pr0gramm.com/user/froschler/dafur-ist-man-hier/5091290
|
|
||||||
# https://pr0gramm.com/user/froschler/reinziehen-1elf/5232030
|
|
||||||
# https://pr0gramm.com/user/froschler/1elf/5232030
|
|
||||||
# https://pr0gramm.com/new/5495710:comment62621020 <- this is not the id!
|
|
||||||
# https://pr0gramm.com/top/fruher war alles damals/5498175
|
|
||||||
|
|
||||||
_VALID_URL = r'https?:\/\/pr0gramm\.com\/(?!static/\d+).+?\/(?P<id>[\d]+)(:|$)'
|
|
||||||
_TEST = {
|
|
||||||
'url': 'https://pr0gramm.com/new/video/5466437',
|
'url': 'https://pr0gramm.com/new/video/5466437',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '5466437',
|
'id': '5466437',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'pr0gramm-5466437 by g11st',
|
'title': 'pr0gramm-5466437 by g11st',
|
||||||
|
'tags': ['Neon Genesis Evangelion', 'Touhou Project', 'Fly me to the Moon', 'Marisad', 'Marisa Kirisame', 'video', 'sound', 'Marisa', 'Anime'],
|
||||||
'uploader': 'g11st',
|
'uploader': 'g11st',
|
||||||
|
'uploader_id': 394718,
|
||||||
|
'upload_timestamp': 1671590240,
|
||||||
'upload_date': '20221221',
|
'upload_date': '20221221',
|
||||||
}
|
'like_count': int,
|
||||||
}
|
'dislike_count': int,
|
||||||
|
'age_limit': 0,
|
||||||
|
'thumbnail': r're:^https://thumb\.pr0gramm\.com/.*\.jpg',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# Tags require account
|
||||||
|
'url': 'https://pr0gramm.com/new/3052805:comment28391322',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3052805',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'pr0gramm-3052805 by Hansking1',
|
||||||
|
'tags': 'count:15',
|
||||||
|
'uploader': 'Hansking1',
|
||||||
|
'uploader_id': 385563,
|
||||||
|
'upload_timestamp': 1552930408,
|
||||||
|
'upload_date': '20190318',
|
||||||
|
'like_count': int,
|
||||||
|
'dislike_count': int,
|
||||||
|
'age_limit': 0,
|
||||||
|
'thumbnail': r're:^https://thumb\.pr0gramm\.com/.*\.jpg',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# Requires verified account
|
||||||
|
'url': 'https://pr0gramm.com/new/Gianna%20Michaels/5848332',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5848332',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'pr0gramm-5848332 by erd0pfel',
|
||||||
|
'tags': 'count:18',
|
||||||
|
'uploader': 'erd0pfel',
|
||||||
|
'uploader_id': 349094,
|
||||||
|
'upload_timestamp': 1694489652,
|
||||||
|
'upload_date': '20230912',
|
||||||
|
'like_count': int,
|
||||||
|
'dislike_count': int,
|
||||||
|
'age_limit': 18,
|
||||||
|
'thumbnail': r're:^https://thumb\.pr0gramm\.com/.*\.jpg',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://pr0gramm.com/static/5466437',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://pr0gramm.com/new/rowan%20atkinson%20herr%20bohne/3052805',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://pr0gramm.com/user/froschler/dafur-ist-man-hier/5091290',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _generic_title():
|
BASE_URL = 'https://pr0gramm.com'
|
||||||
return "oof"
|
|
||||||
|
@functools.cached_property
|
||||||
|
def _is_logged_in(self):
|
||||||
|
return 'pp' in self._get_cookies(self.BASE_URL)
|
||||||
|
|
||||||
|
@functools.cached_property
|
||||||
|
def _maximum_flags(self):
|
||||||
|
# We need to guess the flags for the content otherwise the api will raise an error
|
||||||
|
# We can guess the maximum allowed flags for the account from the cookies
|
||||||
|
# Bitflags are (msbf): nsfp, nsfl, nsfw, sfw
|
||||||
|
flags = 0b0001
|
||||||
|
if self._is_logged_in:
|
||||||
|
flags |= 0b1000
|
||||||
|
cookies = self._get_cookies(self.BASE_URL)
|
||||||
|
if 'me' not in cookies:
|
||||||
|
self._download_webpage(self.BASE_URL, None, 'Refreshing verification information')
|
||||||
|
if traverse_obj(cookies, ('me', {lambda x: x.value}, {unquote}, {json.loads}, 'verified')):
|
||||||
|
flags |= 0b0110
|
||||||
|
|
||||||
|
return flags
|
||||||
|
|
||||||
|
def _call_api(self, endpoint, video_id, query={}, note='Downloading API json'):
|
||||||
|
data = self._download_json(
|
||||||
|
f'https://pr0gramm.com/api/items/{endpoint}',
|
||||||
|
video_id, note, query=query, expected_status=403)
|
||||||
|
|
||||||
|
error = traverse_obj(data, ('error', {str}))
|
||||||
|
if error in ('nsfwRequired', 'nsflRequired', 'nsfpRequired', 'verificationRequired'):
|
||||||
|
if not self._is_logged_in:
|
||||||
|
self.raise_login_required()
|
||||||
|
raise ExtractorError(f'Unverified account cannot access NSFW/NSFL ({error})', expected=True)
|
||||||
|
elif error:
|
||||||
|
message = traverse_obj(data, ('msg', {str})) or error
|
||||||
|
raise ExtractorError(f'API returned error: {message}', expected=True)
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
video_info = traverse_obj(
|
||||||
|
self._call_api('get', video_id, {'id': video_id, 'flags': self._maximum_flags}),
|
||||||
|
('items', 0, {dict}))
|
||||||
|
|
||||||
return self.url_result(
|
source = urljoin('https://img.pr0gramm.com', video_info.get('image'))
|
||||||
'https://pr0gramm.com/static/' + video_id,
|
if not source or not source.endswith('mp4'):
|
||||||
video_id=video_id,
|
self.raise_no_formats('Could not extract a video', expected=bool(source), video_id=video_id)
|
||||||
ie=Pr0grammStaticIE.ie_key())
|
|
||||||
|
tags = None
|
||||||
|
if self._is_logged_in:
|
||||||
|
metadata = self._call_api('info', video_id, {'itemId': video_id})
|
||||||
|
tags = traverse_obj(metadata, ('tags', ..., 'tag', {str}))
|
||||||
|
# Sorted by "confidence", higher confidence = earlier in list
|
||||||
|
confidences = traverse_obj(metadata, ('tags', ..., 'confidence', ({int}, {float})))
|
||||||
|
if confidences:
|
||||||
|
tags = [tag for _, tag in sorted(zip(confidences, tags), reverse=True)]
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': f'pr0gramm-{video_id} by {video_info.get("user")}',
|
||||||
|
'formats': [{
|
||||||
|
'url': source,
|
||||||
|
'ext': 'mp4',
|
||||||
|
**traverse_obj(video_info, {
|
||||||
|
'width': ('width', {int}),
|
||||||
|
'height': ('height', {int}),
|
||||||
|
}),
|
||||||
|
}],
|
||||||
|
'tags': tags,
|
||||||
|
'age_limit': 18 if traverse_obj(video_info, ('flags', {0b110.__and__})) else 0,
|
||||||
|
'_old_archive_ids': [make_archive_id('Pr0grammStatic', video_id)],
|
||||||
|
**traverse_obj(video_info, {
|
||||||
|
'uploader': ('user', {str}),
|
||||||
|
'uploader_id': ('userId', {int}),
|
||||||
|
'like_count': ('up', {int}),
|
||||||
|
'dislike_count': ('down', {int}),
|
||||||
|
'upload_timestamp': ('created', {int}),
|
||||||
|
'upload_date': ('created', {int}, {date.fromtimestamp}, {lambda x: x.strftime('%Y%m%d')}),
|
||||||
|
'thumbnail': ('thumb', {lambda x: urljoin('https://thumb.pr0gramm.com', x)})
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
|
|
@ -1,7 +1,18 @@
|
||||||
|
import itertools
|
||||||
import re
|
import re
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import parse_duration, unified_strdate
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
join_nonempty,
|
||||||
|
js_to_json,
|
||||||
|
parse_duration,
|
||||||
|
strftime_or_none,
|
||||||
|
traverse_obj,
|
||||||
|
unified_strdate,
|
||||||
|
urljoin,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class RadioFranceIE(InfoExtractor):
|
class RadioFranceIE(InfoExtractor):
|
||||||
|
@ -56,8 +67,32 @@ def _real_extract(self, url):
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class FranceCultureIE(InfoExtractor):
|
class RadioFranceBaseIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?radiofrance\.fr/(?:franceculture|fip|francemusique|mouv|franceinter)/podcasts/(?:[^?#]+/)?(?P<display_id>[^?#]+)-(?P<id>\d+)($|[?#])'
|
_VALID_URL_BASE = r'https?://(?:www\.)?radiofrance\.fr'
|
||||||
|
|
||||||
|
_STATIONS_RE = '|'.join(map(re.escape, (
|
||||||
|
'franceculture',
|
||||||
|
'franceinfo',
|
||||||
|
'franceinter',
|
||||||
|
'francemusique',
|
||||||
|
'fip',
|
||||||
|
'mouv',
|
||||||
|
)))
|
||||||
|
|
||||||
|
def _extract_data_from_webpage(self, webpage, display_id, key):
|
||||||
|
return traverse_obj(self._search_json(
|
||||||
|
r'\bconst\s+data\s*=', webpage, key, display_id,
|
||||||
|
contains_pattern=r'(\[\{.*?\}\]);', transform_source=js_to_json),
|
||||||
|
(..., 'data', key, {dict}), get_all=False) or {}
|
||||||
|
|
||||||
|
|
||||||
|
class FranceCultureIE(RadioFranceBaseIE):
|
||||||
|
_VALID_URL = rf'''(?x)
|
||||||
|
{RadioFranceBaseIE._VALID_URL_BASE}
|
||||||
|
/(?:{RadioFranceBaseIE._STATIONS_RE})
|
||||||
|
/podcasts/(?:[^?#]+/)?(?P<display_id>[^?#]+)-(?P<id>\d{{6,}})(?:$|[?#])
|
||||||
|
'''
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'https://www.radiofrance.fr/franceculture/podcasts/science-en-questions/la-physique-d-einstein-aiderait-elle-a-comprendre-le-cerveau-8440487',
|
'url': 'https://www.radiofrance.fr/franceculture/podcasts/science-en-questions/la-physique-d-einstein-aiderait-elle-a-comprendre-le-cerveau-8440487',
|
||||||
|
@ -67,14 +102,30 @@ class FranceCultureIE(InfoExtractor):
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': 'La physique d’Einstein aiderait-elle à comprendre le cerveau ?',
|
'title': 'La physique d’Einstein aiderait-elle à comprendre le cerveau ?',
|
||||||
'description': 'Existerait-il un pont conceptuel entre la physique de l’espace-temps et les neurosciences ?',
|
'description': 'Existerait-il un pont conceptuel entre la physique de l’espace-temps et les neurosciences ?',
|
||||||
'thumbnail': 'https://cdn.radiofrance.fr/s3/cruiser-production/2022/05/d184e7a3-4827-4494-bf94-04ed7b120db4/1200x630_gettyimages-200171095-001.jpg',
|
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||||
'upload_date': '20220514',
|
'upload_date': '20220514',
|
||||||
'duration': 2750,
|
'duration': 2750,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'url': 'https://www.radiofrance.fr/franceinter/podcasts/le-7-9-30/le-7-9-30-du-vendredi-10-mars-2023-2107675',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2107675',
|
||||||
|
'display_id': 'le-7-9-30-du-vendredi-10-mars-2023',
|
||||||
|
'title': 'Inflation alimentaire : comment en sortir ? - Régis Debray et Claude Grange - Cybèle Idelot',
|
||||||
|
'description': 'md5:36ee74351ede77a314fdebb94026b916',
|
||||||
|
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||||
|
'upload_date': '20230310',
|
||||||
|
'duration': 8977,
|
||||||
|
'ext': 'mp3',
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
'url': 'https://www.radiofrance.fr/franceinter/podcasts/la-rafle-du-vel-d-hiv-une-affaire-d-etat/les-racines-du-crime-episode-1-3715507',
|
'url': 'https://www.radiofrance.fr/franceinter/podcasts/la-rafle-du-vel-d-hiv-une-affaire-d-etat/les-racines-du-crime-episode-1-3715507',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.radiofrance.fr/franceinfo/podcasts/le-billet-sciences/sante-bientot-un-vaccin-contre-l-asthme-allergique-3057200',
|
||||||
|
'only_matching': True,
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -89,7 +140,6 @@ def _real_extract(self, url):
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'url': video_data['contentUrl'],
|
'url': video_data['contentUrl'],
|
||||||
'ext': video_data.get('encodingFormat'),
|
|
||||||
'vcodec': 'none' if video_data.get('encodingFormat') == 'mp3' else None,
|
'vcodec': 'none' if video_data.get('encodingFormat') == 'mp3' else None,
|
||||||
'duration': parse_duration(video_data.get('duration')),
|
'duration': parse_duration(video_data.get('duration')),
|
||||||
'title': self._html_search_regex(r'(?s)<h1[^>]*itemprop="[^"]*name[^"]*"[^>]*>(.+?)</h1>',
|
'title': self._html_search_regex(r'(?s)<h1[^>]*itemprop="[^"]*name[^"]*"[^>]*>(.+?)</h1>',
|
||||||
|
@ -102,3 +152,322 @@ def _real_extract(self, url):
|
||||||
'upload_date': unified_strdate(self._search_regex(
|
'upload_date': unified_strdate(self._search_regex(
|
||||||
r'"datePublished"\s*:\s*"([^"]+)', webpage, 'timestamp', fatal=False))
|
r'"datePublished"\s*:\s*"([^"]+)', webpage, 'timestamp', fatal=False))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class RadioFranceLiveIE(RadioFranceBaseIE):
|
||||||
|
_VALID_URL = rf'''(?x)
|
||||||
|
https?://(?:www\.)?radiofrance\.fr
|
||||||
|
/(?P<id>{RadioFranceBaseIE._STATIONS_RE})
|
||||||
|
/?(?P<substation_id>radio-[\w-]+)?(?:[#?]|$)
|
||||||
|
'''
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.radiofrance.fr/franceinter/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'franceinter',
|
||||||
|
'title': str,
|
||||||
|
'live_status': 'is_live',
|
||||||
|
'ext': 'aac',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': 'Livestream',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.radiofrance.fr/franceculture',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'franceculture',
|
||||||
|
'title': str,
|
||||||
|
'live_status': 'is_live',
|
||||||
|
'ext': 'aac',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': 'Livestream',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.radiofrance.fr/mouv/radio-musique-kids-family',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'mouv-radio-musique-kids-family',
|
||||||
|
'title': str,
|
||||||
|
'live_status': 'is_live',
|
||||||
|
'ext': 'aac',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': 'Livestream',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.radiofrance.fr/mouv/radio-rnb-soul',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'mouv-radio-rnb-soul',
|
||||||
|
'title': str,
|
||||||
|
'live_status': 'is_live',
|
||||||
|
'ext': 'aac',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': 'Livestream',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.radiofrance.fr/mouv/radio-musique-mix',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'mouv-radio-musique-mix',
|
||||||
|
'title': str,
|
||||||
|
'live_status': 'is_live',
|
||||||
|
'ext': 'aac',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': 'Livestream',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.radiofrance.fr/fip/radio-rock',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'fip-radio-rock',
|
||||||
|
'title': str,
|
||||||
|
'live_status': 'is_live',
|
||||||
|
'ext': 'aac',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': 'Livestream',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.radiofrance.fr/mouv',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
station_id, substation_id = self._match_valid_url(url).group('id', 'substation_id')
|
||||||
|
|
||||||
|
if substation_id:
|
||||||
|
webpage = self._download_webpage(url, station_id)
|
||||||
|
api_response = self._extract_data_from_webpage(webpage, station_id, 'webRadioData')
|
||||||
|
else:
|
||||||
|
api_response = self._download_json(
|
||||||
|
f'https://www.radiofrance.fr/{station_id}/api/live', station_id)
|
||||||
|
|
||||||
|
formats, subtitles = [], {}
|
||||||
|
for media_source in traverse_obj(api_response, (('now', None), 'media', 'sources', lambda _, v: v['url'])):
|
||||||
|
if media_source.get('format') == 'hls':
|
||||||
|
fmts, subs = self._extract_m3u8_formats_and_subtitles(media_source['url'], station_id, fatal=False)
|
||||||
|
formats.extend(fmts)
|
||||||
|
self._merge_subtitles(subs, target=subtitles)
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'url': media_source['url'],
|
||||||
|
'abr': media_source.get('bitrate'),
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': join_nonempty(station_id, substation_id),
|
||||||
|
'title': traverse_obj(api_response, ('visual', 'legend')) or join_nonempty(
|
||||||
|
('now', 'firstLine', 'title'), ('now', 'secondLine', 'title'), from_dict=api_response, delim=' - '),
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'is_live': True,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class RadioFrancePlaylistBase(RadioFranceBaseIE):
|
||||||
|
"""Subclasses must set _METADATA_KEY"""
|
||||||
|
|
||||||
|
def _call_api(self, content_id, cursor, page_num):
|
||||||
|
raise NotImplementedError('This method must be implemented by subclasses')
|
||||||
|
|
||||||
|
def _generate_playlist_entries(self, content_id, content_response):
|
||||||
|
for page_num in itertools.count(2):
|
||||||
|
for entry in content_response['items']:
|
||||||
|
yield self.url_result(
|
||||||
|
f'https://www.radiofrance.fr/{entry["path"]}', url_transparent=True, **traverse_obj(entry, {
|
||||||
|
'title': 'title',
|
||||||
|
'description': 'standFirst',
|
||||||
|
'timestamp': ('publishedDate', {int_or_none}),
|
||||||
|
'thumbnail': ('visual', 'src'),
|
||||||
|
}))
|
||||||
|
|
||||||
|
next_cursor = traverse_obj(content_response, (('pagination', None), 'next'), get_all=False)
|
||||||
|
if not next_cursor:
|
||||||
|
break
|
||||||
|
|
||||||
|
content_response = self._call_api(content_id, next_cursor, page_num)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
|
metadata = self._download_json(
|
||||||
|
'https://www.radiofrance.fr/api/v2.1/path', display_id,
|
||||||
|
query={'value': urllib.parse.urlparse(url).path})['content']
|
||||||
|
|
||||||
|
content_id = metadata['id']
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
self._generate_playlist_entries(content_id, metadata[self._METADATA_KEY]), content_id,
|
||||||
|
display_id=display_id, **{**traverse_obj(metadata, {
|
||||||
|
'title': 'title',
|
||||||
|
'description': 'standFirst',
|
||||||
|
'thumbnail': ('visual', 'src'),
|
||||||
|
}), **traverse_obj(metadata, {
|
||||||
|
'title': 'name',
|
||||||
|
'description': 'role',
|
||||||
|
})})
|
||||||
|
|
||||||
|
|
||||||
|
class RadioFrancePodcastIE(RadioFrancePlaylistBase):
|
||||||
|
_VALID_URL = rf'''(?x)
|
||||||
|
{RadioFranceBaseIE._VALID_URL_BASE}
|
||||||
|
/(?:{RadioFranceBaseIE._STATIONS_RE})
|
||||||
|
/podcasts/(?P<id>[\w-]+)/?(?:[?#]|$)
|
||||||
|
'''
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.radiofrance.fr/franceinfo/podcasts/le-billet-vert',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'eaf6ef81-a980-4f1c-a7d1-8a75ecd54b17',
|
||||||
|
'display_id': 'le-billet-vert',
|
||||||
|
'title': 'Le billet sciences',
|
||||||
|
'description': 'md5:eb1007b34b0c0a680daaa71525bbd4c1',
|
||||||
|
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 11,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.radiofrance.fr/franceinter/podcasts/jean-marie-le-pen-l-obsession-nationale',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '566fd524-3074-4fbc-ac69-8696f2152a54',
|
||||||
|
'display_id': 'jean-marie-le-pen-l-obsession-nationale',
|
||||||
|
'title': 'Jean-Marie Le Pen, l\'obsession nationale',
|
||||||
|
'description': 'md5:a07c0cfb894f6d07a62d0ad12c4b7d73',
|
||||||
|
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||||
|
},
|
||||||
|
'playlist_count': 7,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.radiofrance.fr/franceculture/podcasts/serie-thomas-grjebine',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '63c1ddc9-9f15-457a-98b2-411bac63f48d',
|
||||||
|
'display_id': 'serie-thomas-grjebine',
|
||||||
|
'title': 'Thomas Grjebine',
|
||||||
|
},
|
||||||
|
'playlist_count': 1,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.radiofrance.fr/fip/podcasts/certains-l-aiment-fip',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '143dff38-e956-4a5d-8576-1c0b7242b99e',
|
||||||
|
'display_id': 'certains-l-aiment-fip',
|
||||||
|
'title': 'Certains l’aiment Fip',
|
||||||
|
'description': 'md5:ff974672ba00d4fd5be80fb001c5b27e',
|
||||||
|
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 321,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.radiofrance.fr/franceinter/podcasts/le-7-9',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.radiofrance.fr/mouv/podcasts/dirty-mix',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
_METADATA_KEY = 'expressions'
|
||||||
|
|
||||||
|
def _call_api(self, podcast_id, cursor, page_num):
|
||||||
|
return self._download_json(
|
||||||
|
f'https://www.radiofrance.fr/api/v2.1/concepts/{podcast_id}/expressions', podcast_id,
|
||||||
|
note=f'Downloading page {page_num}', query={'pageCursor': cursor})
|
||||||
|
|
||||||
|
|
||||||
|
class RadioFranceProfileIE(RadioFrancePlaylistBase):
|
||||||
|
_VALID_URL = rf'{RadioFranceBaseIE._VALID_URL_BASE}/personnes/(?P<id>[\w-]+)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.radiofrance.fr/personnes/thomas-pesquet?p=3',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '86c62790-e481-11e2-9f7b-782bcb6744eb',
|
||||||
|
'display_id': 'thomas-pesquet',
|
||||||
|
'title': 'Thomas Pesquet',
|
||||||
|
'description': 'Astronaute à l\'agence spatiale européenne',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 212,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.radiofrance.fr/personnes/eugenie-bastie',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '9593050b-0183-4972-a0b5-d8f699079e02',
|
||||||
|
'display_id': 'eugenie-bastie',
|
||||||
|
'title': 'Eugénie Bastié',
|
||||||
|
'description': 'Journaliste et essayiste',
|
||||||
|
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 39,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.radiofrance.fr/personnes/lea-salame',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
_METADATA_KEY = 'documents'
|
||||||
|
|
||||||
|
def _call_api(self, profile_id, cursor, page_num):
|
||||||
|
resp = self._download_json(
|
||||||
|
f'https://www.radiofrance.fr/api/v2.1/taxonomy/{profile_id}/documents', profile_id,
|
||||||
|
note=f'Downloading page {page_num}', query={
|
||||||
|
'relation': 'personality',
|
||||||
|
'cursor': cursor,
|
||||||
|
})
|
||||||
|
|
||||||
|
resp['next'] = traverse_obj(resp, ('pagination', 'next'))
|
||||||
|
return resp
|
||||||
|
|
||||||
|
|
||||||
|
class RadioFranceProgramScheduleIE(RadioFranceBaseIE):
|
||||||
|
_VALID_URL = rf'''(?x)
|
||||||
|
{RadioFranceBaseIE._VALID_URL_BASE}
|
||||||
|
/(?P<station>{RadioFranceBaseIE._STATIONS_RE})
|
||||||
|
/grille-programmes(?:\?date=(?P<date>[\d-]+))?
|
||||||
|
'''
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.radiofrance.fr/franceinter/grille-programmes?date=17-02-2023',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'franceinter-program-20230217',
|
||||||
|
'upload_date': '20230217',
|
||||||
|
},
|
||||||
|
'playlist_count': 25,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.radiofrance.fr/franceculture/grille-programmes?date=01-02-2023',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'franceculture-program-20230201',
|
||||||
|
'upload_date': '20230201',
|
||||||
|
},
|
||||||
|
'playlist_count': 25,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.radiofrance.fr/mouv/grille-programmes?date=19-03-2023',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'mouv-program-20230319',
|
||||||
|
'upload_date': '20230319',
|
||||||
|
},
|
||||||
|
'playlist_count': 3,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.radiofrance.fr/francemusique/grille-programmes?date=18-03-2023',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'francemusique-program-20230318',
|
||||||
|
'upload_date': '20230318',
|
||||||
|
},
|
||||||
|
'playlist_count': 15,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.radiofrance.fr/franceculture/grille-programmes',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _generate_playlist_entries(self, webpage_url, api_response):
|
||||||
|
for entry in traverse_obj(api_response, ('steps', lambda _, v: v['expression']['path'])):
|
||||||
|
yield self.url_result(
|
||||||
|
urljoin(webpage_url, f'/{entry["expression"]["path"]}'), ie=FranceCultureIE,
|
||||||
|
url_transparent=True, **traverse_obj(entry, {
|
||||||
|
'title': ('expression', 'title'),
|
||||||
|
'thumbnail': ('expression', 'visual', 'src'),
|
||||||
|
'timestamp': ('startTime', {int_or_none}),
|
||||||
|
'series_id': ('concept', 'id'),
|
||||||
|
'series': ('concept', 'title'),
|
||||||
|
}))
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
station, date = self._match_valid_url(url).group('station', 'date')
|
||||||
|
webpage = self._download_webpage(url, station)
|
||||||
|
grid_data = self._extract_data_from_webpage(webpage, station, 'grid')
|
||||||
|
upload_date = strftime_or_none(grid_data.get('date'), '%Y%m%d')
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
self._generate_playlist_entries(url, grid_data),
|
||||||
|
join_nonempty(station, 'program', upload_date), upload_date=upload_date)
|
||||||
|
|
|
@ -1,10 +1,11 @@
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import parse_qs, remove_start, traverse_obj, ExtractorError
|
||||||
|
|
||||||
|
|
||||||
class RbgTumIE(InfoExtractor):
|
class RbgTumIE(InfoExtractor):
|
||||||
_VALID_URL = r'https://live\.rbg\.tum\.de/w/(?P<id>.+)'
|
_VALID_URL = r'https://(?:live\.rbg\.tum\.de|tum\.live)/w/(?P<id>[^?#]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# Combined view
|
# Combined view
|
||||||
'url': 'https://live.rbg.tum.de/w/cpp/22128',
|
'url': 'https://live.rbg.tum.de/w/cpp/22128',
|
||||||
|
@ -35,16 +36,18 @@ class RbgTumIE(InfoExtractor):
|
||||||
'title': 'Fachschaftsvollversammlung',
|
'title': 'Fachschaftsvollversammlung',
|
||||||
'series': 'Fachschaftsvollversammlung Informatik',
|
'series': 'Fachschaftsvollversammlung Informatik',
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://tum.live/w/linalginfo/27102',
|
||||||
|
'only_matching': True,
|
||||||
}, ]
|
}, ]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
m3u8 = self._html_search_regex(r'(https://.+?\.m3u8)', webpage, 'm3u8')
|
m3u8 = self._html_search_regex(r'"(https://[^"]+\.m3u8[^"]*)', webpage, 'm3u8')
|
||||||
lecture_title = self._html_search_regex(r'(?si)<h1.*?>(.*)</h1>', webpage, 'title')
|
lecture_title = self._html_search_regex(r'<h1[^>]*>([^<]+)</h1>', webpage, 'title', fatal=False)
|
||||||
lecture_series_title = self._html_search_regex(
|
lecture_series_title = remove_start(self._html_extract_title(webpage), 'TUM-Live | ')
|
||||||
r'(?s)<title\b[^>]*>\s*(?:TUM-Live\s\|\s?)?([^:]+):?.*?</title>', webpage, 'series')
|
|
||||||
|
|
||||||
formats = self._extract_m3u8_formats(m3u8, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
|
formats = self._extract_m3u8_formats(m3u8, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
|
||||||
|
|
||||||
|
@ -57,9 +60,9 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
|
|
||||||
class RbgTumCourseIE(InfoExtractor):
|
class RbgTumCourseIE(InfoExtractor):
|
||||||
_VALID_URL = r'https://live\.rbg\.tum\.de/course/(?P<id>.+)'
|
_VALID_URL = r'https://(?P<hostname>(?:live\.rbg\.tum\.de|tum\.live))/old/course/(?P<id>(?P<year>\d+)/(?P<term>\w+)/(?P<slug>[^/?#]+))'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://live.rbg.tum.de/course/2022/S/fpv',
|
'url': 'https://live.rbg.tum.de/old/course/2022/S/fpv',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'title': 'Funktionale Programmierung und Verifikation (IN0003)',
|
'title': 'Funktionale Programmierung und Verifikation (IN0003)',
|
||||||
'id': '2022/S/fpv',
|
'id': '2022/S/fpv',
|
||||||
|
@ -69,7 +72,7 @@ class RbgTumCourseIE(InfoExtractor):
|
||||||
},
|
},
|
||||||
'playlist_count': 13,
|
'playlist_count': 13,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://live.rbg.tum.de/course/2022/W/set',
|
'url': 'https://live.rbg.tum.de/old/course/2022/W/set',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'title': 'SET FSMPIC',
|
'title': 'SET FSMPIC',
|
||||||
'id': '2022/W/set',
|
'id': '2022/W/set',
|
||||||
|
@ -78,16 +81,62 @@ class RbgTumCourseIE(InfoExtractor):
|
||||||
'noplaylist': False,
|
'noplaylist': False,
|
||||||
},
|
},
|
||||||
'playlist_count': 6,
|
'playlist_count': 6,
|
||||||
|
}, {
|
||||||
|
'url': 'https://tum.live/old/course/2023/S/linalginfo',
|
||||||
|
'only_matching': True,
|
||||||
}, ]
|
}, ]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
course_id = self._match_id(url)
|
course_id, hostname, year, term, slug = self._match_valid_url(url).group('id', 'hostname', 'year', 'term', 'slug')
|
||||||
webpage = self._download_webpage(url, course_id)
|
meta = self._download_json(
|
||||||
|
f'https://{hostname}/api/courses/{slug}/', course_id, fatal=False,
|
||||||
|
query={'year': year, 'term': term}) or {}
|
||||||
|
lecture_series_title = meta.get('Name')
|
||||||
|
lectures = [self.url_result(f'https://{hostname}/w/{slug}/{stream_id}', RbgTumIE)
|
||||||
|
for stream_id in traverse_obj(meta, ('Streams', ..., 'ID'))]
|
||||||
|
|
||||||
lecture_series_title = self._html_search_regex(r'(?si)<h1.*?>(.*)</h1>', webpage, 'title')
|
if not lectures:
|
||||||
|
webpage = self._download_webpage(url, course_id)
|
||||||
|
lecture_series_title = remove_start(self._html_extract_title(webpage), 'TUM-Live | ')
|
||||||
|
lectures = [self.url_result(f'https://{hostname}{lecture_path}', RbgTumIE)
|
||||||
|
for lecture_path in re.findall(r'href="(/w/[^/"]+/[^/"]+)"', webpage)]
|
||||||
|
|
||||||
lecture_urls = []
|
return self.playlist_result(lectures, course_id, lecture_series_title)
|
||||||
for lecture_url in re.findall(r'(?i)href="/w/(.+)(?<!/cam)(?<!/pres)(?<!/chat)"', webpage):
|
|
||||||
lecture_urls.append(self.url_result('https://live.rbg.tum.de/w/' + lecture_url, ie=RbgTumIE.ie_key()))
|
|
||||||
|
|
||||||
return self.playlist_result(lecture_urls, course_id, lecture_series_title)
|
|
||||||
|
class RbgTumNewCourseIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https://(?P<hostname>(?:live\.rbg\.tum\.de|tum\.live))/\?'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://live.rbg.tum.de/?year=2022&term=S&slug=fpv&view=3',
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'Funktionale Programmierung und Verifikation (IN0003)',
|
||||||
|
'id': '2022/S/fpv',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'noplaylist': False,
|
||||||
|
},
|
||||||
|
'playlist_count': 13,
|
||||||
|
}, {
|
||||||
|
'url': 'https://live.rbg.tum.de/?year=2022&term=W&slug=set&view=3',
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'SET FSMPIC',
|
||||||
|
'id': '2022/W/set',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'noplaylist': False,
|
||||||
|
},
|
||||||
|
'playlist_count': 6,
|
||||||
|
}, {
|
||||||
|
'url': 'https://tum.live/?year=2023&term=S&slug=linalginfo&view=3',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
query = parse_qs(url)
|
||||||
|
errors = [key for key in ('year', 'term', 'slug') if not query.get(key)]
|
||||||
|
if errors:
|
||||||
|
raise ExtractorError(f'Input URL is missing query parameters: {", ".join(errors)}')
|
||||||
|
year, term, slug = query['year'][0], query['term'][0], query['slug'][0]
|
||||||
|
hostname = self._match_valid_url(url).group('hostname')
|
||||||
|
|
||||||
|
return self.url_result(f'https://{hostname}/old/course/{year}/{term}/{slug}', RbgTumCourseIE)
|
||||||
|
|
|
@ -319,16 +319,20 @@ def add_thumbnail(src):
|
||||||
'format_id': 'fallback',
|
'format_id': 'fallback',
|
||||||
'format_note': 'DASH video, mp4_dash',
|
'format_note': 'DASH video, mp4_dash',
|
||||||
}]
|
}]
|
||||||
formats.extend(self._extract_m3u8_formats(
|
hls_fmts, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||||
hls_playlist_url, display_id, 'mp4', m3u8_id='hls', fatal=False))
|
hls_playlist_url, display_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||||
formats.extend(self._extract_mpd_formats(
|
formats.extend(hls_fmts)
|
||||||
dash_playlist_url, display_id, mpd_id='dash', fatal=False))
|
dash_fmts, dash_subs = self._extract_mpd_formats_and_subtitles(
|
||||||
|
dash_playlist_url, display_id, mpd_id='dash', fatal=False)
|
||||||
|
formats.extend(dash_fmts)
|
||||||
|
self._merge_subtitles(dash_subs, target=subtitles)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
**info,
|
**info,
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
'duration': int_or_none(reddit_video.get('duration')),
|
'duration': int_or_none(reddit_video.get('duration')),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
|
@ -25,7 +26,7 @@ class RTVSLOIE(InfoExtractor):
|
||||||
'url': 'https://www.rtvslo.si/rtv365/arhiv/174842550?s=tv',
|
'url': 'https://www.rtvslo.si/rtv365/arhiv/174842550?s=tv',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '174842550',
|
'id': '174842550',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'release_timestamp': 1643140032,
|
'release_timestamp': 1643140032,
|
||||||
'upload_date': '20220125',
|
'upload_date': '20220125',
|
||||||
'series': 'Dnevnik',
|
'series': 'Dnevnik',
|
||||||
|
@ -69,7 +70,21 @@ class RTVSLOIE(InfoExtractor):
|
||||||
'tbr': 128000,
|
'tbr': 128000,
|
||||||
'release_date': '20220201',
|
'release_date': '20220201',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://365.rtvslo.si/arhiv/razred-zase/148350750',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '148350750',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Prvi šolski dan, mozaična oddaja za mlade',
|
||||||
|
'series': 'Razred zase',
|
||||||
|
'series_id': '148185730',
|
||||||
|
'duration': 1481,
|
||||||
|
'upload_date': '20121019',
|
||||||
|
'timestamp': 1350672122,
|
||||||
|
'release_date': '20121019',
|
||||||
|
'release_timestamp': 1350672122,
|
||||||
|
'thumbnail': 'https://img.rtvcdn.si/_up/ava/ava_misc/show_logos/148185730/razred_zase_2014_logo_4d_wide2.jpg',
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://4d.rtvslo.si/arhiv/dnevnik/174842550',
|
'url': 'https://4d.rtvslo.si/arhiv/dnevnik/174842550',
|
||||||
'only_matching': True
|
'only_matching': True
|
||||||
|
@ -98,13 +113,14 @@ def _real_extract(self, url):
|
||||||
media = self._download_json(self._API_BASE.format('getMedia', v_id), v_id, query={'jwt': jwt})['response']
|
media = self._download_json(self._API_BASE.format('getMedia', v_id), v_id, query={'jwt': jwt})['response']
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
|
skip_protocols = ['smil', 'f4m', 'dash']
|
||||||
adaptive_url = traverse_obj(media, ('addaptiveMedia', 'hls_sec'), expected_type=url_or_none)
|
adaptive_url = traverse_obj(media, ('addaptiveMedia', 'hls_sec'), expected_type=url_or_none)
|
||||||
if adaptive_url:
|
if adaptive_url:
|
||||||
formats = self._extract_wowza_formats(adaptive_url, v_id, skip_protocols=['smil'])
|
formats = self._extract_wowza_formats(adaptive_url, v_id, skip_protocols=skip_protocols)
|
||||||
|
|
||||||
adaptive_url = traverse_obj(media, ('addaptiveMedia_sl', 'hls_sec'), expected_type=url_or_none)
|
adaptive_url = traverse_obj(media, ('addaptiveMedia_sl', 'hls_sec'), expected_type=url_or_none)
|
||||||
if adaptive_url:
|
if adaptive_url:
|
||||||
for f in self._extract_wowza_formats(adaptive_url, v_id, skip_protocols=['smil']):
|
for f in self._extract_wowza_formats(adaptive_url, v_id, skip_protocols=skip_protocols):
|
||||||
formats.append({
|
formats.append({
|
||||||
**f,
|
**f,
|
||||||
'format_id': 'sign-' + f['format_id'],
|
'format_id': 'sign-' + f['format_id'],
|
||||||
|
@ -114,19 +130,19 @@ def _real_extract(self, url):
|
||||||
else f.get('language'))
|
else f.get('language'))
|
||||||
})
|
})
|
||||||
|
|
||||||
formats.extend(
|
for mediafile in traverse_obj(media, ('mediaFiles', lambda _, v: url_or_none(v['streams']['https']))):
|
||||||
{
|
formats.append(traverse_obj(mediafile, {
|
||||||
'url': f['streams'][strm],
|
'url': ('streams', 'https'),
|
||||||
'ext': traverse_obj(f, 'mediaType', expected_type=str.lower),
|
'ext': ('mediaType', {str.lower}),
|
||||||
'width': f.get('width'),
|
'width': ('width', {int_or_none}),
|
||||||
'height': f.get('height'),
|
'height': ('height', {int_or_none}),
|
||||||
'tbr': f.get('bitrate'),
|
'tbr': ('bitrate', {int_or_none}),
|
||||||
'filesize': f.get('filesize'),
|
'filesize': ('filesize', {int_or_none}),
|
||||||
}
|
}))
|
||||||
for strm in ('http', 'https')
|
|
||||||
for f in media.get('mediaFiles') or []
|
for mediafile in traverse_obj(media, ('mediaFiles', lambda _, v: url_or_none(v['streams']['hls_sec']))):
|
||||||
if traverse_obj(f, ('streams', strm))
|
formats.extend(self._extract_wowza_formats(
|
||||||
)
|
mediafile['streams']['hls_sec'], v_id, skip_protocols=skip_protocols))
|
||||||
|
|
||||||
if any('intermission.mp4' in x['url'] for x in formats):
|
if any('intermission.mp4' in x['url'] for x in formats):
|
||||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
|
self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from ..utils import parse_duration
|
from ..utils import parse_duration, unescapeHTML
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
@ -16,7 +16,8 @@ class Rule34VideoIE(InfoExtractor):
|
||||||
'title': 'Shot It-(mmd hmv)',
|
'title': 'Shot It-(mmd hmv)',
|
||||||
'thumbnail': 'https://rule34video.com/contents/videos_screenshots/3065000/3065157/preview.jpg',
|
'thumbnail': 'https://rule34video.com/contents/videos_screenshots/3065000/3065157/preview.jpg',
|
||||||
'duration': 347.0,
|
'duration': 347.0,
|
||||||
'age_limit': 18
|
'age_limit': 18,
|
||||||
|
'tags': 'count:14'
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -28,7 +29,8 @@ class Rule34VideoIE(InfoExtractor):
|
||||||
'title': 'Lara in Trouble Ep. 7 [WildeerStudio]',
|
'title': 'Lara in Trouble Ep. 7 [WildeerStudio]',
|
||||||
'thumbnail': 'https://rule34video.com/contents/videos_screenshots/3065000/3065296/preview.jpg',
|
'thumbnail': 'https://rule34video.com/contents/videos_screenshots/3065000/3065296/preview.jpg',
|
||||||
'duration': 938.0,
|
'duration': 938.0,
|
||||||
'age_limit': 18
|
'age_limit': 18,
|
||||||
|
'tags': 'count:50'
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
@ -57,5 +59,7 @@ def _real_extract(self, url):
|
||||||
'title': title,
|
'title': title,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'duration': parse_duration(duration),
|
'duration': parse_duration(duration),
|
||||||
'age_limit': 18
|
'age_limit': 18,
|
||||||
|
'tags': list(map(unescapeHTML, re.findall(
|
||||||
|
r'<a class="tag_item"[^>]+\bhref="https://rule34video\.com/tags/\d+/"[^>]*>(?P<tag>[^>]*)</a>', webpage))),
|
||||||
}
|
}
|
||||||
|
|
|
@ -33,7 +33,7 @@ class RumbleEmbedIE(InfoExtractor):
|
||||||
'upload_date': '20191020',
|
'upload_date': '20191020',
|
||||||
'channel_url': 'https://rumble.com/c/WMAR',
|
'channel_url': 'https://rumble.com/c/WMAR',
|
||||||
'channel': 'WMAR',
|
'channel': 'WMAR',
|
||||||
'thumbnail': 'https://sp.rmbl.ws/s8/1/5/M/z/1/5Mz1a.OvCc-small-WMAR-2-News-Latest-Headline.jpg',
|
'thumbnail': 'https://sp.rmbl.ws/s8/1/5/M/z/1/5Mz1a.qR4e-small-WMAR-2-News-Latest-Headline.jpg',
|
||||||
'duration': 234,
|
'duration': 234,
|
||||||
'uploader': 'WMAR',
|
'uploader': 'WMAR',
|
||||||
'live_status': 'not_live',
|
'live_status': 'not_live',
|
||||||
|
@ -84,7 +84,7 @@ class RumbleEmbedIE(InfoExtractor):
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'v1essrt',
|
'id': 'v1essrt',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'startswith:lofi hip hop radio - beats to relax/study',
|
'title': 'startswith:lofi hip hop radio 📚 - beats to relax/study to',
|
||||||
'timestamp': 1661519399,
|
'timestamp': 1661519399,
|
||||||
'upload_date': '20220826',
|
'upload_date': '20220826',
|
||||||
'channel_url': 'https://rumble.com/c/LofiGirl',
|
'channel_url': 'https://rumble.com/c/LofiGirl',
|
||||||
|
@ -99,7 +99,7 @@ class RumbleEmbedIE(InfoExtractor):
|
||||||
'url': 'https://rumble.com/embed/v1amumr',
|
'url': 'https://rumble.com/embed/v1amumr',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'v1amumr',
|
'id': 'v1amumr',
|
||||||
'ext': 'webm',
|
'ext': 'mp4',
|
||||||
'fps': 60,
|
'fps': 60,
|
||||||
'title': 'Turning Point USA 2022 Student Action Summit DAY 1 - Rumble Exclusive Live',
|
'title': 'Turning Point USA 2022 Student Action Summit DAY 1 - Rumble Exclusive Live',
|
||||||
'timestamp': 1658518457,
|
'timestamp': 1658518457,
|
||||||
|
@ -129,7 +129,7 @@ class RumbleEmbedIE(InfoExtractor):
|
||||||
'duration': 92,
|
'duration': 92,
|
||||||
'title': '911 Audio From The Man Who Wanted To Kill Supreme Court Justice Kavanaugh',
|
'title': '911 Audio From The Man Who Wanted To Kill Supreme Court Justice Kavanaugh',
|
||||||
'channel_url': 'https://rumble.com/c/RichSementa',
|
'channel_url': 'https://rumble.com/c/RichSementa',
|
||||||
'thumbnail': 'https://sp.rmbl.ws/s8/1/P/j/f/A/PjfAe.OvCc-small-911-Audio-From-The-Man-Who-.jpg',
|
'thumbnail': 'https://sp.rmbl.ws/s8/1/P/j/f/A/PjfAe.qR4e-small-911-Audio-From-The-Man-Who-.jpg',
|
||||||
'timestamp': 1654892716,
|
'timestamp': 1654892716,
|
||||||
'uploader': 'Mr Producer Media',
|
'uploader': 'Mr Producer Media',
|
||||||
'upload_date': '20220610',
|
'upload_date': '20220610',
|
||||||
|
@ -144,7 +144,7 @@ def _extract_embed_urls(cls, url, webpage):
|
||||||
if embeds:
|
if embeds:
|
||||||
return embeds
|
return embeds
|
||||||
return [f'https://rumble.com/embed/{mobj.group("id")}' for mobj in re.finditer(
|
return [f'https://rumble.com/embed/{mobj.group("id")}' for mobj in re.finditer(
|
||||||
r'<script>[^<]*\bRumble\(\s*"play"\s*,\s*{\s*[\'"]?video[\'"]?\s*:\s*[\'"](?P<id>[0-9a-z]+)[\'"]', webpage)]
|
r'<script>[^<]*\bRumble\(\s*"play"\s*,\s*{[^}]*[\'"]?video[\'"]?\s*:\s*[\'"](?P<id>[0-9a-z]+)[\'"]', webpage)]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
@ -236,7 +236,9 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
class RumbleIE(InfoExtractor):
|
class RumbleIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?rumble\.com/(?P<id>v(?!ideos)[\w.-]+)[^/]*$'
|
_VALID_URL = r'https?://(?:www\.)?rumble\.com/(?P<id>v(?!ideos)[\w.-]+)[^/]*$'
|
||||||
_EMBED_REGEX = [r'<a class=video-item--a href=(?P<url>/v[\w.-]+\.html)>']
|
_EMBED_REGEX = [
|
||||||
|
r'<a class=video-item--a href=(?P<url>/v[\w.-]+\.html)>',
|
||||||
|
r'<a[^>]+class="videostream__link link"[^>]+href=(?P<url>/v[\w.-]+\.html)[^>]*>']
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'add_ie': ['RumbleEmbed'],
|
'add_ie': ['RumbleEmbed'],
|
||||||
'url': 'https://rumble.com/vdmum1-moose-the-dog-helps-girls-dig-a-snow-fort.html',
|
'url': 'https://rumble.com/vdmum1-moose-the-dog-helps-girls-dig-a-snow-fort.html',
|
||||||
|
@ -254,6 +256,7 @@ class RumbleIE(InfoExtractor):
|
||||||
'thumbnail': r're:https://.+\.jpg',
|
'thumbnail': r're:https://.+\.jpg',
|
||||||
'duration': 103,
|
'duration': 103,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
|
'dislike_count': int,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'live_status': 'not_live',
|
'live_status': 'not_live',
|
||||||
}
|
}
|
||||||
|
@ -278,6 +281,9 @@ class RumbleIE(InfoExtractor):
|
||||||
'channel_url': 'https://rumble.com/c/Redacted',
|
'channel_url': 'https://rumble.com/c/Redacted',
|
||||||
'live_status': 'not_live',
|
'live_status': 'not_live',
|
||||||
'thumbnail': 'https://sp.rmbl.ws/s8/1/d/x/2/O/dx2Oi.qR4e-small-The-U.S.-CANNOT-hide-this-i.jpg',
|
'thumbnail': 'https://sp.rmbl.ws/s8/1/d/x/2/O/dx2Oi.qR4e-small-The-U.S.-CANNOT-hide-this-i.jpg',
|
||||||
|
'like_count': int,
|
||||||
|
'dislike_count': int,
|
||||||
|
'view_count': int,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://rumble.com/v2e7fju-the-covid-twitter-files-drop-protecting-fauci-while-censoring-the-truth-wma.html',
|
'url': 'https://rumble.com/v2e7fju-the-covid-twitter-files-drop-protecting-fauci-while-censoring-the-truth-wma.html',
|
||||||
|
@ -296,12 +302,15 @@ class RumbleIE(InfoExtractor):
|
||||||
'channel_url': 'https://rumble.com/c/KimIversen',
|
'channel_url': 'https://rumble.com/c/KimIversen',
|
||||||
'channel': 'Kim Iversen',
|
'channel': 'Kim Iversen',
|
||||||
'thumbnail': 'https://sp.rmbl.ws/s8/1/6/b/w/O/6bwOi.qR4e-small-The-Covid-Twitter-Files-Dro.jpg',
|
'thumbnail': 'https://sp.rmbl.ws/s8/1/6/b/w/O/6bwOi.qR4e-small-The-Covid-Twitter-Files-Dro.jpg',
|
||||||
|
'like_count': int,
|
||||||
|
'dislike_count': int,
|
||||||
|
'view_count': int,
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_WEBPAGE_TESTS = [{
|
_WEBPAGE_TESTS = [{
|
||||||
'url': 'https://rumble.com/videos?page=2',
|
'url': 'https://rumble.com/videos?page=2',
|
||||||
'playlist_count': 25,
|
'playlist_mincount': 24,
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'videos?page=2',
|
'id': 'videos?page=2',
|
||||||
'title': 'All videos',
|
'title': 'All videos',
|
||||||
|
@ -309,17 +318,16 @@ class RumbleIE(InfoExtractor):
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://rumble.com/live-videos',
|
'url': 'https://rumble.com/browse/live',
|
||||||
'playlist_mincount': 19,
|
'playlist_mincount': 25,
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'live-videos',
|
'id': 'live',
|
||||||
'title': 'Live Videos',
|
'title': 'Browse',
|
||||||
'description': 'Live videos on Rumble.com',
|
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://rumble.com/search/video?q=rumble&sort=views',
|
'url': 'https://rumble.com/search/video?q=rumble&sort=views',
|
||||||
'playlist_count': 24,
|
'playlist_mincount': 24,
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'video?q=rumble&sort=views',
|
'id': 'video?q=rumble&sort=views',
|
||||||
'title': 'Search results for: rumble',
|
'title': 'Search results for: rumble',
|
||||||
|
@ -334,19 +342,20 @@ def _real_extract(self, url):
|
||||||
if not url_info:
|
if not url_info:
|
||||||
raise UnsupportedError(url)
|
raise UnsupportedError(url)
|
||||||
|
|
||||||
release_ts_str = self._search_regex(
|
return {
|
||||||
r'(?:Livestream begins|Streamed on):\s+<time datetime="([^"]+)',
|
'_type': 'url_transparent',
|
||||||
webpage, 'release date', fatal=False, default=None)
|
'ie_key': url_info['ie_key'],
|
||||||
view_count_str = self._search_regex(r'<span class="media-heading-info">([\d,]+) Views',
|
'url': url_info['url'],
|
||||||
webpage, 'view count', fatal=False, default=None)
|
'release_timestamp': parse_iso8601(self._search_regex(
|
||||||
|
r'(?:Livestream begins|Streamed on):\s+<time datetime="([^"]+)', webpage, 'release date', default=None)),
|
||||||
return self.url_result(
|
'view_count': int_or_none(self._search_regex(
|
||||||
url_info['url'], ie_key=url_info['ie_key'], url_transparent=True,
|
r'"userInteractionCount"\s*:\s*(\d+)', webpage, 'view count', default=None)),
|
||||||
view_count=parse_count(view_count_str),
|
'like_count': parse_count(self._search_regex(
|
||||||
release_timestamp=parse_iso8601(release_ts_str),
|
r'<span data-js="rumbles_up_votes">\s*([\d,.KM]+)', webpage, 'like count', default=None)),
|
||||||
like_count=parse_count(get_element_by_class('rumbles-count', webpage)),
|
'dislike_count': parse_count(self._search_regex(
|
||||||
description=clean_html(get_element_by_class('media-description', webpage)),
|
r'<span data-js="rumbles_down_votes">\s*([\d,.KM]+)', webpage, 'dislike count', default=None)),
|
||||||
)
|
'description': clean_html(get_element_by_class('media-description', webpage))
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class RumbleChannelIE(InfoExtractor):
|
class RumbleChannelIE(InfoExtractor):
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import traverse_obj
|
from ..utils import traverse_obj, url_or_none
|
||||||
|
|
||||||
|
|
||||||
class S4CIE(InfoExtractor):
|
class S4CIE(InfoExtractor):
|
||||||
|
@ -11,7 +11,8 @@ class S4CIE(InfoExtractor):
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Y Swn',
|
'title': 'Y Swn',
|
||||||
'description': 'md5:f7681a30e4955b250b3224aa9fe70cf0',
|
'description': 'md5:f7681a30e4955b250b3224aa9fe70cf0',
|
||||||
'duration': 5340
|
'duration': 5340,
|
||||||
|
'thumbnail': 'https://www.s4c.cymru/amg/1920x1080/Y_Swn_2023S4C_099_ii.jpg'
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.s4c.cymru/clic/programme/856636948',
|
'url': 'https://www.s4c.cymru/clic/programme/856636948',
|
||||||
|
@ -21,6 +22,7 @@ class S4CIE(InfoExtractor):
|
||||||
'title': 'Am Dro',
|
'title': 'Am Dro',
|
||||||
'duration': 2880,
|
'duration': 2880,
|
||||||
'description': 'md5:100d8686fc9a632a0cb2db52a3433ffe',
|
'description': 'md5:100d8686fc9a632a0cb2db52a3433ffe',
|
||||||
|
'thumbnail': 'https://www.s4c.cymru/amg/1920x1080/Am_Dro_2022-23S4C_P6_4005.jpg'
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
@ -30,7 +32,7 @@ def _real_extract(self, url):
|
||||||
f'https://www.s4c.cymru/df/full_prog_details?lang=e&programme_id={video_id}',
|
f'https://www.s4c.cymru/df/full_prog_details?lang=e&programme_id={video_id}',
|
||||||
video_id, fatal=False)
|
video_id, fatal=False)
|
||||||
|
|
||||||
filename = self._download_json(
|
player_config = self._download_json(
|
||||||
'https://player-api.s4c-cdn.co.uk/player-configuration/prod', video_id, query={
|
'https://player-api.s4c-cdn.co.uk/player-configuration/prod', video_id, query={
|
||||||
'programme_id': video_id,
|
'programme_id': video_id,
|
||||||
'signed': '0',
|
'signed': '0',
|
||||||
|
@ -38,7 +40,13 @@ def _real_extract(self, url):
|
||||||
'mode': 'od',
|
'mode': 'od',
|
||||||
'appId': 'clic',
|
'appId': 'clic',
|
||||||
'streamName': '',
|
'streamName': '',
|
||||||
}, note='Downloading player config JSON')['filename']
|
}, note='Downloading player config JSON')
|
||||||
|
subtitles = {}
|
||||||
|
for sub in traverse_obj(player_config, ('subtitles', lambda _, v: url_or_none(v['0']))):
|
||||||
|
subtitles.setdefault(sub.get('3', 'en'), []).append({
|
||||||
|
'url': sub['0'],
|
||||||
|
'name': sub.get('1'),
|
||||||
|
})
|
||||||
m3u8_url = self._download_json(
|
m3u8_url = self._download_json(
|
||||||
'https://player-api.s4c-cdn.co.uk/streaming-urls/prod', video_id, query={
|
'https://player-api.s4c-cdn.co.uk/streaming-urls/prod', video_id, query={
|
||||||
'mode': 'od',
|
'mode': 'od',
|
||||||
|
@ -46,17 +54,52 @@ def _real_extract(self, url):
|
||||||
'region': 'WW',
|
'region': 'WW',
|
||||||
'extra': 'false',
|
'extra': 'false',
|
||||||
'thirdParty': 'false',
|
'thirdParty': 'false',
|
||||||
'filename': filename,
|
'filename': player_config['filename'],
|
||||||
}, note='Downloading streaming urls JSON')['hls']
|
}, note='Downloading streaming urls JSON')['hls']
|
||||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4', m3u8_id='hls')
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'formats': formats,
|
'formats': self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls'),
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
|
'thumbnail': url_or_none(player_config.get('poster')),
|
||||||
**traverse_obj(details, ('full_prog_details', 0, {
|
**traverse_obj(details, ('full_prog_details', 0, {
|
||||||
'title': (('programme_title', 'series_title'), {str}),
|
'title': (('programme_title', 'series_title'), {str}),
|
||||||
'description': ('full_billing', {str.strip}),
|
'description': ('full_billing', {str.strip}),
|
||||||
'duration': ('duration', {lambda x: int(x) * 60}),
|
'duration': ('duration', {lambda x: int(x) * 60}),
|
||||||
}), get_all=False),
|
}), get_all=False),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class S4CSeriesIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?s4c\.cymru/clic/series/(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.s4c.cymru/clic/series/864982911',
|
||||||
|
'playlist_mincount': 6,
|
||||||
|
'info_dict': {
|
||||||
|
'id': '864982911',
|
||||||
|
'title': 'Iaith ar Daith',
|
||||||
|
'description': 'md5:e878ebf660dce89bd2ef521d7ce06397'
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.s4c.cymru/clic/series/866852587',
|
||||||
|
'playlist_mincount': 8,
|
||||||
|
'info_dict': {
|
||||||
|
'id': '866852587',
|
||||||
|
'title': 'FFIT Cymru',
|
||||||
|
'description': 'md5:abcb3c129cb68dbb6cd304fd33b07e96'
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
series_id = self._match_id(url)
|
||||||
|
series_details = self._download_json(
|
||||||
|
'https://www.s4c.cymru/df/series_details', series_id, query={
|
||||||
|
'lang': 'e',
|
||||||
|
'series_id': series_id,
|
||||||
|
'show_prog_in_series': 'Y'
|
||||||
|
}, note='Downloading series details JSON')
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
[self.url_result(f'https://www.s4c.cymru/clic/programme/{episode_id}', S4CIE, episode_id)
|
||||||
|
for episode_id in traverse_obj(series_details, ('other_progs_in_series', ..., 'id'))],
|
||||||
|
series_id, traverse_obj(series_details, ('full_prog_details', 0, 'series_title', {str})))
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
import base64
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
@ -8,7 +9,12 @@
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
float_or_none,
|
||||||
|
url_or_none,
|
||||||
|
unified_timestamp,
|
||||||
try_get,
|
try_get,
|
||||||
|
urljoin,
|
||||||
|
traverse_obj,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -31,13 +37,20 @@ class SohuIE(InfoExtractor):
|
||||||
'id': '409385080',
|
'id': '409385080',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '《2015湖南卫视羊年元宵晚会》唐嫣《花好月圆》',
|
'title': '《2015湖南卫视羊年元宵晚会》唐嫣《花好月圆》',
|
||||||
}
|
},
|
||||||
|
'skip': 'no longer available',
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://my.tv.sohu.com/us/232799889/78693464.shtml',
|
'url': 'http://my.tv.sohu.com/us/232799889/78693464.shtml',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '78693464',
|
'id': '78693464',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '【爱范品】第31期:MWC见不到的奇葩手机',
|
'title': '【爱范品】第31期:MWC见不到的奇葩手机',
|
||||||
|
'uploader': '爱范儿视频',
|
||||||
|
'duration': 213,
|
||||||
|
'timestamp': 1425519600,
|
||||||
|
'upload_date': '20150305',
|
||||||
|
'thumbnail': 'http://e3f49eaa46b57.cdn.sohucs.com//group1/M10/83/FA/MTAuMTAuODguODA=/6_14cbccdde5eg104SysCutcloud_78693464_7_0b.jpg',
|
||||||
|
'tags': ['爱范儿', '爱范品', 'MWC', '手机'],
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'note': 'Multipart video',
|
'note': 'Multipart video',
|
||||||
|
@ -45,6 +58,12 @@ class SohuIE(InfoExtractor):
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '78910339',
|
'id': '78910339',
|
||||||
'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
|
'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
|
||||||
|
'uploader': '小苍cany',
|
||||||
|
'duration': 744.0,
|
||||||
|
'timestamp': 1426269360,
|
||||||
|
'upload_date': '20150313',
|
||||||
|
'thumbnail': 'http://e3f49eaa46b57.cdn.sohucs.com//group1/M11/89/57/MTAuMTAuODguODA=/6_14cea022a1dg102SysCutcloud_78910339_8_0b.jpg',
|
||||||
|
'tags': ['小苍MM', '英雄联盟', '实战秘籍'],
|
||||||
},
|
},
|
||||||
'playlist': [{
|
'playlist': [{
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -75,6 +94,11 @@ class SohuIE(InfoExtractor):
|
||||||
'id': '78932792',
|
'id': '78932792',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'youtube-dl testing video',
|
'title': 'youtube-dl testing video',
|
||||||
|
'duration': 360,
|
||||||
|
'timestamp': 1426348620,
|
||||||
|
'upload_date': '20150314',
|
||||||
|
'thumbnail': 'http://e3f49eaa46b57.cdn.sohucs.com//group1/M02/8A/00/MTAuMTAuODguNzk=/6_14cee1be192g102SysCutcloud_78932792_7_7b.jpg',
|
||||||
|
'tags': [],
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True
|
'skip_download': True
|
||||||
|
@ -100,7 +124,7 @@ def _fetch_data(vid_id, mytv=False):
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
title = re.sub(r' - 搜狐视频$', '', self._og_search_title(webpage))
|
title = re.sub(r'( - 高清正版在线观看)? - 搜狐视频$', '', self._og_search_title(webpage))
|
||||||
|
|
||||||
vid = self._html_search_regex(
|
vid = self._html_search_regex(
|
||||||
r'var vid ?= ?["\'](\d+)["\']',
|
r'var vid ?= ?["\'](\d+)["\']',
|
||||||
|
@ -132,7 +156,9 @@ def _fetch_data(vid_id, mytv=False):
|
||||||
allot = format_data['allot']
|
allot = format_data['allot']
|
||||||
|
|
||||||
data = format_data['data']
|
data = format_data['data']
|
||||||
clips_url = data['clipsURL']
|
clip_url = traverse_obj(data, (('clipsURL', 'mp4PlayUrl'), i, {url_or_none}), get_all=False)
|
||||||
|
if not clip_url:
|
||||||
|
raise ExtractorError(f'Unable to extract url for clip {i}')
|
||||||
su = data['su']
|
su = data['su']
|
||||||
|
|
||||||
video_url = 'newflv.sohu.ccgslb.net'
|
video_url = 'newflv.sohu.ccgslb.net'
|
||||||
|
@ -142,9 +168,9 @@ def _fetch_data(vid_id, mytv=False):
|
||||||
while 'newflv.sohu.ccgslb.net' in video_url:
|
while 'newflv.sohu.ccgslb.net' in video_url:
|
||||||
params = {
|
params = {
|
||||||
'prot': 9,
|
'prot': 9,
|
||||||
'file': clips_url[i],
|
'file': clip_url,
|
||||||
'new': su[i],
|
'new': su[i],
|
||||||
'prod': 'flash',
|
'prod': 'h5n',
|
||||||
'rb': 1,
|
'rb': 1,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -193,6 +219,75 @@ def _fetch_data(vid_id, mytv=False):
|
||||||
'entries': playlist,
|
'entries': playlist,
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
|
'duration': traverse_obj(vid_data, ('data', 'totalDuration', {float_or_none})),
|
||||||
}
|
}
|
||||||
|
|
||||||
return info
|
if mytv:
|
||||||
|
publish_time = unified_timestamp(self._search_regex(
|
||||||
|
r'publishTime:\s*["\'](\d+-\d+-\d+ \d+:\d+)["\']', webpage, 'publish time', fatal=False))
|
||||||
|
else:
|
||||||
|
publish_time = traverse_obj(vid_data, ('tv_application_time', {unified_timestamp}))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'timestamp': publish_time - 8 * 3600 if publish_time else None,
|
||||||
|
**traverse_obj(vid_data, {
|
||||||
|
'alt_title': ('data', 'subName', {str}),
|
||||||
|
'uploader': ('wm_data', 'wm_username', {str}),
|
||||||
|
'thumbnail': ('data', 'coverImg', {url_or_none}),
|
||||||
|
'tags': ('data', 'tag', {str.split}),
|
||||||
|
}),
|
||||||
|
**info,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class SohuVIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://tv\.sohu\.com/v/(?P<id>[\w=-]+)\.html(?:$|[#?])'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'note': 'Multipart video',
|
||||||
|
'url': 'https://tv.sohu.com/v/MjAyMzA2MTQvbjYwMTMxNTE5Mi5zaHRtbA==.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '601315192',
|
||||||
|
'title': '《淬火丹心》第1集',
|
||||||
|
'alt_title': '“点天灯”发生事故',
|
||||||
|
'duration': 2701.692,
|
||||||
|
'timestamp': 1686758040,
|
||||||
|
'upload_date': '20230614',
|
||||||
|
'thumbnail': 'http://photocdn.tv.sohu.com/img/20230614/vrsa_hor_1686738763256_454010551.jpg',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 9,
|
||||||
|
'skip': 'Only available in China',
|
||||||
|
}, {
|
||||||
|
'url': 'https://tv.sohu.com/v/dXMvMjMyNzk5ODg5Lzc4NjkzNDY0LnNodG1s.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '78693464',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '【爱范品】第31期:MWC见不到的奇葩手机',
|
||||||
|
'uploader': '爱范儿视频',
|
||||||
|
'duration': 213,
|
||||||
|
'timestamp': 1425519600,
|
||||||
|
'upload_date': '20150305',
|
||||||
|
'thumbnail': 'http://e3f49eaa46b57.cdn.sohucs.com//group1/M10/83/FA/MTAuMTAuODguODA=/6_14cbccdde5eg104SysCutcloud_78693464_7_0b.jpg',
|
||||||
|
'tags': ['爱范儿', '爱范品', 'MWC', '手机'],
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'note': 'Multipart video',
|
||||||
|
'url': 'https://tv.sohu.com/v/dXMvMjQyNTYyMTYzLzc4OTEwMzM5LnNodG1s.html?src=pl',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '78910339',
|
||||||
|
'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
|
||||||
|
'uploader': '小苍cany',
|
||||||
|
'duration': 744.0,
|
||||||
|
'timestamp': 1426269360,
|
||||||
|
'upload_date': '20150313',
|
||||||
|
'thumbnail': 'http://e3f49eaa46b57.cdn.sohucs.com//group1/M11/89/57/MTAuMTAuODguODA=/6_14cea022a1dg102SysCutcloud_78910339_8_0b.jpg',
|
||||||
|
'tags': ['小苍MM', '英雄联盟', '实战秘籍'],
|
||||||
|
},
|
||||||
|
'playlist_mincount': 3,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
encoded_id = self._match_id(url)
|
||||||
|
path = base64.urlsafe_b64decode(encoded_id).decode()
|
||||||
|
subdomain = 'tv' if re.match(r'\d+/n\d+\.shtml', path) else 'my.tv'
|
||||||
|
return self.url_result(urljoin(f'http://{subdomain}.sohu.com/', path), SohuIE)
|
||||||
|
|
|
@ -15,7 +15,6 @@
|
||||||
UserNotLive,
|
UserNotLive,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
format_field,
|
format_field,
|
||||||
get_element_by_id,
|
|
||||||
get_first,
|
get_first,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
join_nonempty,
|
join_nonempty,
|
||||||
|
@ -50,8 +49,9 @@ def _create_url(user_id, video_id):
|
||||||
return f'https://www.tiktok.com/@{user_id or "_"}/video/{video_id}'
|
return f'https://www.tiktok.com/@{user_id or "_"}/video/{video_id}'
|
||||||
|
|
||||||
def _get_sigi_state(self, webpage, display_id):
|
def _get_sigi_state(self, webpage, display_id):
|
||||||
return self._parse_json(get_element_by_id(
|
return self._search_json(
|
||||||
'SIGI_STATE|sigi-persisted-data', webpage, escape_value=False), display_id)
|
r'<script[^>]+\bid="(?:SIGI_STATE|sigi-persisted-data)"[^>]*>', webpage,
|
||||||
|
'sigi state', display_id, end_pattern=r'</script>')
|
||||||
|
|
||||||
def _call_api_impl(self, ep, query, manifest_app_version, video_id, fatal=True,
|
def _call_api_impl(self, ep, query, manifest_app_version, video_id, fatal=True,
|
||||||
note='Downloading API JSON', errnote='Unable to download API page'):
|
note='Downloading API JSON', errnote='Unable to download API page'):
|
||||||
|
|
|
@ -1,10 +1,14 @@
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
extract_attributes,
|
extract_attributes,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
|
traverse_obj,
|
||||||
try_get,
|
try_get,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -12,6 +16,36 @@ class TV5MondePlusIE(InfoExtractor):
|
||||||
IE_DESC = 'TV5MONDE+'
|
IE_DESC = 'TV5MONDE+'
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?:tv5mondeplus|revoir\.tv5monde)\.com/toutes-les-videos/[^/]+/(?P<id>[^/?#]+)'
|
_VALID_URL = r'https?://(?:www\.)?(?:tv5mondeplus|revoir\.tv5monde)\.com/toutes-les-videos/[^/]+/(?P<id>[^/?#]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
# movie
|
||||||
|
'url': 'https://revoir.tv5monde.com/toutes-les-videos/cinema/les-novices',
|
||||||
|
'md5': 'c86f60bf8b75436455b1b205f9745955',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'ZX0ipMyFQq_6D4BA7b',
|
||||||
|
'display_id': 'les-novices',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Les novices',
|
||||||
|
'description': 'md5:2e7c33ba3ad48dabfcc2a956b88bde2b',
|
||||||
|
'upload_date': '20230821',
|
||||||
|
'thumbnail': 'https://revoir.tv5monde.com/uploads/media/video_thumbnail/0738/60/01e952b7ccf36b7c6007ec9131588954ab651de9.jpeg',
|
||||||
|
'duration': 5177,
|
||||||
|
'episode': 'Les novices',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# series episode
|
||||||
|
'url': 'https://revoir.tv5monde.com/toutes-les-videos/series-fictions/opj-les-dents-de-la-terre-2',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'wJ0eeEPozr_6D4BA7b',
|
||||||
|
'display_id': 'opj-les-dents-de-la-terre-2',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': "OPJ - Les dents de la Terre (2)",
|
||||||
|
'description': 'md5:288f87fd68d993f814e66e60e5302d9d',
|
||||||
|
'upload_date': '20230823',
|
||||||
|
'series': 'OPJ',
|
||||||
|
'episode': 'Les dents de la Terre (2)',
|
||||||
|
'duration': 2877,
|
||||||
|
'thumbnail': 'https://dl-revoir.tv5monde.com/images/1a/5753448.jpg'
|
||||||
|
},
|
||||||
|
}, {
|
||||||
# movie
|
# movie
|
||||||
'url': 'https://revoir.tv5monde.com/toutes-les-videos/cinema/ceux-qui-travaillent',
|
'url': 'https://revoir.tv5monde.com/toutes-les-videos/cinema/ceux-qui-travaillent',
|
||||||
'md5': '32fa0cde16a4480d1251502a66856d5f',
|
'md5': '32fa0cde16a4480d1251502a66856d5f',
|
||||||
|
@ -23,6 +57,7 @@ class TV5MondePlusIE(InfoExtractor):
|
||||||
'description': 'md5:570e8bb688036ace873b2d50d24c026d',
|
'description': 'md5:570e8bb688036ace873b2d50d24c026d',
|
||||||
'upload_date': '20210819',
|
'upload_date': '20210819',
|
||||||
},
|
},
|
||||||
|
'skip': 'no longer available',
|
||||||
}, {
|
}, {
|
||||||
# series episode
|
# series episode
|
||||||
'url': 'https://revoir.tv5monde.com/toutes-les-videos/series-fictions/vestiaires-caro-actrice',
|
'url': 'https://revoir.tv5monde.com/toutes-les-videos/series-fictions/vestiaires-caro-actrice',
|
||||||
|
@ -39,6 +74,7 @@ class TV5MondePlusIE(InfoExtractor):
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
'skip': 'no longer available',
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://revoir.tv5monde.com/toutes-les-videos/series-fictions/neuf-jours-en-hiver-neuf-jours-en-hiver',
|
'url': 'https://revoir.tv5monde.com/toutes-les-videos/series-fictions/neuf-jours-en-hiver-neuf-jours-en-hiver',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -63,20 +99,45 @@ def _real_extract(self, url):
|
||||||
video_files = self._parse_json(
|
video_files = self._parse_json(
|
||||||
vpl_data['data-broadcast'], display_id)
|
vpl_data['data-broadcast'], display_id)
|
||||||
formats = []
|
formats = []
|
||||||
for video_file in video_files:
|
video_id = None
|
||||||
v_url = video_file.get('url')
|
|
||||||
if not v_url:
|
def process_video_files(v):
|
||||||
continue
|
nonlocal video_id
|
||||||
video_format = video_file.get('format') or determine_ext(v_url)
|
for video_file in v:
|
||||||
if video_format == 'm3u8':
|
v_url = video_file.get('url')
|
||||||
formats.extend(self._extract_m3u8_formats(
|
if not v_url:
|
||||||
v_url, display_id, 'mp4', 'm3u8_native',
|
continue
|
||||||
m3u8_id='hls', fatal=False))
|
if video_file.get('type') == 'application/deferred':
|
||||||
else:
|
d_param = urllib.parse.quote(v_url)
|
||||||
formats.append({
|
token = video_file.get('token')
|
||||||
'url': v_url,
|
if not token:
|
||||||
'format_id': video_format,
|
continue
|
||||||
})
|
deferred_json = self._download_json(
|
||||||
|
f'https://api.tv5monde.com/player/asset/{d_param}/resolve?condenseKS=true', display_id,
|
||||||
|
note='Downloading deferred info', headers={'Authorization': f'Bearer {token}'}, fatal=False)
|
||||||
|
v_url = traverse_obj(deferred_json, (0, 'url', {url_or_none}))
|
||||||
|
if not v_url:
|
||||||
|
continue
|
||||||
|
# data-guid from the webpage isn't stable, use the material id from the json urls
|
||||||
|
video_id = self._search_regex(
|
||||||
|
r'materials/([\da-zA-Z]{10}_[\da-fA-F]{7})/', v_url, 'video id', default=None)
|
||||||
|
process_video_files(deferred_json)
|
||||||
|
|
||||||
|
video_format = video_file.get('format') or determine_ext(v_url)
|
||||||
|
if video_format == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
v_url, display_id, 'mp4', 'm3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
|
elif video_format == 'mpd':
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
v_url, display_id, fatal=False))
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'url': v_url,
|
||||||
|
'format_id': video_format,
|
||||||
|
})
|
||||||
|
|
||||||
|
process_video_files(video_files)
|
||||||
|
|
||||||
metadata = self._parse_json(
|
metadata = self._parse_json(
|
||||||
vpl_data['data-metadata'], display_id)
|
vpl_data['data-metadata'], display_id)
|
||||||
|
@ -100,10 +161,11 @@ def _real_extract(self, url):
|
||||||
if upload_date:
|
if upload_date:
|
||||||
upload_date = upload_date.replace('_', '')
|
upload_date = upload_date.replace('_', '')
|
||||||
|
|
||||||
video_id = self._search_regex(
|
if not video_id:
|
||||||
(r'data-guid=["\']([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
|
video_id = self._search_regex(
|
||||||
r'id_contenu["\']\s:\s*(\d+)'), webpage, 'video id',
|
(r'data-guid=["\']([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
|
||||||
default=display_id)
|
r'id_contenu["\']\s:\s*(\d+)'), webpage, 'video id',
|
||||||
|
default=display_id)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
|
|
@ -22,7 +22,7 @@
|
||||||
|
|
||||||
|
|
||||||
class TwitCastingIE(InfoExtractor):
|
class TwitCastingIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:[^/]+\.)?twitcasting\.tv/(?P<uploader_id>[^/]+)/(?:movie|twplayer)/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:[^/?#]+\.)?twitcasting\.tv/(?P<uploader_id>[^/?#]+)/(?:movie|twplayer)/(?P<id>\d+)'
|
||||||
_M3U8_HEADERS = {
|
_M3U8_HEADERS = {
|
||||||
'Origin': 'https://twitcasting.tv',
|
'Origin': 'https://twitcasting.tv',
|
||||||
'Referer': 'https://twitcasting.tv/',
|
'Referer': 'https://twitcasting.tv/',
|
||||||
|
@ -231,7 +231,7 @@ def find_dmu(x):
|
||||||
|
|
||||||
|
|
||||||
class TwitCastingLiveIE(InfoExtractor):
|
class TwitCastingLiveIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:[^/]+\.)?twitcasting\.tv/(?P<id>[^/]+)/?(?:[#?]|$)'
|
_VALID_URL = r'https?://(?:[^/?#]+\.)?twitcasting\.tv/(?P<id>[^/?#]+)/?(?:[#?]|$)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://twitcasting.tv/ivetesangalo',
|
'url': 'https://twitcasting.tv/ivetesangalo',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -265,8 +265,15 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
|
|
||||||
class TwitCastingUserIE(InfoExtractor):
|
class TwitCastingUserIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:[^/]+\.)?twitcasting\.tv/(?P<id>[^/]+)/show/?(?:[#?]|$)'
|
_VALID_URL = r'https?://(?:[^/?#]+\.)?twitcasting\.tv/(?P<id>[^/?#]+)/(:?show|archive)/?(?:[#?]|$)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
'url': 'https://twitcasting.tv/natsuiromatsuri/archive/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'natsuiromatsuri',
|
||||||
|
'title': 'natsuiromatsuri - Live History',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 235,
|
||||||
|
}, {
|
||||||
'url': 'https://twitcasting.tv/noriyukicas/show',
|
'url': 'https://twitcasting.tv/noriyukicas/show',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
|
@ -1,9 +1,10 @@
|
||||||
import functools
|
|
||||||
import json
|
import json
|
||||||
|
import random
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .periscope import PeriscopeBaseIE, PeriscopeIE
|
from .periscope import PeriscopeBaseIE, PeriscopeIE
|
||||||
|
from ..compat import functools # isort: split
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
compat_urllib_parse_unquote,
|
compat_urllib_parse_unquote,
|
||||||
|
@ -147,10 +148,14 @@ def _search_dimensions_in_video_url(a_format, video_url):
|
||||||
def is_logged_in(self):
|
def is_logged_in(self):
|
||||||
return bool(self._get_cookies(self._API_BASE).get('auth_token'))
|
return bool(self._get_cookies(self._API_BASE).get('auth_token'))
|
||||||
|
|
||||||
|
@functools.cached_property
|
||||||
|
def _selected_api(self):
|
||||||
|
return self._configuration_arg('api', ['graphql'], ie_key='Twitter')[0]
|
||||||
|
|
||||||
def _fetch_guest_token(self, display_id):
|
def _fetch_guest_token(self, display_id):
|
||||||
guest_token = traverse_obj(self._download_json(
|
guest_token = traverse_obj(self._download_json(
|
||||||
f'{self._API_BASE}guest/activate.json', display_id, 'Downloading guest token', data=b'',
|
f'{self._API_BASE}guest/activate.json', display_id, 'Downloading guest token', data=b'',
|
||||||
headers=self._set_base_headers(legacy=display_id and self._configuration_arg('legacy_api'))),
|
headers=self._set_base_headers(legacy=display_id and self._selected_api == 'legacy')),
|
||||||
('guest_token', {str}))
|
('guest_token', {str}))
|
||||||
if not guest_token:
|
if not guest_token:
|
||||||
raise ExtractorError('Could not retrieve guest token')
|
raise ExtractorError('Could not retrieve guest token')
|
||||||
|
@ -295,7 +300,7 @@ def input_dict(subtask_id, text):
|
||||||
self.report_login()
|
self.report_login()
|
||||||
|
|
||||||
def _call_api(self, path, video_id, query={}, graphql=False):
|
def _call_api(self, path, video_id, query={}, graphql=False):
|
||||||
headers = self._set_base_headers(legacy=not graphql and self._configuration_arg('legacy_api'))
|
headers = self._set_base_headers(legacy=not graphql and self._selected_api == 'legacy')
|
||||||
headers.update({
|
headers.update({
|
||||||
'x-twitter-auth-type': 'OAuth2Session',
|
'x-twitter-auth-type': 'OAuth2Session',
|
||||||
'x-twitter-client-language': 'en',
|
'x-twitter-client-language': 'en',
|
||||||
|
@ -707,6 +712,7 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'tags': [],
|
'tags': [],
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
},
|
},
|
||||||
|
'skip': 'This Tweet is unavailable',
|
||||||
}, {
|
}, {
|
||||||
# not available in Periscope
|
# not available in Periscope
|
||||||
'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
|
'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
|
||||||
|
@ -721,6 +727,7 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
},
|
},
|
||||||
'add_ie': ['TwitterBroadcast'],
|
'add_ie': ['TwitterBroadcast'],
|
||||||
|
'skip': 'Broadcast no longer exists',
|
||||||
}, {
|
}, {
|
||||||
# unified card
|
# unified card
|
||||||
'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
|
'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
|
||||||
|
@ -773,9 +780,9 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
|
'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1577719286659006464',
|
'id': '1577719286659006464',
|
||||||
'title': 'Ultima📛 | #вʟм - Test',
|
'title': 'Ultima📛| New Era - Test',
|
||||||
'description': 'Test https://t.co/Y3KEZD7Dad',
|
'description': 'Test https://t.co/Y3KEZD7Dad',
|
||||||
'uploader': 'Ultima📛 | #вʟм',
|
'uploader': 'Ultima📛| New Era',
|
||||||
'uploader_id': 'UltimaShadowX',
|
'uploader_id': 'UltimaShadowX',
|
||||||
'uploader_url': 'https://twitter.com/UltimaShadowX',
|
'uploader_url': 'https://twitter.com/UltimaShadowX',
|
||||||
'upload_date': '20221005',
|
'upload_date': '20221005',
|
||||||
|
@ -811,7 +818,7 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# Adult content, fails if not logged in (GraphQL)
|
# Adult content, fails if not logged in
|
||||||
'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
|
'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1575199163847000068',
|
'id': '1575199163847000068',
|
||||||
|
@ -831,9 +838,10 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
'tags': []
|
'tags': []
|
||||||
},
|
},
|
||||||
|
'params': {'skip_download': 'The media could not be played'},
|
||||||
'skip': 'Requires authentication',
|
'skip': 'Requires authentication',
|
||||||
}, {
|
}, {
|
||||||
# Playlist result only with auth
|
# Playlist result only with graphql API
|
||||||
'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
|
'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
|
||||||
'playlist_mincount': 2,
|
'playlist_mincount': 2,
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -898,7 +906,7 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'uploader_id': 'MoniqueCamarra',
|
'uploader_id': 'MoniqueCamarra',
|
||||||
'live_status': 'was_live',
|
'live_status': 'was_live',
|
||||||
'release_timestamp': 1658417414,
|
'release_timestamp': 1658417414,
|
||||||
'description': 'md5:4dc8e972f1d8b3c6580376fabb02a3ad',
|
'description': 'md5:acce559345fd49f129c20dbcda3f1201',
|
||||||
'timestamp': 1658407771,
|
'timestamp': 1658407771,
|
||||||
'release_date': '20220721',
|
'release_date': '20220721',
|
||||||
'upload_date': '20220721',
|
'upload_date': '20220721',
|
||||||
|
@ -1007,10 +1015,10 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
|
'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
'uploader': 'Mün The Friend Of YWAP',
|
'uploader': 'Mün',
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
'upload_date': '20221206',
|
'upload_date': '20221206',
|
||||||
'title': 'Mün The Friend Of YWAP - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
|
'title': 'Mün - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'tags': [],
|
'tags': [],
|
||||||
|
@ -1019,7 +1027,7 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'timestamp': 1670306984.0,
|
'timestamp': 1670306984.0,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# url to retweet id w/ legacy api
|
# retweeted_status (private)
|
||||||
'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
|
'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1623274794488659969',
|
'id': '1623274794488659969',
|
||||||
|
@ -1039,32 +1047,84 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
},
|
},
|
||||||
'params': {'extractor_args': {'twitter': {'legacy_api': ['']}}},
|
|
||||||
'skip': 'Protected tweet',
|
'skip': 'Protected tweet',
|
||||||
}, {
|
}, {
|
||||||
# orig tweet w/ graphql
|
# retweeted_status
|
||||||
'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
|
'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1623274794488659969',
|
'id': '1694928337846538240',
|
||||||
'display_id': '1623739803874349067',
|
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '@selfisekai@hackerspace.pl 🐀 - RT @Johnnybull3ts: Me after going viral to over 30million people: Whoopsie-daisy',
|
'display_id': '1695424220702888009',
|
||||||
'description': 'md5:9258bdbb54793bdc124fe1cd47e96c6a',
|
'title': 'md5:e8daa9527bc2b947121395494f786d9d',
|
||||||
'uploader': '@selfisekai@hackerspace.pl 🐀',
|
'description': 'md5:004f2d37fd58737724ec75bc7e679938',
|
||||||
'uploader_id': 'liberdalau',
|
'uploader': 'Benny Johnson',
|
||||||
'uploader_url': 'https://twitter.com/liberdalau',
|
'uploader_id': 'bennyjohnson',
|
||||||
|
'uploader_url': 'https://twitter.com/bennyjohnson',
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
'tags': [],
|
'tags': [],
|
||||||
'duration': 8.033,
|
'duration': 45.001,
|
||||||
'timestamp': 1675964711.0,
|
'timestamp': 1692962814.0,
|
||||||
'upload_date': '20230209',
|
'upload_date': '20230825',
|
||||||
'thumbnail': r're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
|
'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'view_count': int,
|
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
|
'view_count': int,
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
},
|
},
|
||||||
'skip': 'Protected tweet',
|
}, {
|
||||||
|
# retweeted_status w/ legacy API
|
||||||
|
'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1694928337846538240',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'display_id': '1695424220702888009',
|
||||||
|
'title': 'md5:e8daa9527bc2b947121395494f786d9d',
|
||||||
|
'description': 'md5:004f2d37fd58737724ec75bc7e679938',
|
||||||
|
'uploader': 'Benny Johnson',
|
||||||
|
'uploader_id': 'bennyjohnson',
|
||||||
|
'uploader_url': 'https://twitter.com/bennyjohnson',
|
||||||
|
'age_limit': 0,
|
||||||
|
'tags': [],
|
||||||
|
'duration': 45.001,
|
||||||
|
'timestamp': 1692962814.0,
|
||||||
|
'upload_date': '20230825',
|
||||||
|
'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
|
||||||
|
'like_count': int,
|
||||||
|
'repost_count': int,
|
||||||
|
},
|
||||||
|
'params': {'extractor_args': {'twitter': {'api': ['legacy']}}},
|
||||||
|
}, {
|
||||||
|
# Broadcast embedded in tweet
|
||||||
|
'url': 'https://twitter.com/JessicaDobsonWX/status/1693057346933600402',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1yNGaNLjEblJj',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Jessica Dobson - WAVE Weather Now - Saturday 8/19/23 Update',
|
||||||
|
'uploader': 'Jessica Dobson',
|
||||||
|
'uploader_id': '1DZEoDwDovRQa',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
|
'view_count': int,
|
||||||
|
},
|
||||||
|
'add_ie': ['TwitterBroadcast'],
|
||||||
|
}, {
|
||||||
|
# Animated gif and quote tweet video, with syndication API
|
||||||
|
'url': 'https://twitter.com/BAKKOOONN/status/1696256659889565950',
|
||||||
|
'playlist_mincount': 2,
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1696256659889565950',
|
||||||
|
'title': 'BAKOON - https://t.co/zom968d0a0',
|
||||||
|
'description': 'https://t.co/zom968d0a0',
|
||||||
|
'tags': [],
|
||||||
|
'uploader': 'BAKOON',
|
||||||
|
'uploader_id': 'BAKKOOONN',
|
||||||
|
'uploader_url': 'https://twitter.com/BAKKOOONN',
|
||||||
|
'age_limit': 18,
|
||||||
|
'timestamp': 1693254077.0,
|
||||||
|
'upload_date': '20230828',
|
||||||
|
'like_count': int,
|
||||||
|
},
|
||||||
|
'params': {'extractor_args': {'twitter': {'api': ['syndication']}}},
|
||||||
|
'expected_warnings': ['Not all metadata'],
|
||||||
}, {
|
}, {
|
||||||
# onion route
|
# onion route
|
||||||
'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
|
'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
|
||||||
|
@ -1103,6 +1163,14 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
_MEDIA_ID_RE = re.compile(r'_video/(\d+)/')
|
||||||
|
|
||||||
|
@property
|
||||||
|
def _GRAPHQL_ENDPOINT(self):
|
||||||
|
if self.is_logged_in:
|
||||||
|
return 'zZXycP0V6H7m-2r0mOnFcA/TweetDetail'
|
||||||
|
return '2ICDjqPd81tulZcYrtpTuQ/TweetResultByRestId'
|
||||||
|
|
||||||
def _graphql_to_legacy(self, data, twid):
|
def _graphql_to_legacy(self, data, twid):
|
||||||
result = traverse_obj(data, (
|
result = traverse_obj(data, (
|
||||||
'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
|
'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
|
||||||
|
@ -1130,9 +1198,14 @@ def _graphql_to_legacy(self, data, twid):
|
||||||
'user': ('core', 'user_results', 'result', 'legacy'),
|
'user': ('core', 'user_results', 'result', 'legacy'),
|
||||||
'card': ('card', 'legacy'),
|
'card': ('card', 'legacy'),
|
||||||
'quoted_status': ('quoted_status_result', 'result', 'legacy'),
|
'quoted_status': ('quoted_status_result', 'result', 'legacy'),
|
||||||
|
'retweeted_status': ('legacy', 'retweeted_status_result', 'result', 'legacy'),
|
||||||
}, expected_type=dict, default={}))
|
}, expected_type=dict, default={}))
|
||||||
|
|
||||||
# extra transformation is needed since result does not match legacy format
|
# extra transformations needed since result does not match legacy format
|
||||||
|
if status.get('retweeted_status'):
|
||||||
|
status['retweeted_status']['user'] = traverse_obj(status, (
|
||||||
|
'retweeted_status_result', 'result', 'core', 'user_results', 'result', 'legacy', {dict})) or {}
|
||||||
|
|
||||||
binding_values = {
|
binding_values = {
|
||||||
binding_value.get('key'): binding_value.get('value')
|
binding_value.get('key'): binding_value.get('value')
|
||||||
for binding_value in traverse_obj(status, ('card', 'binding_values', ..., {dict}))
|
for binding_value in traverse_obj(status, ('card', 'binding_values', ..., {dict}))
|
||||||
|
@ -1208,33 +1281,42 @@ def _build_graphql_query(self, media_id):
|
||||||
}
|
}
|
||||||
|
|
||||||
def _extract_status(self, twid):
|
def _extract_status(self, twid):
|
||||||
if self.is_logged_in:
|
if self.is_logged_in or self._selected_api == 'graphql':
|
||||||
return self._graphql_to_legacy(
|
status = self._graphql_to_legacy(self._call_graphql_api(self._GRAPHQL_ENDPOINT, twid), twid)
|
||||||
self._call_graphql_api('zZXycP0V6H7m-2r0mOnFcA/TweetDetail', twid), twid)
|
|
||||||
|
|
||||||
try:
|
elif self._selected_api == 'legacy':
|
||||||
if not self._configuration_arg('legacy_api'):
|
status = self._call_api(f'statuses/show/{twid}.json', twid, {
|
||||||
return self._graphql_to_legacy(
|
|
||||||
self._call_graphql_api('2ICDjqPd81tulZcYrtpTuQ/TweetResultByRestId', twid), twid)
|
|
||||||
return traverse_obj(self._call_api(f'statuses/show/{twid}.json', twid, {
|
|
||||||
'cards_platform': 'Web-12',
|
'cards_platform': 'Web-12',
|
||||||
'include_cards': 1,
|
'include_cards': 1,
|
||||||
'include_reply_count': 1,
|
'include_reply_count': 1,
|
||||||
'include_user_entities': 0,
|
'include_user_entities': 0,
|
||||||
'tweet_mode': 'extended',
|
'tweet_mode': 'extended',
|
||||||
}), 'retweeted_status', None)
|
})
|
||||||
|
|
||||||
except ExtractorError as e:
|
elif self._selected_api == 'syndication':
|
||||||
if e.expected:
|
|
||||||
raise
|
|
||||||
self.report_warning(
|
self.report_warning(
|
||||||
f'{e.orig_msg}. Falling back to syndication endpoint; some metadata may be missing', twid)
|
'Not all metadata or media is available via syndication endpoint', twid, only_once=True)
|
||||||
|
status = self._download_json(
|
||||||
|
'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON',
|
||||||
|
headers={'User-Agent': 'Googlebot'}, query={
|
||||||
|
'id': twid,
|
||||||
|
# TODO: token = ((Number(twid) / 1e15) * Math.PI).toString(36).replace(/(0+|\.)/g, '')
|
||||||
|
'token': ''.join(random.choices('123456789abcdefghijklmnopqrstuvwxyz', k=10)),
|
||||||
|
})
|
||||||
|
if not status:
|
||||||
|
raise ExtractorError('Syndication endpoint returned empty JSON response')
|
||||||
|
# Transform the result so its structure matches that of legacy/graphql
|
||||||
|
media = []
|
||||||
|
for detail in traverse_obj(status, ((None, 'quoted_tweet'), 'mediaDetails', ..., {dict})):
|
||||||
|
detail['id_str'] = traverse_obj(detail, (
|
||||||
|
'video_info', 'variants', ..., 'url', {self._MEDIA_ID_RE.search}, 1), get_all=False) or twid
|
||||||
|
media.append(detail)
|
||||||
|
status['extended_entities'] = {'media': media}
|
||||||
|
|
||||||
status = self._download_json(
|
else:
|
||||||
'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON',
|
raise ExtractorError(f'"{self._selected_api}" is not a valid API selection', expected=True)
|
||||||
headers={'User-Agent': 'Googlebot'}, query={'id': twid})
|
|
||||||
status['extended_entities'] = {'media': status.get('mediaDetails')}
|
return traverse_obj(status, 'retweeted_status', None, expected_type=dict) or {}
|
||||||
return status
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
twid, selected_index = self._match_valid_url(url).group('id', 'index')
|
twid, selected_index = self._match_valid_url(url).group('id', 'index')
|
||||||
|
@ -1266,10 +1348,7 @@ def _real_extract(self, url):
|
||||||
}
|
}
|
||||||
|
|
||||||
def extract_from_video_info(media):
|
def extract_from_video_info(media):
|
||||||
media_id = traverse_obj(media, 'id_str', 'id', (
|
media_id = traverse_obj(media, 'id_str', 'id', expected_type=str_or_none)
|
||||||
'video_info', 'variants', ..., 'url',
|
|
||||||
{functools.partial(re.search, r'_video/(\d+)/')}, 1
|
|
||||||
), get_all=False, expected_type=str_or_none) or twid
|
|
||||||
self.write_debug(f'Extracting from video info: {media_id}')
|
self.write_debug(f'Extracting from video info: {media_id}')
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
|
@ -1503,6 +1582,8 @@ def _real_extract(self, url):
|
||||||
broadcast = self._call_api(
|
broadcast = self._call_api(
|
||||||
'broadcasts/show.json', broadcast_id,
|
'broadcasts/show.json', broadcast_id,
|
||||||
{'ids': broadcast_id})['broadcasts'][broadcast_id]
|
{'ids': broadcast_id})['broadcasts'][broadcast_id]
|
||||||
|
if not broadcast:
|
||||||
|
raise ExtractorError('Broadcast no longer exists', expected=True)
|
||||||
info = self._parse_broadcast_data(broadcast, broadcast_id)
|
info = self._parse_broadcast_data(broadcast, broadcast_id)
|
||||||
media_key = broadcast['media_key']
|
media_key = broadcast['media_key']
|
||||||
source = self._call_api(
|
source = self._call_api(
|
||||||
|
|
|
@ -38,6 +38,7 @@ class VideaIE(InfoExtractor):
|
||||||
'title': 'Az őrült kígyász 285 kígyót enged szabadon',
|
'title': 'Az őrült kígyász 285 kígyót enged szabadon',
|
||||||
'thumbnail': r're:^https?://.*',
|
'thumbnail': r're:^https?://.*',
|
||||||
'duration': 21,
|
'duration': 21,
|
||||||
|
'age_limit': 0,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://videa.hu/videok/origo/jarmuvek/supercars-elozes-jAHDWfWSJH5XuFhH',
|
'url': 'http://videa.hu/videok/origo/jarmuvek/supercars-elozes-jAHDWfWSJH5XuFhH',
|
||||||
|
@ -48,6 +49,7 @@ class VideaIE(InfoExtractor):
|
||||||
'title': 'Supercars előzés',
|
'title': 'Supercars előzés',
|
||||||
'thumbnail': r're:^https?://.*',
|
'thumbnail': r're:^https?://.*',
|
||||||
'duration': 64,
|
'duration': 64,
|
||||||
|
'age_limit': 0,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://videa.hu/player?v=8YfIAjxwWGwT8HVQ',
|
'url': 'http://videa.hu/player?v=8YfIAjxwWGwT8HVQ',
|
||||||
|
@ -58,6 +60,7 @@ class VideaIE(InfoExtractor):
|
||||||
'title': 'Az őrült kígyász 285 kígyót enged szabadon',
|
'title': 'Az őrült kígyász 285 kígyót enged szabadon',
|
||||||
'thumbnail': r're:^https?://.*',
|
'thumbnail': r're:^https?://.*',
|
||||||
'duration': 21,
|
'duration': 21,
|
||||||
|
'age_limit': 0,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://videa.hu/player/v/8YfIAjxwWGwT8HVQ?autoplay=1',
|
'url': 'http://videa.hu/player/v/8YfIAjxwWGwT8HVQ?autoplay=1',
|
||||||
|
@ -124,7 +127,7 @@ def _real_extract(self, url):
|
||||||
query['_t'] = result[:16]
|
query['_t'] = result[:16]
|
||||||
|
|
||||||
b64_info, handle = self._download_webpage_handle(
|
b64_info, handle = self._download_webpage_handle(
|
||||||
'http://videa.hu/videaplayer_get_xml.php', video_id, query=query)
|
'http://videa.hu/player/xml', video_id, query=query)
|
||||||
if b64_info.startswith('<?xml'):
|
if b64_info.startswith('<?xml'):
|
||||||
info = self._parse_xml(b64_info, video_id)
|
info = self._parse_xml(b64_info, video_id)
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -173,6 +173,7 @@ class WDRPageIE(WDRIE): # XXX: Do not subclass from concrete IE
|
||||||
'skip': 'HTTP Error 404: Not Found',
|
'skip': 'HTTP Error 404: Not Found',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
# FIXME: Asset JSON is directly embedded in webpage
|
||||||
'url': 'http://www1.wdr.de/mediathek/video/live/index.html',
|
'url': 'http://www1.wdr.de/mediathek/video/live/index.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'mdb-2296252',
|
'id': 'mdb-2296252',
|
||||||
|
@ -221,6 +222,8 @@ class WDRPageIE(WDRIE): # XXX: Do not subclass from concrete IE
|
||||||
'id': 'mdb-869971',
|
'id': 'mdb-869971',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': r're:^COSMO Livestream [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
'title': r're:^COSMO Livestream [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||||
|
'alt_title': 'COSMO Livestream',
|
||||||
|
'live_status': 'is_live',
|
||||||
'upload_date': '20160101',
|
'upload_date': '20160101',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
|
@ -248,6 +251,16 @@ class WDRPageIE(WDRIE): # XXX: Do not subclass from concrete IE
|
||||||
'url': 'https://kinder.wdr.de/tv/die-sendung-mit-dem-elefanten/av/video-folge---astronaut-100.html',
|
'url': 'https://kinder.wdr.de/tv/die-sendung-mit-dem-elefanten/av/video-folge---astronaut-100.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'url': 'https://www1.wdr.de/mediathek/video/sendungen/rockpalast/video-baroness---freak-valley-festival--100.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'mdb-2741028',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Baroness - Freak Valley Festival 2022',
|
||||||
|
'alt_title': 'Rockpalast',
|
||||||
|
'upload_date': '20220725',
|
||||||
|
},
|
||||||
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -259,7 +272,7 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
# Article with several videos
|
# Article with several videos
|
||||||
|
|
||||||
# for wdr.de the data-extension is in a tag with the class "mediaLink"
|
# for wdr.de the data-extension-ard is in a tag with the class "mediaLink"
|
||||||
# for wdr.de radio players, in a tag with the class "wdrrPlayerPlayBtn"
|
# for wdr.de radio players, in a tag with the class "wdrrPlayerPlayBtn"
|
||||||
# for wdrmaus, in a tag with the class "videoButton" (previously a link
|
# for wdrmaus, in a tag with the class "videoButton" (previously a link
|
||||||
# to the page in a multiline "videoLink"-tag)
|
# to the page in a multiline "videoLink"-tag)
|
||||||
|
@ -268,7 +281,7 @@ def _real_extract(self, url):
|
||||||
(?:
|
(?:
|
||||||
(["\'])(?:mediaLink|wdrrPlayerPlayBtn|videoButton)\b.*?\1[^>]+|
|
(["\'])(?:mediaLink|wdrrPlayerPlayBtn|videoButton)\b.*?\1[^>]+|
|
||||||
(["\'])videoLink\b.*?\2[\s]*>\n[^\n]*
|
(["\'])videoLink\b.*?\2[\s]*>\n[^\n]*
|
||||||
)data-extension=(["\'])(?P<data>(?:(?!\3).)+)\3
|
)data-extension(?:-ard)?=(["\'])(?P<data>(?:(?!\3).)+)\3
|
||||||
''', webpage):
|
''', webpage):
|
||||||
media_link_obj = self._parse_json(
|
media_link_obj = self._parse_json(
|
||||||
mobj.group('data'), display_id, transform_source=js_to_json,
|
mobj.group('data'), display_id, transform_source=js_to_json,
|
||||||
|
@ -295,7 +308,7 @@ def _real_extract(self, url):
|
||||||
compat_urlparse.urljoin(url, mobj.group('href')),
|
compat_urlparse.urljoin(url, mobj.group('href')),
|
||||||
ie=WDRPageIE.ie_key())
|
ie=WDRPageIE.ie_key())
|
||||||
for mobj in re.finditer(
|
for mobj in re.finditer(
|
||||||
r'<a[^>]+\bhref=(["\'])(?P<href>(?:(?!\1).)+)\1[^>]+\bdata-extension=',
|
r'<a[^>]+\bhref=(["\'])(?P<href>(?:(?!\1).)+)\1[^>]+\bdata-extension(?:-ard)?=',
|
||||||
webpage) if re.match(self._PAGE_REGEX, mobj.group('href'))
|
webpage) if re.match(self._PAGE_REGEX, mobj.group('href'))
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
|
@ -1,134 +1,241 @@
|
||||||
from .common import InfoExtractor
|
|
||||||
|
|
||||||
import json
|
|
||||||
import random
|
import random
|
||||||
import re
|
import itertools
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
from ..compat import (
|
from .common import InfoExtractor
|
||||||
compat_parse_qs,
|
|
||||||
compat_str,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
js_to_json,
|
int_or_none,
|
||||||
|
make_archive_id,
|
||||||
|
mimetype2ext,
|
||||||
|
parse_resolution,
|
||||||
|
str_or_none,
|
||||||
strip_jsonp,
|
strip_jsonp,
|
||||||
|
traverse_obj,
|
||||||
|
url_or_none,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class WeiboIE(InfoExtractor):
|
class WeiboBaseIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?weibo\.com/[0-9]+/(?P<id>[a-zA-Z0-9]+)'
|
def _update_visitor_cookies(self, video_id):
|
||||||
_TEST = {
|
visitor_data = self._download_json(
|
||||||
'url': 'https://weibo.com/6275294458/Fp6RGfbff?type=comment',
|
'https://passport.weibo.com/visitor/genvisitor', video_id,
|
||||||
'info_dict': {
|
note='Generating first-visit guest request',
|
||||||
'id': 'Fp6RGfbff',
|
transform_source=strip_jsonp,
|
||||||
'ext': 'mp4',
|
data=urlencode_postdata({
|
||||||
'title': 'You should have servants to massage you,... 来自Hosico_猫 - 微博',
|
'cb': 'gen_callback',
|
||||||
}
|
'fp': '{"os":"2","browser":"Gecko57,0,0,0","fonts":"undefined","screenInfo":"1440*900*24","plugins":""}',
|
||||||
}
|
}))
|
||||||
|
|
||||||
def _real_extract(self, url):
|
self._download_webpage(
|
||||||
video_id = self._match_id(url)
|
'https://passport.weibo.com/visitor/visitor', video_id,
|
||||||
# to get Referer url for genvisitor
|
note='Running first-visit callback to get guest cookies',
|
||||||
webpage, urlh = self._download_webpage_handle(url, video_id)
|
query={
|
||||||
|
'a': 'incarnate',
|
||||||
visitor_url = urlh.url
|
't': visitor_data['data']['tid'],
|
||||||
|
'w': 2,
|
||||||
if 'passport.weibo.com' in visitor_url:
|
'c': '%03d' % visitor_data['data']['confidence'],
|
||||||
# first visit
|
'cb': 'cross_domain',
|
||||||
visitor_data = self._download_json(
|
'from': 'weibo',
|
||||||
'https://passport.weibo.com/visitor/genvisitor', video_id,
|
'_rand': random.random(),
|
||||||
note='Generating first-visit data',
|
|
||||||
transform_source=strip_jsonp,
|
|
||||||
headers={'Referer': visitor_url},
|
|
||||||
data=urlencode_postdata({
|
|
||||||
'cb': 'gen_callback',
|
|
||||||
'fp': json.dumps({
|
|
||||||
'os': '2',
|
|
||||||
'browser': 'Gecko57,0,0,0',
|
|
||||||
'fonts': 'undefined',
|
|
||||||
'screenInfo': '1440*900*24',
|
|
||||||
'plugins': '',
|
|
||||||
}),
|
|
||||||
}))
|
|
||||||
|
|
||||||
tid = visitor_data['data']['tid']
|
|
||||||
cnfd = '%03d' % visitor_data['data']['confidence']
|
|
||||||
|
|
||||||
self._download_webpage(
|
|
||||||
'https://passport.weibo.com/visitor/visitor', video_id,
|
|
||||||
note='Running first-visit callback',
|
|
||||||
query={
|
|
||||||
'a': 'incarnate',
|
|
||||||
't': tid,
|
|
||||||
'w': 2,
|
|
||||||
'c': cnfd,
|
|
||||||
'cb': 'cross_domain',
|
|
||||||
'from': 'weibo',
|
|
||||||
'_rand': random.random(),
|
|
||||||
})
|
|
||||||
|
|
||||||
webpage = self._download_webpage(
|
|
||||||
url, video_id, note='Revisiting webpage')
|
|
||||||
|
|
||||||
title = self._html_extract_title(webpage)
|
|
||||||
|
|
||||||
video_formats = compat_parse_qs(self._search_regex(
|
|
||||||
r'video-sources=\\\"(.+?)\"', webpage, 'video_sources'))
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
supported_resolutions = (480, 720)
|
|
||||||
for res in supported_resolutions:
|
|
||||||
vid_urls = video_formats.get(compat_str(res))
|
|
||||||
if not vid_urls or not isinstance(vid_urls, list):
|
|
||||||
continue
|
|
||||||
|
|
||||||
vid_url = vid_urls[0]
|
|
||||||
formats.append({
|
|
||||||
'url': vid_url,
|
|
||||||
'height': res,
|
|
||||||
})
|
})
|
||||||
|
|
||||||
uploader = self._og_search_property(
|
def _weibo_download_json(self, url, video_id, *args, fatal=True, note='Downloading JSON metadata', **kwargs):
|
||||||
'nick-name', webpage, 'uploader', default=None)
|
webpage, urlh = self._download_webpage_handle(url, video_id, *args, fatal=fatal, note=note, **kwargs)
|
||||||
|
if urllib.parse.urlparse(urlh.url).netloc == 'passport.weibo.com':
|
||||||
|
self._update_visitor_cookies(video_id)
|
||||||
|
webpage = self._download_webpage(url, video_id, *args, fatal=fatal, note=note, **kwargs)
|
||||||
|
return self._parse_json(webpage, video_id, fatal=fatal)
|
||||||
|
|
||||||
|
def _extract_formats(self, video_info):
|
||||||
|
media_info = traverse_obj(video_info, ('page_info', 'media_info'))
|
||||||
|
formats = traverse_obj(media_info, (
|
||||||
|
'playback_list', lambda _, v: url_or_none(v['play_info']['url']), 'play_info', {
|
||||||
|
'url': 'url',
|
||||||
|
'format': ('quality_desc', {str}),
|
||||||
|
'format_id': ('label', {str}),
|
||||||
|
'ext': ('mime', {mimetype2ext}),
|
||||||
|
'tbr': ('bitrate', {int_or_none}, {lambda x: x or None}),
|
||||||
|
'vcodec': ('video_codecs', {str}),
|
||||||
|
'fps': ('fps', {int_or_none}),
|
||||||
|
'width': ('width', {int_or_none}),
|
||||||
|
'height': ('height', {int_or_none}),
|
||||||
|
'filesize': ('size', {int_or_none}),
|
||||||
|
'acodec': ('audio_codecs', {str}),
|
||||||
|
'asr': ('audio_sample_rate', {int_or_none}),
|
||||||
|
'audio_channels': ('audio_channels', {int_or_none}),
|
||||||
|
}))
|
||||||
|
if not formats: # fallback, should be barely used
|
||||||
|
for url in set(traverse_obj(media_info, (..., {url_or_none}))):
|
||||||
|
if 'label=' in url: # filter out non-video urls
|
||||||
|
format_id, resolution = self._search_regex(
|
||||||
|
r'label=(\w+)&template=(\d+x\d+)', url, 'format info',
|
||||||
|
group=(1, 2), default=(None, None))
|
||||||
|
formats.append({
|
||||||
|
'url': url,
|
||||||
|
'format_id': format_id,
|
||||||
|
**parse_resolution(resolution),
|
||||||
|
**traverse_obj(media_info, (
|
||||||
|
'video_details', lambda _, v: v['label'].startswith(format_id), {
|
||||||
|
'size': ('size', {int_or_none}),
|
||||||
|
'tbr': ('bitrate', {int_or_none}),
|
||||||
|
}
|
||||||
|
), get_all=False),
|
||||||
|
})
|
||||||
|
return formats
|
||||||
|
|
||||||
|
def _parse_video_info(self, video_info, video_id=None):
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'extractor_key': WeiboIE.ie_key(),
|
||||||
'uploader': uploader,
|
'extractor': WeiboIE.IE_NAME,
|
||||||
'formats': formats
|
'formats': self._extract_formats(video_info),
|
||||||
|
'http_headers': {'Referer': 'https://weibo.com/'},
|
||||||
|
'_old_archive_ids': [make_archive_id('WeiboMobile', video_id)],
|
||||||
|
**traverse_obj(video_info, {
|
||||||
|
'id': (('id', 'id_str', 'mid'), {str_or_none}),
|
||||||
|
'display_id': ('mblogid', {str_or_none}),
|
||||||
|
'title': ('page_info', 'media_info', ('video_title', 'kol_title', 'name'), {str}, {lambda x: x or None}),
|
||||||
|
'description': ('text_raw', {str}),
|
||||||
|
'duration': ('page_info', 'media_info', 'duration', {int_or_none}),
|
||||||
|
'timestamp': ('page_info', 'media_info', 'video_publish_time', {int_or_none}),
|
||||||
|
'thumbnail': ('page_info', 'page_pic', {url_or_none}),
|
||||||
|
'uploader': ('user', 'screen_name', {str}),
|
||||||
|
'uploader_id': ('user', ('id', 'id_str'), {str_or_none}),
|
||||||
|
'uploader_url': ('user', 'profile_url', {lambda x: urljoin('https://weibo.com/', x)}),
|
||||||
|
'view_count': ('page_info', 'media_info', 'online_users_number', {int_or_none}),
|
||||||
|
'like_count': ('attitudes_count', {int_or_none}),
|
||||||
|
'repost_count': ('reposts_count', {int_or_none}),
|
||||||
|
}, get_all=False),
|
||||||
|
'tags': traverse_obj(video_info, ('topic_struct', ..., 'topic_title', {str})) or None,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class WeiboMobileIE(InfoExtractor):
|
class WeiboIE(WeiboBaseIE):
|
||||||
_VALID_URL = r'https?://m\.weibo\.cn/status/(?P<id>[0-9]+)(\?.+)?'
|
_VALID_URL = r'https?://(?:m\.weibo\.cn/status|(?:www\.)?weibo\.com/\d+)/(?P<id>[a-zA-Z0-9]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'https://m.weibo.cn/status/4189191225395228?wm=3333_2001&sourcetype=weixin&featurecode=newtitle&from=singlemessage&isappinstalled=0',
|
'url': 'https://weibo.com/7827771738/N4xlMvjhI',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '4910815147462302',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'display_id': 'N4xlMvjhI',
|
||||||
|
'title': '【睡前消息暑假版第一期:拉泰国一把 对中国有好处】',
|
||||||
|
'description': 'md5:e2637a7673980d68694ea7c43cf12a5f',
|
||||||
|
'duration': 918,
|
||||||
|
'timestamp': 1686312819,
|
||||||
|
'upload_date': '20230609',
|
||||||
|
'thumbnail': r're:https://.*\.jpg',
|
||||||
|
'uploader': '睡前视频基地',
|
||||||
|
'uploader_id': '7827771738',
|
||||||
|
'uploader_url': 'https://weibo.com/u/7827771738',
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'repost_count': int,
|
||||||
|
'tags': ['泰国大选远进党获胜', '睡前消息', '暑期版'],
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://m.weibo.cn/status/4189191225395228',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '4189191225395228',
|
'id': '4189191225395228',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '午睡当然是要甜甜蜜蜜的啦',
|
'display_id': 'FBqgOmDxO',
|
||||||
'uploader': '柴犬柴犬'
|
'title': '柴犬柴犬的秒拍视频',
|
||||||
|
'description': 'md5:80f461ab5cdae6bbdb70efbf5a1db24f',
|
||||||
|
'duration': 53,
|
||||||
|
'timestamp': 1514264429,
|
||||||
|
'upload_date': '20171226',
|
||||||
|
'thumbnail': r're:https://.*\.jpg',
|
||||||
|
'uploader': '柴犬柴犬',
|
||||||
|
'uploader_id': '5926682210',
|
||||||
|
'uploader_url': 'https://weibo.com/u/5926682210',
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'repost_count': int,
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
'url': 'https://weibo.com/0/4224132150961381',
|
||||||
|
'note': 'no playback_list example',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
# to get Referer url for genvisitor
|
|
||||||
webpage = self._download_webpage(url, video_id, note='visit the page')
|
|
||||||
|
|
||||||
weibo_info = self._parse_json(self._search_regex(
|
return self._parse_video_info(self._weibo_download_json(
|
||||||
r'var\s+\$render_data\s*=\s*\[({.*})\]\[0\]\s*\|\|\s*{};',
|
f'https://weibo.com/ajax/statuses/show?id={video_id}', video_id))
|
||||||
webpage, 'js_code', flags=re.DOTALL),
|
|
||||||
video_id, transform_source=js_to_json)
|
|
||||||
|
|
||||||
status_data = weibo_info.get('status', {})
|
|
||||||
page_info = status_data.get('page_info')
|
|
||||||
title = status_data['status_title']
|
|
||||||
uploader = status_data.get('user', {}).get('screen_name')
|
|
||||||
|
|
||||||
return {
|
class WeiboVideoIE(WeiboBaseIE):
|
||||||
'id': video_id,
|
_VALID_URL = r'https?://(?:www\.)?weibo\.com/tv/show/(?P<id>\d+:\d+)'
|
||||||
'title': title,
|
_TESTS = [{
|
||||||
'uploader': uploader,
|
'url': 'https://weibo.com/tv/show/1034:4797699866951785?from=old_pc_videoshow',
|
||||||
'url': page_info['media_info']['stream_url']
|
'info_dict': {
|
||||||
|
'id': '4797700463137878',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'display_id': 'LEZDodaiW',
|
||||||
|
'title': '呃,稍微了解了一下靡烟miya,感觉这东西也太二了',
|
||||||
|
'description': '呃,稍微了解了一下靡烟miya,感觉这东西也太二了 http://t.cn/A6aerGsM ',
|
||||||
|
'duration': 76,
|
||||||
|
'timestamp': 1659344278,
|
||||||
|
'upload_date': '20220801',
|
||||||
|
'thumbnail': r're:https://.*\.jpg',
|
||||||
|
'uploader': '君子爱财陈平安',
|
||||||
|
'uploader_id': '3905382233',
|
||||||
|
'uploader_url': 'https://weibo.com/u/3905382233',
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'repost_count': int,
|
||||||
}
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
post_data = f'data={{"Component_Play_Playinfo":{{"oid":"{video_id}"}}}}'.encode()
|
||||||
|
video_info = self._weibo_download_json(
|
||||||
|
f'https://weibo.com/tv/api/component?page=%2Ftv%2Fshow%2F{video_id.replace(":", "%3A")}',
|
||||||
|
video_id, headers={'Referer': url}, data=post_data)['data']['Component_Play_Playinfo']
|
||||||
|
return self.url_result(f'https://weibo.com/0/{video_info["mid"]}', WeiboIE)
|
||||||
|
|
||||||
|
|
||||||
|
class WeiboUserIE(WeiboBaseIE):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?weibo\.com/u/(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://weibo.com/u/2066652961?tabtype=video',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2066652961',
|
||||||
|
'title': '萧影殿下的视频',
|
||||||
|
'description': '萧影殿下的全部视频',
|
||||||
|
'uploader': '萧影殿下',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 195,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _fetch_page(self, uid, cursor=0, page=1):
|
||||||
|
return self._weibo_download_json(
|
||||||
|
'https://weibo.com/ajax/profile/getWaterFallContent',
|
||||||
|
uid, note=f'Downloading videos page {page}',
|
||||||
|
query={'uid': uid, 'cursor': cursor})['data']
|
||||||
|
|
||||||
|
def _entries(self, uid, first_page):
|
||||||
|
cursor = 0
|
||||||
|
for page in itertools.count(1):
|
||||||
|
response = first_page if page == 1 else self._fetch_page(uid, cursor, page)
|
||||||
|
for video_info in traverse_obj(response, ('list', ..., {dict})):
|
||||||
|
yield self._parse_video_info(video_info)
|
||||||
|
cursor = response.get('next_cursor')
|
||||||
|
if (int_or_none(cursor) or -1) < 0:
|
||||||
|
break
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
uid = self._match_id(url)
|
||||||
|
first_page = self._fetch_page(uid)
|
||||||
|
uploader = traverse_obj(first_page, ('list', ..., 'user', 'screen_name', {str}), get_all=False)
|
||||||
|
metainfo = {
|
||||||
|
'title': f'{uploader}的视频',
|
||||||
|
'description': f'{uploader}的全部视频',
|
||||||
|
'uploader': uploader,
|
||||||
|
} if uploader else {}
|
||||||
|
|
||||||
|
return self.playlist_result(self._entries(uid, first_page), uid, **metainfo)
|
||||||
|
|
|
@ -9,6 +9,7 @@
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
try_call,
|
try_call,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
|
url_basename,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -45,12 +46,14 @@ class ZaikoIE(ZaikoBaseIE):
|
||||||
'uploader_id': '454',
|
'uploader_id': '454',
|
||||||
'uploader': 'ZAIKO ZERO',
|
'uploader': 'ZAIKO ZERO',
|
||||||
'release_timestamp': 1583809200,
|
'release_timestamp': 1583809200,
|
||||||
'thumbnail': r're:https://[a-z0-9]+.cloudfront.net/[a-z0-9_]+/[a-z0-9_]+',
|
'thumbnail': r're:^https://[\w.-]+/\w+/\w+',
|
||||||
|
'thumbnails': 'maxcount:2',
|
||||||
'release_date': '20200310',
|
'release_date': '20200310',
|
||||||
'categories': ['Tech House'],
|
'categories': ['Tech House'],
|
||||||
'live_status': 'was_live',
|
'live_status': 'was_live',
|
||||||
},
|
},
|
||||||
'params': {'skip_download': 'm3u8'},
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
'skip': 'Your account does not have tickets to this event',
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -83,6 +86,12 @@ def _real_extract(self, url):
|
||||||
if not formats:
|
if not formats:
|
||||||
self.raise_no_formats(msg, expected=expected)
|
self.raise_no_formats(msg, expected=expected)
|
||||||
|
|
||||||
|
thumbnail_urls = [
|
||||||
|
traverse_obj(player_meta, ('initial_event_info', 'poster_url')),
|
||||||
|
self._og_search_thumbnail(self._download_webpage(
|
||||||
|
f'https://zaiko.io/event/{video_id}', video_id, 'Downloading event page', fatal=False) or ''),
|
||||||
|
]
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
@ -96,8 +105,8 @@ def _real_extract(self, url):
|
||||||
}),
|
}),
|
||||||
**traverse_obj(player_meta, ('initial_event_info', {
|
**traverse_obj(player_meta, ('initial_event_info', {
|
||||||
'alt_title': ('title', {str}),
|
'alt_title': ('title', {str}),
|
||||||
'thumbnail': ('poster_url', {url_or_none}),
|
|
||||||
})),
|
})),
|
||||||
|
'thumbnails': [{'url': url, 'id': url_basename(url)} for url in thumbnail_urls if url_or_none(url)]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -127,6 +127,7 @@ def _real_extract(self, url):
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': str_or_none(traverse_obj(data, ('meet', 'topic'))),
|
'title': str_or_none(traverse_obj(data, ('meet', 'topic'))),
|
||||||
|
'duration': int_or_none(data.get('duration')),
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'http_headers': {
|
'http_headers': {
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
|
|
||||||
import contextlib
|
import contextlib
|
||||||
import functools
|
import functools
|
||||||
|
import socket
|
||||||
import ssl
|
import ssl
|
||||||
import sys
|
import sys
|
||||||
import typing
|
import typing
|
||||||
|
@ -206,3 +207,59 @@ def wrapper(self, *args, **kwargs):
|
||||||
e.handler = self
|
e.handler = self
|
||||||
raise
|
raise
|
||||||
return wrapper
|
return wrapper
|
||||||
|
|
||||||
|
|
||||||
|
def _socket_connect(ip_addr, timeout, source_address):
|
||||||
|
af, socktype, proto, canonname, sa = ip_addr
|
||||||
|
sock = socket.socket(af, socktype, proto)
|
||||||
|
try:
|
||||||
|
if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
|
||||||
|
sock.settimeout(timeout)
|
||||||
|
if source_address:
|
||||||
|
sock.bind(source_address)
|
||||||
|
sock.connect(sa)
|
||||||
|
return sock
|
||||||
|
except socket.error:
|
||||||
|
sock.close()
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
def create_connection(
|
||||||
|
address,
|
||||||
|
timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
|
||||||
|
source_address=None,
|
||||||
|
*,
|
||||||
|
_create_socket_func=_socket_connect
|
||||||
|
):
|
||||||
|
# Work around socket.create_connection() which tries all addresses from getaddrinfo() including IPv6.
|
||||||
|
# This filters the addresses based on the given source_address.
|
||||||
|
# Based on: https://github.com/python/cpython/blob/main/Lib/socket.py#L810
|
||||||
|
host, port = address
|
||||||
|
ip_addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
|
||||||
|
if not ip_addrs:
|
||||||
|
raise socket.error('getaddrinfo returns an empty list')
|
||||||
|
if source_address is not None:
|
||||||
|
af = socket.AF_INET if ':' not in source_address[0] else socket.AF_INET6
|
||||||
|
ip_addrs = [addr for addr in ip_addrs if addr[0] == af]
|
||||||
|
if not ip_addrs:
|
||||||
|
raise OSError(
|
||||||
|
f'No remote IPv{4 if af == socket.AF_INET else 6} addresses available for connect. '
|
||||||
|
f'Can\'t use "{source_address[0]}" as source address')
|
||||||
|
|
||||||
|
err = None
|
||||||
|
for ip_addr in ip_addrs:
|
||||||
|
try:
|
||||||
|
sock = _create_socket_func(ip_addr, timeout, source_address)
|
||||||
|
# Explicitly break __traceback__ reference cycle
|
||||||
|
# https://bugs.python.org/issue36820
|
||||||
|
err = None
|
||||||
|
return sock
|
||||||
|
except socket.error as e:
|
||||||
|
err = e
|
||||||
|
|
||||||
|
try:
|
||||||
|
raise err
|
||||||
|
finally:
|
||||||
|
# Explicitly break __traceback__ reference cycle
|
||||||
|
# https://bugs.python.org/issue36820
|
||||||
|
err = None
|
||||||
|
|
|
@ -23,6 +23,7 @@
|
||||||
from ._helper import (
|
from ._helper import (
|
||||||
InstanceStoreMixin,
|
InstanceStoreMixin,
|
||||||
add_accept_encoding_header,
|
add_accept_encoding_header,
|
||||||
|
create_connection,
|
||||||
get_redirect_method,
|
get_redirect_method,
|
||||||
make_socks_proxy_opts,
|
make_socks_proxy_opts,
|
||||||
select_proxy,
|
select_proxy,
|
||||||
|
@ -54,44 +55,10 @@
|
||||||
def _create_http_connection(http_class, source_address, *args, **kwargs):
|
def _create_http_connection(http_class, source_address, *args, **kwargs):
|
||||||
hc = http_class(*args, **kwargs)
|
hc = http_class(*args, **kwargs)
|
||||||
|
|
||||||
|
if hasattr(hc, '_create_connection'):
|
||||||
|
hc._create_connection = create_connection
|
||||||
|
|
||||||
if source_address is not None:
|
if source_address is not None:
|
||||||
# This is to workaround _create_connection() from socket where it will try all
|
|
||||||
# address data from getaddrinfo() including IPv6. This filters the result from
|
|
||||||
# getaddrinfo() based on the source_address value.
|
|
||||||
# This is based on the cpython socket.create_connection() function.
|
|
||||||
# https://github.com/python/cpython/blob/master/Lib/socket.py#L691
|
|
||||||
def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
|
|
||||||
host, port = address
|
|
||||||
err = None
|
|
||||||
addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
|
|
||||||
af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
|
|
||||||
ip_addrs = [addr for addr in addrs if addr[0] == af]
|
|
||||||
if addrs and not ip_addrs:
|
|
||||||
ip_version = 'v4' if af == socket.AF_INET else 'v6'
|
|
||||||
raise OSError(
|
|
||||||
"No remote IP%s addresses available for connect, can't use '%s' as source address"
|
|
||||||
% (ip_version, source_address[0]))
|
|
||||||
for res in ip_addrs:
|
|
||||||
af, socktype, proto, canonname, sa = res
|
|
||||||
sock = None
|
|
||||||
try:
|
|
||||||
sock = socket.socket(af, socktype, proto)
|
|
||||||
if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
|
|
||||||
sock.settimeout(timeout)
|
|
||||||
sock.bind(source_address)
|
|
||||||
sock.connect(sa)
|
|
||||||
err = None # Explicitly break reference cycle
|
|
||||||
return sock
|
|
||||||
except OSError as _:
|
|
||||||
err = _
|
|
||||||
if sock is not None:
|
|
||||||
sock.close()
|
|
||||||
if err is not None:
|
|
||||||
raise err
|
|
||||||
else:
|
|
||||||
raise OSError('getaddrinfo returns an empty list')
|
|
||||||
if hasattr(hc, '_create_connection'):
|
|
||||||
hc._create_connection = _create_connection
|
|
||||||
hc.source_address = (source_address, 0)
|
hc.source_address = (source_address, 0)
|
||||||
|
|
||||||
return hc
|
return hc
|
||||||
|
@ -220,13 +187,28 @@ def make_socks_conn_class(base_class, socks_proxy):
|
||||||
proxy_args = make_socks_proxy_opts(socks_proxy)
|
proxy_args = make_socks_proxy_opts(socks_proxy)
|
||||||
|
|
||||||
class SocksConnection(base_class):
|
class SocksConnection(base_class):
|
||||||
def connect(self):
|
_create_connection = create_connection
|
||||||
self.sock = sockssocket()
|
|
||||||
self.sock.setproxy(**proxy_args)
|
|
||||||
if type(self.timeout) in (int, float): # noqa: E721
|
|
||||||
self.sock.settimeout(self.timeout)
|
|
||||||
self.sock.connect((self.host, self.port))
|
|
||||||
|
|
||||||
|
def connect(self):
|
||||||
|
def sock_socket_connect(ip_addr, timeout, source_address):
|
||||||
|
af, socktype, proto, canonname, sa = ip_addr
|
||||||
|
sock = sockssocket(af, socktype, proto)
|
||||||
|
try:
|
||||||
|
connect_proxy_args = proxy_args.copy()
|
||||||
|
connect_proxy_args.update({'addr': sa[0], 'port': sa[1]})
|
||||||
|
sock.setproxy(**connect_proxy_args)
|
||||||
|
if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT: # noqa: E721
|
||||||
|
sock.settimeout(timeout)
|
||||||
|
if source_address:
|
||||||
|
sock.bind(source_address)
|
||||||
|
sock.connect((self.host, self.port))
|
||||||
|
return sock
|
||||||
|
except socket.error:
|
||||||
|
sock.close()
|
||||||
|
raise
|
||||||
|
self.sock = create_connection(
|
||||||
|
(proxy_args['addr'], proxy_args['port']), timeout=self.timeout,
|
||||||
|
source_address=self.source_address, _create_socket_func=sock_socket_connect)
|
||||||
if isinstance(self, http.client.HTTPSConnection):
|
if isinstance(self, http.client.HTTPSConnection):
|
||||||
self.sock = self._context.wrap_socket(self.sock, server_hostname=self.host)
|
self.sock = self._context.wrap_socket(self.sock, server_hostname=self.host)
|
||||||
|
|
||||||
|
@ -429,7 +411,7 @@ def _send(self, request):
|
||||||
except urllib.error.HTTPError as e:
|
except urllib.error.HTTPError as e:
|
||||||
if isinstance(e.fp, (http.client.HTTPResponse, urllib.response.addinfourl)):
|
if isinstance(e.fp, (http.client.HTTPResponse, urllib.response.addinfourl)):
|
||||||
# Prevent file object from being closed when urllib.error.HTTPError is destroyed.
|
# Prevent file object from being closed when urllib.error.HTTPError is destroyed.
|
||||||
e._closer.file = None
|
e._closer.close_called = True
|
||||||
raise HTTPError(UrllibResponseAdapter(e.fp), redirect_loop='redirect error' in str(e)) from e
|
raise HTTPError(UrllibResponseAdapter(e.fp), redirect_loop='redirect error' in str(e)) from e
|
||||||
raise # unexpected
|
raise # unexpected
|
||||||
except urllib.error.URLError as e:
|
except urllib.error.URLError as e:
|
||||||
|
|
|
@ -115,7 +115,7 @@ def __init__(self, http_error: HTTPError):
|
||||||
hdrs=http_error.response.headers,
|
hdrs=http_error.response.headers,
|
||||||
fp=http_error.response
|
fp=http_error.response
|
||||||
)
|
)
|
||||||
self._closer.file = None # Disable auto close
|
self._closer.close_called = True # Disable auto close
|
||||||
self._http_error = http_error
|
self._http_error = http_error
|
||||||
HTTPError.__init__(self, http_error.response, redirect_loop=http_error.redirect_loop)
|
HTTPError.__init__(self, http_error.response, redirect_loop=http_error.redirect_loop)
|
||||||
|
|
||||||
|
|
|
@ -134,26 +134,31 @@ def _check_response_version(self, expected_version, got_version):
|
||||||
self.close()
|
self.close()
|
||||||
raise InvalidVersionError(expected_version, got_version)
|
raise InvalidVersionError(expected_version, got_version)
|
||||||
|
|
||||||
def _resolve_address(self, destaddr, default, use_remote_dns):
|
def _resolve_address(self, destaddr, default, use_remote_dns, family=None):
|
||||||
try:
|
for f in (family,) if family else (socket.AF_INET, socket.AF_INET6):
|
||||||
return socket.inet_aton(destaddr)
|
try:
|
||||||
except OSError:
|
return f, socket.inet_pton(f, destaddr)
|
||||||
if use_remote_dns and self._proxy.remote_dns:
|
except OSError:
|
||||||
return default
|
continue
|
||||||
else:
|
|
||||||
return socket.inet_aton(socket.gethostbyname(destaddr))
|
if use_remote_dns and self._proxy.remote_dns:
|
||||||
|
return 0, default
|
||||||
|
else:
|
||||||
|
res = socket.getaddrinfo(destaddr, None, family=family or 0)
|
||||||
|
f, _, _, _, ipaddr = res[0]
|
||||||
|
return f, socket.inet_pton(f, ipaddr[0])
|
||||||
|
|
||||||
def _setup_socks4(self, address, is_4a=False):
|
def _setup_socks4(self, address, is_4a=False):
|
||||||
destaddr, port = address
|
destaddr, port = address
|
||||||
|
|
||||||
ipaddr = self._resolve_address(destaddr, SOCKS4_DEFAULT_DSTIP, use_remote_dns=is_4a)
|
_, ipaddr = self._resolve_address(destaddr, SOCKS4_DEFAULT_DSTIP, use_remote_dns=is_4a, family=socket.AF_INET)
|
||||||
|
|
||||||
packet = struct.pack('!BBH', SOCKS4_VERSION, Socks4Command.CMD_CONNECT, port) + ipaddr
|
packet = struct.pack('!BBH', SOCKS4_VERSION, Socks4Command.CMD_CONNECT, port) + ipaddr
|
||||||
|
|
||||||
username = (self._proxy.username or '').encode()
|
username = (self._proxy.username or '').encode()
|
||||||
packet += username + b'\x00'
|
packet += username + b'\x00'
|
||||||
|
|
||||||
if is_4a and self._proxy.remote_dns:
|
if is_4a and self._proxy.remote_dns and ipaddr == SOCKS4_DEFAULT_DSTIP:
|
||||||
packet += destaddr.encode() + b'\x00'
|
packet += destaddr.encode() + b'\x00'
|
||||||
|
|
||||||
self.sendall(packet)
|
self.sendall(packet)
|
||||||
|
@ -210,7 +215,7 @@ def _socks5_auth(self):
|
||||||
def _setup_socks5(self, address):
|
def _setup_socks5(self, address):
|
||||||
destaddr, port = address
|
destaddr, port = address
|
||||||
|
|
||||||
ipaddr = self._resolve_address(destaddr, None, use_remote_dns=True)
|
family, ipaddr = self._resolve_address(destaddr, None, use_remote_dns=True)
|
||||||
|
|
||||||
self._socks5_auth()
|
self._socks5_auth()
|
||||||
|
|
||||||
|
@ -220,8 +225,10 @@ def _setup_socks5(self, address):
|
||||||
destaddr = destaddr.encode()
|
destaddr = destaddr.encode()
|
||||||
packet += struct.pack('!B', Socks5AddressType.ATYP_DOMAINNAME)
|
packet += struct.pack('!B', Socks5AddressType.ATYP_DOMAINNAME)
|
||||||
packet += self._len_and_data(destaddr)
|
packet += self._len_and_data(destaddr)
|
||||||
else:
|
elif family == socket.AF_INET:
|
||||||
packet += struct.pack('!B', Socks5AddressType.ATYP_IPV4) + ipaddr
|
packet += struct.pack('!B', Socks5AddressType.ATYP_IPV4) + ipaddr
|
||||||
|
elif family == socket.AF_INET6:
|
||||||
|
packet += struct.pack('!B', Socks5AddressType.ATYP_IPV6) + ipaddr
|
||||||
packet += struct.pack('!H', port)
|
packet += struct.pack('!H', port)
|
||||||
|
|
||||||
self.sendall(packet)
|
self.sendall(packet)
|
||||||
|
|
|
@ -669,6 +669,7 @@ def replace_insane(char):
|
||||||
|
|
||||||
def sanitize_path(s, force=False):
|
def sanitize_path(s, force=False):
|
||||||
"""Sanitizes and normalizes path on Windows"""
|
"""Sanitizes and normalizes path on Windows"""
|
||||||
|
# XXX: this handles drive relative paths (c:sth) incorrectly
|
||||||
if sys.platform == 'win32':
|
if sys.platform == 'win32':
|
||||||
force = False
|
force = False
|
||||||
drive_or_unc, _ = os.path.splitdrive(s)
|
drive_or_unc, _ = os.path.splitdrive(s)
|
||||||
|
@ -687,7 +688,10 @@ def sanitize_path(s, force=False):
|
||||||
sanitized_path.insert(0, drive_or_unc + os.path.sep)
|
sanitized_path.insert(0, drive_or_unc + os.path.sep)
|
||||||
elif force and s and s[0] == os.path.sep:
|
elif force and s and s[0] == os.path.sep:
|
||||||
sanitized_path.insert(0, os.path.sep)
|
sanitized_path.insert(0, os.path.sep)
|
||||||
return os.path.join(*sanitized_path)
|
# TODO: Fix behavioral differences <3.12
|
||||||
|
# The workaround using `normpath` only superficially passes tests
|
||||||
|
# Ref: https://github.com/python/cpython/pull/100351
|
||||||
|
return os.path.normpath(os.path.join(*sanitized_path))
|
||||||
|
|
||||||
|
|
||||||
def sanitize_url(url, *, scheme='http'):
|
def sanitize_url(url, *, scheme='http'):
|
||||||
|
@ -1256,7 +1260,7 @@ def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
|
||||||
if precision == 'auto':
|
if precision == 'auto':
|
||||||
auto_precision = True
|
auto_precision = True
|
||||||
precision = 'microsecond'
|
precision = 'microsecond'
|
||||||
today = datetime_round(datetime.datetime.utcnow(), precision)
|
today = datetime_round(datetime.datetime.now(datetime.timezone.utc), precision)
|
||||||
if date_str in ('now', 'today'):
|
if date_str in ('now', 'today'):
|
||||||
return today
|
return today
|
||||||
if date_str == 'yesterday':
|
if date_str == 'yesterday':
|
||||||
|
@ -1319,8 +1323,8 @@ def datetime_round(dt, precision='day'):
|
||||||
'second': 1,
|
'second': 1,
|
||||||
}
|
}
|
||||||
roundto = lambda x, n: ((x + n / 2) // n) * n
|
roundto = lambda x, n: ((x + n / 2) // n) * n
|
||||||
timestamp = calendar.timegm(dt.timetuple())
|
timestamp = roundto(calendar.timegm(dt.timetuple()), unit_seconds[precision])
|
||||||
return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
|
return datetime.datetime.fromtimestamp(timestamp, datetime.timezone.utc)
|
||||||
|
|
||||||
|
|
||||||
def hyphenate_date(date_str):
|
def hyphenate_date(date_str):
|
||||||
|
@ -2847,6 +2851,7 @@ def mimetype2ext(mt, default=NO_DEFAULT):
|
||||||
'quicktime': 'mov',
|
'quicktime': 'mov',
|
||||||
'webm': 'webm',
|
'webm': 'webm',
|
||||||
'vp9': 'vp9',
|
'vp9': 'vp9',
|
||||||
|
'video/ogg': 'ogv',
|
||||||
'x-flv': 'flv',
|
'x-flv': 'flv',
|
||||||
'x-m4v': 'm4v',
|
'x-m4v': 'm4v',
|
||||||
'x-matroska': 'mkv',
|
'x-matroska': 'mkv',
|
||||||
|
|
Loading…
Reference in a new issue