mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-10-01 15:12:57 +00:00
Merge remote-tracking branch 'upstream/master'
This commit is contained in:
commit
d8d31be98e
5
.github/workflows/core.yml
vendored
5
.github/workflows/core.yml
vendored
|
@ -13,13 +13,16 @@ jobs:
|
|||
matrix:
|
||||
os: [ubuntu-latest]
|
||||
# CPython 3.11 is in quick-test
|
||||
python-version: ['3.8', '3.9', '3.10', pypy-3.7, pypy-3.8]
|
||||
python-version: ['3.8', '3.9', '3.10', '3.12-dev', pypy-3.7, pypy-3.8, pypy-3.10]
|
||||
run-tests-ext: [sh]
|
||||
include:
|
||||
# atleast one of each CPython/PyPy tests must be in windows
|
||||
- os: windows-latest
|
||||
python-version: '3.7'
|
||||
run-tests-ext: bat
|
||||
- os: windows-latest
|
||||
python-version: '3.12-dev'
|
||||
run-tests-ext: bat
|
||||
- os: windows-latest
|
||||
python-version: pypy-3.9
|
||||
run-tests-ext: bat
|
||||
|
|
|
@ -76,7 +76,7 @@
|
|||
|
||||
# NEW FEATURES
|
||||
|
||||
* Forked from [**yt-dlc@f9401f2**](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee) and merged with [**youtube-dl@42f2d4**](https://github.com/ytdl-org/youtube-dl/commit/07af47960f3bb262ead02490ce65c8c45c01741e) ([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21))
|
||||
* Forked from [**yt-dlc@f9401f2**](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee) and merged with [**youtube-dl@66ab08**](https://github.com/ytdl-org/youtube-dl/commit/66ab0814c4baa2dc79c2dd5287bc0ad61a37c5b9) ([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21))
|
||||
|
||||
* **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in YouTube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API
|
||||
|
||||
|
@ -1854,7 +1854,7 @@ #### rokfinchannel
|
|||
* `tab`: Which tab to download - one of `new`, `top`, `videos`, `podcasts`, `streams`, `stacks`
|
||||
|
||||
#### twitter
|
||||
* `legacy_api`: Force usage of the legacy Twitter API instead of the GraphQL API for tweet extraction. Has no effect if login cookies are passed
|
||||
* `api`: Select one of `graphql` (default), `legacy` or `syndication` as the API for tweet extraction. Has no effect if logged in
|
||||
|
||||
#### stacommu, wrestleuniverse
|
||||
* `device_id`: UUID value assigned by the website and used to enforce device limits for paid livestream content. Can be found in browser local storage
|
||||
|
|
|
@ -68,6 +68,25 @@
|
|||
{
|
||||
"action": "change",
|
||||
"when": "b03fa7834579a01cc5fba48c0e73488a16683d48",
|
||||
"short": "[ie/twitter] Revert 92315c03774cfabb3a921884326beb4b981f786b"
|
||||
"short": "[ie/twitter] Revert 92315c03774cfabb3a921884326beb4b981f786b",
|
||||
"authors": ["pukkandan"]
|
||||
},
|
||||
{
|
||||
"action": "change",
|
||||
"when": "fcd6a76adc49d5cd8783985c7ce35384b72e545f",
|
||||
"short": "[test] Add tests for socks proxies (#7908)",
|
||||
"authors": ["coletdjnz"]
|
||||
},
|
||||
{
|
||||
"action": "change",
|
||||
"when": "4bf912282a34b58b6b35d8f7e6be535770c89c76",
|
||||
"short": "[rh:urllib] Remove dot segments during URL normalization (#7662)",
|
||||
"authors": ["coletdjnz"]
|
||||
},
|
||||
{
|
||||
"action": "change",
|
||||
"when": "59e92b1f1833440bb2190f847eb735cf0f90bc85",
|
||||
"short": "[rh:urllib] Simplify gzip decoding (#7611)",
|
||||
"authors": ["Grub4K"]
|
||||
}
|
||||
]
|
||||
|
|
|
@ -31,35 +31,27 @@ class CommitGroup(enum.Enum):
|
|||
EXTRACTOR = 'Extractor'
|
||||
DOWNLOADER = 'Downloader'
|
||||
POSTPROCESSOR = 'Postprocessor'
|
||||
NETWORKING = 'Networking'
|
||||
MISC = 'Misc.'
|
||||
|
||||
@classmethod
|
||||
@property
|
||||
def ignorable_prefixes(cls):
|
||||
return ('core', 'downloader', 'extractor', 'misc', 'postprocessor', 'upstream')
|
||||
|
||||
@classmethod
|
||||
@lru_cache
|
||||
def commit_lookup(cls):
|
||||
def subgroup_lookup(cls):
|
||||
return {
|
||||
name: group
|
||||
for group, names in {
|
||||
cls.PRIORITY: {'priority'},
|
||||
cls.CORE: {
|
||||
'aes',
|
||||
'cache',
|
||||
'compat_utils',
|
||||
'compat',
|
||||
'cookies',
|
||||
'core',
|
||||
'dependencies',
|
||||
'formats',
|
||||
'jsinterp',
|
||||
'networking',
|
||||
'outtmpl',
|
||||
'plugins',
|
||||
'update',
|
||||
'upstream',
|
||||
'utils',
|
||||
},
|
||||
cls.MISC: {
|
||||
|
@ -67,23 +59,40 @@ def commit_lookup(cls):
|
|||
'cleanup',
|
||||
'devscripts',
|
||||
'docs',
|
||||
'misc',
|
||||
'test',
|
||||
},
|
||||
cls.EXTRACTOR: {'extractor', 'ie'},
|
||||
cls.DOWNLOADER: {'downloader', 'fd'},
|
||||
cls.POSTPROCESSOR: {'postprocessor', 'pp'},
|
||||
cls.NETWORKING: {
|
||||
'rh',
|
||||
},
|
||||
}.items()
|
||||
for name in names
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def get(cls, value):
|
||||
result = cls.commit_lookup().get(value)
|
||||
if result:
|
||||
logger.debug(f'Mapped {value!r} => {result.name}')
|
||||
@lru_cache
|
||||
def group_lookup(cls):
|
||||
result = {
|
||||
'fd': cls.DOWNLOADER,
|
||||
'ie': cls.EXTRACTOR,
|
||||
'pp': cls.POSTPROCESSOR,
|
||||
'upstream': cls.CORE,
|
||||
}
|
||||
result.update({item.name.lower(): item for item in iter(cls)})
|
||||
return result
|
||||
|
||||
@classmethod
|
||||
def get(cls, value: str) -> tuple[CommitGroup | None, str | None]:
|
||||
group, _, subgroup = (group.strip().lower() for group in value.partition('/'))
|
||||
|
||||
result = cls.group_lookup().get(group)
|
||||
if not result:
|
||||
if subgroup:
|
||||
return None, value
|
||||
subgroup = group
|
||||
result = cls.subgroup_lookup().get(subgroup)
|
||||
|
||||
return result, subgroup or None
|
||||
|
||||
|
||||
@dataclass
|
||||
class Commit:
|
||||
|
@ -198,19 +207,23 @@ def _prepare_cleanup_misc_items(self, items):
|
|||
for commit_infos in cleanup_misc_items.values():
|
||||
sorted_items.append(CommitInfo(
|
||||
'cleanup', ('Miscellaneous',), ', '.join(
|
||||
self._format_message_link(None, info.commit.hash).strip()
|
||||
self._format_message_link(None, info.commit.hash)
|
||||
for info in sorted(commit_infos, key=lambda item: item.commit.hash or '')),
|
||||
[], Commit(None, '', commit_infos[0].commit.authors), []))
|
||||
|
||||
return sorted_items
|
||||
|
||||
def format_single_change(self, info):
|
||||
message = self._format_message_link(info.message, info.commit.hash)
|
||||
def format_single_change(self, info: CommitInfo):
|
||||
message, sep, rest = info.message.partition('\n')
|
||||
if '[' not in message:
|
||||
# If the message doesn't already contain markdown links, try to add a link to the commit
|
||||
message = self._format_message_link(message, info.commit.hash)
|
||||
|
||||
if info.issues:
|
||||
message = message.replace('\n', f' ({self._format_issues(info.issues)})\n', 1)
|
||||
message = f'{message} ({self._format_issues(info.issues)})'
|
||||
|
||||
if info.commit.authors:
|
||||
message = message.replace('\n', f' by {self._format_authors(info.commit.authors)}\n', 1)
|
||||
message = f'{message} by {self._format_authors(info.commit.authors)}'
|
||||
|
||||
if info.fixes:
|
||||
fix_message = ', '.join(f'{self._format_message_link(None, fix.hash)}' for fix in info.fixes)
|
||||
|
@ -219,16 +232,14 @@ def format_single_change(self, info):
|
|||
if authors != info.commit.authors:
|
||||
fix_message = f'{fix_message} by {self._format_authors(authors)}'
|
||||
|
||||
message = message.replace('\n', f' (With fixes in {fix_message})\n', 1)
|
||||
message = f'{message} (With fixes in {fix_message})'
|
||||
|
||||
return message[:-1]
|
||||
return message if not sep else f'{message}{sep}{rest}'
|
||||
|
||||
def _format_message_link(self, message, hash):
|
||||
assert message or hash, 'Improperly defined commit message or override'
|
||||
message = message if message else hash[:HASH_LENGTH]
|
||||
if not hash:
|
||||
return f'{message}\n'
|
||||
return f'[{message}\n'.replace('\n', f']({self.repo_url}/commit/{hash})\n', 1)
|
||||
return f'[{message}]({self.repo_url}/commit/{hash})' if hash else message
|
||||
|
||||
def _format_issues(self, issues):
|
||||
return ', '.join(f'[#{issue}]({self.repo_url}/issues/{issue})' for issue in issues)
|
||||
|
@ -318,7 +329,7 @@ def _get_commits_and_fixes(self, default_author):
|
|||
for commitish, revert_commit in reverts.items():
|
||||
reverted = commits.pop(commitish, None)
|
||||
if reverted:
|
||||
logger.debug(f'{commit} fully reverted {reverted}')
|
||||
logger.debug(f'{commitish} fully reverted {reverted}')
|
||||
else:
|
||||
commits[revert_commit.hash] = revert_commit
|
||||
|
||||
|
@ -337,7 +348,7 @@ def apply_overrides(self, overrides):
|
|||
for override in overrides:
|
||||
when = override.get('when')
|
||||
if when and when not in self and when != self._start:
|
||||
logger.debug(f'Ignored {when!r}, not in commits {self._start!r}')
|
||||
logger.debug(f'Ignored {when!r} override')
|
||||
continue
|
||||
|
||||
override_hash = override.get('hash') or when
|
||||
|
@ -365,7 +376,7 @@ def groups(self):
|
|||
for commit in self:
|
||||
upstream_re = self.UPSTREAM_MERGE_RE.search(commit.short)
|
||||
if upstream_re:
|
||||
commit.short = f'[core/upstream] Merged with youtube-dl {upstream_re.group(1)}'
|
||||
commit.short = f'[upstream] Merged with youtube-dl {upstream_re.group(1)}'
|
||||
|
||||
match = self.MESSAGE_RE.fullmatch(commit.short)
|
||||
if not match:
|
||||
|
@ -410,25 +421,20 @@ def details_from_prefix(prefix):
|
|||
if not prefix:
|
||||
return CommitGroup.CORE, None, ()
|
||||
|
||||
prefix, _, details = prefix.partition('/')
|
||||
prefix = prefix.strip()
|
||||
details = details.strip()
|
||||
prefix, *sub_details = prefix.split(':')
|
||||
|
||||
group = CommitGroup.get(prefix.lower())
|
||||
if group is CommitGroup.PRIORITY:
|
||||
prefix, _, details = details.partition('/')
|
||||
group, details = CommitGroup.get(prefix)
|
||||
if group is CommitGroup.PRIORITY and details:
|
||||
details = details.partition('/')[2].strip()
|
||||
|
||||
if not details and prefix and prefix not in CommitGroup.ignorable_prefixes:
|
||||
logger.debug(f'Replaced details with {prefix!r}')
|
||||
details = prefix or None
|
||||
if details and '/' in details:
|
||||
logger.error(f'Prefix is overnested, using first part: {prefix}')
|
||||
details = details.partition('/')[0].strip()
|
||||
|
||||
if details == 'common':
|
||||
details = None
|
||||
|
||||
if details:
|
||||
details, *sub_details = details.split(':')
|
||||
else:
|
||||
sub_details = []
|
||||
elif group is CommitGroup.NETWORKING and details == 'rh':
|
||||
details = 'Request Handler'
|
||||
|
||||
return group, details, sub_details
|
||||
|
||||
|
|
|
@ -10,14 +10,14 @@
|
|||
import argparse
|
||||
import contextlib
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from devscripts.utils import read_version, run_process, write_file
|
||||
|
||||
|
||||
def get_new_version(version, revision):
|
||||
if not version:
|
||||
version = datetime.utcnow().strftime('%Y.%m.%d')
|
||||
version = datetime.now(timezone.utc).strftime('%Y.%m.%d')
|
||||
|
||||
if revision:
|
||||
assert revision.isdigit(), 'Revision must be a number'
|
||||
|
|
|
@ -281,17 +281,13 @@ def test_socks4_auth(self, handler, ctx):
|
|||
rh, proxies={'all': f'socks4://user:@{server_address}'})
|
||||
assert response['version'] == 4
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [
|
||||
pytest.param('Urllib', 'http', marks=pytest.mark.xfail(
|
||||
reason='socks4a implementation currently broken when destination is not a domain name'))
|
||||
], indirect=True)
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
||||
def test_socks4a_ipv4_target(self, handler, ctx):
|
||||
with ctx.socks_server(Socks4ProxyHandler) as server_address:
|
||||
with handler(proxies={'all': f'socks4a://{server_address}'}) as rh:
|
||||
response = ctx.socks_info_request(rh, target_domain='127.0.0.1')
|
||||
assert response['version'] == 4
|
||||
assert response['ipv4_address'] == '127.0.0.1'
|
||||
assert response['domain_address'] is None
|
||||
assert (response['ipv4_address'] == '127.0.0.1') != (response['domain_address'] == '127.0.0.1')
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
||||
def test_socks4a_domain_target(self, handler, ctx):
|
||||
|
@ -302,10 +298,7 @@ def test_socks4a_domain_target(self, handler, ctx):
|
|||
assert response['ipv4_address'] is None
|
||||
assert response['domain_address'] == 'localhost'
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [
|
||||
pytest.param('Urllib', 'http', marks=pytest.mark.xfail(
|
||||
reason='source_address is not yet supported for socks4 proxies'))
|
||||
], indirect=True)
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
||||
def test_ipv4_client_source_address(self, handler, ctx):
|
||||
with ctx.socks_server(Socks4ProxyHandler) as server_address:
|
||||
source_address = f'127.0.0.{random.randint(5, 255)}'
|
||||
|
@ -327,10 +320,7 @@ def test_socks4_errors(self, handler, ctx, reply_code):
|
|||
with pytest.raises(ProxyError):
|
||||
ctx.socks_info_request(rh)
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [
|
||||
pytest.param('Urllib', 'http', marks=pytest.mark.xfail(
|
||||
reason='IPv6 socks4 proxies are not yet supported'))
|
||||
], indirect=True)
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
||||
def test_ipv6_socks4_proxy(self, handler, ctx):
|
||||
with ctx.socks_server(Socks4ProxyHandler, bind_ip='::1') as server_address:
|
||||
with handler(proxies={'all': f'socks4://{server_address}'}) as rh:
|
||||
|
@ -342,7 +332,7 @@ def test_ipv6_socks4_proxy(self, handler, ctx):
|
|||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
||||
def test_timeout(self, handler, ctx):
|
||||
with ctx.socks_server(Socks4ProxyHandler, sleep=2) as server_address:
|
||||
with handler(proxies={'all': f'socks4://{server_address}'}, timeout=1) as rh:
|
||||
with handler(proxies={'all': f'socks4://{server_address}'}, timeout=0.5) as rh:
|
||||
with pytest.raises(TransportError):
|
||||
ctx.socks_info_request(rh)
|
||||
|
||||
|
@ -383,7 +373,7 @@ def test_socks5_domain_target(self, handler, ctx):
|
|||
with ctx.socks_server(Socks5ProxyHandler) as server_address:
|
||||
with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
|
||||
response = ctx.socks_info_request(rh, target_domain='localhost')
|
||||
assert response['ipv4_address'] == '127.0.0.1'
|
||||
assert (response['ipv4_address'] == '127.0.0.1') != (response['ipv6_address'] == '::1')
|
||||
assert response['version'] == 5
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
||||
|
@ -404,22 +394,15 @@ def test_socks5h_ip_target(self, handler, ctx):
|
|||
assert response['domain_address'] is None
|
||||
assert response['version'] == 5
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [
|
||||
pytest.param('Urllib', 'http', marks=pytest.mark.xfail(
|
||||
reason='IPv6 destination addresses are not yet supported'))
|
||||
], indirect=True)
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
||||
def test_socks5_ipv6_destination(self, handler, ctx):
|
||||
with ctx.socks_server(Socks5ProxyHandler) as server_address:
|
||||
with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
|
||||
response = ctx.socks_info_request(rh, target_domain='[::1]')
|
||||
assert response['ipv6_address'] == '::1'
|
||||
assert response['port'] == 80
|
||||
assert response['version'] == 5
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [
|
||||
pytest.param('Urllib', 'http', marks=pytest.mark.xfail(
|
||||
reason='IPv6 socks5 proxies are not yet supported'))
|
||||
], indirect=True)
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
||||
def test_ipv6_socks5_proxy(self, handler, ctx):
|
||||
with ctx.socks_server(Socks5ProxyHandler, bind_ip='::1') as server_address:
|
||||
with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
|
||||
|
@ -430,10 +413,7 @@ def test_ipv6_socks5_proxy(self, handler, ctx):
|
|||
|
||||
# XXX: is there any feasible way of testing IPv6 source addresses?
|
||||
# Same would go for non-proxy source_address test...
|
||||
@pytest.mark.parametrize('handler,ctx', [
|
||||
pytest.param('Urllib', 'http', marks=pytest.mark.xfail(
|
||||
reason='source_address is not yet supported for socks5 proxies'))
|
||||
], indirect=True)
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
||||
def test_ipv4_client_source_address(self, handler, ctx):
|
||||
with ctx.socks_server(Socks5ProxyHandler) as server_address:
|
||||
source_address = f'127.0.0.{random.randint(5, 255)}'
|
||||
|
|
|
@ -2591,7 +2591,7 @@ def _fill_common_fields(self, info_dict, final=True):
|
|||
# Working around out-of-range timestamp values (e.g. negative ones on Windows,
|
||||
# see http://bugs.python.org/issue1646728)
|
||||
with contextlib.suppress(ValueError, OverflowError, OSError):
|
||||
upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
|
||||
upload_date = datetime.datetime.fromtimestamp(info_dict[ts_key], datetime.timezone.utc)
|
||||
info_dict[date_key] = upload_date.strftime('%Y%m%d')
|
||||
|
||||
live_keys = ('is_live', 'was_live')
|
||||
|
|
|
@ -15,7 +15,7 @@ def get_package_info(module):
|
|||
name=getattr(module, '_yt_dlp__identifier', module.__name__),
|
||||
version=str(next(filter(None, (
|
||||
getattr(module, attr, None)
|
||||
for attr in ('__version__', 'version_string', 'version')
|
||||
for attr in ('_yt_dlp__version', '__version__', 'version_string', 'version')
|
||||
)), None)))
|
||||
|
||||
|
||||
|
|
|
@ -43,6 +43,8 @@
|
|||
|
||||
try:
|
||||
import sqlite3
|
||||
# We need to get the underlying `sqlite` version, see https://github.com/yt-dlp/yt-dlp/issues/8152
|
||||
sqlite3._yt_dlp__version = sqlite3.sqlite_version
|
||||
except ImportError:
|
||||
# although sqlite3 is part of the standard library, it is possible to compile python without
|
||||
# sqlite support. See: https://github.com/yt-dlp/yt-dlp/issues/544
|
||||
|
|
|
@ -122,7 +122,6 @@
|
|||
from .archiveorg import (
|
||||
ArchiveOrgIE,
|
||||
YoutubeWebArchiveIE,
|
||||
VLiveWebArchiveIE,
|
||||
)
|
||||
from .arcpublishing import ArcPublishingIE
|
||||
from .arkena import ArkenaIE
|
||||
|
@ -165,6 +164,7 @@
|
|||
AWAANLiveIE,
|
||||
AWAANSeasonIE,
|
||||
)
|
||||
from .axs import AxsIE
|
||||
from .azmedien import AZMedienIE
|
||||
from .baidu import BaiduVideoIE
|
||||
from .banbye import (
|
||||
|
@ -223,7 +223,11 @@
|
|||
BiliBiliPlayerIE,
|
||||
BilibiliSpaceVideoIE,
|
||||
BilibiliSpaceAudioIE,
|
||||
BilibiliSpacePlaylistIE,
|
||||
BilibiliCollectionListIE,
|
||||
BilibiliSeriesListIE,
|
||||
BilibiliFavoritesListIE,
|
||||
BilibiliWatchlaterIE,
|
||||
BilibiliPlaylistIE,
|
||||
BiliIntlIE,
|
||||
BiliIntlSeriesIE,
|
||||
BiliLiveIE,
|
||||
|
@ -292,9 +296,11 @@
|
|||
from .camsoda import CamsodaIE
|
||||
from .camtasia import CamtasiaEmbedIE
|
||||
from .camwithher import CamWithHerIE
|
||||
from .canal1 import Canal1IE
|
||||
from .canalalpha import CanalAlphaIE
|
||||
from .canalplus import CanalplusIE
|
||||
from .canalc2 import Canalc2IE
|
||||
from .caracoltv import CaracolTvPlayIE
|
||||
from .carambatv import (
|
||||
CarambaTVIE,
|
||||
CarambaTVPageIE,
|
||||
|
@ -561,6 +567,7 @@
|
|||
EpiconIE,
|
||||
EpiconSeriesIE,
|
||||
)
|
||||
from .eplus import EplusIbIE
|
||||
from .epoch import EpochIE
|
||||
from .eporner import EpornerIE
|
||||
from .eroprofile import (
|
||||
|
@ -1501,6 +1508,7 @@
|
|||
from .popcorntimes import PopcorntimesIE
|
||||
from .popcorntv import PopcornTVIE
|
||||
from .porn91 import Porn91IE
|
||||
from .pornbox import PornboxIE
|
||||
from .porncom import PornComIE
|
||||
from .pornflip import PornFlipIE
|
||||
from .pornhd import PornHdIE
|
||||
|
@ -1519,7 +1527,7 @@
|
|||
PuhuTVIE,
|
||||
PuhuTVSerieIE,
|
||||
)
|
||||
from .pr0gramm import Pr0grammStaticIE, Pr0grammIE
|
||||
from .pr0gramm import Pr0grammIE
|
||||
from .prankcast import PrankCastIE
|
||||
from .premiershiprugby import PremiershipRugbyIE
|
||||
from .presstv import PressTVIE
|
||||
|
@ -1555,7 +1563,14 @@
|
|||
from .radiode import RadioDeIE
|
||||
from .radiojavan import RadioJavanIE
|
||||
from .radiobremen import RadioBremenIE
|
||||
from .radiofrance import FranceCultureIE, RadioFranceIE
|
||||
from .radiofrance import (
|
||||
FranceCultureIE,
|
||||
RadioFranceIE,
|
||||
RadioFranceLiveIE,
|
||||
RadioFrancePodcastIE,
|
||||
RadioFranceProfileIE,
|
||||
RadioFranceProgramScheduleIE,
|
||||
)
|
||||
from .radiozet import RadioZetPodcastIE
|
||||
from .radiokapital import (
|
||||
RadioKapitalIE,
|
||||
|
@ -1586,6 +1601,7 @@
|
|||
from .rbgtum import (
|
||||
RbgTumIE,
|
||||
RbgTumCourseIE,
|
||||
RbgTumNewCourseIE,
|
||||
)
|
||||
from .rcs import (
|
||||
RCSIE,
|
||||
|
@ -1710,7 +1726,10 @@
|
|||
RuvIE,
|
||||
RuvSpilaIE
|
||||
)
|
||||
from .s4c import S4CIE
|
||||
from .s4c import (
|
||||
S4CIE,
|
||||
S4CSeriesIE
|
||||
)
|
||||
from .safari import (
|
||||
SafariIE,
|
||||
SafariApiIE,
|
||||
|
@ -1791,7 +1810,10 @@
|
|||
from .slutload import SlutloadIE
|
||||
from .smotrim import SmotrimIE
|
||||
from .snotr import SnotrIE
|
||||
from .sohu import SohuIE
|
||||
from .sohu import (
|
||||
SohuIE,
|
||||
SohuVIE,
|
||||
)
|
||||
from .sonyliv import (
|
||||
SonyLIVIE,
|
||||
SonyLIVSeriesIE,
|
||||
|
@ -2354,7 +2376,8 @@
|
|||
)
|
||||
from .weibo import (
|
||||
WeiboIE,
|
||||
WeiboMobileIE
|
||||
WeiboVideoIE,
|
||||
WeiboUserIE,
|
||||
)
|
||||
from .weiqitv import WeiqiTVIE
|
||||
from .weverse import (
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
import urllib.request
|
||||
import urllib.response
|
||||
import uuid
|
||||
|
||||
from ..utils.networking import clean_proxies
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_ecb_decrypt
|
||||
from ..utils import (
|
||||
|
@ -35,7 +35,10 @@ def add_opener(ydl, handler): # FIXME: Create proper API in .networking
|
|||
rh = ydl._request_director.handlers['Urllib']
|
||||
if 'abematv-license' in rh._SUPPORTED_URL_SCHEMES:
|
||||
return
|
||||
opener = rh._get_instance(cookiejar=ydl.cookiejar, proxies=ydl.proxies)
|
||||
headers = ydl.params['http_headers'].copy()
|
||||
proxies = ydl.proxies.copy()
|
||||
clean_proxies(proxies, headers)
|
||||
opener = rh._get_instance(cookiejar=ydl.cookiejar, proxies=proxies)
|
||||
assert isinstance(opener, urllib.request.OpenerDirector)
|
||||
opener.add_handler(handler)
|
||||
rh._SUPPORTED_URL_SCHEMES = (*rh._SUPPORTED_URL_SCHEMES, 'abematv-license')
|
||||
|
|
|
@ -22,8 +22,11 @@ def _call_api(self, asin, data=None, note=None):
|
|||
|
||||
resp = self._download_json(
|
||||
f'https://www.amazon.in/minitv/api/web/{"graphql" if data else "prs"}',
|
||||
asin, note=note, headers={'Content-Type': 'application/json'},
|
||||
data=json.dumps(data).encode() if data else None,
|
||||
asin, note=note, headers={
|
||||
'Content-Type': 'application/json',
|
||||
'currentpageurl': '/',
|
||||
'currentplatform': 'dWeb'
|
||||
}, data=json.dumps(data).encode() if data else None,
|
||||
query=None if data else {
|
||||
'deviceType': 'A1WMMUXPCUJL4N',
|
||||
'contentId': asin,
|
||||
|
@ -46,7 +49,7 @@ class AmazonMiniTVIE(AmazonMiniTVBaseIE):
|
|||
'ext': 'mp4',
|
||||
'title': 'May I Kiss You?',
|
||||
'language': 'Hindi',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
|
||||
'description': 'md5:a549bfc747973e04feb707833474e59d',
|
||||
'release_timestamp': 1644710400,
|
||||
'release_date': '20220213',
|
||||
|
@ -68,7 +71,7 @@ class AmazonMiniTVIE(AmazonMiniTVBaseIE):
|
|||
'ext': 'mp4',
|
||||
'title': 'Jahaan',
|
||||
'language': 'Hindi',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
'description': 'md5:05eb765a77bf703f322f120ec6867339',
|
||||
'release_timestamp': 1647475200,
|
||||
'release_date': '20220317',
|
||||
|
|
|
@ -3,7 +3,6 @@
|
|||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .naver import NaverBaseIE
|
||||
from .youtube import YoutubeBaseInfoExtractor, YoutubeIE
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..networking import HEADRequest
|
||||
|
@ -947,237 +946,3 @@ def _real_extract(self, url):
|
|||
if not info.get('title'):
|
||||
info['title'] = video_id
|
||||
return info
|
||||
|
||||
|
||||
class VLiveWebArchiveIE(InfoExtractor):
|
||||
IE_NAME = 'web.archive:vlive'
|
||||
IE_DESC = 'web.archive.org saved vlive videos'
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:https?://)?web\.archive\.org/
|
||||
(?:web/)?(?:(?P<date>[0-9]{14})?[0-9A-Za-z_*]*/)? # /web and the version index is optional
|
||||
(?:https?(?::|%3[Aa])//)?(?:
|
||||
(?:(?:www|m)\.)?vlive\.tv(?::(?:80|443))?/(?:video|embed)/(?P<id>[0-9]+) # VLive URL
|
||||
)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://web.archive.org/web/20221221144331/http://www.vlive.tv/video/1326',
|
||||
'md5': 'cc7314812855ce56de70a06a27314983',
|
||||
'info_dict': {
|
||||
'id': '1326',
|
||||
'ext': 'mp4',
|
||||
'title': "Girl's Day's Broadcast",
|
||||
'creator': "Girl's Day",
|
||||
'view_count': int,
|
||||
'uploader_id': 'muploader_a',
|
||||
'uploader_url': None,
|
||||
'uploader': None,
|
||||
'upload_date': '20150817',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
|
||||
'timestamp': 1439816449,
|
||||
'like_count': int,
|
||||
'channel': 'Girl\'s Day',
|
||||
'channel_id': 'FDF27',
|
||||
'comment_count': int,
|
||||
'release_timestamp': 1439818140,
|
||||
'release_date': '20150817',
|
||||
'duration': 1014,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://web.archive.org/web/20221221182103/http://www.vlive.tv/video/16937',
|
||||
'info_dict': {
|
||||
'id': '16937',
|
||||
'ext': 'mp4',
|
||||
'title': '첸백시 걍방',
|
||||
'creator': 'EXO',
|
||||
'view_count': int,
|
||||
'subtitles': 'mincount:12',
|
||||
'uploader_id': 'muploader_j',
|
||||
'uploader_url': 'http://vlive.tv',
|
||||
'uploader': None,
|
||||
'upload_date': '20161112',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
|
||||
'timestamp': 1478923074,
|
||||
'like_count': int,
|
||||
'channel': 'EXO',
|
||||
'channel_id': 'F94BD',
|
||||
'comment_count': int,
|
||||
'release_timestamp': 1478924280,
|
||||
'release_date': '20161112',
|
||||
'duration': 906,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://web.archive.org/web/20221127190050/http://www.vlive.tv/video/101870',
|
||||
'info_dict': {
|
||||
'id': '101870',
|
||||
'ext': 'mp4',
|
||||
'title': '[ⓓ xV] “레벨이들 매력에 반해? 안 반해?” 움직이는 HD 포토 (레드벨벳:Red Velvet)',
|
||||
'creator': 'Dispatch',
|
||||
'view_count': int,
|
||||
'subtitles': 'mincount:6',
|
||||
'uploader_id': 'V__FRA08071',
|
||||
'uploader_url': 'http://vlive.tv',
|
||||
'uploader': None,
|
||||
'upload_date': '20181130',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
|
||||
'timestamp': 1543601327,
|
||||
'like_count': int,
|
||||
'channel': 'Dispatch',
|
||||
'channel_id': 'C796F3',
|
||||
'comment_count': int,
|
||||
'release_timestamp': 1543601040,
|
||||
'release_date': '20181130',
|
||||
'duration': 279,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
# The wayback machine has special timestamp and "mode" values:
|
||||
# timestamp:
|
||||
# 1 = the first capture
|
||||
# 2 = the last capture
|
||||
# mode:
|
||||
# id_ = Identity - perform no alterations of the original resource, return it as it was archived.
|
||||
_WAYBACK_BASE_URL = 'https://web.archive.org/web/2id_/'
|
||||
|
||||
def _download_archived_page(self, url, video_id, *, timestamp='2', **kwargs):
|
||||
for retry in self.RetryManager():
|
||||
try:
|
||||
return self._download_webpage(f'https://web.archive.org/web/{timestamp}id_/{url}', video_id, **kwargs)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 404:
|
||||
raise ExtractorError('Page was not archived', expected=True)
|
||||
retry.error = e
|
||||
continue
|
||||
|
||||
def _download_archived_json(self, url, video_id, **kwargs):
|
||||
page = self._download_archived_page(url, video_id, **kwargs)
|
||||
if not page:
|
||||
raise ExtractorError('Page was not archived', expected=True)
|
||||
else:
|
||||
return self._parse_json(page, video_id)
|
||||
|
||||
def _extract_formats_from_m3u8(self, m3u8_url, params, video_id):
|
||||
m3u8_doc = self._download_archived_page(m3u8_url, video_id, note='Downloading m3u8', query=params, fatal=False)
|
||||
if not m3u8_doc:
|
||||
return
|
||||
|
||||
# M3U8 document should be changed to archive domain
|
||||
m3u8_doc = m3u8_doc.splitlines()
|
||||
url_base = m3u8_url.rsplit('/', 1)[0]
|
||||
first_segment = None
|
||||
for i, line in enumerate(m3u8_doc):
|
||||
if not line.startswith('#'):
|
||||
m3u8_doc[i] = f'{self._WAYBACK_BASE_URL}{url_base}/{line}?{urllib.parse.urlencode(params)}'
|
||||
first_segment = first_segment or m3u8_doc[i]
|
||||
|
||||
# Segments may not have been archived. See https://web.archive.org/web/20221127190050/http://www.vlive.tv/video/101870
|
||||
urlh = self._request_webpage(HEADRequest(first_segment), video_id, errnote=False,
|
||||
fatal=False, note='Check first segment availablity')
|
||||
if urlh:
|
||||
formats, subtitles = self._parse_m3u8_formats_and_subtitles('\n'.join(m3u8_doc), ext='mp4', video_id=video_id)
|
||||
if subtitles:
|
||||
self._report_ignoring_subs('m3u8')
|
||||
return formats
|
||||
|
||||
# Closely follows the logic of the ArchiveTeam grab script
|
||||
# See: https://github.com/ArchiveTeam/vlive-grab/blob/master/vlive.lua
|
||||
def _real_extract(self, url):
|
||||
video_id, url_date = self._match_valid_url(url).group('id', 'date')
|
||||
|
||||
webpage = self._download_archived_page(f'https://www.vlive.tv/video/{video_id}', video_id, timestamp=url_date)
|
||||
|
||||
player_info = self._search_json(r'__PRELOADED_STATE__\s*=', webpage, 'player info', video_id)
|
||||
user_country = traverse_obj(player_info, ('common', 'userCountry'))
|
||||
|
||||
main_script_url = self._search_regex(r'<script\s+src="([^"]+/js/main\.[^"]+\.js)"', webpage, 'main script url')
|
||||
main_script = self._download_archived_page(main_script_url, video_id, note='Downloading main script')
|
||||
app_id = self._search_regex(r'appId\s*=\s*"([^"]+)"', main_script, 'app id')
|
||||
|
||||
inkey = self._download_archived_json(
|
||||
f'https://www.vlive.tv/globalv-web/vam-web/video/v1.0/vod/{video_id}/inkey', video_id, note='Fetching inkey', query={
|
||||
'appId': app_id,
|
||||
'platformType': 'PC',
|
||||
'gcc': user_country,
|
||||
'locale': 'en_US',
|
||||
}, fatal=False)
|
||||
|
||||
vod_id = traverse_obj(player_info, ('postDetail', 'post', 'officialVideo', 'vodId'))
|
||||
|
||||
vod_data = self._download_archived_json(
|
||||
f'https://apis.naver.com/rmcnmv/rmcnmv/vod/play/v2.0/{vod_id}', video_id, note='Fetching vod data', query={
|
||||
'key': inkey.get('inkey'),
|
||||
'pid': 'rmcPlayer_16692457559726800', # partially unix time and partially random. Fixed value used by archiveteam project
|
||||
'sid': '2024',
|
||||
'ver': '2.0',
|
||||
'devt': 'html5_pc',
|
||||
'doct': 'json',
|
||||
'ptc': 'https',
|
||||
'sptc': 'https',
|
||||
'cpt': 'vtt',
|
||||
'ctls': '%7B%22visible%22%3A%7B%22fullscreen%22%3Atrue%2C%22logo%22%3Afalse%2C%22playbackRate%22%3Afalse%2C%22scrap%22%3Afalse%2C%22playCount%22%3Atrue%2C%22commentCount%22%3Atrue%2C%22title%22%3Atrue%2C%22writer%22%3Atrue%2C%22expand%22%3Afalse%2C%22subtitles%22%3Atrue%2C%22thumbnails%22%3Atrue%2C%22quality%22%3Atrue%2C%22setting%22%3Atrue%2C%22script%22%3Afalse%2C%22logoDimmed%22%3Atrue%2C%22badge%22%3Atrue%2C%22seekingTime%22%3Atrue%2C%22muted%22%3Atrue%2C%22muteButton%22%3Afalse%2C%22viewerNotice%22%3Afalse%2C%22linkCount%22%3Afalse%2C%22createTime%22%3Afalse%2C%22thumbnail%22%3Atrue%7D%2C%22clicked%22%3A%7B%22expand%22%3Afalse%2C%22subtitles%22%3Afalse%7D%7D',
|
||||
'pv': '4.26.9',
|
||||
'dr': '1920x1080',
|
||||
'cpl': 'en_US',
|
||||
'lc': 'en_US',
|
||||
'adi': '%5B%7B%22type%22%3A%22pre%22%2C%22exposure%22%3Afalse%2C%22replayExposure%22%3Afalse%7D%5D',
|
||||
'adu': '%2F',
|
||||
'videoId': vod_id,
|
||||
'cc': user_country,
|
||||
})
|
||||
|
||||
formats = []
|
||||
|
||||
streams = traverse_obj(vod_data, ('streams', ...))
|
||||
if len(streams) > 1:
|
||||
self.report_warning('Multiple streams found. Only the first stream will be downloaded.')
|
||||
stream = streams[0]
|
||||
|
||||
max_stream = max(
|
||||
stream.get('videos') or [],
|
||||
key=lambda v: traverse_obj(v, ('bitrate', 'video'), default=0), default=None)
|
||||
if max_stream is not None:
|
||||
params = {arg.get('name'): arg.get('value') for arg in stream.get('keys', []) if arg.get('type') == 'param'}
|
||||
formats = self._extract_formats_from_m3u8(max_stream.get('source'), params, video_id) or []
|
||||
|
||||
# For parts of the project MP4 files were archived
|
||||
max_video = max(
|
||||
traverse_obj(vod_data, ('videos', 'list', ...)),
|
||||
key=lambda v: traverse_obj(v, ('bitrate', 'video'), default=0), default=None)
|
||||
if max_video is not None:
|
||||
video_url = self._WAYBACK_BASE_URL + max_video.get('source')
|
||||
urlh = self._request_webpage(HEADRequest(video_url), video_id, errnote=False,
|
||||
fatal=False, note='Check video availablity')
|
||||
if urlh:
|
||||
formats.append({'url': video_url})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
**traverse_obj(player_info, ('postDetail', 'post', {
|
||||
'title': ('officialVideo', 'title', {str}),
|
||||
'creator': ('author', 'nickname', {str}),
|
||||
'channel': ('channel', 'channelName', {str}),
|
||||
'channel_id': ('channel', 'channelCode', {str}),
|
||||
'duration': ('officialVideo', 'playTime', {int_or_none}),
|
||||
'view_count': ('officialVideo', 'playCount', {int_or_none}),
|
||||
'like_count': ('officialVideo', 'likeCount', {int_or_none}),
|
||||
'comment_count': ('officialVideo', 'commentCount', {int_or_none}),
|
||||
'timestamp': ('officialVideo', 'createdAt', {lambda x: int_or_none(x, scale=1000)}),
|
||||
'release_timestamp': ('officialVideo', 'willStartAt', {lambda x: int_or_none(x, scale=1000)}),
|
||||
})),
|
||||
**traverse_obj(vod_data, ('meta', {
|
||||
'uploader_id': ('user', 'id', {str}),
|
||||
'uploader': ('user', 'name', {str}),
|
||||
'uploader_url': ('user', 'url', {url_or_none}),
|
||||
'thumbnail': ('cover', 'source', {url_or_none}),
|
||||
}), expected_type=lambda x: x or None),
|
||||
**NaverBaseIE.process_subtitles(vod_data, lambda x: [self._WAYBACK_BASE_URL + x]),
|
||||
}
|
||||
|
|
|
@ -12,7 +12,7 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with
|
|||
|
||||
def _aws_execute_api(self, aws_dict, video_id, query=None):
|
||||
query = query or {}
|
||||
amz_date = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')
|
||||
amz_date = datetime.datetime.now(datetime.timezone.utc).strftime('%Y%m%dT%H%M%SZ')
|
||||
date = amz_date[:8]
|
||||
headers = {
|
||||
'Accept': 'application/json',
|
||||
|
|
87
yt_dlp/extractor/axs.py
Normal file
87
yt_dlp/extractor/axs.py
Normal file
|
@ -0,0 +1,87 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
js_to_json,
|
||||
parse_iso8601,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class AxsIE(InfoExtractor):
|
||||
IE_NAME = 'axs.tv'
|
||||
_VALID_URL = r'https?://(?:www\.)?axs\.tv/(?:channel/(?:[^/?#]+/)+)?video/(?P<id>[^/?#]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.axs.tv/video/5f4dc776b70e4f1c194f22ef/',
|
||||
'md5': '8d97736ae8e50c64df528e5e676778cf',
|
||||
'info_dict': {
|
||||
'id': '5f4dc776b70e4f1c194f22ef',
|
||||
'title': 'Small Town',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:e314d28bfaa227a4d7ec965fae19997f',
|
||||
'upload_date': '20230602',
|
||||
'timestamp': 1685729564,
|
||||
'duration': 1284.216,
|
||||
'series': 'Rock & Roll Road Trip with Sammy Hagar',
|
||||
'season': 2,
|
||||
'episode': '3',
|
||||
'thumbnail': 'https://images.dotstudiopro.com/5f4e9d330a0c3b295a7e8394',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.axs.tv/channel/rock-star-interview/video/daryl-hall',
|
||||
'md5': '300ae795cd8f9984652c0949734ffbdc',
|
||||
'info_dict': {
|
||||
'id': '5f488148b70e4f392572977c',
|
||||
'display_id': 'daryl-hall',
|
||||
'title': 'Daryl Hall',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:e54ecaa0f4b5683fc9259e9e4b196628',
|
||||
'upload_date': '20230214',
|
||||
'timestamp': 1676403615,
|
||||
'duration': 2570.668,
|
||||
'series': 'The Big Interview with Dan Rather',
|
||||
'season': 3,
|
||||
'episode': '5',
|
||||
'thumbnail': 'https://images.dotstudiopro.com/5f4d1901f340b50d937cec32',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
webpage_json_data = self._search_json(
|
||||
r'mountObj\s*=', webpage, 'video ID data', display_id,
|
||||
transform_source=js_to_json)
|
||||
video_id = webpage_json_data['video_id']
|
||||
company_id = webpage_json_data['company_id']
|
||||
|
||||
meta = self._download_json(
|
||||
f'https://api.myspotlight.tv/dotplayer/video/{company_id}/{video_id}',
|
||||
video_id, query={'device_type': 'desktop_web'})['video']
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
meta['video_m3u8'], video_id, 'mp4', m3u8_id='hls')
|
||||
|
||||
subtitles = {}
|
||||
for cc in traverse_obj(meta, ('closeCaption', lambda _, v: url_or_none(v['srtPath']))):
|
||||
subtitles.setdefault(cc.get('srtShortLang') or 'en', []).append(
|
||||
{'ext': cc.get('srtExt'), 'url': cc['srtPath']})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'formats': formats,
|
||||
**traverse_obj(meta, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'series': ('seriestitle', {str}),
|
||||
'season': ('season', {int}),
|
||||
'episode': ('episode', {str}),
|
||||
'duration': ('duration', {float_or_none}),
|
||||
'timestamp': ('updated_at', {parse_iso8601}),
|
||||
'thumbnail': ('thumb', {url_or_none}),
|
||||
}),
|
||||
'subtitles': subtitles,
|
||||
}
|
|
@ -31,7 +31,7 @@ def _extract_playlist(self, playlist_id):
|
|||
|
||||
|
||||
class BanByeIE(BanByeBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?banbye.com/(?:en/)?watch/(?P<id>\w+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?banbye.com/(?:en/)?watch/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://banbye.com/watch/v_ytfmvkVYLE8T',
|
||||
'md5': '2f4ea15c5ca259a73d909b2cfd558eb5',
|
||||
|
@ -59,7 +59,27 @@ class BanByeIE(BanByeBaseIE):
|
|||
'title': 'Krzysztof Karoń',
|
||||
'id': 'p_Ld82N6gBw_OJ',
|
||||
},
|
||||
'playlist_count': 9,
|
||||
'playlist_mincount': 9,
|
||||
}, {
|
||||
'url': 'https://banbye.com/watch/v_kb6_o1Kyq-CD',
|
||||
'info_dict': {
|
||||
'id': 'v_kb6_o1Kyq-CD',
|
||||
'ext': 'mp4',
|
||||
'title': 'Co tak naprawdę dzieje się we Francji?! Czy Warszawa a potem cała Polska będzie drugim Paryżem?!🤔🇵🇱',
|
||||
'description': 'md5:82be4c0e13eae8ea1ca8b9f2e07226a8',
|
||||
'uploader': 'Marcin Rola - MOIM ZDANIEM!🇵🇱',
|
||||
'channel_id': 'ch_QgWnHvDG2fo5',
|
||||
'channel_url': 'https://banbye.com/channel/ch_QgWnHvDG2fo5',
|
||||
'duration': 597,
|
||||
'timestamp': 1688642656,
|
||||
'upload_date': '20230706',
|
||||
'thumbnail': 'https://cdn.banbye.com/video/v_kb6_o1Kyq-CD/96.webp',
|
||||
'tags': ['Paryż', 'Francja', 'Polska', 'Imigranci', 'Morawiecki', 'Tusk'],
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
|
|
@ -15,11 +15,13 @@
|
|||
float_or_none,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
parse_qs,
|
||||
strip_or_none,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
unified_timestamp,
|
||||
|
@ -41,7 +43,6 @@ class BBCCoUkIE(InfoExtractor):
|
|||
iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
|
||||
music/(?:clips|audiovideo/popular)[/#]|
|
||||
radio/player/|
|
||||
sounds/play/|
|
||||
events/[^/]+/play/[^/]+/
|
||||
)
|
||||
(?P<id>%s)(?!/(?:episodes|broadcasts|clips))
|
||||
|
@ -218,20 +219,6 @@ class BBCCoUkIE(InfoExtractor):
|
|||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bbc.co.uk/sounds/play/m0007jzb',
|
||||
'note': 'Audio',
|
||||
'info_dict': {
|
||||
'id': 'm0007jz9',
|
||||
'ext': 'mp4',
|
||||
'title': 'BBC Proms, 2019, Prom 34: West–Eastern Divan Orchestra',
|
||||
'description': "Live BBC Proms. West–Eastern Divan Orchestra with Daniel Barenboim and Martha Argerich.",
|
||||
'duration': 9840,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
|
||||
'only_matching': True,
|
||||
|
@ -844,6 +831,20 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
'upload_date': '20190604',
|
||||
'categories': ['Psychology'],
|
||||
},
|
||||
}, {
|
||||
# BBC Sounds
|
||||
'url': 'https://www.bbc.co.uk/sounds/play/m001q78b',
|
||||
'info_dict': {
|
||||
'id': 'm001q789',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Night Tracks Mix - Music for the darkling hour',
|
||||
'thumbnail': 'https://ichef.bbci.co.uk/images/ic/raw/p0c00hym.jpg',
|
||||
'chapters': 'count:8',
|
||||
'description': 'md5:815fb51cbdaa270040aab8145b3f1d67',
|
||||
'uploader': 'Radio 3',
|
||||
'duration': 1800,
|
||||
'uploader_id': 'bbc_radio_three',
|
||||
},
|
||||
}, { # onion routes
|
||||
'url': 'https://www.bbcnewsd73hkzno2ini43t4gblxvycyac5aw4gnv7t2rccijh7745uqd.onion/news/av/world-europe-63208576',
|
||||
'only_matching': True,
|
||||
|
@ -1128,6 +1129,13 @@ def _real_extract(self, url):
|
|||
'uploader_id': network.get('id'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'chapters': traverse_obj(preload_state, (
|
||||
'tracklist', 'tracks', lambda _, v: float_or_none(v['offset']['start']), {
|
||||
'title': ('titles', {lambda x: join_nonempty(
|
||||
'primary', 'secondary', 'tertiary', delim=' - ', from_dict=x)}),
|
||||
'start_time': ('offset', 'start', {float_or_none}),
|
||||
'end_time': ('offset', 'end', {float_or_none}),
|
||||
})) or None,
|
||||
}
|
||||
|
||||
bbc3_config = self._parse_json(
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
traverse_obj,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
@ -8,7 +9,8 @@
|
|||
class BildIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?bild\.de/(?:[^/]+/)+(?P<display_id>[^/]+)-(?P<id>\d+)(?:,auto=true)?\.bild\.html'
|
||||
IE_DESC = 'Bild.de'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'note': 'static MP4 only',
|
||||
'url': 'http://www.bild.de/video/clip/apple-ipad-air/das-koennen-die-neuen-ipads-38184146.bild.html',
|
||||
'md5': 'dd495cbd99f2413502a1713a1156ac8a',
|
||||
'info_dict': {
|
||||
|
@ -19,7 +21,19 @@ class BildIE(InfoExtractor):
|
|||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 196,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'note': 'static MP4 and HLS',
|
||||
'url': 'https://www.bild.de/video/clip/news-ausland/deftiger-abgang-vom-10m-turm-bademeister-sorgt-fuer-skandal-85158620.bild.html',
|
||||
'md5': 'fb0ed4f09c495d4ba7ce2eee0bb90de1',
|
||||
'info_dict': {
|
||||
'id': '85158620',
|
||||
'ext': 'mp4',
|
||||
'title': 'Der Sprungturm-Skandal',
|
||||
'description': 'md5:709b543c24dc31bbbffee73bccda34ad',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 69,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
@ -27,11 +41,23 @@ def _real_extract(self, url):
|
|||
video_data = self._download_json(
|
||||
url.split('.bild.html')[0] + ',view=json.bild.html', video_id)
|
||||
|
||||
formats = []
|
||||
for src in traverse_obj(video_data, ('clipList', 0, 'srces', lambda _, v: v['src'])):
|
||||
src_type = src.get('type')
|
||||
if src_type == 'application/x-mpegURL':
|
||||
formats.extend(
|
||||
self._extract_m3u8_formats(
|
||||
src['src'], video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
elif src_type == 'video/mp4':
|
||||
formats.append({'url': src['src'], 'format_id': 'http-mp4'})
|
||||
else:
|
||||
self.report_warning(f'Skipping unsupported format type: "{src_type}"')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': unescapeHTML(video_data['title']).strip(),
|
||||
'description': unescapeHTML(video_data.get('description')),
|
||||
'url': video_data['clipList'][0]['srces'][0]['src'],
|
||||
'formats': formats,
|
||||
'thumbnail': video_data.get('poster'),
|
||||
'duration': int_or_none(video_data.get('durationSec')),
|
||||
}
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
import hashlib
|
||||
import itertools
|
||||
import math
|
||||
import re
|
||||
import time
|
||||
import urllib.parse
|
||||
|
||||
|
@ -14,6 +15,7 @@
|
|||
GeoRestrictedError,
|
||||
InAdvancePagedList,
|
||||
OnDemandPagedList,
|
||||
bool_or_none,
|
||||
filter_dict,
|
||||
float_or_none,
|
||||
format_field,
|
||||
|
@ -34,27 +36,31 @@
|
|||
unsmuggle_url,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
variadic,
|
||||
)
|
||||
|
||||
|
||||
class BilibiliBaseIE(InfoExtractor):
|
||||
_FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?')
|
||||
|
||||
def extract_formats(self, play_info):
|
||||
format_names = {
|
||||
r['quality']: traverse_obj(r, 'new_description', 'display_desc')
|
||||
for r in traverse_obj(play_info, ('support_formats', lambda _, v: v['quality']))
|
||||
}
|
||||
|
||||
audios = traverse_obj(play_info, ('dash', 'audio', ...))
|
||||
audios = traverse_obj(play_info, ('dash', (None, 'dolby'), 'audio', ..., {dict}))
|
||||
flac_audio = traverse_obj(play_info, ('dash', 'flac', 'audio'))
|
||||
if flac_audio:
|
||||
audios.append(flac_audio)
|
||||
formats = [{
|
||||
'url': traverse_obj(audio, 'baseUrl', 'base_url', 'url'),
|
||||
'ext': mimetype2ext(traverse_obj(audio, 'mimeType', 'mime_type')),
|
||||
'acodec': audio.get('codecs'),
|
||||
'acodec': traverse_obj(audio, ('codecs', {str.lower})),
|
||||
'vcodec': 'none',
|
||||
'tbr': float_or_none(audio.get('bandwidth'), scale=1000),
|
||||
'filesize': int_or_none(audio.get('size'))
|
||||
'filesize': int_or_none(audio.get('size')),
|
||||
'format_id': str_or_none(audio.get('id')),
|
||||
} for audio in audios]
|
||||
|
||||
formats.extend({
|
||||
|
@ -65,9 +71,13 @@ def extract_formats(self, play_info):
|
|||
'height': int_or_none(video.get('height')),
|
||||
'vcodec': video.get('codecs'),
|
||||
'acodec': 'none' if audios else None,
|
||||
'dynamic_range': {126: 'DV', 125: 'HDR10'}.get(int_or_none(video.get('id'))),
|
||||
'tbr': float_or_none(video.get('bandwidth'), scale=1000),
|
||||
'filesize': int_or_none(video.get('size')),
|
||||
'quality': int_or_none(video.get('id')),
|
||||
'format_id': traverse_obj(
|
||||
video, (('baseUrl', 'base_url'), {self._FORMAT_ID_RE.search}, 1),
|
||||
('id', {str_or_none}), get_all=False),
|
||||
'format': format_names.get(video.get('id')),
|
||||
} for video in traverse_obj(play_info, ('dash', 'video', ...)))
|
||||
|
||||
|
@ -149,7 +159,7 @@ def _get_episodes_from_season(self, ss_id, url):
|
|||
|
||||
|
||||
class BiliBiliIE(BilibiliBaseIE):
|
||||
_VALID_URL = r'https?://www\.bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/video/BV13x41117TL',
|
||||
|
@ -245,7 +255,7 @@ class BiliBiliIE(BilibiliBaseIE):
|
|||
'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
|
||||
'duration': 313.557,
|
||||
'upload_date': '20220709',
|
||||
'uploader': '小夫Tech',
|
||||
'uploader': '小夫太渴',
|
||||
'timestamp': 1657347907,
|
||||
'uploader_id': '1326814124',
|
||||
'comment_count': int,
|
||||
|
@ -502,7 +512,7 @@ def _real_extract(self, url):
|
|||
|
||||
|
||||
class BiliBiliBangumiMediaIE(BilibiliBaseIE):
|
||||
_VALID_URL = r'https?://www\.bilibili\.com/bangumi/media/md(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/media/md(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/bangumi/media/md24097891',
|
||||
'info_dict': {
|
||||
|
@ -521,7 +531,7 @@ def _real_extract(self, url):
|
|||
|
||||
|
||||
class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
|
||||
_VALID_URL = r'(?x)https?://www\.bilibili\.com/bangumi/play/ss(?P<id>\d+)'
|
||||
_VALID_URL = r'(?x)https?://(?:www\.)?bilibili\.com/bangumi/play/ss(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/bangumi/play/ss26801',
|
||||
'info_dict': {
|
||||
|
@ -672,13 +682,35 @@ def get_entries(page_data):
|
|||
return self.playlist_result(paged_list, playlist_id)
|
||||
|
||||
|
||||
class BilibiliSpacePlaylistIE(BilibiliSpaceBaseIE):
|
||||
_VALID_URL = r'https?://space.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail\?sid=(?P<sid>\d+)'
|
||||
class BilibiliSpaceListBaseIE(BilibiliSpaceBaseIE):
|
||||
def _get_entries(self, page_data, bvid_keys, ending_key='bvid'):
|
||||
for bvid in traverse_obj(page_data, (*variadic(bvid_keys, (str, bytes, dict, set)), ..., ending_key, {str})):
|
||||
yield self.url_result(f'https://www.bilibili.com/video/{bvid}', BiliBiliIE, bvid)
|
||||
|
||||
def _get_uploader(self, uid, playlist_id):
|
||||
webpage = self._download_webpage(f'https://space.bilibili.com/{uid}', playlist_id, fatal=False)
|
||||
return self._search_regex(r'(?s)<title\b[^>]*>([^<]+)的个人空间-', webpage, 'uploader', fatal=False)
|
||||
|
||||
def _extract_playlist(self, fetch_page, get_metadata, get_entries):
|
||||
metadata, page_list = super()._extract_playlist(fetch_page, get_metadata, get_entries)
|
||||
metadata.pop('page_count', None)
|
||||
metadata.pop('page_size', None)
|
||||
return metadata, page_list
|
||||
|
||||
|
||||
class BilibiliCollectionListIE(BilibiliSpaceListBaseIE):
|
||||
_VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail/?\?sid=(?P<sid>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://space.bilibili.com/2142762/channel/collectiondetail?sid=57445',
|
||||
'info_dict': {
|
||||
'id': '2142762_57445',
|
||||
'title': '《底特律 变人》'
|
||||
'title': '【完结】《底特律 变人》全结局流程解说',
|
||||
'description': '',
|
||||
'uploader': '老戴在此',
|
||||
'uploader_id': '2142762',
|
||||
'timestamp': int,
|
||||
'upload_date': str,
|
||||
'thumbnail': 'https://archive.biliimg.com/bfs/archive/e0e543ae35ad3df863ea7dea526bc32e70f4c091.jpg',
|
||||
},
|
||||
'playlist_mincount': 31,
|
||||
}]
|
||||
|
@ -699,22 +731,251 @@ def get_metadata(page_data):
|
|||
return {
|
||||
'page_count': math.ceil(entry_count / page_size),
|
||||
'page_size': page_size,
|
||||
'title': traverse_obj(page_data, ('meta', 'name'))
|
||||
'uploader': self._get_uploader(mid, playlist_id),
|
||||
**traverse_obj(page_data, {
|
||||
'title': ('meta', 'name', {str}),
|
||||
'description': ('meta', 'description', {str}),
|
||||
'uploader_id': ('meta', 'mid', {str_or_none}),
|
||||
'timestamp': ('meta', 'ptime', {int_or_none}),
|
||||
'thumbnail': ('meta', 'cover', {url_or_none}),
|
||||
})
|
||||
}
|
||||
|
||||
def get_entries(page_data):
|
||||
for entry in page_data.get('archives', []):
|
||||
yield self.url_result(f'https://www.bilibili.com/video/{entry["bvid"]}',
|
||||
BiliBiliIE, entry['bvid'])
|
||||
return self._get_entries(page_data, 'archives')
|
||||
|
||||
metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
|
||||
return self.playlist_result(paged_list, playlist_id, metadata['title'])
|
||||
return self.playlist_result(paged_list, playlist_id, **metadata)
|
||||
|
||||
|
||||
class BilibiliSeriesListIE(BilibiliSpaceListBaseIE):
|
||||
_VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/seriesdetail/?\?\bsid=(?P<sid>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://space.bilibili.com/1958703906/channel/seriesdetail?sid=547718&ctype=0',
|
||||
'info_dict': {
|
||||
'id': '1958703906_547718',
|
||||
'title': '直播回放',
|
||||
'description': '直播回放',
|
||||
'uploader': '靡烟miya',
|
||||
'uploader_id': '1958703906',
|
||||
'timestamp': 1637985853,
|
||||
'upload_date': '20211127',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
},
|
||||
'playlist_mincount': 513,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mid, sid = self._match_valid_url(url).group('mid', 'sid')
|
||||
playlist_id = f'{mid}_{sid}'
|
||||
playlist_meta = traverse_obj(self._download_json(
|
||||
f'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id, fatal=False
|
||||
), {
|
||||
'title': ('data', 'meta', 'name', {str}),
|
||||
'description': ('data', 'meta', 'description', {str}),
|
||||
'uploader_id': ('data', 'meta', 'mid', {str_or_none}),
|
||||
'timestamp': ('data', 'meta', 'ctime', {int_or_none}),
|
||||
'modified_timestamp': ('data', 'meta', 'mtime', {int_or_none}),
|
||||
})
|
||||
|
||||
def fetch_page(page_idx):
|
||||
return self._download_json(
|
||||
'https://api.bilibili.com/x/series/archives',
|
||||
playlist_id, note=f'Downloading page {page_idx}',
|
||||
query={'mid': mid, 'series_id': sid, 'pn': page_idx + 1, 'ps': 30})['data']
|
||||
|
||||
def get_metadata(page_data):
|
||||
page_size = page_data['page']['size']
|
||||
entry_count = page_data['page']['total']
|
||||
return {
|
||||
'page_count': math.ceil(entry_count / page_size),
|
||||
'page_size': page_size,
|
||||
'uploader': self._get_uploader(mid, playlist_id),
|
||||
**playlist_meta
|
||||
}
|
||||
|
||||
def get_entries(page_data):
|
||||
return self._get_entries(page_data, 'archives')
|
||||
|
||||
metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
|
||||
return self.playlist_result(paged_list, playlist_id, **metadata)
|
||||
|
||||
|
||||
class BilibiliFavoritesListIE(BilibiliSpaceListBaseIE):
|
||||
_VALID_URL = r'https?://(?:space\.bilibili\.com/\d+/favlist/?\?fid=|(?:www\.)?bilibili\.com/medialist/detail/ml)(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://space.bilibili.com/84912/favlist?fid=1103407912&ftype=create',
|
||||
'info_dict': {
|
||||
'id': '1103407912',
|
||||
'title': '【V2】(旧)',
|
||||
'description': '',
|
||||
'uploader': '晓月春日',
|
||||
'uploader_id': '84912',
|
||||
'timestamp': 1604905176,
|
||||
'upload_date': '20201109',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
'playlist_mincount': 22,
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/medialist/detail/ml1103407912',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
fid = self._match_id(url)
|
||||
|
||||
list_info = self._download_json(
|
||||
f'https://api.bilibili.com/x/v3/fav/resource/list?media_id={fid}&pn=1&ps=20',
|
||||
fid, note='Downloading favlist metadata')
|
||||
if list_info['code'] == -403:
|
||||
self.raise_login_required(msg='This is a private favorites list. You need to log in as its owner')
|
||||
|
||||
entries = self._get_entries(self._download_json(
|
||||
f'https://api.bilibili.com/x/v3/fav/resource/ids?media_id={fid}',
|
||||
fid, note='Download favlist entries'), 'data')
|
||||
|
||||
return self.playlist_result(entries, fid, **traverse_obj(list_info, ('data', 'info', {
|
||||
'title': ('title', {str}),
|
||||
'description': ('intro', {str}),
|
||||
'uploader': ('upper', 'name', {str}),
|
||||
'uploader_id': ('upper', 'mid', {str_or_none}),
|
||||
'timestamp': ('ctime', {int_or_none}),
|
||||
'modified_timestamp': ('mtime', {int_or_none}),
|
||||
'thumbnail': ('cover', {url_or_none}),
|
||||
'view_count': ('cnt_info', 'play', {int_or_none}),
|
||||
'like_count': ('cnt_info', 'thumb_up', {int_or_none}),
|
||||
})))
|
||||
|
||||
|
||||
class BilibiliWatchlaterIE(BilibiliSpaceListBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/watchlater/#/list',
|
||||
'info_dict': {'id': 'watchlater'},
|
||||
'playlist_mincount': 0,
|
||||
'skip': 'login required',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
list_id = getattr(self._get_cookies(url).get('DedeUserID'), 'value', 'watchlater')
|
||||
watchlater_info = self._download_json(
|
||||
'https://api.bilibili.com/x/v2/history/toview/web?jsonp=jsonp', list_id)
|
||||
if watchlater_info['code'] == -101:
|
||||
self.raise_login_required(msg='You need to login to access your watchlater list')
|
||||
entries = self._get_entries(watchlater_info, ('data', 'list'))
|
||||
return self.playlist_result(entries, id=list_id, title='稍后再看')
|
||||
|
||||
|
||||
class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:medialist/play|list)/(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/list/1958703906?sid=547718',
|
||||
'info_dict': {
|
||||
'id': '5_547718',
|
||||
'title': '直播回放',
|
||||
'uploader': '靡烟miya',
|
||||
'uploader_id': '1958703906',
|
||||
'timestamp': 1637985853,
|
||||
'upload_date': '20211127',
|
||||
},
|
||||
'playlist_mincount': 513,
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/medialist/play/1958703906?business=space_series&business_id=547718&desc=1',
|
||||
'info_dict': {
|
||||
'id': '5_547718',
|
||||
},
|
||||
'playlist_mincount': 513,
|
||||
'skip': 'redirect url',
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/list/ml1103407912',
|
||||
'info_dict': {
|
||||
'id': '3_1103407912',
|
||||
'title': '【V2】(旧)',
|
||||
'uploader': '晓月春日',
|
||||
'uploader_id': '84912',
|
||||
'timestamp': 1604905176,
|
||||
'upload_date': '20201109',
|
||||
'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
|
||||
},
|
||||
'playlist_mincount': 22,
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/medialist/play/ml1103407912',
|
||||
'info_dict': {
|
||||
'id': '3_1103407912',
|
||||
},
|
||||
'playlist_mincount': 22,
|
||||
'skip': 'redirect url',
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/list/watchlater',
|
||||
'info_dict': {'id': 'watchlater'},
|
||||
'playlist_mincount': 0,
|
||||
'skip': 'login required',
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/medialist/play/watchlater',
|
||||
'info_dict': {'id': 'watchlater'},
|
||||
'playlist_mincount': 0,
|
||||
'skip': 'login required',
|
||||
}]
|
||||
|
||||
def _extract_medialist(self, query, list_id):
|
||||
for page_num in itertools.count(1):
|
||||
page_data = self._download_json(
|
||||
'https://api.bilibili.com/x/v2/medialist/resource/list',
|
||||
list_id, query=query, note=f'getting playlist {query["biz_id"]} page {page_num}'
|
||||
)['data']
|
||||
yield from self._get_entries(page_data, 'media_list', ending_key='bv_id')
|
||||
query['oid'] = traverse_obj(page_data, ('media_list', -1, 'id'))
|
||||
if not page_data.get('has_more', False):
|
||||
break
|
||||
|
||||
def _real_extract(self, url):
|
||||
list_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, list_id)
|
||||
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', list_id)
|
||||
if traverse_obj(initial_state, ('error', 'code', {int_or_none})) != 200:
|
||||
error_code = traverse_obj(initial_state, ('error', 'trueCode', {int_or_none}))
|
||||
error_message = traverse_obj(initial_state, ('error', 'message', {str_or_none}))
|
||||
if error_code == -400 and list_id == 'watchlater':
|
||||
self.raise_login_required('You need to login to access your watchlater playlist')
|
||||
elif error_code == -403:
|
||||
self.raise_login_required('This is a private playlist. You need to login as its owner')
|
||||
elif error_code == 11010:
|
||||
raise ExtractorError('Playlist is no longer available', expected=True)
|
||||
raise ExtractorError(f'Could not access playlist: {error_code} {error_message}')
|
||||
|
||||
query = {
|
||||
'ps': 20,
|
||||
'with_current': False,
|
||||
**traverse_obj(initial_state, {
|
||||
'type': ('playlist', 'type', {int_or_none}),
|
||||
'biz_id': ('playlist', 'id', {int_or_none}),
|
||||
'tid': ('tid', {int_or_none}),
|
||||
'sort_field': ('sortFiled', {int_or_none}),
|
||||
'desc': ('desc', {bool_or_none}, {str_or_none}, {str.lower}),
|
||||
})
|
||||
}
|
||||
metadata = {
|
||||
'id': f'{query["type"]}_{query["biz_id"]}',
|
||||
**traverse_obj(initial_state, ('mediaListInfo', {
|
||||
'title': ('title', {str}),
|
||||
'uploader': ('upper', 'name', {str}),
|
||||
'uploader_id': ('upper', 'mid', {str_or_none}),
|
||||
'timestamp': ('ctime', {int_or_none}),
|
||||
'thumbnail': ('cover', {url_or_none}),
|
||||
})),
|
||||
}
|
||||
return self.playlist_result(self._extract_medialist(query, list_id), **metadata)
|
||||
|
||||
|
||||
class BilibiliCategoryIE(InfoExtractor):
|
||||
IE_NAME = 'Bilibili category extractor'
|
||||
_MAX_RESULTS = 1000000
|
||||
_VALID_URL = r'https?://www\.bilibili\.com/v/[a-zA-Z]+\/[a-zA-Z]+'
|
||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/v/[a-zA-Z]+\/[a-zA-Z]+'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/v/kichiku/mad',
|
||||
'info_dict': {
|
||||
|
@ -1399,7 +1660,7 @@ def _real_extract(self, url):
|
|||
|
||||
|
||||
class BiliLiveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://live.bilibili.com/(?:blanc/)?(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://live\.bilibili\.com/(?:blanc/)?(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://live.bilibili.com/196',
|
||||
|
|
|
@ -1,56 +1,170 @@
|
|||
import functools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
get_element_text_and_html_by_tag,
|
||||
get_elements_by_class,
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
determine_ext,
|
||||
mimetype2ext,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
variadic,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
def html_get_element(tag=None, cls=None):
|
||||
assert tag or cls, 'One of tag or class is required'
|
||||
|
||||
if cls:
|
||||
func = functools.partial(get_elements_by_class, cls, tag=tag)
|
||||
else:
|
||||
func = functools.partial(get_element_text_and_html_by_tag, tag)
|
||||
|
||||
def html_get_element_wrapper(html):
|
||||
return variadic(func(html))[0]
|
||||
|
||||
return html_get_element_wrapper
|
||||
|
||||
|
||||
class BpbIE(InfoExtractor):
|
||||
IE_DESC = 'Bundeszentrale für politische Bildung'
|
||||
_VALID_URL = r'https?://(?:www\.)?bpb\.de/mediathek/(?P<id>[0-9]+)/'
|
||||
_VALID_URL = r'https?://(?:www\.|m\.)?bpb\.de/(?:[^/?#]+/)*(?P<id>\d+)(?:[/?#]|$)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.bpb.de/mediathek/297/joachim-gauck-zu-1989-und-die-erinnerung-an-die-ddr',
|
||||
'md5': 'c4f84c8a8044ca9ff68bb8441d300b3f',
|
||||
'info_dict': {
|
||||
'id': '297',
|
||||
'ext': 'mp4',
|
||||
'creator': 'Kooperative Berlin',
|
||||
'description': 'md5:f4f75885ba009d3e2b156247a8941ce6',
|
||||
'release_date': '20160115',
|
||||
'series': 'Interview auf dem Geschichtsforum 1989 | 2009',
|
||||
'tags': ['Friedliche Revolution', 'Erinnerungskultur', 'Vergangenheitspolitik', 'DDR 1949 - 1990', 'Freiheitsrecht', 'BStU', 'Deutschland'],
|
||||
'thumbnail': 'https://www.bpb.de/cache/images/7/297_teaser_16x9_1240.jpg?8839D',
|
||||
'title': 'Joachim Gauck zu 1989 und die Erinnerung an die DDR',
|
||||
'description': 'Joachim Gauck, erster Beauftragter für die Stasi-Unterlagen, spricht auf dem Geschichtsforum über die friedliche Revolution 1989 und eine "gewisse Traurigkeit" im Umgang mit der DDR-Vergangenheit.'
|
||||
'uploader': 'Bundeszentrale für politische Bildung',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bpb.de/mediathek/video/522184/krieg-flucht-und-falschmeldungen-wirstattdesinformation-2/',
|
||||
'info_dict': {
|
||||
'id': '522184',
|
||||
'ext': 'mp4',
|
||||
'creator': 'Institute for Strategic Dialogue Germany gGmbH (ISD)',
|
||||
'description': 'md5:f83c795ff8f825a69456a9e51fc15903',
|
||||
'release_date': '20230621',
|
||||
'tags': ['Desinformation', 'Ukraine', 'Russland', 'Geflüchtete'],
|
||||
'thumbnail': 'https://www.bpb.de/cache/images/4/522184_teaser_16x9_1240.png?EABFB',
|
||||
'title': 'md5:9b01ccdbf58dbf9e5c9f6e771a803b1c',
|
||||
'uploader': 'Bundeszentrale für politische Bildung',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bpb.de/lernen/bewegtbild-und-politische-bildung/webvideo/518789/krieg-flucht-und-falschmeldungen-wirstattdesinformation-1/',
|
||||
'info_dict': {
|
||||
'id': '518789',
|
||||
'ext': 'mp4',
|
||||
'creator': 'Institute for Strategic Dialogue Germany gGmbH (ISD)',
|
||||
'description': 'md5:85228aed433e84ff0ff9bc582abd4ea8',
|
||||
'release_date': '20230302',
|
||||
'tags': ['Desinformation', 'Ukraine', 'Russland', 'Geflüchtete'],
|
||||
'thumbnail': 'https://www.bpb.de/cache/images/9/518789_teaser_16x9_1240.jpeg?56D0D',
|
||||
'title': 'md5:3e956f264bb501f6383f10495a401da4',
|
||||
'uploader': 'Bundeszentrale für politische Bildung',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bpb.de/mediathek/podcasts/apuz-podcast/539727/apuz-20-china/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.bpb.de/mediathek/audio/315813/folge-1-eine-einfuehrung/',
|
||||
'info_dict': {
|
||||
'id': '315813',
|
||||
'ext': 'mp3',
|
||||
'creator': 'Axel Schröder',
|
||||
'description': 'md5:eda9d1af34e5912efef5baf54fba4427',
|
||||
'release_date': '20200921',
|
||||
'series': 'Auf Endlagersuche. Der deutsche Weg zu einem sicheren Atommülllager',
|
||||
'tags': ['Atomenergie', 'Endlager', 'hoch-radioaktiver Abfall', 'Endlagersuche', 'Atommüll', 'Atomendlager', 'Gorleben', 'Deutschland'],
|
||||
'thumbnail': 'https://www.bpb.de/cache/images/3/315813_teaser_16x9_1240.png?92A94',
|
||||
'title': 'Folge 1: Eine Einführung',
|
||||
'uploader': 'Bundeszentrale für politische Bildung',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bpb.de/517806/die-weltanschauung-der-neuen-rechten/',
|
||||
'info_dict': {
|
||||
'id': '517806',
|
||||
'ext': 'mp3',
|
||||
'creator': 'Bundeszentrale für politische Bildung',
|
||||
'description': 'md5:594689600e919912aade0b2871cc3fed',
|
||||
'release_date': '20230127',
|
||||
'series': 'Vorträge des Fachtags "Modernisierer. Grenzgänger. Anstifter. Sechs Jahrzehnte \'Neue Rechte\'"',
|
||||
'tags': ['Rechtsextremismus', 'Konservatismus', 'Konservativismus', 'neue Rechte', 'Rechtspopulismus', 'Schnellroda', 'Deutschland'],
|
||||
'thumbnail': 'https://www.bpb.de/cache/images/6/517806_teaser_16x9_1240.png?7A7A0',
|
||||
'title': 'Die Weltanschauung der "Neuen Rechten"',
|
||||
'uploader': 'Bundeszentrale für politische Bildung',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bpb.de/mediathek/reihen/zahlen-und-fakten-soziale-situation-filme/520153/zahlen-und-fakten-die-soziale-situation-in-deutschland-migration/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_TITLE_RE = re.compile('(?P<title>[^<]*)<[^>]+>(?P<series>[^<]*)')
|
||||
|
||||
def _parse_vue_attributes(self, name, string, video_id):
|
||||
attributes = extract_attributes(self._search_regex(rf'(<{name}(?:"[^"]*?"|[^>])*>)', string, name))
|
||||
|
||||
for key, value in attributes.items():
|
||||
if key.startswith(':'):
|
||||
attributes[key] = self._parse_json(value, video_id, transform_source=js_to_json, fatal=False)
|
||||
|
||||
return attributes
|
||||
|
||||
@staticmethod
|
||||
def _process_source(source):
|
||||
url = url_or_none(source['src'])
|
||||
if not url:
|
||||
return None
|
||||
|
||||
source_type = source.get('type', '')
|
||||
extension = mimetype2ext(source_type)
|
||||
is_video = source_type.startswith('video')
|
||||
note = url.rpartition('.')[0].rpartition('_')[2] if is_video else None
|
||||
|
||||
return {
|
||||
'url': url,
|
||||
'ext': extension,
|
||||
'vcodec': None if is_video else 'none',
|
||||
'quality': 10 if note == 'high' else 0,
|
||||
'format_note': note,
|
||||
'format_id': join_nonempty(extension, note),
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<h2 class="white">(.*?)</h2>', webpage, 'title')
|
||||
video_info_dicts = re.findall(
|
||||
r"({\s*src\s*:\s*'https?://film\.bpb\.de/[^}]+})", webpage)
|
||||
|
||||
formats = []
|
||||
for video_info in video_info_dicts:
|
||||
video_info = self._parse_json(
|
||||
video_info, video_id, transform_source=js_to_json, fatal=False)
|
||||
if not video_info:
|
||||
continue
|
||||
video_url = video_info.get('src')
|
||||
if not video_url:
|
||||
continue
|
||||
quality = 'high' if '_high' in video_url else 'low'
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'quality': 10 if quality == 'high' else 0,
|
||||
'format_note': quality,
|
||||
'format_id': '%s-%s' % (quality, determine_ext(video_url)),
|
||||
})
|
||||
title_result = traverse_obj(webpage, ({html_get_element(cls='opening-header__title')}, {self._TITLE_RE.match}))
|
||||
json_lds = list(self._yield_json_ld(webpage, video_id, fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'description': self._og_search_description(webpage),
|
||||
'title': traverse_obj(title_result, ('title', {str.strip})) or None,
|
||||
# This metadata could be interpreted otherwise, but it fits "series" the most
|
||||
'series': traverse_obj(title_result, ('series', {str.strip})) or None,
|
||||
'description': join_nonempty(*traverse_obj(webpage, [(
|
||||
{html_get_element(cls='opening-intro')},
|
||||
[{html_get_element(tag='bpb-accordion-item')}, {html_get_element(cls='text-content')}],
|
||||
), {clean_html}]), delim='\n\n') or None,
|
||||
'creator': self._html_search_meta('author', webpage),
|
||||
'uploader': self._html_search_meta('publisher', webpage),
|
||||
'release_date': unified_strdate(self._html_search_meta('date', webpage)),
|
||||
'tags': traverse_obj(json_lds, (..., 'keywords', {lambda x: x.split(',')}, ...)),
|
||||
**traverse_obj(self._parse_vue_attributes('bpb-player', webpage, video_id), {
|
||||
'formats': (':sources', ..., {self._process_source}),
|
||||
'thumbnail': ('poster', {lambda x: urljoin(url, x)}),
|
||||
}),
|
||||
}
|
||||
|
|
39
yt_dlp/extractor/canal1.py
Normal file
39
yt_dlp/extractor/canal1.py
Normal file
|
@ -0,0 +1,39 @@
|
|||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class Canal1IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.|noticias\.)?canal1\.com\.co/(?:[^?#&])+/(?P<id>[\w-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://canal1.com.co/noticias/napa-i-una-cadena-de-produccion-de-arroz-que-se-quedo-en-veremos-y-abandonada-en-el-departamento-del-choco/',
|
||||
'info_dict': {
|
||||
'id': '63b39f6b354977084b85ab54',
|
||||
'display_id': 'napa-i-una-cadena-de-produccion-de-arroz-que-se-quedo-en-veremos-y-abandonada-en-el-departamento-del-choco',
|
||||
'title': 'Ñapa I Una cadena de producción de arroz que se quedó en veremos y abandonada en el departamento del Chocó',
|
||||
'description': 'md5:bc49c6d64d20610ea1e7daf079a0d013',
|
||||
'thumbnail': r're:^https?://[^?#]+63b39f6b354977084b85ab54',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://noticias.canal1.com.co/noticias/tres-i-el-triste-record-que-impuso-elon-musk-el-dueno-de-tesla-y-de-twitter/',
|
||||
'info_dict': {
|
||||
'id': '63b39e93f5fd223aa32250fb',
|
||||
'display_id': 'tres-i-el-triste-record-que-impuso-elon-musk-el-dueno-de-tesla-y-de-twitter',
|
||||
'title': 'Tres I El triste récord que impuso Elon Musk, el dueño de Tesla y de Twitter',
|
||||
'description': 'md5:d9f691f131a21ce6767ca6c05d17d791',
|
||||
'thumbnail': r're:^https?://[^?#]+63b39e93f5fd223aa32250fb',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}, {
|
||||
# Geo-restricted to Colombia
|
||||
'url': 'https://canal1.com.co/programas/guerreros-canal-1/video-inedito-guerreros-despedida-kewin-zarate/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
return self.url_result(
|
||||
self._search_regex(r'"embedUrl"\s*:\s*"([^"]+)', webpage, 'embed url'),
|
||||
display_id=display_id, url_transparent=True)
|
136
yt_dlp/extractor/caracoltv.py
Normal file
136
yt_dlp/extractor/caracoltv.py
Normal file
|
@ -0,0 +1,136 @@
|
|||
import base64
|
||||
import json
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
traverse_obj,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class CaracolTvPlayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://play\.caracoltv\.com/videoDetails/(?P<id>[^/?#]+)'
|
||||
_NETRC_MACHINE = 'caracoltv-play'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://play.caracoltv.com/videoDetails/OTo4NGFmNjUwOWQ2ZmM0NTg2YWRiOWU0MGNhOWViOWJkYQ==',
|
||||
'info_dict': {
|
||||
'id': 'OTo4NGFmNjUwOWQ2ZmM0NTg2YWRiOWU0MGNhOWViOWJkYQ==',
|
||||
'title': 'La teoría del promedio',
|
||||
'description': 'md5:1cdd6d2c13f19ef0d9649ab81a023ac3',
|
||||
},
|
||||
'playlist_count': 6,
|
||||
}, {
|
||||
'url': 'https://play.caracoltv.com/videoDetails/OTo3OWM4ZTliYzQxMmM0MTMxYTk4Mjk2YjdjNGQ4NGRkOQ==/ella?season=0',
|
||||
'info_dict': {
|
||||
'id': 'OTo3OWM4ZTliYzQxMmM0MTMxYTk4Mjk2YjdjNGQ4NGRkOQ==',
|
||||
'title': 'Ella',
|
||||
'description': 'md5:a639b1feb5ddcc0cff92a489b4e544b8',
|
||||
},
|
||||
'playlist_count': 10,
|
||||
}, {
|
||||
'url': 'https://play.caracoltv.com/videoDetails/OTpiYTY1YTVmOTI5MzI0ZWJhOGZiY2Y3MmRlOWZlYmJkOA==/la-vuelta-al-mundo-en-80-risas-2022?season=0',
|
||||
'info_dict': {
|
||||
'id': 'OTpiYTY1YTVmOTI5MzI0ZWJhOGZiY2Y3MmRlOWZlYmJkOA==',
|
||||
'title': 'La vuelta al mundo en 80 risas 2022',
|
||||
'description': 'md5:e97aac36106e5c37ebf947b3350106a4',
|
||||
},
|
||||
'playlist_count': 17,
|
||||
}, {
|
||||
'url': 'https://play.caracoltv.com/videoDetails/MzoxX3BwbjRmNjB1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_USER_TOKEN = None
|
||||
|
||||
def _extract_app_token(self, webpage):
|
||||
config_js_path = self._search_regex(
|
||||
r'<script[^>]+src\s*=\s*"([^"]+coreConfig.js[^"]+)', webpage, 'config js url', fatal=False)
|
||||
|
||||
mediation_config = {} if not config_js_path else self._search_json(
|
||||
r'mediation\s*:', self._download_webpage(
|
||||
urljoin('https://play.caracoltv.com/', config_js_path), None, fatal=False, note='Extracting JS config'),
|
||||
'mediation_config', None, transform_source=js_to_json, fatal=False)
|
||||
|
||||
key = traverse_obj(
|
||||
mediation_config, ('live', 'key')) or '795cd9c089a1fc48094524a5eba85a3fca1331817c802f601735907c8bbb4f50'
|
||||
secret = traverse_obj(
|
||||
mediation_config, ('live', 'secret')) or '64dec00a6989ba83d087621465b5e5d38bdac22033b0613b659c442c78976fa0'
|
||||
|
||||
return base64.b64encode(f'{key}:{secret}'.encode()).decode()
|
||||
|
||||
def _perform_login(self, email, password):
|
||||
webpage = self._download_webpage('https://play.caracoltv.com/', None, fatal=False)
|
||||
app_token = self._extract_app_token(webpage)
|
||||
|
||||
bearer_token = self._download_json(
|
||||
'https://eu-gateway.inmobly.com/applications/oauth', None, data=b'', note='Retrieving bearer token',
|
||||
headers={'Authorization': f'Basic {app_token}'})['token']
|
||||
|
||||
self._USER_TOKEN = self._download_json(
|
||||
'https://eu-gateway.inmobly.com/user/login', None, note='Performing login', headers={
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': f'Bearer {bearer_token}',
|
||||
}, data=json.dumps({
|
||||
'device_data': {
|
||||
'device_id': str(uuid.uuid4()),
|
||||
'device_token': '',
|
||||
'device_type': 'web'
|
||||
},
|
||||
'login_data': {
|
||||
'enabled': True,
|
||||
'email': email,
|
||||
'password': password,
|
||||
}
|
||||
}).encode())['user_token']
|
||||
|
||||
def _extract_video(self, video_data, series_id=None, season_id=None, season_number=None):
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_data['stream_url'], series_id, 'mp4')
|
||||
|
||||
return {
|
||||
'id': video_data['id'],
|
||||
'title': video_data.get('name'),
|
||||
'description': video_data.get('description'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'thumbnails': traverse_obj(
|
||||
video_data, ('extra_thumbs', ..., {'url': 'thumb_url', 'height': 'height', 'width': 'width'})),
|
||||
'series_id': series_id,
|
||||
'season_id': season_id,
|
||||
'season_number': int_or_none(season_number),
|
||||
'episode_number': int_or_none(video_data.get('item_order')),
|
||||
'is_live': video_data.get('entry_type') == 3,
|
||||
}
|
||||
|
||||
def _extract_series_seasons(self, seasons, series_id):
|
||||
for season in seasons:
|
||||
api_response = self._download_json(
|
||||
'https://eu-gateway.inmobly.com/feed', series_id, query={'season_id': season['id']},
|
||||
headers={'Authorization': f'Bearer {self._USER_TOKEN}'})
|
||||
|
||||
season_number = season.get('order')
|
||||
for episode in api_response['items']:
|
||||
yield self._extract_video(episode, series_id, season['id'], season_number)
|
||||
|
||||
def _real_extract(self, url):
|
||||
series_id = self._match_id(url)
|
||||
|
||||
if self._USER_TOKEN is None:
|
||||
self._perform_login('guest@inmobly.com', 'Test@gus1')
|
||||
|
||||
api_response = self._download_json(
|
||||
'https://eu-gateway.inmobly.com/feed', series_id, query={'include_ids': series_id},
|
||||
headers={'Authorization': f'Bearer {self._USER_TOKEN}'})['items'][0]
|
||||
|
||||
if not api_response.get('seasons'):
|
||||
return self._extract_video(api_response)
|
||||
|
||||
return self.playlist_result(
|
||||
self._extract_series_seasons(api_response['seasons'], series_id),
|
||||
series_id, **traverse_obj(api_response, {
|
||||
'title': 'name',
|
||||
'description': 'description',
|
||||
}))
|
|
@ -339,12 +339,12 @@ def _new_claims_token(self, email, password):
|
|||
data = json.dumps({'jwt': sig}).encode()
|
||||
headers = {'content-type': 'application/json', 'ott-device-type': 'web'}
|
||||
resp = self._download_json('https://services.radio-canada.ca/ott/cbc-api/v2/token',
|
||||
None, data=data, headers=headers)
|
||||
None, data=data, headers=headers, expected_status=426)
|
||||
cbc_access_token = resp['accessToken']
|
||||
|
||||
headers = {'content-type': 'application/json', 'ott-device-type': 'web', 'ott-access-token': cbc_access_token}
|
||||
resp = self._download_json('https://services.radio-canada.ca/ott/cbc-api/v2/profile',
|
||||
None, headers=headers)
|
||||
None, headers=headers, expected_status=426)
|
||||
return resp['claimsToken']
|
||||
|
||||
def _get_claims_token_expiry(self):
|
||||
|
|
|
@ -90,10 +90,17 @@ class CCCPlaylistIE(InfoExtractor):
|
|||
'id': '30c3',
|
||||
},
|
||||
'playlist_count': 135,
|
||||
}, {
|
||||
'url': 'https://media.ccc.de/c/DS2023',
|
||||
'info_dict': {
|
||||
'title': 'Datenspuren 2023',
|
||||
'id': 'DS2023',
|
||||
},
|
||||
'playlist_count': 37
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url).lower()
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
conf = self._download_json(
|
||||
'https://media.ccc.de/public/conferences/' + playlist_id,
|
||||
|
|
|
@ -1,31 +1,72 @@
|
|||
import time
|
||||
import hashlib
|
||||
import re
|
||||
import urllib
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .openload import PhantomJSwrapper
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
UserNotLive,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
parse_resolution,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class DouyuTVIE(InfoExtractor):
|
||||
IE_DESC = '斗鱼'
|
||||
class DouyuBaseIE(InfoExtractor):
|
||||
def _download_cryptojs_md5(self, video_id):
|
||||
for url in [
|
||||
'https://cdnjs.cloudflare.com/ajax/libs/crypto-js/3.1.2/rollups/md5.js',
|
||||
'https://cdn.bootcdn.net/ajax/libs/crypto-js/3.1.2/rollups/md5.js',
|
||||
]:
|
||||
js_code = self._download_webpage(
|
||||
url, video_id, note='Downloading signing dependency', fatal=False)
|
||||
if js_code:
|
||||
self.cache.store('douyu', 'crypto-js-md5', js_code)
|
||||
return js_code
|
||||
raise ExtractorError('Unable to download JS dependency (crypto-js/md5)')
|
||||
|
||||
def _get_cryptojs_md5(self, video_id):
|
||||
return self.cache.load('douyu', 'crypto-js-md5') or self._download_cryptojs_md5(video_id)
|
||||
|
||||
def _calc_sign(self, sign_func, video_id, a):
|
||||
b = uuid.uuid4().hex
|
||||
c = round(time.time())
|
||||
js_script = f'{self._get_cryptojs_md5(video_id)};{sign_func};console.log(ub98484234("{a}","{b}","{c}"))'
|
||||
phantom = PhantomJSwrapper(self)
|
||||
result = phantom.execute(js_script, video_id,
|
||||
note='Executing JS signing script').strip()
|
||||
return {i: v[0] for i, v in urllib.parse.parse_qs(result).items()}
|
||||
|
||||
def _search_js_sign_func(self, webpage, fatal=True):
|
||||
# The greedy look-behind ensures last possible script tag is matched
|
||||
return self._search_regex(
|
||||
r'(?:<script.*)?<script[^>]*>(.*?ub98484234.*?)</script>', webpage, 'JS sign func', fatal=fatal)
|
||||
|
||||
|
||||
class DouyuTVIE(DouyuBaseIE):
|
||||
IE_DESC = '斗鱼直播'
|
||||
_VALID_URL = r'https?://(?:www\.)?douyu(?:tv)?\.com/(topic/\w+\?rid=|(?:[^/]+/))*(?P<id>[A-Za-z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.douyutv.com/iseven',
|
||||
'url': 'https://www.douyu.com/pigff',
|
||||
'info_dict': {
|
||||
'id': '17732',
|
||||
'display_id': 'iseven',
|
||||
'ext': 'flv',
|
||||
'title': 're:^清晨醒脑!根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'description': r're:.*m7show@163\.com.*',
|
||||
'thumbnail': r're:^https?://.*\.png',
|
||||
'uploader': '7师傅',
|
||||
'id': '24422',
|
||||
'display_id': 'pigff',
|
||||
'ext': 'mp4',
|
||||
'title': 're:^【PIGFF】.* [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'description': r'≥15级牌子看鱼吧置顶帖进粉丝vx群',
|
||||
'thumbnail': str,
|
||||
'uploader': 'pigff',
|
||||
'is_live': True,
|
||||
'live_status': 'is_live',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
|
@ -85,15 +126,43 @@ class DouyuTVIE(InfoExtractor):
|
|||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _get_sign_func(self, room_id, video_id):
|
||||
return self._download_json(
|
||||
f'https://www.douyu.com/swf_api/homeH5Enc?rids={room_id}', video_id,
|
||||
note='Getting signing script')['data'][f'room{room_id}']
|
||||
|
||||
def _extract_stream_formats(self, stream_formats):
|
||||
formats = []
|
||||
for stream_info in traverse_obj(stream_formats, (..., 'data')):
|
||||
stream_url = urljoin(
|
||||
traverse_obj(stream_info, 'rtmp_url'), traverse_obj(stream_info, 'rtmp_live'))
|
||||
if stream_url:
|
||||
rate_id = traverse_obj(stream_info, ('rate', {int_or_none}))
|
||||
rate_info = traverse_obj(stream_info, ('multirates', lambda _, v: v['rate'] == rate_id), get_all=False)
|
||||
ext = determine_ext(stream_url)
|
||||
formats.append({
|
||||
'url': stream_url,
|
||||
'format_id': str_or_none(rate_id),
|
||||
'ext': 'mp4' if ext == 'm3u8' else ext,
|
||||
'protocol': 'm3u8_native' if ext == 'm3u8' else 'https',
|
||||
'quality': rate_id % -10000 if rate_id is not None else None,
|
||||
**traverse_obj(rate_info, {
|
||||
'format': ('name', {str_or_none}),
|
||||
'tbr': ('bit', {int_or_none}),
|
||||
}),
|
||||
})
|
||||
return formats
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
if video_id.isdigit():
|
||||
room_id = video_id
|
||||
else:
|
||||
page = self._download_webpage(url, video_id)
|
||||
room_id = self._html_search_regex(
|
||||
r'"room_id\\?"\s*:\s*(\d+),', page, 'room id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
room_id = self._search_regex(r'\$ROOM\.room_id\s*=\s*(\d+)', webpage, 'room id')
|
||||
|
||||
if self._search_regex(r'"videoLoop"\s*:\s*(\d+)', webpage, 'loop', default='') == '1':
|
||||
raise UserNotLive('The channel is auto-playing VODs', video_id=video_id)
|
||||
if self._search_regex(r'\$ROOM\.show_status\s*=\s*(\d+)', webpage, 'status', default='') == '2':
|
||||
raise UserNotLive(video_id=video_id)
|
||||
|
||||
# Grab metadata from API
|
||||
params = {
|
||||
|
@ -102,110 +171,136 @@ def _real_extract(self, url):
|
|||
'time': int(time.time()),
|
||||
}
|
||||
params['auth'] = hashlib.md5(
|
||||
f'room/{video_id}?{urllib.parse.urlencode(params)}zNzMV1y4EMxOHS6I5WKm'.encode()).hexdigest()
|
||||
room = self._download_json(
|
||||
f'room/{room_id}?{urllib.parse.urlencode(params)}zNzMV1y4EMxOHS6I5WKm'.encode()).hexdigest()
|
||||
room = traverse_obj(self._download_json(
|
||||
f'http://www.douyutv.com/api/v1/room/{room_id}', video_id,
|
||||
note='Downloading room info', query=params)['data']
|
||||
note='Downloading room info', query=params, fatal=False), 'data')
|
||||
|
||||
# 1 = live, 2 = offline
|
||||
if room.get('show_status') == '2':
|
||||
raise ExtractorError('Live stream is offline', expected=True)
|
||||
if traverse_obj(room, 'show_status') == '2':
|
||||
raise UserNotLive(video_id=video_id)
|
||||
|
||||
video_url = urljoin('https://hls3-akm.douyucdn.cn/', self._search_regex(r'(live/.*)', room['hls_url'], 'URL'))
|
||||
formats, subs = self._extract_m3u8_formats_and_subtitles(video_url, room_id)
|
||||
js_sign_func = self._search_js_sign_func(webpage, fatal=False) or self._get_sign_func(room_id, video_id)
|
||||
form_data = {
|
||||
'rate': 0,
|
||||
**self._calc_sign(js_sign_func, video_id, room_id),
|
||||
}
|
||||
stream_formats = [self._download_json(
|
||||
f'https://www.douyu.com/lapi/live/getH5Play/{room_id}',
|
||||
video_id, note="Downloading livestream format",
|
||||
data=urlencode_postdata(form_data))]
|
||||
|
||||
title = unescapeHTML(room['room_name'])
|
||||
description = room.get('show_details')
|
||||
thumbnail = room.get('room_src')
|
||||
uploader = room.get('nickname')
|
||||
for rate_id in traverse_obj(stream_formats[0], ('data', 'multirates', ..., 'rate')):
|
||||
if rate_id != traverse_obj(stream_formats[0], ('data', 'rate')):
|
||||
form_data['rate'] = rate_id
|
||||
stream_formats.append(self._download_json(
|
||||
f'https://www.douyu.com/lapi/live/getH5Play/{room_id}',
|
||||
video_id, note=f'Downloading livestream format {rate_id}',
|
||||
data=urlencode_postdata(form_data)))
|
||||
|
||||
return {
|
||||
'id': room_id,
|
||||
'display_id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'formats': self._extract_stream_formats(stream_formats),
|
||||
'is_live': True,
|
||||
'subtitles': subs,
|
||||
'formats': formats,
|
||||
**traverse_obj(room, {
|
||||
'display_id': ('url', {str}, {lambda i: i[1:]}),
|
||||
'title': ('room_name', {unescapeHTML}),
|
||||
'description': ('show_details', {str}),
|
||||
'uploader': ('nickname', {str}),
|
||||
'thumbnail': ('room_src', {url_or_none}),
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
class DouyuShowIE(InfoExtractor):
|
||||
class DouyuShowIE(DouyuBaseIE):
|
||||
_VALID_URL = r'https?://v(?:mobile)?\.douyu\.com/show/(?P<id>[0-9a-zA-Z]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://v.douyu.com/show/rjNBdvnVXNzvE2yw',
|
||||
'md5': '0c2cfd068ee2afe657801269b2d86214',
|
||||
'url': 'https://v.douyu.com/show/mPyq7oVNe5Yv1gLY',
|
||||
'info_dict': {
|
||||
'id': 'rjNBdvnVXNzvE2yw',
|
||||
'id': 'mPyq7oVNe5Yv1gLY',
|
||||
'ext': 'mp4',
|
||||
'title': '陈一发儿:砒霜 我有个室友系列!04-01 22点场',
|
||||
'duration': 7150.08,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': '陈一发儿',
|
||||
'uploader_id': 'XrZwYelr5wbK',
|
||||
'uploader_url': 'https://v.douyu.com/author/XrZwYelr5wbK',
|
||||
'upload_date': '20170402',
|
||||
'title': '四川人小时候的味道“蒜苗回锅肉”,传统菜不能丢,要常做来吃',
|
||||
'duration': 633,
|
||||
'thumbnail': str,
|
||||
'uploader': '美食作家王刚V',
|
||||
'uploader_id': 'OVAO4NVx1m7Q',
|
||||
'timestamp': 1661850002,
|
||||
'upload_date': '20220830',
|
||||
'view_count': int,
|
||||
'tags': ['美食', '美食综合'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://vmobile.douyu.com/show/rjNBdvnVXNzvE2yw',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_FORMATS = {
|
||||
'super': '原画',
|
||||
'high': '超清',
|
||||
'normal': '高清',
|
||||
}
|
||||
|
||||
_QUALITIES = {
|
||||
'super': -1,
|
||||
'high': -2,
|
||||
'normal': -3,
|
||||
}
|
||||
|
||||
_RESOLUTIONS = {
|
||||
'super': '1920x1080',
|
||||
'high': '1280x720',
|
||||
'normal': '852x480',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
url = url.replace('vmobile.', 'v.')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
room_info = self._parse_json(self._search_regex(
|
||||
r'var\s+\$ROOM\s*=\s*({.+});', webpage, 'room info'), video_id)
|
||||
video_info = self._search_json(
|
||||
r'<script>\s*window\.\$DATA\s*=', webpage,
|
||||
'video info', video_id, transform_source=js_to_json)
|
||||
|
||||
video_info = None
|
||||
js_sign_func = self._search_js_sign_func(webpage)
|
||||
form_data = {
|
||||
'vid': video_id,
|
||||
**self._calc_sign(js_sign_func, video_id, video_info['ROOM']['point_id']),
|
||||
}
|
||||
url_info = self._download_json(
|
||||
'https://v.douyu.com/api/stream/getStreamUrl', video_id,
|
||||
data=urlencode_postdata(form_data), note="Downloading video formats")
|
||||
|
||||
for trial in range(5):
|
||||
# Sometimes Douyu rejects our request. Let's try it more times
|
||||
try:
|
||||
video_info = self._download_json(
|
||||
'https://vmobile.douyu.com/video/getInfo', video_id,
|
||||
query={'vid': video_id},
|
||||
headers={
|
||||
'Referer': url,
|
||||
'x-requested-with': 'XMLHttpRequest',
|
||||
})
|
||||
break
|
||||
except ExtractorError:
|
||||
self._sleep(1, video_id)
|
||||
|
||||
if not video_info:
|
||||
raise ExtractorError('Can\'t fetch video info')
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
video_info['data']['video_url'], video_id,
|
||||
entry_protocol='m3u8_native', ext='mp4')
|
||||
|
||||
upload_date = unified_strdate(self._html_search_regex(
|
||||
r'<em>上传时间:</em><span>([^<]+)</span>', webpage,
|
||||
'upload date', fatal=False))
|
||||
|
||||
uploader = uploader_id = uploader_url = None
|
||||
mobj = re.search(
|
||||
r'(?m)<a[^>]+href="/author/([0-9a-zA-Z]+)".+?<strong[^>]+title="([^"]+)"',
|
||||
webpage)
|
||||
if mobj:
|
||||
uploader_id, uploader = mobj.groups()
|
||||
uploader_url = urljoin(url, '/author/' + uploader_id)
|
||||
formats = []
|
||||
for name, url in traverse_obj(url_info, ('data', 'thumb_video', {dict.items}, ...)):
|
||||
video_url = traverse_obj(url, ('url', {url_or_none}))
|
||||
if video_url:
|
||||
ext = determine_ext(video_url)
|
||||
formats.append({
|
||||
'format': self._FORMATS.get(name),
|
||||
'format_id': name,
|
||||
'url': video_url,
|
||||
'quality': self._QUALITIES.get(name),
|
||||
'ext': 'mp4' if ext == 'm3u8' else ext,
|
||||
'protocol': 'm3u8_native' if ext == 'm3u8' else 'https',
|
||||
**parse_resolution(self._RESOLUTIONS.get(name))
|
||||
})
|
||||
else:
|
||||
self.to_screen(
|
||||
f'"{self._FORMATS.get(name, name)}" format may require logging in. {self._login_hint()}')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': room_info['name'],
|
||||
'formats': formats,
|
||||
'duration': room_info.get('duration'),
|
||||
'thumbnail': room_info.get('pic'),
|
||||
'upload_date': upload_date,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'uploader_url': uploader_url,
|
||||
**traverse_obj(video_info, ('DATA', {
|
||||
'title': ('content', 'title', {str}),
|
||||
'uploader': ('content', 'author', {str}),
|
||||
'uploader_id': ('content', 'up_id', {str_or_none}),
|
||||
'duration': ('content', 'video_duration', {int_or_none}),
|
||||
'thumbnail': ('content', 'video_pic', {url_or_none}),
|
||||
'timestamp': ('content', 'create_time', {int_or_none}),
|
||||
'view_count': ('content', 'view_num', {int_or_none}),
|
||||
'tags': ('videoTag', ..., 'tagName', {str}),
|
||||
}))
|
||||
}
|
||||
|
|
96
yt_dlp/extractor/eplus.py
Normal file
96
yt_dlp/extractor/eplus.py
Normal file
|
@ -0,0 +1,96 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
try_call,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class EplusIbIE(InfoExtractor):
|
||||
IE_NAME = 'eplus:inbound'
|
||||
IE_DESC = 'e+ (イープラス) overseas'
|
||||
_VALID_URL = r'https?://live\.eplus\.jp/ex/player\?ib=(?P<id>(?:\w|%2B|%2F){86}%3D%3D)'
|
||||
_TESTS = [{
|
||||
'url': 'https://live.eplus.jp/ex/player?ib=YEFxb3Vyc2Dombnjg7blkrLlrablnJLjgrnjgq%2Fjg7zjg6vjgqLjgqTjg4njg6vlkIzlpb3kvJpgTGllbGxhIQ%3D%3D',
|
||||
'info_dict': {
|
||||
'id': '354502-0001-002',
|
||||
'title': 'LoveLive!Series Presents COUNTDOWN LoveLive! 2021→2022~LIVE with a smile!~【Streaming+(配信)】',
|
||||
'live_status': 'was_live',
|
||||
'release_date': '20211231',
|
||||
'release_timestamp': 1640952000,
|
||||
'description': str,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'ignore_no_formats_error': True,
|
||||
},
|
||||
'expected_warnings': [
|
||||
'Could not find the playlist URL. This event may not be accessible',
|
||||
'No video formats found!',
|
||||
'Requested format is not available',
|
||||
],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
data_json = self._search_json(r'<script>\s*var app\s*=', webpage, 'data json', video_id)
|
||||
|
||||
delivery_status = data_json.get('delivery_status')
|
||||
archive_mode = data_json.get('archive_mode')
|
||||
release_timestamp = try_call(lambda: unified_timestamp(data_json['event_datetime']) - 32400)
|
||||
release_timestamp_str = data_json.get('event_datetime_text') # JST
|
||||
|
||||
self.write_debug(f'delivery_status = {delivery_status}, archive_mode = {archive_mode}')
|
||||
|
||||
if delivery_status == 'PREPARING':
|
||||
live_status = 'is_upcoming'
|
||||
elif delivery_status == 'STARTED':
|
||||
live_status = 'is_live'
|
||||
elif delivery_status == 'STOPPED':
|
||||
if archive_mode != 'ON':
|
||||
raise ExtractorError(
|
||||
'This event has ended and there is no archive for this event', expected=True)
|
||||
live_status = 'post_live'
|
||||
elif delivery_status == 'WAIT_CONFIRM_ARCHIVED':
|
||||
live_status = 'post_live'
|
||||
elif delivery_status == 'CONFIRMED_ARCHIVE':
|
||||
live_status = 'was_live'
|
||||
else:
|
||||
self.report_warning(f'Unknown delivery_status {delivery_status}, treat it as a live')
|
||||
live_status = 'is_live'
|
||||
|
||||
formats = []
|
||||
|
||||
m3u8_playlist_urls = self._search_json(
|
||||
r'var listChannels\s*=', webpage, 'hls URLs', video_id, contains_pattern=r'\[.+\]', default=[])
|
||||
if not m3u8_playlist_urls:
|
||||
if live_status == 'is_upcoming':
|
||||
self.raise_no_formats(
|
||||
f'Could not find the playlist URL. This live event will begin at {release_timestamp_str} JST', expected=True)
|
||||
else:
|
||||
self.raise_no_formats(
|
||||
'Could not find the playlist URL. This event may not be accessible', expected=True)
|
||||
elif live_status == 'is_upcoming':
|
||||
self.raise_no_formats(f'This live event will begin at {release_timestamp_str} JST', expected=True)
|
||||
elif live_status == 'post_live':
|
||||
self.raise_no_formats('This event has ended, and the archive will be available shortly', expected=True)
|
||||
else:
|
||||
for m3u8_playlist_url in m3u8_playlist_urls:
|
||||
formats.extend(self._extract_m3u8_formats(m3u8_playlist_url, video_id))
|
||||
# FIXME: HTTP request headers need to be updated to continue download
|
||||
warning = 'Due to technical limitations, the download will be interrupted after one hour'
|
||||
if live_status == 'is_live':
|
||||
self.report_warning(warning)
|
||||
elif live_status == 'was_live':
|
||||
self.report_warning(f'{warning}. You can restart to continue the download')
|
||||
|
||||
return {
|
||||
'id': data_json['app_id'],
|
||||
'title': data_json.get('app_name'),
|
||||
'formats': formats,
|
||||
'live_status': live_status,
|
||||
'description': data_json.get('content'),
|
||||
'release_timestamp': release_timestamp,
|
||||
}
|
|
@ -11,8 +11,8 @@ class ExpressenIE(InfoExtractor):
|
|||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?(?:expressen|di)\.se/
|
||||
(?:(?:tvspelare/video|videoplayer/embed)/)?
|
||||
tv/(?:[^/]+/)*
|
||||
(?:(?:tvspelare/video|video-?player/embed)/)?
|
||||
(?:tv|nyheter)/(?:[^/?#]+/)*
|
||||
(?P<id>[^/?#&]+)
|
||||
'''
|
||||
_EMBED_REGEX = [r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?(?:expressen|di)\.se/(?:tvspelare/video|videoplayer/embed)/tv/.+?)\1']
|
||||
|
@ -42,6 +42,12 @@ class ExpressenIE(InfoExtractor):
|
|||
}, {
|
||||
'url': 'https://www.di.se/videoplayer/embed/tv/ditv/borsmorgon/implantica-rusar-70--under-borspremiaren-hor-styrelsemedlemmen/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.expressen.se/video-player/embed/tv/nyheter/ekero-fodda-olof-gustafsson-forvaltar-knarkbaronen-pablo-escobars-namn',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.expressen.se/nyheter/efter-egna-telefonbluffen-escobar-stammer-klarna/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
|
|
@ -74,6 +74,22 @@ class FacebookIE(InfoExtractor):
|
|||
_VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true&payloadtype=primary'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.facebook.com/radiokicksfm/videos/3676516585958356/',
|
||||
'info_dict': {
|
||||
'id': '3676516585958356',
|
||||
'ext': 'mp4',
|
||||
'title': 'dr Adam Przygoda',
|
||||
'description': 'md5:34675bda53336b1d16400265c2bb9b3b',
|
||||
'uploader': 'RADIO KICKS FM',
|
||||
'upload_date': '20230818',
|
||||
'timestamp': 1692346159,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'uploader_id': '100063551323670',
|
||||
'duration': 3132.184,
|
||||
'view_count': int,
|
||||
'concurrent_view_count': 0,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',
|
||||
'md5': '6a40d33c0eccbb1af76cf0485a052659',
|
||||
'info_dict': {
|
||||
|
@ -97,7 +113,7 @@ class FacebookIE(InfoExtractor):
|
|||
'upload_date': '20140506',
|
||||
'timestamp': 1399398998,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'uploader_id': 'pfbid04scW44U4P9iTyLZAGy8y8W3pR3i2VugvHCimiRudUAVbN3MPp9eXBaYFcgVworZwl',
|
||||
'uploader_id': 'pfbid028wxorhX2ErLFJ578N6P3crHD3PHmXTCqCvfBpsnbSLmbokwSY75p5hWBjHGkG4zxl',
|
||||
'duration': 131.03,
|
||||
'concurrent_view_count': int,
|
||||
},
|
||||
|
@ -179,7 +195,7 @@ class FacebookIE(InfoExtractor):
|
|||
'timestamp': 1486648217,
|
||||
'upload_date': '20170209',
|
||||
'uploader': 'Yaroslav Korpan',
|
||||
'uploader_id': 'pfbid029y8j22EwH3ikeqgH3SEP9G3CAi9kmWKgXJJG9s5geV7mo3J2bvURqHCdgucRgAyhl',
|
||||
'uploader_id': 'pfbid06AScABAWcW91qpiuGrLt99Ef9tvwHoXP6t8KeFYEqkSfreMtfa9nTveh8b2ZEVSWl',
|
||||
'concurrent_view_count': int,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'view_count': int,
|
||||
|
@ -274,7 +290,7 @@ class FacebookIE(InfoExtractor):
|
|||
'title': 'Josef',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'concurrent_view_count': int,
|
||||
'uploader_id': 'pfbid02gXHbDwxumkaKJQaTGUf3znYfYzTuidGEWawiramNx4YamSj2afwYSRkpcjtHtMRJl',
|
||||
'uploader_id': 'pfbid0cibUN6tV7DYgdbJdsUFN46wc4jKpVSPAvJQhFofGqBGmVn3V3JtAs2tfUwziw2hUl',
|
||||
'timestamp': 1549275572,
|
||||
'duration': 3.413,
|
||||
'uploader': 'Josef Novak',
|
||||
|
@ -401,9 +417,9 @@ def _extract_from_url(self, url, video_id):
|
|||
|
||||
def extract_metadata(webpage):
|
||||
post_data = [self._parse_json(j, video_id, fatal=False) for j in re.findall(
|
||||
r'handleWithCustomApplyEach\(\s*ScheduledApplyEach\s*,\s*(\{.+?\})\s*\);', webpage)]
|
||||
r'data-sjs>({.*?ScheduledServerJS.*?})</script>', webpage)]
|
||||
post = traverse_obj(post_data, (
|
||||
..., 'require', ..., ..., ..., '__bbox', 'result', 'data'), expected_type=dict) or []
|
||||
..., 'require', ..., ..., ..., '__bbox', 'require', ..., ..., ..., '__bbox', 'result', 'data'), expected_type=dict) or []
|
||||
media = traverse_obj(post, (..., 'attachments', ..., lambda k, v: (
|
||||
k == 'media' and str(v['id']) == video_id and v['__typename'] == 'Video')), expected_type=dict)
|
||||
title = get_first(media, ('title', 'text'))
|
||||
|
@ -489,18 +505,17 @@ def process_formats(info):
|
|||
# with non-browser User-Agent.
|
||||
for f in info['formats']:
|
||||
f.setdefault('http_headers', {})['User-Agent'] = 'facebookexternalhit/1.1'
|
||||
info['_format_sort_fields'] = ('res', 'quality')
|
||||
|
||||
def extract_relay_data(_filter):
|
||||
return self._parse_json(self._search_regex(
|
||||
r'handleWithCustomApplyEach\([^,]+,\s*({.*?%s.*?})\);' % _filter,
|
||||
r'data-sjs>({.*?%s.*?})</script>' % _filter,
|
||||
webpage, 'replay data', default='{}'), video_id, fatal=False) or {}
|
||||
|
||||
def extract_relay_prefetched_data(_filter):
|
||||
replay_data = extract_relay_data(_filter)
|
||||
for require in (replay_data.get('require') or []):
|
||||
if require[0] == 'RelayPrefetchedStreamCache':
|
||||
return try_get(require, lambda x: x[3][1]['__bbox']['result']['data'], dict) or {}
|
||||
return traverse_obj(extract_relay_data(_filter), (
|
||||
'require', (None, (..., ..., ..., '__bbox', 'require')),
|
||||
lambda _, v: 'RelayPrefetchedStreamCache' in v, ..., ...,
|
||||
'__bbox', 'result', 'data', {dict}), get_all=False) or {}
|
||||
|
||||
if not video_data:
|
||||
server_js_data = self._parse_json(self._search_regex([
|
||||
|
@ -511,7 +526,7 @@ def extract_relay_prefetched_data(_filter):
|
|||
|
||||
if not video_data:
|
||||
data = extract_relay_prefetched_data(
|
||||
r'"(?:dash_manifest|playable_url(?:_quality_hd)?)"\s*:\s*"[^"]+"')
|
||||
r'"(?:dash_manifest|playable_url(?:_quality_hd)?)')
|
||||
if data:
|
||||
entries = []
|
||||
|
||||
|
@ -526,7 +541,8 @@ def parse_graphql_video(video):
|
|||
formats = []
|
||||
q = qualities(['sd', 'hd'])
|
||||
for key, format_id in (('playable_url', 'sd'), ('playable_url_quality_hd', 'hd'),
|
||||
('playable_url_dash', '')):
|
||||
('playable_url_dash', ''), ('browser_native_hd_url', 'hd'),
|
||||
('browser_native_sd_url', 'sd')):
|
||||
playable_url = video.get(key)
|
||||
if not playable_url:
|
||||
continue
|
||||
|
@ -535,7 +551,8 @@ def parse_graphql_video(video):
|
|||
else:
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'quality': q(format_id),
|
||||
# sd, hd formats w/o resolution info should be deprioritized below DASH
|
||||
'quality': q(format_id) - 3,
|
||||
'url': playable_url,
|
||||
})
|
||||
extract_dash_manifest(video, formats)
|
||||
|
@ -702,9 +719,11 @@ def parse_attachment(attachment, key='media'):
|
|||
for src_type in ('src', 'src_no_ratelimit'):
|
||||
src = f[0].get('%s_%s' % (quality, src_type))
|
||||
if src:
|
||||
preference = -10 if format_id == 'progressive' else -1
|
||||
# sd, hd formats w/o resolution info should be deprioritized below DASH
|
||||
# TODO: investigate if progressive or src formats still exist
|
||||
preference = -10 if format_id == 'progressive' else -3
|
||||
if quality == 'hd':
|
||||
preference += 5
|
||||
preference += 1
|
||||
formats.append({
|
||||
'format_id': '%s_%s_%s' % (format_id, quality, src_type),
|
||||
'url': src,
|
||||
|
|
|
@ -60,6 +60,7 @@ class Funker530IE(InfoExtractor):
|
|||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
info = {}
|
||||
rumble_url = list(RumbleEmbedIE._extract_embed_urls(url, webpage))
|
||||
if rumble_url:
|
||||
info = {'url': rumble_url[0], 'ie_key': RumbleEmbedIE.ie_key()}
|
||||
|
|
|
@ -2370,7 +2370,7 @@ def _extract_kvs(self, url, webpage, video_id):
|
|||
'id': flashvars['video_id'],
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'thumbnail': urljoin(url, thumbnail),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
|
|
@ -66,7 +66,7 @@ def _entries(self, file_id):
|
|||
query_params = {
|
||||
'contentId': file_id,
|
||||
'token': self._TOKEN,
|
||||
'websiteToken': 12345,
|
||||
'websiteToken': '7fd94ds12fds4', # From https://gofile.io/dist/js/alljs.js
|
||||
}
|
||||
password = self.get_param('videopassword')
|
||||
if password:
|
||||
|
|
|
@ -383,9 +383,9 @@ def __get_current_timestamp():
|
|||
months = [None, 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
|
||||
days = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
|
||||
|
||||
time_now = datetime.datetime.utcnow()
|
||||
time_now = datetime.datetime.now(datetime.timezone.utc)
|
||||
format_string = "{} {} {} %H:%M:%S UTC %Y".format(days[time_now.weekday()], months[time_now.month], time_now.day)
|
||||
time_string = datetime.datetime.utcnow().strftime(format_string)
|
||||
time_string = time_now.strftime(format_string)
|
||||
return time_string
|
||||
|
||||
def __str__(self):
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
parse_iso8601,
|
||||
time_seconds,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
@ -11,15 +11,14 @@
|
|||
class IndavideoEmbedIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:embed\.)?indavideo\.hu/player/video/|assets\.indavideo\.hu/swf/player\.swf\?.*\b(?:v(?:ID|id))=)(?P<id>[\da-f]+)'
|
||||
# Some example URLs covered by generic extractor:
|
||||
# http://indavideo.hu/video/Vicces_cica_1
|
||||
# http://index.indavideo.hu/video/2015_0728_beregszasz
|
||||
# http://auto.indavideo.hu/video/Sajat_utanfutoban_a_kis_tacsko
|
||||
# http://erotika.indavideo.hu/video/Amator_tini_punci
|
||||
# http://film.indavideo.hu/video/f_hrom_nagymamm_volt
|
||||
# http://palyazat.indavideo.hu/video/Embertelen_dal_Dodgem_egyuttes
|
||||
_EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//embed\.indavideo\.hu/player/video/[\da-f]+)']
|
||||
# https://indavideo.hu/video/Vicces_cica_1
|
||||
# https://index.indavideo.hu/video/Hod_Nemetorszagban
|
||||
# https://auto.indavideo.hu/video/Sajat_utanfutoban_a_kis_tacsko
|
||||
# https://film.indavideo.hu/video/f_farkaslesen
|
||||
# https://palyazat.indavideo.hu/video/Embertelen_dal_Dodgem_egyuttes
|
||||
_EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)//embed\.indavideo\.hu/player/video/[\da-f]+)']
|
||||
_TESTS = [{
|
||||
'url': 'http://indavideo.hu/player/video/1bdc3c6d80/',
|
||||
'url': 'https://indavideo.hu/player/video/1bdc3c6d80/',
|
||||
'md5': 'c8a507a1c7410685f83a06eaeeaafeab',
|
||||
'info_dict': {
|
||||
'id': '1837039',
|
||||
|
@ -36,21 +35,33 @@ class IndavideoEmbedIE(InfoExtractor):
|
|||
'tags': ['tánc', 'cica', 'cuki', 'cukiajanlo', 'newsroom'],
|
||||
},
|
||||
}, {
|
||||
'url': 'http://embed.indavideo.hu/player/video/1bdc3c6d80?autostart=1&hide=1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://assets.indavideo.hu/swf/player.swf?v=fe25e500&vID=1bdc3c6d80&autostart=1&hide=1&i=1',
|
||||
'url': 'https://embed.indavideo.hu/player/video/1bdc3c6d80?autostart=1&hide=1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://indavideo.hu/video/Vicces_cica_1',
|
||||
'info_dict': {
|
||||
'id': '1335611',
|
||||
'ext': 'mp4',
|
||||
'title': 'Vicces cica',
|
||||
'description': 'Játszik a tablettel. :D',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Jet_Pack',
|
||||
'uploader_id': '491217',
|
||||
'timestamp': 1390821212,
|
||||
'upload_date': '20140127',
|
||||
'duration': 7,
|
||||
'age_limit': 0,
|
||||
'tags': ['cica', 'Jet_Pack'],
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video = self._download_json(
|
||||
'https://amfphp.indavideo.hu/SYm0json.php/player.playerHandler.getVideoData/%s' % video_id,
|
||||
video_id)['data']
|
||||
|
||||
title = video['title']
|
||||
f'https://amfphp.indavideo.hu/SYm0json.php/player.playerHandler.getVideoData/{video_id}/',
|
||||
video_id, query={'_': time_seconds()})['data']
|
||||
|
||||
video_urls = []
|
||||
|
||||
|
@ -60,33 +71,21 @@ def _real_extract(self, url):
|
|||
elif isinstance(video_files, dict):
|
||||
video_urls.extend(video_files.values())
|
||||
|
||||
video_file = video.get('video_file')
|
||||
if video:
|
||||
video_urls.append(video_file)
|
||||
video_urls = list(set(video_urls))
|
||||
|
||||
video_prefix = video_urls[0].rsplit('/', 1)[0]
|
||||
|
||||
for flv_file in video.get('flv_files', []):
|
||||
flv_url = '%s/%s' % (video_prefix, flv_file)
|
||||
if flv_url not in video_urls:
|
||||
video_urls.append(flv_url)
|
||||
|
||||
filesh = video.get('filesh')
|
||||
filesh = video.get('filesh') or {}
|
||||
|
||||
formats = []
|
||||
for video_url in video_urls:
|
||||
height = int_or_none(self._search_regex(
|
||||
r'\.(\d{3,4})\.mp4(?:\?|$)', video_url, 'height', default=None))
|
||||
if filesh:
|
||||
if not height:
|
||||
continue
|
||||
token = filesh.get(compat_str(height))
|
||||
if token is None:
|
||||
continue
|
||||
video_url = update_url_query(video_url, {'token': token})
|
||||
if not height and len(filesh) == 1:
|
||||
height = int_or_none(list(filesh.keys())[0])
|
||||
token = filesh.get(str(height))
|
||||
if token is None:
|
||||
continue
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'url': update_url_query(video_url, {'token': token}),
|
||||
'height': height,
|
||||
})
|
||||
|
||||
|
@ -103,7 +102,7 @@ def _real_extract(self, url):
|
|||
|
||||
return {
|
||||
'id': video.get('id') or video_id,
|
||||
'title': title,
|
||||
'title': video.get('title'),
|
||||
'description': video.get('description'),
|
||||
'thumbnails': thumbnails,
|
||||
'uploader': video.get('user_name'),
|
||||
|
|
|
@ -57,8 +57,8 @@ class LecturioIE(LecturioBaseIE):
|
|||
_VALID_URL = r'''(?x)
|
||||
https://
|
||||
(?:
|
||||
app\.lecturio\.com/([^/]+/(?P<nt>[^/?#&]+)\.lecture|(?:\#/)?lecture/c/\d+/(?P<id>\d+))|
|
||||
(?:www\.)?lecturio\.de/[^/]+/(?P<nt_de>[^/?#&]+)\.vortrag
|
||||
app\.lecturio\.com/([^/?#]+/(?P<nt>[^/?#&]+)\.lecture|(?:\#/)?lecture/c/\d+/(?P<id>\d+))|
|
||||
(?:www\.)?lecturio\.de/(?:[^/?#]+/)+(?P<nt_de>[^/?#&]+)\.vortrag
|
||||
)
|
||||
'''
|
||||
_TESTS = [{
|
||||
|
@ -73,6 +73,9 @@ class LecturioIE(LecturioBaseIE):
|
|||
}, {
|
||||
'url': 'https://www.lecturio.de/jura/oeffentliches-recht-staatsexamen.vortrag',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.lecturio.de/jura/oeffentliches-recht-at-1-staatsexamen/oeffentliches-recht-staatsexamen.vortrag',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://app.lecturio.com/#/lecture/c/6434/39634',
|
||||
'only_matching': True,
|
||||
|
|
|
@ -17,11 +17,12 @@ class MassengeschmackTVIE(InfoExtractor):
|
|||
|
||||
_TEST = {
|
||||
'url': 'https://massengeschmack.tv/play/fktv202',
|
||||
'md5': 'a9e054db9c2b5a08f0a0527cc201e8d3',
|
||||
'md5': '9996f314994a49fefe5f39aa1b07ae21',
|
||||
'info_dict': {
|
||||
'id': 'fktv202',
|
||||
'ext': 'mp4',
|
||||
'title': 'Fernsehkritik-TV - Folge 202',
|
||||
'title': 'Fernsehkritik-TV #202',
|
||||
'thumbnail': 'https://cache.massengeschmack.tv/img/mag/fktv202.jpg'
|
||||
},
|
||||
}
|
||||
|
||||
|
@ -29,9 +30,6 @@ def _real_extract(self, url):
|
|||
episode = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, episode)
|
||||
title = clean_html(self._html_search_regex(
|
||||
'<h3>([^<]+)</h3>', webpage, 'title'))
|
||||
thumbnail = self._search_regex(r'POSTER\s*=\s*"([^"]+)', webpage, 'thumbnail', fatal=False)
|
||||
sources = self._parse_json(self._search_regex(r'(?s)MEDIA\s*=\s*(\[.+?\]);', webpage, 'media'), episode, js_to_json)
|
||||
|
||||
formats = []
|
||||
|
@ -67,7 +65,8 @@ def _real_extract(self, url):
|
|||
|
||||
return {
|
||||
'id': episode,
|
||||
'title': title,
|
||||
'title': clean_html(self._html_search_regex(
|
||||
r'<span[^>]+\bid=["\']clip-title["\'][^>]*>([^<]+)', webpage, 'title', fatal=False)),
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
'thumbnail': self._search_regex(r'POSTER\s*=\s*"([^"]+)', webpage, 'thumbnail', fatal=False),
|
||||
}
|
||||
|
|
|
@ -1,5 +1,8 @@
|
|||
from ..utils import (
|
||||
unified_strdate
|
||||
ExtractorError,
|
||||
traverse_obj,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
)
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
|
@ -15,7 +18,7 @@ class MediaKlikkIE(InfoExtractor):
|
|||
(?P<id>[^/#?_]+)'''
|
||||
|
||||
_TESTS = [{
|
||||
# mediaklikk. date in html.
|
||||
# (old) mediaklikk. date in html.
|
||||
'url': 'https://mediaklikk.hu/video/hazajaro-delnyugat-bacska-a-duna-menten-palankatol-doroszloig/',
|
||||
'info_dict': {
|
||||
'id': '4754129',
|
||||
|
@ -23,9 +26,21 @@ class MediaKlikkIE(InfoExtractor):
|
|||
'ext': 'mp4',
|
||||
'upload_date': '20210901',
|
||||
'thumbnail': 'http://mediaklikk.hu/wp-content/uploads/sites/4/2014/02/hazajarouj_JO.jpg'
|
||||
},
|
||||
'skip': 'Webpage redirects to 404 page',
|
||||
}, {
|
||||
# mediaklikk. date in html.
|
||||
'url': 'https://mediaklikk.hu/video/hazajaro-fabova-hegyseg-kishont-koronaja/',
|
||||
'info_dict': {
|
||||
'id': '6696133',
|
||||
'title': 'Hazajáró, Fabova-hegység - Kishont koronája',
|
||||
'display_id': 'hazajaro-fabova-hegyseg-kishont-koronaja',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20230903',
|
||||
'thumbnail': 'https://mediaklikk.hu/wp-content/uploads/sites/4/2014/02/hazajarouj_JO.jpg'
|
||||
}
|
||||
}, {
|
||||
# m4sport
|
||||
# (old) m4sport
|
||||
'url': 'https://m4sport.hu/video/2021/08/30/gyemant-liga-parizs/',
|
||||
'info_dict': {
|
||||
'id': '4754999',
|
||||
|
@ -33,6 +48,18 @@ class MediaKlikkIE(InfoExtractor):
|
|||
'ext': 'mp4',
|
||||
'upload_date': '20210830',
|
||||
'thumbnail': 'http://m4sport.hu/wp-content/uploads/sites/4/2021/08/vlcsnap-2021-08-30-18h21m20s10-1024x576.jpg'
|
||||
},
|
||||
'skip': 'Webpage redirects to 404 page',
|
||||
}, {
|
||||
# m4sport
|
||||
'url': 'https://m4sport.hu/sportkozvetitesek/video/2023/09/08/atletika-gyemant-liga-brusszel/',
|
||||
'info_dict': {
|
||||
'id': '6711136',
|
||||
'title': 'Atlétika – Gyémánt Liga, Brüsszel',
|
||||
'display_id': 'atletika-gyemant-liga-brusszel',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20230908',
|
||||
'thumbnail': 'https://m4sport.hu/wp-content/uploads/sites/4/2023/09/vlcsnap-2023-09-08-22h43m18s691.jpg'
|
||||
}
|
||||
}, {
|
||||
# m4sport with *video/ url and no date
|
||||
|
@ -40,20 +67,33 @@ class MediaKlikkIE(InfoExtractor):
|
|||
'info_dict': {
|
||||
'id': '4492099',
|
||||
'title': 'Real Madrid - Chelsea 1-1',
|
||||
'display_id': 'real-madrid-chelsea-1-1',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': 'http://m4sport.hu/wp-content/uploads/sites/4/2021/04/Sequence-01.Still001-1024x576.png'
|
||||
'thumbnail': 'https://m4sport.hu/wp-content/uploads/sites/4/2021/04/Sequence-01.Still001-1024x576.png'
|
||||
}
|
||||
}, {
|
||||
# hirado
|
||||
# (old) hirado
|
||||
'url': 'https://hirado.hu/videok/felteteleket-szabott-a-fovaros/',
|
||||
'info_dict': {
|
||||
'id': '4760120',
|
||||
'title': 'Feltételeket szabott a főváros',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': 'http://hirado.hu/wp-content/uploads/sites/4/2021/09/vlcsnap-2021-09-01-20h20m37s165.jpg'
|
||||
},
|
||||
'skip': 'Webpage redirects to video list page',
|
||||
}, {
|
||||
# hirado
|
||||
'url': 'https://hirado.hu/belfold/video/2023/09/11/marad-az-eves-elszamolas-a-napelemekre-beruhazo-csaladoknal',
|
||||
'info_dict': {
|
||||
'id': '6716068',
|
||||
'title': 'Marad az éves elszámolás a napelemekre beruházó családoknál',
|
||||
'display_id': 'marad-az-eves-elszamolas-a-napelemekre-beruhazo-csaladoknal',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20230911',
|
||||
'thumbnail': 'https://hirado.hu/wp-content/uploads/sites/4/2023/09/vlcsnap-2023-09-11-09h16m09s882.jpg'
|
||||
}
|
||||
}, {
|
||||
# petofilive
|
||||
# (old) petofilive
|
||||
'url': 'https://petofilive.hu/video/2021/06/07/tha-shudras-az-akusztikban/',
|
||||
'info_dict': {
|
||||
'id': '4571948',
|
||||
|
@ -61,6 +101,18 @@ class MediaKlikkIE(InfoExtractor):
|
|||
'ext': 'mp4',
|
||||
'upload_date': '20210607',
|
||||
'thumbnail': 'http://petofilive.hu/wp-content/uploads/sites/4/2021/06/vlcsnap-2021-06-07-22h14m23s915-1024x576.jpg'
|
||||
},
|
||||
'skip': 'Webpage redirects to empty page',
|
||||
}, {
|
||||
# petofilive
|
||||
'url': 'https://petofilive.hu/video/2023/09/09/futball-fesztival-a-margitszigeten/',
|
||||
'info_dict': {
|
||||
'id': '6713233',
|
||||
'title': 'Futball Fesztivál a Margitszigeten',
|
||||
'display_id': 'futball-fesztival-a-margitszigeten',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20230909',
|
||||
'thumbnail': 'https://petofilive.hu/wp-content/uploads/sites/4/2023/09/Clipboard11-2.jpg'
|
||||
}
|
||||
}]
|
||||
|
||||
|
@ -84,8 +136,12 @@ def _real_extract(self, url):
|
|||
|
||||
player_data['video'] = player_data.pop('token')
|
||||
player_page = self._download_webpage('https://player.mediaklikk.hu/playernew/player.php', video_id, query=player_data)
|
||||
playlist_url = self._proto_relative_url(compat_urllib_parse_unquote(
|
||||
self._html_search_regex(r'\"file\":\s*\"(\\?/\\?/.*playlist\.m3u8)\"', player_page, 'playlist_url')).replace('\\/', '/'))
|
||||
player_json = self._search_json(
|
||||
r'\bpl\.setup\s*\(', player_page, 'player json', video_id, end_pattern=r'\);')
|
||||
playlist_url = traverse_obj(
|
||||
player_json, ('playlist', lambda _, v: v['type'] == 'hls', 'file', {url_or_none}), get_all=False)
|
||||
if not playlist_url:
|
||||
raise ExtractorError('Unable to extract playlist url')
|
||||
|
||||
formats = self._extract_wowza_formats(
|
||||
playlist_url, video_id, skip_protocols=['f4m', 'smil', 'dash'])
|
||||
|
|
|
@ -14,7 +14,7 @@ class MediaStreamBaseIE(InfoExtractor):
|
|||
_BASE_URL_RE = r'https?://mdstrm\.com/(?:embed|live-stream)'
|
||||
|
||||
def _extract_mediastream_urls(self, webpage):
|
||||
yield from traverse_obj(list(self._yield_json_ld(webpage, None)), (
|
||||
yield from traverse_obj(list(self._yield_json_ld(webpage, None, fatal=False)), (
|
||||
lambda _, v: v['@type'] == 'VideoObject', ('embedUrl', 'contentUrl'),
|
||||
{lambda x: x if re.match(rf'{self._BASE_URL_RE}/\w+', x) else None}))
|
||||
|
||||
|
@ -106,8 +106,12 @@ def _real_extract(self, url):
|
|||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if 'Debido a tu ubicación no puedes ver el contenido' in webpage:
|
||||
self.raise_geo_restricted()
|
||||
for message in [
|
||||
'Debido a tu ubicación no puedes ver el contenido',
|
||||
'You are not allowed to watch this video: Geo Fencing Restriction'
|
||||
]:
|
||||
if message in webpage:
|
||||
self.raise_geo_restricted()
|
||||
|
||||
player_config = self._search_json(r'window\.MDSTRM\.OPTIONS\s*=', webpage, 'metadata', video_id)
|
||||
|
||||
|
|
|
@ -20,7 +20,7 @@ class MixcloudBaseIE(InfoExtractor):
|
|||
def _call_api(self, object_type, object_fields, display_id, username, slug=None):
|
||||
lookup_key = object_type + 'Lookup'
|
||||
return self._download_json(
|
||||
'https://www.mixcloud.com/graphql', display_id, query={
|
||||
'https://app.mixcloud.com/graphql', display_id, query={
|
||||
'query': '''{
|
||||
%s(lookup: {username: "%s"%s}) {
|
||||
%s
|
||||
|
@ -46,7 +46,15 @@ class MixcloudIE(MixcloudBaseIE):
|
|||
'view_count': int,
|
||||
'timestamp': 1321359578,
|
||||
'upload_date': '20111115',
|
||||
'uploader_url': 'https://www.mixcloud.com/dholbach/',
|
||||
'artist': 'Submorphics & Chino , Telekinesis, Porter Robinson, Enei, Breakage ft Jess Mills',
|
||||
'duration': 3723,
|
||||
'tags': [],
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/',
|
||||
'info_dict': {
|
||||
|
@ -60,7 +68,14 @@ class MixcloudIE(MixcloudBaseIE):
|
|||
'view_count': int,
|
||||
'timestamp': 1422987057,
|
||||
'upload_date': '20150203',
|
||||
'uploader_url': 'https://www.mixcloud.com/gillespeterson/',
|
||||
'duration': 2992,
|
||||
'tags': [],
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
'params': {'skip_download': '404 playback error on site'},
|
||||
}, {
|
||||
'url': 'https://beta.mixcloud.com/RedLightRadio/nosedrip-15-red-light-radio-01-18-2016/',
|
||||
'only_matching': True,
|
||||
|
@ -259,9 +274,9 @@ def _real_extract(self, url):
|
|||
cloudcast_url = cloudcast.get('url')
|
||||
if not cloudcast_url:
|
||||
continue
|
||||
slug = try_get(cloudcast, lambda x: x['slug'], compat_str)
|
||||
item_slug = try_get(cloudcast, lambda x: x['slug'], compat_str)
|
||||
owner_username = try_get(cloudcast, lambda x: x['owner']['username'], compat_str)
|
||||
video_id = '%s_%s' % (owner_username, slug) if slug and owner_username else None
|
||||
video_id = f'{owner_username}_{item_slug}' if item_slug and owner_username else None
|
||||
entries.append(self.url_result(
|
||||
cloudcast_url, MixcloudIE.ie_key(), video_id))
|
||||
|
||||
|
@ -284,7 +299,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
|
|||
'info_dict': {
|
||||
'id': 'dholbach_uploads',
|
||||
'title': 'Daniel Holbach (uploads)',
|
||||
'description': 'md5:b60d776f0bab534c5dabe0a34e47a789',
|
||||
'description': 'md5:a3f468a60ac8c3e1f8616380fc469b2b',
|
||||
},
|
||||
'playlist_mincount': 36,
|
||||
}, {
|
||||
|
@ -292,7 +307,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
|
|||
'info_dict': {
|
||||
'id': 'dholbach_uploads',
|
||||
'title': 'Daniel Holbach (uploads)',
|
||||
'description': 'md5:b60d776f0bab534c5dabe0a34e47a789',
|
||||
'description': 'md5:a3f468a60ac8c3e1f8616380fc469b2b',
|
||||
},
|
||||
'playlist_mincount': 36,
|
||||
}, {
|
||||
|
@ -300,7 +315,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
|
|||
'info_dict': {
|
||||
'id': 'dholbach_favorites',
|
||||
'title': 'Daniel Holbach (favorites)',
|
||||
'description': 'md5:b60d776f0bab534c5dabe0a34e47a789',
|
||||
'description': 'md5:a3f468a60ac8c3e1f8616380fc469b2b',
|
||||
},
|
||||
# 'params': {
|
||||
# 'playlist_items': '1-100',
|
||||
|
@ -323,9 +338,9 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
|
|||
'info_dict': {
|
||||
'id': 'FirstEar_stream',
|
||||
'title': 'First Ear (stream)',
|
||||
'description': 'Curators of good music\r\n\r\nfirstearmusic.com',
|
||||
'description': 'we maraud for ears',
|
||||
},
|
||||
'playlist_mincount': 271,
|
||||
'playlist_mincount': 269,
|
||||
}]
|
||||
|
||||
_TITLE_KEY = 'displayName'
|
||||
|
|
|
@ -151,7 +151,7 @@ def _real_extract(self, url):
|
|||
'd': 'days',
|
||||
}
|
||||
kwargs = {_AGO_UNITS.get(uploaded_ago[-1]): delta}
|
||||
upload_date = (datetime.datetime.utcnow() - datetime.timedelta(**kwargs)).strftime('%Y%m%d')
|
||||
upload_date = (datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(**kwargs)).strftime('%Y%m%d')
|
||||
|
||||
comment_count = len(re.findall(r'''class\s*=\s*['"]media-comment-contents\b''', webpage))
|
||||
uploader_id = self._html_search_regex(
|
||||
|
|
|
@ -33,7 +33,7 @@ def _real_extract(self, url):
|
|||
|
||||
class N1InfoIIE(InfoExtractor):
|
||||
IE_NAME = 'N1Info:article'
|
||||
_VALID_URL = r'https?://(?:(?:(?:ba|rs|hr)\.)?n1info\.(?:com|si)|nova\.rs)/(?:[^/]+/){1,2}(?P<id>[^/]+)'
|
||||
_VALID_URL = r'https?://(?:(?:\w+\.)?n1info\.\w+|nova\.rs)/(?:[^/?#]+/){1,2}(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
# Youtube embedded
|
||||
'url': 'https://rs.n1info.com/sport-klub/tenis/kako-je-djokovic-propustio-istorijsku-priliku-video/',
|
||||
|
@ -94,6 +94,16 @@ class N1InfoIIE(InfoExtractor):
|
|||
'upload_date': '20211102',
|
||||
'timestamp': 1635861677,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://n1info.rs/vesti/cuta-biti-u-kosovskoj-mitrovici-znaci-da-te-docekaju-eksplozivnim-napravama/',
|
||||
'info_dict': {
|
||||
'id': '1332368',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ćuta: Biti u Kosovskoj Mitrovici znači da te dočekaju eksplozivnim napravama',
|
||||
'upload_date': '20230620',
|
||||
'timestamp': 1687290536,
|
||||
'thumbnail': 'https://cdn.brid.tv/live/partners/26827/snapshot/1332368_th_6492013a8356f_1687290170.jpg'
|
||||
},
|
||||
}, {
|
||||
'url': 'https://hr.n1info.com/vijesti/pravobraniteljica-o-ubojstvu-u-zagrebu-radi-se-o-doista-nezapamcenoj-situaciji/',
|
||||
'only_matching': True,
|
||||
|
@ -105,19 +115,35 @@ def _real_extract(self, url):
|
|||
|
||||
title = self._html_search_regex(r'<h1[^>]+>(.+?)</h1>', webpage, 'title')
|
||||
timestamp = unified_timestamp(self._html_search_meta('article:published_time', webpage))
|
||||
|
||||
videos = re.findall(r'(?m)(<video[^>]+>)', webpage)
|
||||
plugin_data = self._html_search_meta('BridPlugin', webpage)
|
||||
entries = []
|
||||
for video in videos:
|
||||
video_data = extract_attributes(video)
|
||||
entries.append({
|
||||
'_type': 'url_transparent',
|
||||
'url': video_data.get('data-url'),
|
||||
'id': video_data.get('id'),
|
||||
'title': title,
|
||||
'thumbnail': video_data.get('data-thumbnail'),
|
||||
'timestamp': timestamp,
|
||||
'ie_key': 'N1InfoAsset'})
|
||||
if plugin_data:
|
||||
site_id = self._html_search_regex(r'site:(\d+)', webpage, 'site id')
|
||||
for video_data in re.findall(r'\$bp\("Brid_\d+", (.+)\);', webpage):
|
||||
video_id = self._parse_json(video_data, title)['video']
|
||||
entries.append({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'timestamp': timestamp,
|
||||
'thumbnail': self._html_search_meta('thumbnailURL', webpage),
|
||||
'formats': self._extract_m3u8_formats(
|
||||
f'https://cdn-uc.brid.tv/live/partners/{site_id}/streaming/{video_id}/{video_id}.m3u8',
|
||||
video_id, fatal=False),
|
||||
})
|
||||
else:
|
||||
# Old player still present in older articles
|
||||
videos = re.findall(r'(?m)(<video[^>]+>)', webpage)
|
||||
for video in videos:
|
||||
video_data = extract_attributes(video)
|
||||
entries.append({
|
||||
'_type': 'url_transparent',
|
||||
'url': video_data.get('data-url'),
|
||||
'id': video_data.get('id'),
|
||||
'title': title,
|
||||
'thumbnail': video_data.get('data-thumbnail'),
|
||||
'timestamp': timestamp,
|
||||
'ie_key': 'N1InfoAsset',
|
||||
})
|
||||
|
||||
embedded_videos = re.findall(r'(<iframe[^>]+>)', webpage)
|
||||
for embedded_video in embedded_videos:
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
class NaverBaseIE(InfoExtractor):
|
||||
_CAPTION_EXT_RE = r'\.(?:ttml|vtt)'
|
||||
|
||||
@staticmethod # NB: Used in VLiveWebArchiveIE, WeverseIE
|
||||
@staticmethod # NB: Used in WeverseIE
|
||||
def process_subtitles(vod_data, process_url):
|
||||
ret = {'subtitles': {}, 'automatic_captions': {}}
|
||||
for caption in traverse_obj(vod_data, ('captions', 'list', ...)):
|
||||
|
|
|
@ -265,6 +265,26 @@ class NitterIE(InfoExtractor):
|
|||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
}
|
||||
}, { # no OpenGraph title
|
||||
'url': f'https://{current_instance}/LocalBateman/status/1678455464038735895#m',
|
||||
'info_dict': {
|
||||
'id': '1678455464038735895',
|
||||
'ext': 'mp4',
|
||||
'title': 'Your Typical Local Man - Local man, what did Romanians ever do to you?',
|
||||
'description': 'Local man, what did Romanians ever do to you?',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Your Typical Local Man',
|
||||
'uploader_id': 'LocalBateman',
|
||||
'uploader_url': f'https://{current_instance}/LocalBateman',
|
||||
'upload_date': '20230710',
|
||||
'timestamp': 1689009900,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}
|
||||
]
|
||||
|
||||
|
@ -292,7 +312,7 @@ def _real_extract(self, url):
|
|||
'ext': ext
|
||||
}]
|
||||
|
||||
title = description = self._og_search_description(full_webpage) or self._html_search_regex(
|
||||
title = description = self._og_search_description(full_webpage, default=None) or self._html_search_regex(
|
||||
r'<div class="tweet-content[^>]+>([^<]+)</div>', webpage, 'title', fatal=False)
|
||||
|
||||
uploader_id = self._html_search_regex(
|
||||
|
|
|
@ -6,7 +6,6 @@
|
|||
determine_ext,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
qualities,
|
||||
traverse_obj,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
|
@ -49,77 +48,52 @@ def _real_extract(self, url):
|
|||
duration = None
|
||||
formats = []
|
||||
|
||||
player = self._parse_json(
|
||||
self._search_regex(
|
||||
(r'(?:(?:replacePlaceholders|processAdTagModifier).*?:\s*)?(?:replacePlaceholders|processAdTagModifier)\s*\(\s*(?P<json>{.*?})\s*\)(?:\s*\))?\s*,',
|
||||
r'Player\.init\s*\([^,]+,(?P<cndn>\s*\w+\s*\?)?\s*(?P<json>{(?(cndn).+?|.+)})\s*(?(cndn):|,\s*{.+?}\s*\)\s*;)'),
|
||||
webpage, 'player', default='{}', group='json'), video_id, fatal=False)
|
||||
if player:
|
||||
for format_id, format_list in player['tracks'].items():
|
||||
if not isinstance(format_list, list):
|
||||
format_list = [format_list]
|
||||
for format_dict in format_list:
|
||||
if not isinstance(format_dict, dict):
|
||||
continue
|
||||
if (not self.get_param('allow_unplayable_formats')
|
||||
and traverse_obj(format_dict, ('drm', 'keySystem'))):
|
||||
has_drm = True
|
||||
continue
|
||||
format_url = url_or_none(format_dict.get('src'))
|
||||
format_type = format_dict.get('type')
|
||||
ext = determine_ext(format_url)
|
||||
if (format_type == 'application/x-mpegURL'
|
||||
or format_id == 'HLS' or ext == 'm3u8'):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls',
|
||||
fatal=False))
|
||||
elif (format_type == 'application/dash+xml'
|
||||
or format_id == 'DASH' or ext == 'mpd'):
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
format_url, video_id, mpd_id='dash', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
})
|
||||
duration = int_or_none(player.get('duration'))
|
||||
else:
|
||||
# Old path, not actual as of 08.04.2020
|
||||
bitrates = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)(?:src|bitrates)\s*=\s*({.+?})\s*;', webpage, 'formats'),
|
||||
video_id, transform_source=js_to_json)
|
||||
|
||||
QUALITIES = ('lq', 'mq', 'hq', 'hd')
|
||||
quality_key = qualities(QUALITIES)
|
||||
|
||||
for format_id, format_list in bitrates.items():
|
||||
if not isinstance(format_list, list):
|
||||
format_list = [format_list]
|
||||
for format_url in format_list:
|
||||
format_url = url_or_none(format_url)
|
||||
if not format_url:
|
||||
continue
|
||||
if format_id == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, ext='mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls',
|
||||
fatal=False))
|
||||
continue
|
||||
f = {
|
||||
def process_format_list(format_list, format_id=""):
|
||||
nonlocal formats, has_drm
|
||||
if not isinstance(format_list, list):
|
||||
format_list = [format_list]
|
||||
for format_dict in format_list:
|
||||
if not isinstance(format_dict, dict):
|
||||
continue
|
||||
if (not self.get_param('allow_unplayable_formats')
|
||||
and traverse_obj(format_dict, ('drm', 'keySystem'))):
|
||||
has_drm = True
|
||||
continue
|
||||
format_url = url_or_none(format_dict.get('src'))
|
||||
format_type = format_dict.get('type')
|
||||
ext = determine_ext(format_url)
|
||||
if (format_type == 'application/x-mpegURL'
|
||||
or format_id == 'HLS' or ext == 'm3u8'):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls',
|
||||
fatal=False))
|
||||
elif (format_type == 'application/dash+xml'
|
||||
or format_id == 'DASH' or ext == 'mpd'):
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
format_url, video_id, mpd_id='dash', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
}
|
||||
f_id = format_id
|
||||
for quality in QUALITIES:
|
||||
if '%s.mp4' % quality in format_url:
|
||||
f_id += '-%s' % quality
|
||||
f.update({
|
||||
'quality': quality_key(quality),
|
||||
'format_note': quality.upper(),
|
||||
})
|
||||
break
|
||||
f['format_id'] = f_id
|
||||
formats.append(f)
|
||||
})
|
||||
|
||||
player = self._search_json(
|
||||
r'player:', webpage, 'player', video_id, fatal=False, end_pattern=r';\s*</script>')
|
||||
if player:
|
||||
for src in traverse_obj(player, ('lib', 'source', 'sources', ...)):
|
||||
process_format_list(src)
|
||||
duration = traverse_obj(player, ('sourceInfo', 'duration', {int_or_none}))
|
||||
if not formats and not has_drm:
|
||||
# older code path, in use before August 2023
|
||||
player = self._parse_json(
|
||||
self._search_regex(
|
||||
(r'(?:(?:replacePlaceholders|processAdTagModifier).*?:\s*)?(?:replacePlaceholders|processAdTagModifier)\s*\(\s*(?P<json>{.*?})\s*\)(?:\s*\))?\s*,',
|
||||
r'Player\.init\s*\([^,]+,(?P<cndn>\s*\w+\s*\?)?\s*(?P<json>{(?(cndn).+?|.+)})\s*(?(cndn):|,\s*{.+?}\s*\)\s*;)'),
|
||||
webpage, 'player', group='json'), video_id)
|
||||
if player:
|
||||
for format_id, format_list in player['tracks'].items():
|
||||
process_format_list(format_list, format_id)
|
||||
duration = int_or_none(player.get('duration'))
|
||||
|
||||
if not formats and has_drm:
|
||||
self.report_drm(video_id)
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
import calendar
|
||||
import json
|
||||
import functools
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timezone
|
||||
from random import random
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
@ -243,7 +243,7 @@ def _mark_watched(self, base_url, video_id, delivery_info):
|
|||
invocation_id = delivery_info.get('InvocationId')
|
||||
stream_id = traverse_obj(delivery_info, ('Delivery', 'Streams', ..., 'PublicID'), get_all=False, expected_type=str)
|
||||
if invocation_id and stream_id and duration:
|
||||
timestamp_str = f'/Date({calendar.timegm(datetime.utcnow().timetuple())}000)/'
|
||||
timestamp_str = f'/Date({calendar.timegm(datetime.now(timezone.utc).timetuple())}000)/'
|
||||
data = {
|
||||
'streamRequests': [
|
||||
{
|
||||
|
|
113
yt_dlp/extractor/pornbox.py
Normal file
113
yt_dlp/extractor/pornbox.py
Normal file
|
@ -0,0 +1,113 @@
|
|||
from .common import InfoExtractor
|
||||
from ..compat import functools
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
qualities,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class PornboxIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?pornbox\.com/application/watch-page/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://pornbox.com/application/watch-page/212108',
|
||||
'md5': '3ff6b6e206f263be4c5e987a3162ac6e',
|
||||
'info_dict': {
|
||||
'id': '212108',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:ececc5c6e6c9dd35d290c45fed05fd49',
|
||||
'uploader': 'Lily Strong',
|
||||
'timestamp': 1665871200,
|
||||
'upload_date': '20221015',
|
||||
'age_limit': 18,
|
||||
'availability': 'needs_auth',
|
||||
'duration': 1505,
|
||||
'cast': ['Lily Strong', 'John Strong'],
|
||||
'tags': 'count:11',
|
||||
'description': 'md5:589c7f33e183aa8aa939537300efb859',
|
||||
'thumbnail': r're:^https?://cdn-image\.gtflixtv\.com.*\.jpg.*$'
|
||||
}
|
||||
}, {
|
||||
'url': 'https://pornbox.com/application/watch-page/216045',
|
||||
'info_dict': {
|
||||
'id': '216045',
|
||||
'title': 'md5:3e48528e73a9a2b12f7a2772ed0b26a2',
|
||||
'description': 'md5:3e631dcaac029f15ed434e402d1b06c7',
|
||||
'uploader': 'VK Studio',
|
||||
'timestamp': 1618264800,
|
||||
'upload_date': '20210412',
|
||||
'age_limit': 18,
|
||||
'availability': 'premium_only',
|
||||
'duration': 2710,
|
||||
'cast': 'count:3',
|
||||
'tags': 'count:29',
|
||||
'thumbnail': r're:^https?://cdn-image\.gtflixtv\.com.*\.jpg.*$',
|
||||
'subtitles': 'count:6'
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'ignore_no_formats_error': True
|
||||
},
|
||||
'expected_warnings': [
|
||||
'You are either not logged in or do not have access to this scene',
|
||||
'No video formats found', 'Requested format is not available']
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
public_data = self._download_json(f'https://pornbox.com/contents/{video_id}', video_id)
|
||||
|
||||
subtitles = {country_code: [{
|
||||
'url': f'https://pornbox.com/contents/{video_id}/subtitles/{country_code}',
|
||||
'ext': 'srt'
|
||||
}] for country_code in traverse_obj(public_data, ('subtitles', ..., {str}))}
|
||||
|
||||
is_free_scene = traverse_obj(
|
||||
public_data, ('price', 'is_available_for_free', {bool}), default=False)
|
||||
|
||||
metadata = {
|
||||
'id': video_id,
|
||||
**traverse_obj(public_data, {
|
||||
'title': ('scene_name', {str.strip}),
|
||||
'description': ('small_description', {str.strip}),
|
||||
'uploader': 'studio',
|
||||
'duration': ('runtime', {parse_duration}),
|
||||
'cast': (('models', 'male_models'), ..., 'model_name'),
|
||||
'thumbnail': ('player_poster', {url_or_none}),
|
||||
'tags': ('niches', ..., 'niche'),
|
||||
}),
|
||||
'age_limit': 18,
|
||||
'timestamp': parse_iso8601(traverse_obj(
|
||||
public_data, ('studios', 'release_date'), 'publish_date')),
|
||||
'availability': self._availability(needs_auth=True, needs_premium=not is_free_scene),
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
if not public_data.get('is_purchased') or not is_free_scene:
|
||||
self.raise_login_required(
|
||||
'You are either not logged in or do not have access to this scene', metadata_available=True)
|
||||
return metadata
|
||||
|
||||
media_id = traverse_obj(public_data, (
|
||||
'medias', lambda _, v: v['title'] == 'Full video', 'media_id', {int}), get_all=False)
|
||||
if not media_id:
|
||||
self.raise_no_formats('Could not find stream id', video_id=video_id)
|
||||
|
||||
stream_data = self._download_json(
|
||||
f'https://pornbox.com/media/{media_id}/stream', video_id=video_id, note='Getting manifest urls')
|
||||
|
||||
get_quality = qualities(['web', 'vga', 'hd', '1080p', '4k', '8k'])
|
||||
metadata['formats'] = traverse_obj(stream_data, ('qualities', lambda _, v: v['src'], {
|
||||
'url': 'src',
|
||||
'vbr': ('bitrate', {functools.partial(int_or_none, scale=1000)}),
|
||||
'format_id': ('quality', {str_or_none}),
|
||||
'quality': ('quality', {get_quality}),
|
||||
'width': ('size', {lambda x: int(x[:-1])}),
|
||||
}))
|
||||
|
||||
return metadata
|
|
@ -1,97 +1,155 @@
|
|||
import re
|
||||
import json
|
||||
from datetime import date
|
||||
from urllib.parse import unquote
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import merge_dicts
|
||||
from ..compat import functools
|
||||
from ..utils import ExtractorError, make_archive_id, urljoin
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class Pr0grammStaticIE(InfoExtractor):
|
||||
# Possible urls:
|
||||
# https://pr0gramm.com/static/5466437
|
||||
_VALID_URL = r'https?://pr0gramm\.com/static/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'https://pr0gramm.com/static/5466437',
|
||||
'md5': '52fa540d70d3edc286846f8ca85938aa',
|
||||
'info_dict': {
|
||||
'id': '5466437',
|
||||
'ext': 'mp4',
|
||||
'title': 'pr0gramm-5466437 by g11st',
|
||||
'uploader': 'g11st',
|
||||
'upload_date': '20221221',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
# Fetch media sources
|
||||
entries = self._parse_html5_media_entries(url, webpage, video_id)
|
||||
media_info = entries[0]
|
||||
|
||||
# Fetch author
|
||||
uploader = self._html_search_regex(r'by\W+([\w-]+)\W+', webpage, 'uploader')
|
||||
|
||||
# Fetch approx upload timestamp from filename
|
||||
# Have None-defaults in case the extraction fails
|
||||
uploadDay = None
|
||||
uploadMon = None
|
||||
uploadYear = None
|
||||
uploadTimestr = None
|
||||
# (//img.pr0gramm.com/2022/12/21/62ae8aa5e2da0ebf.mp4)
|
||||
m = re.search(r'//img\.pr0gramm\.com/(?P<year>[\d]+)/(?P<mon>[\d]+)/(?P<day>[\d]+)/\w+\.\w{,4}', webpage)
|
||||
|
||||
if (m):
|
||||
# Up to a day of accuracy should suffice...
|
||||
uploadDay = m.groupdict().get('day')
|
||||
uploadMon = m.groupdict().get('mon')
|
||||
uploadYear = m.groupdict().get('year')
|
||||
uploadTimestr = uploadYear + uploadMon + uploadDay
|
||||
|
||||
return merge_dicts({
|
||||
'id': video_id,
|
||||
'title': 'pr0gramm-%s%s' % (video_id, (' by ' + uploader) if uploader else ''),
|
||||
'uploader': uploader,
|
||||
'upload_date': uploadTimestr
|
||||
}, media_info)
|
||||
|
||||
|
||||
# This extractor is for the primary url (used for sharing, and appears in the
|
||||
# location bar) Since this page loads the DOM via JS, yt-dl can't find any
|
||||
# video information here. So let's redirect to a compatibility version of
|
||||
# the site, which does contain the <video>-element by itself, without requiring
|
||||
# js to be ran.
|
||||
class Pr0grammIE(InfoExtractor):
|
||||
# Possible urls:
|
||||
# https://pr0gramm.com/new/546637
|
||||
# https://pr0gramm.com/new/video/546637
|
||||
# https://pr0gramm.com/top/546637
|
||||
# https://pr0gramm.com/top/video/546637
|
||||
# https://pr0gramm.com/user/g11st/uploads/5466437
|
||||
# https://pr0gramm.com/user/froschler/dafur-ist-man-hier/5091290
|
||||
# https://pr0gramm.com/user/froschler/reinziehen-1elf/5232030
|
||||
# https://pr0gramm.com/user/froschler/1elf/5232030
|
||||
# https://pr0gramm.com/new/5495710:comment62621020 <- this is not the id!
|
||||
# https://pr0gramm.com/top/fruher war alles damals/5498175
|
||||
|
||||
_VALID_URL = r'https?:\/\/pr0gramm\.com\/(?!static/\d+).+?\/(?P<id>[\d]+)(:|$)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://pr0gramm\.com\/(?:[^/?#]+/)+(?P<id>[\d]+)(?:[/?#:]|$)'
|
||||
_TESTS = [{
|
||||
# Tags require account
|
||||
'url': 'https://pr0gramm.com/new/video/5466437',
|
||||
'info_dict': {
|
||||
'id': '5466437',
|
||||
'ext': 'mp4',
|
||||
'title': 'pr0gramm-5466437 by g11st',
|
||||
'tags': ['Neon Genesis Evangelion', 'Touhou Project', 'Fly me to the Moon', 'Marisad', 'Marisa Kirisame', 'video', 'sound', 'Marisa', 'Anime'],
|
||||
'uploader': 'g11st',
|
||||
'uploader_id': 394718,
|
||||
'upload_timestamp': 1671590240,
|
||||
'upload_date': '20221221',
|
||||
}
|
||||
}
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'age_limit': 0,
|
||||
'thumbnail': r're:^https://thumb\.pr0gramm\.com/.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
# Tags require account
|
||||
'url': 'https://pr0gramm.com/new/3052805:comment28391322',
|
||||
'info_dict': {
|
||||
'id': '3052805',
|
||||
'ext': 'mp4',
|
||||
'title': 'pr0gramm-3052805 by Hansking1',
|
||||
'tags': 'count:15',
|
||||
'uploader': 'Hansking1',
|
||||
'uploader_id': 385563,
|
||||
'upload_timestamp': 1552930408,
|
||||
'upload_date': '20190318',
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'age_limit': 0,
|
||||
'thumbnail': r're:^https://thumb\.pr0gramm\.com/.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
# Requires verified account
|
||||
'url': 'https://pr0gramm.com/new/Gianna%20Michaels/5848332',
|
||||
'info_dict': {
|
||||
'id': '5848332',
|
||||
'ext': 'mp4',
|
||||
'title': 'pr0gramm-5848332 by erd0pfel',
|
||||
'tags': 'count:18',
|
||||
'uploader': 'erd0pfel',
|
||||
'uploader_id': 349094,
|
||||
'upload_timestamp': 1694489652,
|
||||
'upload_date': '20230912',
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'age_limit': 18,
|
||||
'thumbnail': r're:^https://thumb\.pr0gramm\.com/.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://pr0gramm.com/static/5466437',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://pr0gramm.com/new/rowan%20atkinson%20herr%20bohne/3052805',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://pr0gramm.com/user/froschler/dafur-ist-man-hier/5091290',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _generic_title():
|
||||
return "oof"
|
||||
BASE_URL = 'https://pr0gramm.com'
|
||||
|
||||
@functools.cached_property
|
||||
def _is_logged_in(self):
|
||||
return 'pp' in self._get_cookies(self.BASE_URL)
|
||||
|
||||
@functools.cached_property
|
||||
def _maximum_flags(self):
|
||||
# We need to guess the flags for the content otherwise the api will raise an error
|
||||
# We can guess the maximum allowed flags for the account from the cookies
|
||||
# Bitflags are (msbf): nsfp, nsfl, nsfw, sfw
|
||||
flags = 0b0001
|
||||
if self._is_logged_in:
|
||||
flags |= 0b1000
|
||||
cookies = self._get_cookies(self.BASE_URL)
|
||||
if 'me' not in cookies:
|
||||
self._download_webpage(self.BASE_URL, None, 'Refreshing verification information')
|
||||
if traverse_obj(cookies, ('me', {lambda x: x.value}, {unquote}, {json.loads}, 'verified')):
|
||||
flags |= 0b0110
|
||||
|
||||
return flags
|
||||
|
||||
def _call_api(self, endpoint, video_id, query={}, note='Downloading API json'):
|
||||
data = self._download_json(
|
||||
f'https://pr0gramm.com/api/items/{endpoint}',
|
||||
video_id, note, query=query, expected_status=403)
|
||||
|
||||
error = traverse_obj(data, ('error', {str}))
|
||||
if error in ('nsfwRequired', 'nsflRequired', 'nsfpRequired', 'verificationRequired'):
|
||||
if not self._is_logged_in:
|
||||
self.raise_login_required()
|
||||
raise ExtractorError(f'Unverified account cannot access NSFW/NSFL ({error})', expected=True)
|
||||
elif error:
|
||||
message = traverse_obj(data, ('msg', {str})) or error
|
||||
raise ExtractorError(f'API returned error: {message}', expected=True)
|
||||
|
||||
return data
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_info = traverse_obj(
|
||||
self._call_api('get', video_id, {'id': video_id, 'flags': self._maximum_flags}),
|
||||
('items', 0, {dict}))
|
||||
|
||||
return self.url_result(
|
||||
'https://pr0gramm.com/static/' + video_id,
|
||||
video_id=video_id,
|
||||
ie=Pr0grammStaticIE.ie_key())
|
||||
source = urljoin('https://img.pr0gramm.com', video_info.get('image'))
|
||||
if not source or not source.endswith('mp4'):
|
||||
self.raise_no_formats('Could not extract a video', expected=bool(source), video_id=video_id)
|
||||
|
||||
tags = None
|
||||
if self._is_logged_in:
|
||||
metadata = self._call_api('info', video_id, {'itemId': video_id})
|
||||
tags = traverse_obj(metadata, ('tags', ..., 'tag', {str}))
|
||||
# Sorted by "confidence", higher confidence = earlier in list
|
||||
confidences = traverse_obj(metadata, ('tags', ..., 'confidence', ({int}, {float})))
|
||||
if confidences:
|
||||
tags = [tag for _, tag in sorted(zip(confidences, tags), reverse=True)]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': f'pr0gramm-{video_id} by {video_info.get("user")}',
|
||||
'formats': [{
|
||||
'url': source,
|
||||
'ext': 'mp4',
|
||||
**traverse_obj(video_info, {
|
||||
'width': ('width', {int}),
|
||||
'height': ('height', {int}),
|
||||
}),
|
||||
}],
|
||||
'tags': tags,
|
||||
'age_limit': 18 if traverse_obj(video_info, ('flags', {0b110.__and__})) else 0,
|
||||
'_old_archive_ids': [make_archive_id('Pr0grammStatic', video_id)],
|
||||
**traverse_obj(video_info, {
|
||||
'uploader': ('user', {str}),
|
||||
'uploader_id': ('userId', {int}),
|
||||
'like_count': ('up', {int}),
|
||||
'dislike_count': ('down', {int}),
|
||||
'upload_timestamp': ('created', {int}),
|
||||
'upload_date': ('created', {int}, {date.fromtimestamp}, {lambda x: x.strftime('%Y%m%d')}),
|
||||
'thumbnail': ('thumb', {lambda x: urljoin('https://thumb.pr0gramm.com', x)})
|
||||
}),
|
||||
}
|
||||
|
|
|
@ -1,7 +1,18 @@
|
|||
import itertools
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_duration, unified_strdate
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
parse_duration,
|
||||
strftime_or_none,
|
||||
traverse_obj,
|
||||
unified_strdate,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class RadioFranceIE(InfoExtractor):
|
||||
|
@ -56,8 +67,32 @@ def _real_extract(self, url):
|
|||
}
|
||||
|
||||
|
||||
class FranceCultureIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?radiofrance\.fr/(?:franceculture|fip|francemusique|mouv|franceinter)/podcasts/(?:[^?#]+/)?(?P<display_id>[^?#]+)-(?P<id>\d+)($|[?#])'
|
||||
class RadioFranceBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:www\.)?radiofrance\.fr'
|
||||
|
||||
_STATIONS_RE = '|'.join(map(re.escape, (
|
||||
'franceculture',
|
||||
'franceinfo',
|
||||
'franceinter',
|
||||
'francemusique',
|
||||
'fip',
|
||||
'mouv',
|
||||
)))
|
||||
|
||||
def _extract_data_from_webpage(self, webpage, display_id, key):
|
||||
return traverse_obj(self._search_json(
|
||||
r'\bconst\s+data\s*=', webpage, key, display_id,
|
||||
contains_pattern=r'(\[\{.*?\}\]);', transform_source=js_to_json),
|
||||
(..., 'data', key, {dict}), get_all=False) or {}
|
||||
|
||||
|
||||
class FranceCultureIE(RadioFranceBaseIE):
|
||||
_VALID_URL = rf'''(?x)
|
||||
{RadioFranceBaseIE._VALID_URL_BASE}
|
||||
/(?:{RadioFranceBaseIE._STATIONS_RE})
|
||||
/podcasts/(?:[^?#]+/)?(?P<display_id>[^?#]+)-(?P<id>\d{{6,}})(?:$|[?#])
|
||||
'''
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://www.radiofrance.fr/franceculture/podcasts/science-en-questions/la-physique-d-einstein-aiderait-elle-a-comprendre-le-cerveau-8440487',
|
||||
|
@ -67,14 +102,30 @@ class FranceCultureIE(InfoExtractor):
|
|||
'ext': 'mp3',
|
||||
'title': 'La physique d’Einstein aiderait-elle à comprendre le cerveau ?',
|
||||
'description': 'Existerait-il un pont conceptuel entre la physique de l’espace-temps et les neurosciences ?',
|
||||
'thumbnail': 'https://cdn.radiofrance.fr/s3/cruiser-production/2022/05/d184e7a3-4827-4494-bf94-04ed7b120db4/1200x630_gettyimages-200171095-001.jpg',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
'upload_date': '20220514',
|
||||
'duration': 2750,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://www.radiofrance.fr/franceinter/podcasts/le-7-9-30/le-7-9-30-du-vendredi-10-mars-2023-2107675',
|
||||
'info_dict': {
|
||||
'id': '2107675',
|
||||
'display_id': 'le-7-9-30-du-vendredi-10-mars-2023',
|
||||
'title': 'Inflation alimentaire : comment en sortir ? - Régis Debray et Claude Grange - Cybèle Idelot',
|
||||
'description': 'md5:36ee74351ede77a314fdebb94026b916',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
'upload_date': '20230310',
|
||||
'duration': 8977,
|
||||
'ext': 'mp3',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://www.radiofrance.fr/franceinter/podcasts/la-rafle-du-vel-d-hiv-une-affaire-d-etat/les-racines-du-crime-episode-1-3715507',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.radiofrance.fr/franceinfo/podcasts/le-billet-sciences/sante-bientot-un-vaccin-contre-l-asthme-allergique-3057200',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
|
@ -89,7 +140,6 @@ def _real_extract(self, url):
|
|||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'url': video_data['contentUrl'],
|
||||
'ext': video_data.get('encodingFormat'),
|
||||
'vcodec': 'none' if video_data.get('encodingFormat') == 'mp3' else None,
|
||||
'duration': parse_duration(video_data.get('duration')),
|
||||
'title': self._html_search_regex(r'(?s)<h1[^>]*itemprop="[^"]*name[^"]*"[^>]*>(.+?)</h1>',
|
||||
|
@ -102,3 +152,322 @@ def _real_extract(self, url):
|
|||
'upload_date': unified_strdate(self._search_regex(
|
||||
r'"datePublished"\s*:\s*"([^"]+)', webpage, 'timestamp', fatal=False))
|
||||
}
|
||||
|
||||
|
||||
class RadioFranceLiveIE(RadioFranceBaseIE):
|
||||
_VALID_URL = rf'''(?x)
|
||||
https?://(?:www\.)?radiofrance\.fr
|
||||
/(?P<id>{RadioFranceBaseIE._STATIONS_RE})
|
||||
/?(?P<substation_id>radio-[\w-]+)?(?:[#?]|$)
|
||||
'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.radiofrance.fr/franceinter/',
|
||||
'info_dict': {
|
||||
'id': 'franceinter',
|
||||
'title': str,
|
||||
'live_status': 'is_live',
|
||||
'ext': 'aac',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'Livestream',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.radiofrance.fr/franceculture',
|
||||
'info_dict': {
|
||||
'id': 'franceculture',
|
||||
'title': str,
|
||||
'live_status': 'is_live',
|
||||
'ext': 'aac',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'Livestream',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.radiofrance.fr/mouv/radio-musique-kids-family',
|
||||
'info_dict': {
|
||||
'id': 'mouv-radio-musique-kids-family',
|
||||
'title': str,
|
||||
'live_status': 'is_live',
|
||||
'ext': 'aac',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'Livestream',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.radiofrance.fr/mouv/radio-rnb-soul',
|
||||
'info_dict': {
|
||||
'id': 'mouv-radio-rnb-soul',
|
||||
'title': str,
|
||||
'live_status': 'is_live',
|
||||
'ext': 'aac',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'Livestream',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.radiofrance.fr/mouv/radio-musique-mix',
|
||||
'info_dict': {
|
||||
'id': 'mouv-radio-musique-mix',
|
||||
'title': str,
|
||||
'live_status': 'is_live',
|
||||
'ext': 'aac',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'Livestream',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.radiofrance.fr/fip/radio-rock',
|
||||
'info_dict': {
|
||||
'id': 'fip-radio-rock',
|
||||
'title': str,
|
||||
'live_status': 'is_live',
|
||||
'ext': 'aac',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'Livestream',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.radiofrance.fr/mouv',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
station_id, substation_id = self._match_valid_url(url).group('id', 'substation_id')
|
||||
|
||||
if substation_id:
|
||||
webpage = self._download_webpage(url, station_id)
|
||||
api_response = self._extract_data_from_webpage(webpage, station_id, 'webRadioData')
|
||||
else:
|
||||
api_response = self._download_json(
|
||||
f'https://www.radiofrance.fr/{station_id}/api/live', station_id)
|
||||
|
||||
formats, subtitles = [], {}
|
||||
for media_source in traverse_obj(api_response, (('now', None), 'media', 'sources', lambda _, v: v['url'])):
|
||||
if media_source.get('format') == 'hls':
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(media_source['url'], station_id, fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
else:
|
||||
formats.append({
|
||||
'url': media_source['url'],
|
||||
'abr': media_source.get('bitrate'),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': join_nonempty(station_id, substation_id),
|
||||
'title': traverse_obj(api_response, ('visual', 'legend')) or join_nonempty(
|
||||
('now', 'firstLine', 'title'), ('now', 'secondLine', 'title'), from_dict=api_response, delim=' - '),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'is_live': True,
|
||||
}
|
||||
|
||||
|
||||
class RadioFrancePlaylistBase(RadioFranceBaseIE):
|
||||
"""Subclasses must set _METADATA_KEY"""
|
||||
|
||||
def _call_api(self, content_id, cursor, page_num):
|
||||
raise NotImplementedError('This method must be implemented by subclasses')
|
||||
|
||||
def _generate_playlist_entries(self, content_id, content_response):
|
||||
for page_num in itertools.count(2):
|
||||
for entry in content_response['items']:
|
||||
yield self.url_result(
|
||||
f'https://www.radiofrance.fr/{entry["path"]}', url_transparent=True, **traverse_obj(entry, {
|
||||
'title': 'title',
|
||||
'description': 'standFirst',
|
||||
'timestamp': ('publishedDate', {int_or_none}),
|
||||
'thumbnail': ('visual', 'src'),
|
||||
}))
|
||||
|
||||
next_cursor = traverse_obj(content_response, (('pagination', None), 'next'), get_all=False)
|
||||
if not next_cursor:
|
||||
break
|
||||
|
||||
content_response = self._call_api(content_id, next_cursor, page_num)
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
metadata = self._download_json(
|
||||
'https://www.radiofrance.fr/api/v2.1/path', display_id,
|
||||
query={'value': urllib.parse.urlparse(url).path})['content']
|
||||
|
||||
content_id = metadata['id']
|
||||
|
||||
return self.playlist_result(
|
||||
self._generate_playlist_entries(content_id, metadata[self._METADATA_KEY]), content_id,
|
||||
display_id=display_id, **{**traverse_obj(metadata, {
|
||||
'title': 'title',
|
||||
'description': 'standFirst',
|
||||
'thumbnail': ('visual', 'src'),
|
||||
}), **traverse_obj(metadata, {
|
||||
'title': 'name',
|
||||
'description': 'role',
|
||||
})})
|
||||
|
||||
|
||||
class RadioFrancePodcastIE(RadioFrancePlaylistBase):
|
||||
_VALID_URL = rf'''(?x)
|
||||
{RadioFranceBaseIE._VALID_URL_BASE}
|
||||
/(?:{RadioFranceBaseIE._STATIONS_RE})
|
||||
/podcasts/(?P<id>[\w-]+)/?(?:[?#]|$)
|
||||
'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.radiofrance.fr/franceinfo/podcasts/le-billet-vert',
|
||||
'info_dict': {
|
||||
'id': 'eaf6ef81-a980-4f1c-a7d1-8a75ecd54b17',
|
||||
'display_id': 'le-billet-vert',
|
||||
'title': 'Le billet sciences',
|
||||
'description': 'md5:eb1007b34b0c0a680daaa71525bbd4c1',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
},
|
||||
'playlist_mincount': 11,
|
||||
}, {
|
||||
'url': 'https://www.radiofrance.fr/franceinter/podcasts/jean-marie-le-pen-l-obsession-nationale',
|
||||
'info_dict': {
|
||||
'id': '566fd524-3074-4fbc-ac69-8696f2152a54',
|
||||
'display_id': 'jean-marie-le-pen-l-obsession-nationale',
|
||||
'title': 'Jean-Marie Le Pen, l\'obsession nationale',
|
||||
'description': 'md5:a07c0cfb894f6d07a62d0ad12c4b7d73',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
},
|
||||
'playlist_count': 7,
|
||||
}, {
|
||||
'url': 'https://www.radiofrance.fr/franceculture/podcasts/serie-thomas-grjebine',
|
||||
'info_dict': {
|
||||
'id': '63c1ddc9-9f15-457a-98b2-411bac63f48d',
|
||||
'display_id': 'serie-thomas-grjebine',
|
||||
'title': 'Thomas Grjebine',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
}, {
|
||||
'url': 'https://www.radiofrance.fr/fip/podcasts/certains-l-aiment-fip',
|
||||
'info_dict': {
|
||||
'id': '143dff38-e956-4a5d-8576-1c0b7242b99e',
|
||||
'display_id': 'certains-l-aiment-fip',
|
||||
'title': 'Certains l’aiment Fip',
|
||||
'description': 'md5:ff974672ba00d4fd5be80fb001c5b27e',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
},
|
||||
'playlist_mincount': 321,
|
||||
}, {
|
||||
'url': 'https://www.radiofrance.fr/franceinter/podcasts/le-7-9',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.radiofrance.fr/mouv/podcasts/dirty-mix',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_METADATA_KEY = 'expressions'
|
||||
|
||||
def _call_api(self, podcast_id, cursor, page_num):
|
||||
return self._download_json(
|
||||
f'https://www.radiofrance.fr/api/v2.1/concepts/{podcast_id}/expressions', podcast_id,
|
||||
note=f'Downloading page {page_num}', query={'pageCursor': cursor})
|
||||
|
||||
|
||||
class RadioFranceProfileIE(RadioFrancePlaylistBase):
|
||||
_VALID_URL = rf'{RadioFranceBaseIE._VALID_URL_BASE}/personnes/(?P<id>[\w-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.radiofrance.fr/personnes/thomas-pesquet?p=3',
|
||||
'info_dict': {
|
||||
'id': '86c62790-e481-11e2-9f7b-782bcb6744eb',
|
||||
'display_id': 'thomas-pesquet',
|
||||
'title': 'Thomas Pesquet',
|
||||
'description': 'Astronaute à l\'agence spatiale européenne',
|
||||
},
|
||||
'playlist_mincount': 212,
|
||||
}, {
|
||||
'url': 'https://www.radiofrance.fr/personnes/eugenie-bastie',
|
||||
'info_dict': {
|
||||
'id': '9593050b-0183-4972-a0b5-d8f699079e02',
|
||||
'display_id': 'eugenie-bastie',
|
||||
'title': 'Eugénie Bastié',
|
||||
'description': 'Journaliste et essayiste',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
},
|
||||
'playlist_mincount': 39,
|
||||
}, {
|
||||
'url': 'https://www.radiofrance.fr/personnes/lea-salame',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_METADATA_KEY = 'documents'
|
||||
|
||||
def _call_api(self, profile_id, cursor, page_num):
|
||||
resp = self._download_json(
|
||||
f'https://www.radiofrance.fr/api/v2.1/taxonomy/{profile_id}/documents', profile_id,
|
||||
note=f'Downloading page {page_num}', query={
|
||||
'relation': 'personality',
|
||||
'cursor': cursor,
|
||||
})
|
||||
|
||||
resp['next'] = traverse_obj(resp, ('pagination', 'next'))
|
||||
return resp
|
||||
|
||||
|
||||
class RadioFranceProgramScheduleIE(RadioFranceBaseIE):
|
||||
_VALID_URL = rf'''(?x)
|
||||
{RadioFranceBaseIE._VALID_URL_BASE}
|
||||
/(?P<station>{RadioFranceBaseIE._STATIONS_RE})
|
||||
/grille-programmes(?:\?date=(?P<date>[\d-]+))?
|
||||
'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.radiofrance.fr/franceinter/grille-programmes?date=17-02-2023',
|
||||
'info_dict': {
|
||||
'id': 'franceinter-program-20230217',
|
||||
'upload_date': '20230217',
|
||||
},
|
||||
'playlist_count': 25,
|
||||
}, {
|
||||
'url': 'https://www.radiofrance.fr/franceculture/grille-programmes?date=01-02-2023',
|
||||
'info_dict': {
|
||||
'id': 'franceculture-program-20230201',
|
||||
'upload_date': '20230201',
|
||||
},
|
||||
'playlist_count': 25,
|
||||
}, {
|
||||
'url': 'https://www.radiofrance.fr/mouv/grille-programmes?date=19-03-2023',
|
||||
'info_dict': {
|
||||
'id': 'mouv-program-20230319',
|
||||
'upload_date': '20230319',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}, {
|
||||
'url': 'https://www.radiofrance.fr/francemusique/grille-programmes?date=18-03-2023',
|
||||
'info_dict': {
|
||||
'id': 'francemusique-program-20230318',
|
||||
'upload_date': '20230318',
|
||||
},
|
||||
'playlist_count': 15,
|
||||
}, {
|
||||
'url': 'https://www.radiofrance.fr/franceculture/grille-programmes',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _generate_playlist_entries(self, webpage_url, api_response):
|
||||
for entry in traverse_obj(api_response, ('steps', lambda _, v: v['expression']['path'])):
|
||||
yield self.url_result(
|
||||
urljoin(webpage_url, f'/{entry["expression"]["path"]}'), ie=FranceCultureIE,
|
||||
url_transparent=True, **traverse_obj(entry, {
|
||||
'title': ('expression', 'title'),
|
||||
'thumbnail': ('expression', 'visual', 'src'),
|
||||
'timestamp': ('startTime', {int_or_none}),
|
||||
'series_id': ('concept', 'id'),
|
||||
'series': ('concept', 'title'),
|
||||
}))
|
||||
|
||||
def _real_extract(self, url):
|
||||
station, date = self._match_valid_url(url).group('station', 'date')
|
||||
webpage = self._download_webpage(url, station)
|
||||
grid_data = self._extract_data_from_webpage(webpage, station, 'grid')
|
||||
upload_date = strftime_or_none(grid_data.get('date'), '%Y%m%d')
|
||||
|
||||
return self.playlist_result(
|
||||
self._generate_playlist_entries(url, grid_data),
|
||||
join_nonempty(station, 'program', upload_date), upload_date=upload_date)
|
||||
|
|
|
@ -1,10 +1,11 @@
|
|||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_qs, remove_start, traverse_obj, ExtractorError
|
||||
|
||||
|
||||
class RbgTumIE(InfoExtractor):
|
||||
_VALID_URL = r'https://live\.rbg\.tum\.de/w/(?P<id>.+)'
|
||||
_VALID_URL = r'https://(?:live\.rbg\.tum\.de|tum\.live)/w/(?P<id>[^?#]+)'
|
||||
_TESTS = [{
|
||||
# Combined view
|
||||
'url': 'https://live.rbg.tum.de/w/cpp/22128',
|
||||
|
@ -35,16 +36,18 @@ class RbgTumIE(InfoExtractor):
|
|||
'title': 'Fachschaftsvollversammlung',
|
||||
'series': 'Fachschaftsvollversammlung Informatik',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://tum.live/w/linalginfo/27102',
|
||||
'only_matching': True,
|
||||
}, ]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
m3u8 = self._html_search_regex(r'(https://.+?\.m3u8)', webpage, 'm3u8')
|
||||
lecture_title = self._html_search_regex(r'(?si)<h1.*?>(.*)</h1>', webpage, 'title')
|
||||
lecture_series_title = self._html_search_regex(
|
||||
r'(?s)<title\b[^>]*>\s*(?:TUM-Live\s\|\s?)?([^:]+):?.*?</title>', webpage, 'series')
|
||||
m3u8 = self._html_search_regex(r'"(https://[^"]+\.m3u8[^"]*)', webpage, 'm3u8')
|
||||
lecture_title = self._html_search_regex(r'<h1[^>]*>([^<]+)</h1>', webpage, 'title', fatal=False)
|
||||
lecture_series_title = remove_start(self._html_extract_title(webpage), 'TUM-Live | ')
|
||||
|
||||
formats = self._extract_m3u8_formats(m3u8, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
|
||||
|
||||
|
@ -57,9 +60,9 @@ def _real_extract(self, url):
|
|||
|
||||
|
||||
class RbgTumCourseIE(InfoExtractor):
|
||||
_VALID_URL = r'https://live\.rbg\.tum\.de/course/(?P<id>.+)'
|
||||
_VALID_URL = r'https://(?P<hostname>(?:live\.rbg\.tum\.de|tum\.live))/old/course/(?P<id>(?P<year>\d+)/(?P<term>\w+)/(?P<slug>[^/?#]+))'
|
||||
_TESTS = [{
|
||||
'url': 'https://live.rbg.tum.de/course/2022/S/fpv',
|
||||
'url': 'https://live.rbg.tum.de/old/course/2022/S/fpv',
|
||||
'info_dict': {
|
||||
'title': 'Funktionale Programmierung und Verifikation (IN0003)',
|
||||
'id': '2022/S/fpv',
|
||||
|
@ -69,7 +72,7 @@ class RbgTumCourseIE(InfoExtractor):
|
|||
},
|
||||
'playlist_count': 13,
|
||||
}, {
|
||||
'url': 'https://live.rbg.tum.de/course/2022/W/set',
|
||||
'url': 'https://live.rbg.tum.de/old/course/2022/W/set',
|
||||
'info_dict': {
|
||||
'title': 'SET FSMPIC',
|
||||
'id': '2022/W/set',
|
||||
|
@ -78,16 +81,62 @@ class RbgTumCourseIE(InfoExtractor):
|
|||
'noplaylist': False,
|
||||
},
|
||||
'playlist_count': 6,
|
||||
}, {
|
||||
'url': 'https://tum.live/old/course/2023/S/linalginfo',
|
||||
'only_matching': True,
|
||||
}, ]
|
||||
|
||||
def _real_extract(self, url):
|
||||
course_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, course_id)
|
||||
course_id, hostname, year, term, slug = self._match_valid_url(url).group('id', 'hostname', 'year', 'term', 'slug')
|
||||
meta = self._download_json(
|
||||
f'https://{hostname}/api/courses/{slug}/', course_id, fatal=False,
|
||||
query={'year': year, 'term': term}) or {}
|
||||
lecture_series_title = meta.get('Name')
|
||||
lectures = [self.url_result(f'https://{hostname}/w/{slug}/{stream_id}', RbgTumIE)
|
||||
for stream_id in traverse_obj(meta, ('Streams', ..., 'ID'))]
|
||||
|
||||
lecture_series_title = self._html_search_regex(r'(?si)<h1.*?>(.*)</h1>', webpage, 'title')
|
||||
if not lectures:
|
||||
webpage = self._download_webpage(url, course_id)
|
||||
lecture_series_title = remove_start(self._html_extract_title(webpage), 'TUM-Live | ')
|
||||
lectures = [self.url_result(f'https://{hostname}{lecture_path}', RbgTumIE)
|
||||
for lecture_path in re.findall(r'href="(/w/[^/"]+/[^/"]+)"', webpage)]
|
||||
|
||||
lecture_urls = []
|
||||
for lecture_url in re.findall(r'(?i)href="/w/(.+)(?<!/cam)(?<!/pres)(?<!/chat)"', webpage):
|
||||
lecture_urls.append(self.url_result('https://live.rbg.tum.de/w/' + lecture_url, ie=RbgTumIE.ie_key()))
|
||||
return self.playlist_result(lectures, course_id, lecture_series_title)
|
||||
|
||||
return self.playlist_result(lecture_urls, course_id, lecture_series_title)
|
||||
|
||||
class RbgTumNewCourseIE(InfoExtractor):
|
||||
_VALID_URL = r'https://(?P<hostname>(?:live\.rbg\.tum\.de|tum\.live))/\?'
|
||||
_TESTS = [{
|
||||
'url': 'https://live.rbg.tum.de/?year=2022&term=S&slug=fpv&view=3',
|
||||
'info_dict': {
|
||||
'title': 'Funktionale Programmierung und Verifikation (IN0003)',
|
||||
'id': '2022/S/fpv',
|
||||
},
|
||||
'params': {
|
||||
'noplaylist': False,
|
||||
},
|
||||
'playlist_count': 13,
|
||||
}, {
|
||||
'url': 'https://live.rbg.tum.de/?year=2022&term=W&slug=set&view=3',
|
||||
'info_dict': {
|
||||
'title': 'SET FSMPIC',
|
||||
'id': '2022/W/set',
|
||||
},
|
||||
'params': {
|
||||
'noplaylist': False,
|
||||
},
|
||||
'playlist_count': 6,
|
||||
}, {
|
||||
'url': 'https://tum.live/?year=2023&term=S&slug=linalginfo&view=3',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
query = parse_qs(url)
|
||||
errors = [key for key in ('year', 'term', 'slug') if not query.get(key)]
|
||||
if errors:
|
||||
raise ExtractorError(f'Input URL is missing query parameters: {", ".join(errors)}')
|
||||
year, term, slug = query['year'][0], query['term'][0], query['slug'][0]
|
||||
hostname = self._match_valid_url(url).group('hostname')
|
||||
|
||||
return self.url_result(f'https://{hostname}/old/course/{year}/{term}/{slug}', RbgTumCourseIE)
|
||||
|
|
|
@ -319,16 +319,20 @@ def add_thumbnail(src):
|
|||
'format_id': 'fallback',
|
||||
'format_note': 'DASH video, mp4_dash',
|
||||
}]
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
hls_playlist_url, display_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
dash_playlist_url, display_id, mpd_id='dash', fatal=False))
|
||||
hls_fmts, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
hls_playlist_url, display_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
formats.extend(hls_fmts)
|
||||
dash_fmts, dash_subs = self._extract_mpd_formats_and_subtitles(
|
||||
dash_playlist_url, display_id, mpd_id='dash', fatal=False)
|
||||
formats.extend(dash_fmts)
|
||||
self._merge_subtitles(dash_subs, target=subtitles)
|
||||
|
||||
return {
|
||||
**info,
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'duration': int_or_none(reddit_video.get('duration')),
|
||||
}
|
||||
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
|
@ -25,7 +26,7 @@ class RTVSLOIE(InfoExtractor):
|
|||
'url': 'https://www.rtvslo.si/rtv365/arhiv/174842550?s=tv',
|
||||
'info_dict': {
|
||||
'id': '174842550',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'release_timestamp': 1643140032,
|
||||
'upload_date': '20220125',
|
||||
'series': 'Dnevnik',
|
||||
|
@ -69,7 +70,21 @@ class RTVSLOIE(InfoExtractor):
|
|||
'tbr': 128000,
|
||||
'release_date': '20220201',
|
||||
},
|
||||
|
||||
}, {
|
||||
'url': 'https://365.rtvslo.si/arhiv/razred-zase/148350750',
|
||||
'info_dict': {
|
||||
'id': '148350750',
|
||||
'ext': 'mp4',
|
||||
'title': 'Prvi šolski dan, mozaična oddaja za mlade',
|
||||
'series': 'Razred zase',
|
||||
'series_id': '148185730',
|
||||
'duration': 1481,
|
||||
'upload_date': '20121019',
|
||||
'timestamp': 1350672122,
|
||||
'release_date': '20121019',
|
||||
'release_timestamp': 1350672122,
|
||||
'thumbnail': 'https://img.rtvcdn.si/_up/ava/ava_misc/show_logos/148185730/razred_zase_2014_logo_4d_wide2.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://4d.rtvslo.si/arhiv/dnevnik/174842550',
|
||||
'only_matching': True
|
||||
|
@ -98,13 +113,14 @@ def _real_extract(self, url):
|
|||
media = self._download_json(self._API_BASE.format('getMedia', v_id), v_id, query={'jwt': jwt})['response']
|
||||
|
||||
formats = []
|
||||
skip_protocols = ['smil', 'f4m', 'dash']
|
||||
adaptive_url = traverse_obj(media, ('addaptiveMedia', 'hls_sec'), expected_type=url_or_none)
|
||||
if adaptive_url:
|
||||
formats = self._extract_wowza_formats(adaptive_url, v_id, skip_protocols=['smil'])
|
||||
formats = self._extract_wowza_formats(adaptive_url, v_id, skip_protocols=skip_protocols)
|
||||
|
||||
adaptive_url = traverse_obj(media, ('addaptiveMedia_sl', 'hls_sec'), expected_type=url_or_none)
|
||||
if adaptive_url:
|
||||
for f in self._extract_wowza_formats(adaptive_url, v_id, skip_protocols=['smil']):
|
||||
for f in self._extract_wowza_formats(adaptive_url, v_id, skip_protocols=skip_protocols):
|
||||
formats.append({
|
||||
**f,
|
||||
'format_id': 'sign-' + f['format_id'],
|
||||
|
@ -114,19 +130,19 @@ def _real_extract(self, url):
|
|||
else f.get('language'))
|
||||
})
|
||||
|
||||
formats.extend(
|
||||
{
|
||||
'url': f['streams'][strm],
|
||||
'ext': traverse_obj(f, 'mediaType', expected_type=str.lower),
|
||||
'width': f.get('width'),
|
||||
'height': f.get('height'),
|
||||
'tbr': f.get('bitrate'),
|
||||
'filesize': f.get('filesize'),
|
||||
}
|
||||
for strm in ('http', 'https')
|
||||
for f in media.get('mediaFiles') or []
|
||||
if traverse_obj(f, ('streams', strm))
|
||||
)
|
||||
for mediafile in traverse_obj(media, ('mediaFiles', lambda _, v: url_or_none(v['streams']['https']))):
|
||||
formats.append(traverse_obj(mediafile, {
|
||||
'url': ('streams', 'https'),
|
||||
'ext': ('mediaType', {str.lower}),
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
'tbr': ('bitrate', {int_or_none}),
|
||||
'filesize': ('filesize', {int_or_none}),
|
||||
}))
|
||||
|
||||
for mediafile in traverse_obj(media, ('mediaFiles', lambda _, v: url_or_none(v['streams']['hls_sec']))):
|
||||
formats.extend(self._extract_wowza_formats(
|
||||
mediafile['streams']['hls_sec'], v_id, skip_protocols=skip_protocols))
|
||||
|
||||
if any('intermission.mp4' in x['url'] for x in formats):
|
||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
import re
|
||||
|
||||
from ..utils import parse_duration
|
||||
from ..utils import parse_duration, unescapeHTML
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
|
@ -16,7 +16,8 @@ class Rule34VideoIE(InfoExtractor):
|
|||
'title': 'Shot It-(mmd hmv)',
|
||||
'thumbnail': 'https://rule34video.com/contents/videos_screenshots/3065000/3065157/preview.jpg',
|
||||
'duration': 347.0,
|
||||
'age_limit': 18
|
||||
'age_limit': 18,
|
||||
'tags': 'count:14'
|
||||
}
|
||||
},
|
||||
{
|
||||
|
@ -28,7 +29,8 @@ class Rule34VideoIE(InfoExtractor):
|
|||
'title': 'Lara in Trouble Ep. 7 [WildeerStudio]',
|
||||
'thumbnail': 'https://rule34video.com/contents/videos_screenshots/3065000/3065296/preview.jpg',
|
||||
'duration': 938.0,
|
||||
'age_limit': 18
|
||||
'age_limit': 18,
|
||||
'tags': 'count:50'
|
||||
}
|
||||
},
|
||||
]
|
||||
|
@ -57,5 +59,7 @@ def _real_extract(self, url):
|
|||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': parse_duration(duration),
|
||||
'age_limit': 18
|
||||
'age_limit': 18,
|
||||
'tags': list(map(unescapeHTML, re.findall(
|
||||
r'<a class="tag_item"[^>]+\bhref="https://rule34video\.com/tags/\d+/"[^>]*>(?P<tag>[^>]*)</a>', webpage))),
|
||||
}
|
||||
|
|
|
@ -33,7 +33,7 @@ class RumbleEmbedIE(InfoExtractor):
|
|||
'upload_date': '20191020',
|
||||
'channel_url': 'https://rumble.com/c/WMAR',
|
||||
'channel': 'WMAR',
|
||||
'thumbnail': 'https://sp.rmbl.ws/s8/1/5/M/z/1/5Mz1a.OvCc-small-WMAR-2-News-Latest-Headline.jpg',
|
||||
'thumbnail': 'https://sp.rmbl.ws/s8/1/5/M/z/1/5Mz1a.qR4e-small-WMAR-2-News-Latest-Headline.jpg',
|
||||
'duration': 234,
|
||||
'uploader': 'WMAR',
|
||||
'live_status': 'not_live',
|
||||
|
@ -84,7 +84,7 @@ class RumbleEmbedIE(InfoExtractor):
|
|||
'info_dict': {
|
||||
'id': 'v1essrt',
|
||||
'ext': 'mp4',
|
||||
'title': 'startswith:lofi hip hop radio - beats to relax/study',
|
||||
'title': 'startswith:lofi hip hop radio 📚 - beats to relax/study to',
|
||||
'timestamp': 1661519399,
|
||||
'upload_date': '20220826',
|
||||
'channel_url': 'https://rumble.com/c/LofiGirl',
|
||||
|
@ -99,7 +99,7 @@ class RumbleEmbedIE(InfoExtractor):
|
|||
'url': 'https://rumble.com/embed/v1amumr',
|
||||
'info_dict': {
|
||||
'id': 'v1amumr',
|
||||
'ext': 'webm',
|
||||
'ext': 'mp4',
|
||||
'fps': 60,
|
||||
'title': 'Turning Point USA 2022 Student Action Summit DAY 1 - Rumble Exclusive Live',
|
||||
'timestamp': 1658518457,
|
||||
|
@ -129,7 +129,7 @@ class RumbleEmbedIE(InfoExtractor):
|
|||
'duration': 92,
|
||||
'title': '911 Audio From The Man Who Wanted To Kill Supreme Court Justice Kavanaugh',
|
||||
'channel_url': 'https://rumble.com/c/RichSementa',
|
||||
'thumbnail': 'https://sp.rmbl.ws/s8/1/P/j/f/A/PjfAe.OvCc-small-911-Audio-From-The-Man-Who-.jpg',
|
||||
'thumbnail': 'https://sp.rmbl.ws/s8/1/P/j/f/A/PjfAe.qR4e-small-911-Audio-From-The-Man-Who-.jpg',
|
||||
'timestamp': 1654892716,
|
||||
'uploader': 'Mr Producer Media',
|
||||
'upload_date': '20220610',
|
||||
|
@ -144,7 +144,7 @@ def _extract_embed_urls(cls, url, webpage):
|
|||
if embeds:
|
||||
return embeds
|
||||
return [f'https://rumble.com/embed/{mobj.group("id")}' for mobj in re.finditer(
|
||||
r'<script>[^<]*\bRumble\(\s*"play"\s*,\s*{\s*[\'"]?video[\'"]?\s*:\s*[\'"](?P<id>[0-9a-z]+)[\'"]', webpage)]
|
||||
r'<script>[^<]*\bRumble\(\s*"play"\s*,\s*{[^}]*[\'"]?video[\'"]?\s*:\s*[\'"](?P<id>[0-9a-z]+)[\'"]', webpage)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
@ -236,7 +236,9 @@ def _real_extract(self, url):
|
|||
|
||||
class RumbleIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?rumble\.com/(?P<id>v(?!ideos)[\w.-]+)[^/]*$'
|
||||
_EMBED_REGEX = [r'<a class=video-item--a href=(?P<url>/v[\w.-]+\.html)>']
|
||||
_EMBED_REGEX = [
|
||||
r'<a class=video-item--a href=(?P<url>/v[\w.-]+\.html)>',
|
||||
r'<a[^>]+class="videostream__link link"[^>]+href=(?P<url>/v[\w.-]+\.html)[^>]*>']
|
||||
_TESTS = [{
|
||||
'add_ie': ['RumbleEmbed'],
|
||||
'url': 'https://rumble.com/vdmum1-moose-the-dog-helps-girls-dig-a-snow-fort.html',
|
||||
|
@ -254,6 +256,7 @@ class RumbleIE(InfoExtractor):
|
|||
'thumbnail': r're:https://.+\.jpg',
|
||||
'duration': 103,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'view_count': int,
|
||||
'live_status': 'not_live',
|
||||
}
|
||||
|
@ -278,6 +281,9 @@ class RumbleIE(InfoExtractor):
|
|||
'channel_url': 'https://rumble.com/c/Redacted',
|
||||
'live_status': 'not_live',
|
||||
'thumbnail': 'https://sp.rmbl.ws/s8/1/d/x/2/O/dx2Oi.qR4e-small-The-U.S.-CANNOT-hide-this-i.jpg',
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://rumble.com/v2e7fju-the-covid-twitter-files-drop-protecting-fauci-while-censoring-the-truth-wma.html',
|
||||
|
@ -296,12 +302,15 @@ class RumbleIE(InfoExtractor):
|
|||
'channel_url': 'https://rumble.com/c/KimIversen',
|
||||
'channel': 'Kim Iversen',
|
||||
'thumbnail': 'https://sp.rmbl.ws/s8/1/6/b/w/O/6bwOi.qR4e-small-The-Covid-Twitter-Files-Dro.jpg',
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'view_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://rumble.com/videos?page=2',
|
||||
'playlist_count': 25,
|
||||
'playlist_mincount': 24,
|
||||
'info_dict': {
|
||||
'id': 'videos?page=2',
|
||||
'title': 'All videos',
|
||||
|
@ -309,17 +318,16 @@ class RumbleIE(InfoExtractor):
|
|||
'age_limit': 0,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://rumble.com/live-videos',
|
||||
'playlist_mincount': 19,
|
||||
'url': 'https://rumble.com/browse/live',
|
||||
'playlist_mincount': 25,
|
||||
'info_dict': {
|
||||
'id': 'live-videos',
|
||||
'title': 'Live Videos',
|
||||
'description': 'Live videos on Rumble.com',
|
||||
'id': 'live',
|
||||
'title': 'Browse',
|
||||
'age_limit': 0,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://rumble.com/search/video?q=rumble&sort=views',
|
||||
'playlist_count': 24,
|
||||
'playlist_mincount': 24,
|
||||
'info_dict': {
|
||||
'id': 'video?q=rumble&sort=views',
|
||||
'title': 'Search results for: rumble',
|
||||
|
@ -334,19 +342,20 @@ def _real_extract(self, url):
|
|||
if not url_info:
|
||||
raise UnsupportedError(url)
|
||||
|
||||
release_ts_str = self._search_regex(
|
||||
r'(?:Livestream begins|Streamed on):\s+<time datetime="([^"]+)',
|
||||
webpage, 'release date', fatal=False, default=None)
|
||||
view_count_str = self._search_regex(r'<span class="media-heading-info">([\d,]+) Views',
|
||||
webpage, 'view count', fatal=False, default=None)
|
||||
|
||||
return self.url_result(
|
||||
url_info['url'], ie_key=url_info['ie_key'], url_transparent=True,
|
||||
view_count=parse_count(view_count_str),
|
||||
release_timestamp=parse_iso8601(release_ts_str),
|
||||
like_count=parse_count(get_element_by_class('rumbles-count', webpage)),
|
||||
description=clean_html(get_element_by_class('media-description', webpage)),
|
||||
)
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': url_info['ie_key'],
|
||||
'url': url_info['url'],
|
||||
'release_timestamp': parse_iso8601(self._search_regex(
|
||||
r'(?:Livestream begins|Streamed on):\s+<time datetime="([^"]+)', webpage, 'release date', default=None)),
|
||||
'view_count': int_or_none(self._search_regex(
|
||||
r'"userInteractionCount"\s*:\s*(\d+)', webpage, 'view count', default=None)),
|
||||
'like_count': parse_count(self._search_regex(
|
||||
r'<span data-js="rumbles_up_votes">\s*([\d,.KM]+)', webpage, 'like count', default=None)),
|
||||
'dislike_count': parse_count(self._search_regex(
|
||||
r'<span data-js="rumbles_down_votes">\s*([\d,.KM]+)', webpage, 'dislike count', default=None)),
|
||||
'description': clean_html(get_element_by_class('media-description', webpage))
|
||||
}
|
||||
|
||||
|
||||
class RumbleChannelIE(InfoExtractor):
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import traverse_obj
|
||||
from ..utils import traverse_obj, url_or_none
|
||||
|
||||
|
||||
class S4CIE(InfoExtractor):
|
||||
|
@ -11,7 +11,8 @@ class S4CIE(InfoExtractor):
|
|||
'ext': 'mp4',
|
||||
'title': 'Y Swn',
|
||||
'description': 'md5:f7681a30e4955b250b3224aa9fe70cf0',
|
||||
'duration': 5340
|
||||
'duration': 5340,
|
||||
'thumbnail': 'https://www.s4c.cymru/amg/1920x1080/Y_Swn_2023S4C_099_ii.jpg'
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.s4c.cymru/clic/programme/856636948',
|
||||
|
@ -21,6 +22,7 @@ class S4CIE(InfoExtractor):
|
|||
'title': 'Am Dro',
|
||||
'duration': 2880,
|
||||
'description': 'md5:100d8686fc9a632a0cb2db52a3433ffe',
|
||||
'thumbnail': 'https://www.s4c.cymru/amg/1920x1080/Am_Dro_2022-23S4C_P6_4005.jpg'
|
||||
},
|
||||
}]
|
||||
|
||||
|
@ -30,7 +32,7 @@ def _real_extract(self, url):
|
|||
f'https://www.s4c.cymru/df/full_prog_details?lang=e&programme_id={video_id}',
|
||||
video_id, fatal=False)
|
||||
|
||||
filename = self._download_json(
|
||||
player_config = self._download_json(
|
||||
'https://player-api.s4c-cdn.co.uk/player-configuration/prod', video_id, query={
|
||||
'programme_id': video_id,
|
||||
'signed': '0',
|
||||
|
@ -38,7 +40,13 @@ def _real_extract(self, url):
|
|||
'mode': 'od',
|
||||
'appId': 'clic',
|
||||
'streamName': '',
|
||||
}, note='Downloading player config JSON')['filename']
|
||||
}, note='Downloading player config JSON')
|
||||
subtitles = {}
|
||||
for sub in traverse_obj(player_config, ('subtitles', lambda _, v: url_or_none(v['0']))):
|
||||
subtitles.setdefault(sub.get('3', 'en'), []).append({
|
||||
'url': sub['0'],
|
||||
'name': sub.get('1'),
|
||||
})
|
||||
m3u8_url = self._download_json(
|
||||
'https://player-api.s4c-cdn.co.uk/streaming-urls/prod', video_id, query={
|
||||
'mode': 'od',
|
||||
|
@ -46,17 +54,52 @@ def _real_extract(self, url):
|
|||
'region': 'WW',
|
||||
'extra': 'false',
|
||||
'thirdParty': 'false',
|
||||
'filename': filename,
|
||||
'filename': player_config['filename'],
|
||||
}, note='Downloading streaming urls JSON')['hls']
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4', m3u8_id='hls')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'formats': self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls'),
|
||||
'subtitles': subtitles,
|
||||
'thumbnail': url_or_none(player_config.get('poster')),
|
||||
**traverse_obj(details, ('full_prog_details', 0, {
|
||||
'title': (('programme_title', 'series_title'), {str}),
|
||||
'description': ('full_billing', {str.strip}),
|
||||
'duration': ('duration', {lambda x: int(x) * 60}),
|
||||
}), get_all=False),
|
||||
}
|
||||
|
||||
|
||||
class S4CSeriesIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?s4c\.cymru/clic/series/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.s4c.cymru/clic/series/864982911',
|
||||
'playlist_mincount': 6,
|
||||
'info_dict': {
|
||||
'id': '864982911',
|
||||
'title': 'Iaith ar Daith',
|
||||
'description': 'md5:e878ebf660dce89bd2ef521d7ce06397'
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.s4c.cymru/clic/series/866852587',
|
||||
'playlist_mincount': 8,
|
||||
'info_dict': {
|
||||
'id': '866852587',
|
||||
'title': 'FFIT Cymru',
|
||||
'description': 'md5:abcb3c129cb68dbb6cd304fd33b07e96'
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
series_id = self._match_id(url)
|
||||
series_details = self._download_json(
|
||||
'https://www.s4c.cymru/df/series_details', series_id, query={
|
||||
'lang': 'e',
|
||||
'series_id': series_id,
|
||||
'show_prog_in_series': 'Y'
|
||||
}, note='Downloading series details JSON')
|
||||
|
||||
return self.playlist_result(
|
||||
[self.url_result(f'https://www.s4c.cymru/clic/programme/{episode_id}', S4CIE, episode_id)
|
||||
for episode_id in traverse_obj(series_details, ('other_progs_in_series', ..., 'id'))],
|
||||
series_id, traverse_obj(series_details, ('full_prog_details', 0, 'series_title', {str})))
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
import base64
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
@ -8,7 +9,12 @@
|
|||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
url_or_none,
|
||||
unified_timestamp,
|
||||
try_get,
|
||||
urljoin,
|
||||
traverse_obj,
|
||||
)
|
||||
|
||||
|
||||
|
@ -31,13 +37,20 @@ class SohuIE(InfoExtractor):
|
|||
'id': '409385080',
|
||||
'ext': 'mp4',
|
||||
'title': '《2015湖南卫视羊年元宵晚会》唐嫣《花好月圆》',
|
||||
}
|
||||
},
|
||||
'skip': 'no longer available',
|
||||
}, {
|
||||
'url': 'http://my.tv.sohu.com/us/232799889/78693464.shtml',
|
||||
'info_dict': {
|
||||
'id': '78693464',
|
||||
'ext': 'mp4',
|
||||
'title': '【爱范品】第31期:MWC见不到的奇葩手机',
|
||||
'uploader': '爱范儿视频',
|
||||
'duration': 213,
|
||||
'timestamp': 1425519600,
|
||||
'upload_date': '20150305',
|
||||
'thumbnail': 'http://e3f49eaa46b57.cdn.sohucs.com//group1/M10/83/FA/MTAuMTAuODguODA=/6_14cbccdde5eg104SysCutcloud_78693464_7_0b.jpg',
|
||||
'tags': ['爱范儿', '爱范品', 'MWC', '手机'],
|
||||
}
|
||||
}, {
|
||||
'note': 'Multipart video',
|
||||
|
@ -45,6 +58,12 @@ class SohuIE(InfoExtractor):
|
|||
'info_dict': {
|
||||
'id': '78910339',
|
||||
'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
|
||||
'uploader': '小苍cany',
|
||||
'duration': 744.0,
|
||||
'timestamp': 1426269360,
|
||||
'upload_date': '20150313',
|
||||
'thumbnail': 'http://e3f49eaa46b57.cdn.sohucs.com//group1/M11/89/57/MTAuMTAuODguODA=/6_14cea022a1dg102SysCutcloud_78910339_8_0b.jpg',
|
||||
'tags': ['小苍MM', '英雄联盟', '实战秘籍'],
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
|
@ -75,6 +94,11 @@ class SohuIE(InfoExtractor):
|
|||
'id': '78932792',
|
||||
'ext': 'mp4',
|
||||
'title': 'youtube-dl testing video',
|
||||
'duration': 360,
|
||||
'timestamp': 1426348620,
|
||||
'upload_date': '20150314',
|
||||
'thumbnail': 'http://e3f49eaa46b57.cdn.sohucs.com//group1/M02/8A/00/MTAuMTAuODguNzk=/6_14cee1be192g102SysCutcloud_78932792_7_7b.jpg',
|
||||
'tags': [],
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True
|
||||
|
@ -100,7 +124,7 @@ def _fetch_data(vid_id, mytv=False):
|
|||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = re.sub(r' - 搜狐视频$', '', self._og_search_title(webpage))
|
||||
title = re.sub(r'( - 高清正版在线观看)? - 搜狐视频$', '', self._og_search_title(webpage))
|
||||
|
||||
vid = self._html_search_regex(
|
||||
r'var vid ?= ?["\'](\d+)["\']',
|
||||
|
@ -132,7 +156,9 @@ def _fetch_data(vid_id, mytv=False):
|
|||
allot = format_data['allot']
|
||||
|
||||
data = format_data['data']
|
||||
clips_url = data['clipsURL']
|
||||
clip_url = traverse_obj(data, (('clipsURL', 'mp4PlayUrl'), i, {url_or_none}), get_all=False)
|
||||
if not clip_url:
|
||||
raise ExtractorError(f'Unable to extract url for clip {i}')
|
||||
su = data['su']
|
||||
|
||||
video_url = 'newflv.sohu.ccgslb.net'
|
||||
|
@ -142,9 +168,9 @@ def _fetch_data(vid_id, mytv=False):
|
|||
while 'newflv.sohu.ccgslb.net' in video_url:
|
||||
params = {
|
||||
'prot': 9,
|
||||
'file': clips_url[i],
|
||||
'file': clip_url,
|
||||
'new': su[i],
|
||||
'prod': 'flash',
|
||||
'prod': 'h5n',
|
||||
'rb': 1,
|
||||
}
|
||||
|
||||
|
@ -193,6 +219,75 @@ def _fetch_data(vid_id, mytv=False):
|
|||
'entries': playlist,
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'duration': traverse_obj(vid_data, ('data', 'totalDuration', {float_or_none})),
|
||||
}
|
||||
|
||||
return info
|
||||
if mytv:
|
||||
publish_time = unified_timestamp(self._search_regex(
|
||||
r'publishTime:\s*["\'](\d+-\d+-\d+ \d+:\d+)["\']', webpage, 'publish time', fatal=False))
|
||||
else:
|
||||
publish_time = traverse_obj(vid_data, ('tv_application_time', {unified_timestamp}))
|
||||
|
||||
return {
|
||||
'timestamp': publish_time - 8 * 3600 if publish_time else None,
|
||||
**traverse_obj(vid_data, {
|
||||
'alt_title': ('data', 'subName', {str}),
|
||||
'uploader': ('wm_data', 'wm_username', {str}),
|
||||
'thumbnail': ('data', 'coverImg', {url_or_none}),
|
||||
'tags': ('data', 'tag', {str.split}),
|
||||
}),
|
||||
**info,
|
||||
}
|
||||
|
||||
|
||||
class SohuVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://tv\.sohu\.com/v/(?P<id>[\w=-]+)\.html(?:$|[#?])'
|
||||
|
||||
_TESTS = [{
|
||||
'note': 'Multipart video',
|
||||
'url': 'https://tv.sohu.com/v/MjAyMzA2MTQvbjYwMTMxNTE5Mi5zaHRtbA==.html',
|
||||
'info_dict': {
|
||||
'id': '601315192',
|
||||
'title': '《淬火丹心》第1集',
|
||||
'alt_title': '“点天灯”发生事故',
|
||||
'duration': 2701.692,
|
||||
'timestamp': 1686758040,
|
||||
'upload_date': '20230614',
|
||||
'thumbnail': 'http://photocdn.tv.sohu.com/img/20230614/vrsa_hor_1686738763256_454010551.jpg',
|
||||
},
|
||||
'playlist_mincount': 9,
|
||||
'skip': 'Only available in China',
|
||||
}, {
|
||||
'url': 'https://tv.sohu.com/v/dXMvMjMyNzk5ODg5Lzc4NjkzNDY0LnNodG1s.html',
|
||||
'info_dict': {
|
||||
'id': '78693464',
|
||||
'ext': 'mp4',
|
||||
'title': '【爱范品】第31期:MWC见不到的奇葩手机',
|
||||
'uploader': '爱范儿视频',
|
||||
'duration': 213,
|
||||
'timestamp': 1425519600,
|
||||
'upload_date': '20150305',
|
||||
'thumbnail': 'http://e3f49eaa46b57.cdn.sohucs.com//group1/M10/83/FA/MTAuMTAuODguODA=/6_14cbccdde5eg104SysCutcloud_78693464_7_0b.jpg',
|
||||
'tags': ['爱范儿', '爱范品', 'MWC', '手机'],
|
||||
}
|
||||
}, {
|
||||
'note': 'Multipart video',
|
||||
'url': 'https://tv.sohu.com/v/dXMvMjQyNTYyMTYzLzc4OTEwMzM5LnNodG1s.html?src=pl',
|
||||
'info_dict': {
|
||||
'id': '78910339',
|
||||
'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
|
||||
'uploader': '小苍cany',
|
||||
'duration': 744.0,
|
||||
'timestamp': 1426269360,
|
||||
'upload_date': '20150313',
|
||||
'thumbnail': 'http://e3f49eaa46b57.cdn.sohucs.com//group1/M11/89/57/MTAuMTAuODguODA=/6_14cea022a1dg102SysCutcloud_78910339_8_0b.jpg',
|
||||
'tags': ['小苍MM', '英雄联盟', '实战秘籍'],
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
encoded_id = self._match_id(url)
|
||||
path = base64.urlsafe_b64decode(encoded_id).decode()
|
||||
subdomain = 'tv' if re.match(r'\d+/n\d+\.shtml', path) else 'my.tv'
|
||||
return self.url_result(urljoin(f'http://{subdomain}.sohu.com/', path), SohuIE)
|
||||
|
|
|
@ -15,7 +15,6 @@
|
|||
UserNotLive,
|
||||
determine_ext,
|
||||
format_field,
|
||||
get_element_by_id,
|
||||
get_first,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
|
@ -50,8 +49,9 @@ def _create_url(user_id, video_id):
|
|||
return f'https://www.tiktok.com/@{user_id or "_"}/video/{video_id}'
|
||||
|
||||
def _get_sigi_state(self, webpage, display_id):
|
||||
return self._parse_json(get_element_by_id(
|
||||
'SIGI_STATE|sigi-persisted-data', webpage, escape_value=False), display_id)
|
||||
return self._search_json(
|
||||
r'<script[^>]+\bid="(?:SIGI_STATE|sigi-persisted-data)"[^>]*>', webpage,
|
||||
'sigi state', display_id, end_pattern=r'</script>')
|
||||
|
||||
def _call_api_impl(self, ep, query, manifest_app_version, video_id, fatal=True,
|
||||
note='Downloading API JSON', errnote='Unable to download API page'):
|
||||
|
|
|
@ -1,10 +1,14 @@
|
|||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
|
@ -12,6 +16,36 @@ class TV5MondePlusIE(InfoExtractor):
|
|||
IE_DESC = 'TV5MONDE+'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:tv5mondeplus|revoir\.tv5monde)\.com/toutes-les-videos/[^/]+/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
# movie
|
||||
'url': 'https://revoir.tv5monde.com/toutes-les-videos/cinema/les-novices',
|
||||
'md5': 'c86f60bf8b75436455b1b205f9745955',
|
||||
'info_dict': {
|
||||
'id': 'ZX0ipMyFQq_6D4BA7b',
|
||||
'display_id': 'les-novices',
|
||||
'ext': 'mp4',
|
||||
'title': 'Les novices',
|
||||
'description': 'md5:2e7c33ba3ad48dabfcc2a956b88bde2b',
|
||||
'upload_date': '20230821',
|
||||
'thumbnail': 'https://revoir.tv5monde.com/uploads/media/video_thumbnail/0738/60/01e952b7ccf36b7c6007ec9131588954ab651de9.jpeg',
|
||||
'duration': 5177,
|
||||
'episode': 'Les novices',
|
||||
},
|
||||
}, {
|
||||
# series episode
|
||||
'url': 'https://revoir.tv5monde.com/toutes-les-videos/series-fictions/opj-les-dents-de-la-terre-2',
|
||||
'info_dict': {
|
||||
'id': 'wJ0eeEPozr_6D4BA7b',
|
||||
'display_id': 'opj-les-dents-de-la-terre-2',
|
||||
'ext': 'mp4',
|
||||
'title': "OPJ - Les dents de la Terre (2)",
|
||||
'description': 'md5:288f87fd68d993f814e66e60e5302d9d',
|
||||
'upload_date': '20230823',
|
||||
'series': 'OPJ',
|
||||
'episode': 'Les dents de la Terre (2)',
|
||||
'duration': 2877,
|
||||
'thumbnail': 'https://dl-revoir.tv5monde.com/images/1a/5753448.jpg'
|
||||
},
|
||||
}, {
|
||||
# movie
|
||||
'url': 'https://revoir.tv5monde.com/toutes-les-videos/cinema/ceux-qui-travaillent',
|
||||
'md5': '32fa0cde16a4480d1251502a66856d5f',
|
||||
|
@ -23,6 +57,7 @@ class TV5MondePlusIE(InfoExtractor):
|
|||
'description': 'md5:570e8bb688036ace873b2d50d24c026d',
|
||||
'upload_date': '20210819',
|
||||
},
|
||||
'skip': 'no longer available',
|
||||
}, {
|
||||
# series episode
|
||||
'url': 'https://revoir.tv5monde.com/toutes-les-videos/series-fictions/vestiaires-caro-actrice',
|
||||
|
@ -39,6 +74,7 @@ class TV5MondePlusIE(InfoExtractor):
|
|||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'no longer available',
|
||||
}, {
|
||||
'url': 'https://revoir.tv5monde.com/toutes-les-videos/series-fictions/neuf-jours-en-hiver-neuf-jours-en-hiver',
|
||||
'only_matching': True,
|
||||
|
@ -63,20 +99,45 @@ def _real_extract(self, url):
|
|||
video_files = self._parse_json(
|
||||
vpl_data['data-broadcast'], display_id)
|
||||
formats = []
|
||||
for video_file in video_files:
|
||||
v_url = video_file.get('url')
|
||||
if not v_url:
|
||||
continue
|
||||
video_format = video_file.get('format') or determine_ext(v_url)
|
||||
if video_format == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
v_url, display_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': v_url,
|
||||
'format_id': video_format,
|
||||
})
|
||||
video_id = None
|
||||
|
||||
def process_video_files(v):
|
||||
nonlocal video_id
|
||||
for video_file in v:
|
||||
v_url = video_file.get('url')
|
||||
if not v_url:
|
||||
continue
|
||||
if video_file.get('type') == 'application/deferred':
|
||||
d_param = urllib.parse.quote(v_url)
|
||||
token = video_file.get('token')
|
||||
if not token:
|
||||
continue
|
||||
deferred_json = self._download_json(
|
||||
f'https://api.tv5monde.com/player/asset/{d_param}/resolve?condenseKS=true', display_id,
|
||||
note='Downloading deferred info', headers={'Authorization': f'Bearer {token}'}, fatal=False)
|
||||
v_url = traverse_obj(deferred_json, (0, 'url', {url_or_none}))
|
||||
if not v_url:
|
||||
continue
|
||||
# data-guid from the webpage isn't stable, use the material id from the json urls
|
||||
video_id = self._search_regex(
|
||||
r'materials/([\da-zA-Z]{10}_[\da-fA-F]{7})/', v_url, 'video id', default=None)
|
||||
process_video_files(deferred_json)
|
||||
|
||||
video_format = video_file.get('format') or determine_ext(v_url)
|
||||
if video_format == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
v_url, display_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif video_format == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
v_url, display_id, fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': v_url,
|
||||
'format_id': video_format,
|
||||
})
|
||||
|
||||
process_video_files(video_files)
|
||||
|
||||
metadata = self._parse_json(
|
||||
vpl_data['data-metadata'], display_id)
|
||||
|
@ -100,10 +161,11 @@ def _real_extract(self, url):
|
|||
if upload_date:
|
||||
upload_date = upload_date.replace('_', '')
|
||||
|
||||
video_id = self._search_regex(
|
||||
(r'data-guid=["\']([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
|
||||
r'id_contenu["\']\s:\s*(\d+)'), webpage, 'video id',
|
||||
default=display_id)
|
||||
if not video_id:
|
||||
video_id = self._search_regex(
|
||||
(r'data-guid=["\']([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
|
||||
r'id_contenu["\']\s:\s*(\d+)'), webpage, 'video id',
|
||||
default=display_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
|
|
@ -22,7 +22,7 @@
|
|||
|
||||
|
||||
class TwitCastingIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?twitcasting\.tv/(?P<uploader_id>[^/]+)/(?:movie|twplayer)/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:[^/?#]+\.)?twitcasting\.tv/(?P<uploader_id>[^/?#]+)/(?:movie|twplayer)/(?P<id>\d+)'
|
||||
_M3U8_HEADERS = {
|
||||
'Origin': 'https://twitcasting.tv',
|
||||
'Referer': 'https://twitcasting.tv/',
|
||||
|
@ -231,7 +231,7 @@ def find_dmu(x):
|
|||
|
||||
|
||||
class TwitCastingLiveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?twitcasting\.tv/(?P<id>[^/]+)/?(?:[#?]|$)'
|
||||
_VALID_URL = r'https?://(?:[^/?#]+\.)?twitcasting\.tv/(?P<id>[^/?#]+)/?(?:[#?]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://twitcasting.tv/ivetesangalo',
|
||||
'only_matching': True,
|
||||
|
@ -265,8 +265,15 @@ def _real_extract(self, url):
|
|||
|
||||
|
||||
class TwitCastingUserIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?twitcasting\.tv/(?P<id>[^/]+)/show/?(?:[#?]|$)'
|
||||
_VALID_URL = r'https?://(?:[^/?#]+\.)?twitcasting\.tv/(?P<id>[^/?#]+)/(:?show|archive)/?(?:[#?]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://twitcasting.tv/natsuiromatsuri/archive/',
|
||||
'info_dict': {
|
||||
'id': 'natsuiromatsuri',
|
||||
'title': 'natsuiromatsuri - Live History',
|
||||
},
|
||||
'playlist_mincount': 235,
|
||||
}, {
|
||||
'url': 'https://twitcasting.tv/noriyukicas/show',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
import functools
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .periscope import PeriscopeBaseIE, PeriscopeIE
|
||||
from ..compat import functools # isort: split
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_unquote,
|
||||
|
@ -147,10 +148,14 @@ def _search_dimensions_in_video_url(a_format, video_url):
|
|||
def is_logged_in(self):
|
||||
return bool(self._get_cookies(self._API_BASE).get('auth_token'))
|
||||
|
||||
@functools.cached_property
|
||||
def _selected_api(self):
|
||||
return self._configuration_arg('api', ['graphql'], ie_key='Twitter')[0]
|
||||
|
||||
def _fetch_guest_token(self, display_id):
|
||||
guest_token = traverse_obj(self._download_json(
|
||||
f'{self._API_BASE}guest/activate.json', display_id, 'Downloading guest token', data=b'',
|
||||
headers=self._set_base_headers(legacy=display_id and self._configuration_arg('legacy_api'))),
|
||||
headers=self._set_base_headers(legacy=display_id and self._selected_api == 'legacy')),
|
||||
('guest_token', {str}))
|
||||
if not guest_token:
|
||||
raise ExtractorError('Could not retrieve guest token')
|
||||
|
@ -295,7 +300,7 @@ def input_dict(subtask_id, text):
|
|||
self.report_login()
|
||||
|
||||
def _call_api(self, path, video_id, query={}, graphql=False):
|
||||
headers = self._set_base_headers(legacy=not graphql and self._configuration_arg('legacy_api'))
|
||||
headers = self._set_base_headers(legacy=not graphql and self._selected_api == 'legacy')
|
||||
headers.update({
|
||||
'x-twitter-auth-type': 'OAuth2Session',
|
||||
'x-twitter-client-language': 'en',
|
||||
|
@ -707,6 +712,7 @@ class TwitterIE(TwitterBaseIE):
|
|||
'tags': [],
|
||||
'age_limit': 0,
|
||||
},
|
||||
'skip': 'This Tweet is unavailable',
|
||||
}, {
|
||||
# not available in Periscope
|
||||
'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
|
||||
|
@ -721,6 +727,7 @@ class TwitterIE(TwitterBaseIE):
|
|||
'view_count': int,
|
||||
},
|
||||
'add_ie': ['TwitterBroadcast'],
|
||||
'skip': 'Broadcast no longer exists',
|
||||
}, {
|
||||
# unified card
|
||||
'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
|
||||
|
@ -773,9 +780,9 @@ class TwitterIE(TwitterBaseIE):
|
|||
'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
|
||||
'info_dict': {
|
||||
'id': '1577719286659006464',
|
||||
'title': 'Ultima📛 | #вʟм - Test',
|
||||
'title': 'Ultima📛| New Era - Test',
|
||||
'description': 'Test https://t.co/Y3KEZD7Dad',
|
||||
'uploader': 'Ultima📛 | #вʟм',
|
||||
'uploader': 'Ultima📛| New Era',
|
||||
'uploader_id': 'UltimaShadowX',
|
||||
'uploader_url': 'https://twitter.com/UltimaShadowX',
|
||||
'upload_date': '20221005',
|
||||
|
@ -811,7 +818,7 @@ class TwitterIE(TwitterBaseIE):
|
|||
'age_limit': 0,
|
||||
},
|
||||
}, {
|
||||
# Adult content, fails if not logged in (GraphQL)
|
||||
# Adult content, fails if not logged in
|
||||
'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
|
||||
'info_dict': {
|
||||
'id': '1575199163847000068',
|
||||
|
@ -831,9 +838,10 @@ class TwitterIE(TwitterBaseIE):
|
|||
'age_limit': 18,
|
||||
'tags': []
|
||||
},
|
||||
'params': {'skip_download': 'The media could not be played'},
|
||||
'skip': 'Requires authentication',
|
||||
}, {
|
||||
# Playlist result only with auth
|
||||
# Playlist result only with graphql API
|
||||
'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
|
||||
'playlist_mincount': 2,
|
||||
'info_dict': {
|
||||
|
@ -898,7 +906,7 @@ class TwitterIE(TwitterBaseIE):
|
|||
'uploader_id': 'MoniqueCamarra',
|
||||
'live_status': 'was_live',
|
||||
'release_timestamp': 1658417414,
|
||||
'description': 'md5:4dc8e972f1d8b3c6580376fabb02a3ad',
|
||||
'description': 'md5:acce559345fd49f129c20dbcda3f1201',
|
||||
'timestamp': 1658407771,
|
||||
'release_date': '20220721',
|
||||
'upload_date': '20220721',
|
||||
|
@ -1007,10 +1015,10 @@ class TwitterIE(TwitterBaseIE):
|
|||
'view_count': int,
|
||||
'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
|
||||
'age_limit': 0,
|
||||
'uploader': 'Mün The Friend Of YWAP',
|
||||
'uploader': 'Mün',
|
||||
'repost_count': int,
|
||||
'upload_date': '20221206',
|
||||
'title': 'Mün The Friend Of YWAP - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
|
||||
'title': 'Mün - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'tags': [],
|
||||
|
@ -1019,7 +1027,7 @@ class TwitterIE(TwitterBaseIE):
|
|||
'timestamp': 1670306984.0,
|
||||
},
|
||||
}, {
|
||||
# url to retweet id w/ legacy api
|
||||
# retweeted_status (private)
|
||||
'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
|
||||
'info_dict': {
|
||||
'id': '1623274794488659969',
|
||||
|
@ -1039,32 +1047,84 @@ class TwitterIE(TwitterBaseIE):
|
|||
'like_count': int,
|
||||
'repost_count': int,
|
||||
},
|
||||
'params': {'extractor_args': {'twitter': {'legacy_api': ['']}}},
|
||||
'skip': 'Protected tweet',
|
||||
}, {
|
||||
# orig tweet w/ graphql
|
||||
'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
|
||||
# retweeted_status
|
||||
'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
|
||||
'info_dict': {
|
||||
'id': '1623274794488659969',
|
||||
'display_id': '1623739803874349067',
|
||||
'id': '1694928337846538240',
|
||||
'ext': 'mp4',
|
||||
'title': '@selfisekai@hackerspace.pl 🐀 - RT @Johnnybull3ts: Me after going viral to over 30million people: Whoopsie-daisy',
|
||||
'description': 'md5:9258bdbb54793bdc124fe1cd47e96c6a',
|
||||
'uploader': '@selfisekai@hackerspace.pl 🐀',
|
||||
'uploader_id': 'liberdalau',
|
||||
'uploader_url': 'https://twitter.com/liberdalau',
|
||||
'display_id': '1695424220702888009',
|
||||
'title': 'md5:e8daa9527bc2b947121395494f786d9d',
|
||||
'description': 'md5:004f2d37fd58737724ec75bc7e679938',
|
||||
'uploader': 'Benny Johnson',
|
||||
'uploader_id': 'bennyjohnson',
|
||||
'uploader_url': 'https://twitter.com/bennyjohnson',
|
||||
'age_limit': 0,
|
||||
'tags': [],
|
||||
'duration': 8.033,
|
||||
'timestamp': 1675964711.0,
|
||||
'upload_date': '20230209',
|
||||
'thumbnail': r're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
|
||||
'duration': 45.001,
|
||||
'timestamp': 1692962814.0,
|
||||
'upload_date': '20230825',
|
||||
'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
|
||||
'like_count': int,
|
||||
'view_count': int,
|
||||
'repost_count': int,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
'skip': 'Protected tweet',
|
||||
}, {
|
||||
# retweeted_status w/ legacy API
|
||||
'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
|
||||
'info_dict': {
|
||||
'id': '1694928337846538240',
|
||||
'ext': 'mp4',
|
||||
'display_id': '1695424220702888009',
|
||||
'title': 'md5:e8daa9527bc2b947121395494f786d9d',
|
||||
'description': 'md5:004f2d37fd58737724ec75bc7e679938',
|
||||
'uploader': 'Benny Johnson',
|
||||
'uploader_id': 'bennyjohnson',
|
||||
'uploader_url': 'https://twitter.com/bennyjohnson',
|
||||
'age_limit': 0,
|
||||
'tags': [],
|
||||
'duration': 45.001,
|
||||
'timestamp': 1692962814.0,
|
||||
'upload_date': '20230825',
|
||||
'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
},
|
||||
'params': {'extractor_args': {'twitter': {'api': ['legacy']}}},
|
||||
}, {
|
||||
# Broadcast embedded in tweet
|
||||
'url': 'https://twitter.com/JessicaDobsonWX/status/1693057346933600402',
|
||||
'info_dict': {
|
||||
'id': '1yNGaNLjEblJj',
|
||||
'ext': 'mp4',
|
||||
'title': 'Jessica Dobson - WAVE Weather Now - Saturday 8/19/23 Update',
|
||||
'uploader': 'Jessica Dobson',
|
||||
'uploader_id': '1DZEoDwDovRQa',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'view_count': int,
|
||||
},
|
||||
'add_ie': ['TwitterBroadcast'],
|
||||
}, {
|
||||
# Animated gif and quote tweet video, with syndication API
|
||||
'url': 'https://twitter.com/BAKKOOONN/status/1696256659889565950',
|
||||
'playlist_mincount': 2,
|
||||
'info_dict': {
|
||||
'id': '1696256659889565950',
|
||||
'title': 'BAKOON - https://t.co/zom968d0a0',
|
||||
'description': 'https://t.co/zom968d0a0',
|
||||
'tags': [],
|
||||
'uploader': 'BAKOON',
|
||||
'uploader_id': 'BAKKOOONN',
|
||||
'uploader_url': 'https://twitter.com/BAKKOOONN',
|
||||
'age_limit': 18,
|
||||
'timestamp': 1693254077.0,
|
||||
'upload_date': '20230828',
|
||||
'like_count': int,
|
||||
},
|
||||
'params': {'extractor_args': {'twitter': {'api': ['syndication']}}},
|
||||
'expected_warnings': ['Not all metadata'],
|
||||
}, {
|
||||
# onion route
|
||||
'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
|
||||
|
@ -1103,6 +1163,14 @@ class TwitterIE(TwitterBaseIE):
|
|||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_MEDIA_ID_RE = re.compile(r'_video/(\d+)/')
|
||||
|
||||
@property
|
||||
def _GRAPHQL_ENDPOINT(self):
|
||||
if self.is_logged_in:
|
||||
return 'zZXycP0V6H7m-2r0mOnFcA/TweetDetail'
|
||||
return '2ICDjqPd81tulZcYrtpTuQ/TweetResultByRestId'
|
||||
|
||||
def _graphql_to_legacy(self, data, twid):
|
||||
result = traverse_obj(data, (
|
||||
'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
|
||||
|
@ -1130,9 +1198,14 @@ def _graphql_to_legacy(self, data, twid):
|
|||
'user': ('core', 'user_results', 'result', 'legacy'),
|
||||
'card': ('card', 'legacy'),
|
||||
'quoted_status': ('quoted_status_result', 'result', 'legacy'),
|
||||
'retweeted_status': ('legacy', 'retweeted_status_result', 'result', 'legacy'),
|
||||
}, expected_type=dict, default={}))
|
||||
|
||||
# extra transformation is needed since result does not match legacy format
|
||||
# extra transformations needed since result does not match legacy format
|
||||
if status.get('retweeted_status'):
|
||||
status['retweeted_status']['user'] = traverse_obj(status, (
|
||||
'retweeted_status_result', 'result', 'core', 'user_results', 'result', 'legacy', {dict})) or {}
|
||||
|
||||
binding_values = {
|
||||
binding_value.get('key'): binding_value.get('value')
|
||||
for binding_value in traverse_obj(status, ('card', 'binding_values', ..., {dict}))
|
||||
|
@ -1208,33 +1281,42 @@ def _build_graphql_query(self, media_id):
|
|||
}
|
||||
|
||||
def _extract_status(self, twid):
|
||||
if self.is_logged_in:
|
||||
return self._graphql_to_legacy(
|
||||
self._call_graphql_api('zZXycP0V6H7m-2r0mOnFcA/TweetDetail', twid), twid)
|
||||
if self.is_logged_in or self._selected_api == 'graphql':
|
||||
status = self._graphql_to_legacy(self._call_graphql_api(self._GRAPHQL_ENDPOINT, twid), twid)
|
||||
|
||||
try:
|
||||
if not self._configuration_arg('legacy_api'):
|
||||
return self._graphql_to_legacy(
|
||||
self._call_graphql_api('2ICDjqPd81tulZcYrtpTuQ/TweetResultByRestId', twid), twid)
|
||||
return traverse_obj(self._call_api(f'statuses/show/{twid}.json', twid, {
|
||||
elif self._selected_api == 'legacy':
|
||||
status = self._call_api(f'statuses/show/{twid}.json', twid, {
|
||||
'cards_platform': 'Web-12',
|
||||
'include_cards': 1,
|
||||
'include_reply_count': 1,
|
||||
'include_user_entities': 0,
|
||||
'tweet_mode': 'extended',
|
||||
}), 'retweeted_status', None)
|
||||
})
|
||||
|
||||
except ExtractorError as e:
|
||||
if e.expected:
|
||||
raise
|
||||
elif self._selected_api == 'syndication':
|
||||
self.report_warning(
|
||||
f'{e.orig_msg}. Falling back to syndication endpoint; some metadata may be missing', twid)
|
||||
'Not all metadata or media is available via syndication endpoint', twid, only_once=True)
|
||||
status = self._download_json(
|
||||
'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON',
|
||||
headers={'User-Agent': 'Googlebot'}, query={
|
||||
'id': twid,
|
||||
# TODO: token = ((Number(twid) / 1e15) * Math.PI).toString(36).replace(/(0+|\.)/g, '')
|
||||
'token': ''.join(random.choices('123456789abcdefghijklmnopqrstuvwxyz', k=10)),
|
||||
})
|
||||
if not status:
|
||||
raise ExtractorError('Syndication endpoint returned empty JSON response')
|
||||
# Transform the result so its structure matches that of legacy/graphql
|
||||
media = []
|
||||
for detail in traverse_obj(status, ((None, 'quoted_tweet'), 'mediaDetails', ..., {dict})):
|
||||
detail['id_str'] = traverse_obj(detail, (
|
||||
'video_info', 'variants', ..., 'url', {self._MEDIA_ID_RE.search}, 1), get_all=False) or twid
|
||||
media.append(detail)
|
||||
status['extended_entities'] = {'media': media}
|
||||
|
||||
status = self._download_json(
|
||||
'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON',
|
||||
headers={'User-Agent': 'Googlebot'}, query={'id': twid})
|
||||
status['extended_entities'] = {'media': status.get('mediaDetails')}
|
||||
return status
|
||||
else:
|
||||
raise ExtractorError(f'"{self._selected_api}" is not a valid API selection', expected=True)
|
||||
|
||||
return traverse_obj(status, 'retweeted_status', None, expected_type=dict) or {}
|
||||
|
||||
def _real_extract(self, url):
|
||||
twid, selected_index = self._match_valid_url(url).group('id', 'index')
|
||||
|
@ -1266,10 +1348,7 @@ def _real_extract(self, url):
|
|||
}
|
||||
|
||||
def extract_from_video_info(media):
|
||||
media_id = traverse_obj(media, 'id_str', 'id', (
|
||||
'video_info', 'variants', ..., 'url',
|
||||
{functools.partial(re.search, r'_video/(\d+)/')}, 1
|
||||
), get_all=False, expected_type=str_or_none) or twid
|
||||
media_id = traverse_obj(media, 'id_str', 'id', expected_type=str_or_none)
|
||||
self.write_debug(f'Extracting from video info: {media_id}')
|
||||
|
||||
formats = []
|
||||
|
@ -1503,6 +1582,8 @@ def _real_extract(self, url):
|
|||
broadcast = self._call_api(
|
||||
'broadcasts/show.json', broadcast_id,
|
||||
{'ids': broadcast_id})['broadcasts'][broadcast_id]
|
||||
if not broadcast:
|
||||
raise ExtractorError('Broadcast no longer exists', expected=True)
|
||||
info = self._parse_broadcast_data(broadcast, broadcast_id)
|
||||
media_key = broadcast['media_key']
|
||||
source = self._call_api(
|
||||
|
|
|
@ -38,6 +38,7 @@ class VideaIE(InfoExtractor):
|
|||
'title': 'Az őrült kígyász 285 kígyót enged szabadon',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'duration': 21,
|
||||
'age_limit': 0,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://videa.hu/videok/origo/jarmuvek/supercars-elozes-jAHDWfWSJH5XuFhH',
|
||||
|
@ -48,6 +49,7 @@ class VideaIE(InfoExtractor):
|
|||
'title': 'Supercars előzés',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'duration': 64,
|
||||
'age_limit': 0,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://videa.hu/player?v=8YfIAjxwWGwT8HVQ',
|
||||
|
@ -58,6 +60,7 @@ class VideaIE(InfoExtractor):
|
|||
'title': 'Az őrült kígyász 285 kígyót enged szabadon',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'duration': 21,
|
||||
'age_limit': 0,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://videa.hu/player/v/8YfIAjxwWGwT8HVQ?autoplay=1',
|
||||
|
@ -124,7 +127,7 @@ def _real_extract(self, url):
|
|||
query['_t'] = result[:16]
|
||||
|
||||
b64_info, handle = self._download_webpage_handle(
|
||||
'http://videa.hu/videaplayer_get_xml.php', video_id, query=query)
|
||||
'http://videa.hu/player/xml', video_id, query=query)
|
||||
if b64_info.startswith('<?xml'):
|
||||
info = self._parse_xml(b64_info, video_id)
|
||||
else:
|
||||
|
|
|
@ -173,6 +173,7 @@ class WDRPageIE(WDRIE): # XXX: Do not subclass from concrete IE
|
|||
'skip': 'HTTP Error 404: Not Found',
|
||||
},
|
||||
{
|
||||
# FIXME: Asset JSON is directly embedded in webpage
|
||||
'url': 'http://www1.wdr.de/mediathek/video/live/index.html',
|
||||
'info_dict': {
|
||||
'id': 'mdb-2296252',
|
||||
|
@ -221,6 +222,8 @@ class WDRPageIE(WDRIE): # XXX: Do not subclass from concrete IE
|
|||
'id': 'mdb-869971',
|
||||
'ext': 'mp4',
|
||||
'title': r're:^COSMO Livestream [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'alt_title': 'COSMO Livestream',
|
||||
'live_status': 'is_live',
|
||||
'upload_date': '20160101',
|
||||
},
|
||||
'params': {
|
||||
|
@ -248,6 +251,16 @@ class WDRPageIE(WDRIE): # XXX: Do not subclass from concrete IE
|
|||
'url': 'https://kinder.wdr.de/tv/die-sendung-mit-dem-elefanten/av/video-folge---astronaut-100.html',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://www1.wdr.de/mediathek/video/sendungen/rockpalast/video-baroness---freak-valley-festival--100.html',
|
||||
'info_dict': {
|
||||
'id': 'mdb-2741028',
|
||||
'ext': 'mp4',
|
||||
'title': 'Baroness - Freak Valley Festival 2022',
|
||||
'alt_title': 'Rockpalast',
|
||||
'upload_date': '20220725',
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -259,7 +272,7 @@ def _real_extract(self, url):
|
|||
|
||||
# Article with several videos
|
||||
|
||||
# for wdr.de the data-extension is in a tag with the class "mediaLink"
|
||||
# for wdr.de the data-extension-ard is in a tag with the class "mediaLink"
|
||||
# for wdr.de radio players, in a tag with the class "wdrrPlayerPlayBtn"
|
||||
# for wdrmaus, in a tag with the class "videoButton" (previously a link
|
||||
# to the page in a multiline "videoLink"-tag)
|
||||
|
@ -268,7 +281,7 @@ def _real_extract(self, url):
|
|||
(?:
|
||||
(["\'])(?:mediaLink|wdrrPlayerPlayBtn|videoButton)\b.*?\1[^>]+|
|
||||
(["\'])videoLink\b.*?\2[\s]*>\n[^\n]*
|
||||
)data-extension=(["\'])(?P<data>(?:(?!\3).)+)\3
|
||||
)data-extension(?:-ard)?=(["\'])(?P<data>(?:(?!\3).)+)\3
|
||||
''', webpage):
|
||||
media_link_obj = self._parse_json(
|
||||
mobj.group('data'), display_id, transform_source=js_to_json,
|
||||
|
@ -295,7 +308,7 @@ def _real_extract(self, url):
|
|||
compat_urlparse.urljoin(url, mobj.group('href')),
|
||||
ie=WDRPageIE.ie_key())
|
||||
for mobj in re.finditer(
|
||||
r'<a[^>]+\bhref=(["\'])(?P<href>(?:(?!\1).)+)\1[^>]+\bdata-extension=',
|
||||
r'<a[^>]+\bhref=(["\'])(?P<href>(?:(?!\1).)+)\1[^>]+\bdata-extension(?:-ard)?=',
|
||||
webpage) if re.match(self._PAGE_REGEX, mobj.group('href'))
|
||||
]
|
||||
|
||||
|
|
|
@ -1,134 +1,241 @@
|
|||
from .common import InfoExtractor
|
||||
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
import itertools
|
||||
import urllib.parse
|
||||
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_str,
|
||||
)
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
js_to_json,
|
||||
int_or_none,
|
||||
make_archive_id,
|
||||
mimetype2ext,
|
||||
parse_resolution,
|
||||
str_or_none,
|
||||
strip_jsonp,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class WeiboIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?weibo\.com/[0-9]+/(?P<id>[a-zA-Z0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'https://weibo.com/6275294458/Fp6RGfbff?type=comment',
|
||||
'info_dict': {
|
||||
'id': 'Fp6RGfbff',
|
||||
'ext': 'mp4',
|
||||
'title': 'You should have servants to massage you,... 来自Hosico_猫 - 微博',
|
||||
}
|
||||
}
|
||||
class WeiboBaseIE(InfoExtractor):
|
||||
def _update_visitor_cookies(self, video_id):
|
||||
visitor_data = self._download_json(
|
||||
'https://passport.weibo.com/visitor/genvisitor', video_id,
|
||||
note='Generating first-visit guest request',
|
||||
transform_source=strip_jsonp,
|
||||
data=urlencode_postdata({
|
||||
'cb': 'gen_callback',
|
||||
'fp': '{"os":"2","browser":"Gecko57,0,0,0","fonts":"undefined","screenInfo":"1440*900*24","plugins":""}',
|
||||
}))
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
# to get Referer url for genvisitor
|
||||
webpage, urlh = self._download_webpage_handle(url, video_id)
|
||||
|
||||
visitor_url = urlh.url
|
||||
|
||||
if 'passport.weibo.com' in visitor_url:
|
||||
# first visit
|
||||
visitor_data = self._download_json(
|
||||
'https://passport.weibo.com/visitor/genvisitor', video_id,
|
||||
note='Generating first-visit data',
|
||||
transform_source=strip_jsonp,
|
||||
headers={'Referer': visitor_url},
|
||||
data=urlencode_postdata({
|
||||
'cb': 'gen_callback',
|
||||
'fp': json.dumps({
|
||||
'os': '2',
|
||||
'browser': 'Gecko57,0,0,0',
|
||||
'fonts': 'undefined',
|
||||
'screenInfo': '1440*900*24',
|
||||
'plugins': '',
|
||||
}),
|
||||
}))
|
||||
|
||||
tid = visitor_data['data']['tid']
|
||||
cnfd = '%03d' % visitor_data['data']['confidence']
|
||||
|
||||
self._download_webpage(
|
||||
'https://passport.weibo.com/visitor/visitor', video_id,
|
||||
note='Running first-visit callback',
|
||||
query={
|
||||
'a': 'incarnate',
|
||||
't': tid,
|
||||
'w': 2,
|
||||
'c': cnfd,
|
||||
'cb': 'cross_domain',
|
||||
'from': 'weibo',
|
||||
'_rand': random.random(),
|
||||
})
|
||||
|
||||
webpage = self._download_webpage(
|
||||
url, video_id, note='Revisiting webpage')
|
||||
|
||||
title = self._html_extract_title(webpage)
|
||||
|
||||
video_formats = compat_parse_qs(self._search_regex(
|
||||
r'video-sources=\\\"(.+?)\"', webpage, 'video_sources'))
|
||||
|
||||
formats = []
|
||||
supported_resolutions = (480, 720)
|
||||
for res in supported_resolutions:
|
||||
vid_urls = video_formats.get(compat_str(res))
|
||||
if not vid_urls or not isinstance(vid_urls, list):
|
||||
continue
|
||||
|
||||
vid_url = vid_urls[0]
|
||||
formats.append({
|
||||
'url': vid_url,
|
||||
'height': res,
|
||||
self._download_webpage(
|
||||
'https://passport.weibo.com/visitor/visitor', video_id,
|
||||
note='Running first-visit callback to get guest cookies',
|
||||
query={
|
||||
'a': 'incarnate',
|
||||
't': visitor_data['data']['tid'],
|
||||
'w': 2,
|
||||
'c': '%03d' % visitor_data['data']['confidence'],
|
||||
'cb': 'cross_domain',
|
||||
'from': 'weibo',
|
||||
'_rand': random.random(),
|
||||
})
|
||||
|
||||
uploader = self._og_search_property(
|
||||
'nick-name', webpage, 'uploader', default=None)
|
||||
def _weibo_download_json(self, url, video_id, *args, fatal=True, note='Downloading JSON metadata', **kwargs):
|
||||
webpage, urlh = self._download_webpage_handle(url, video_id, *args, fatal=fatal, note=note, **kwargs)
|
||||
if urllib.parse.urlparse(urlh.url).netloc == 'passport.weibo.com':
|
||||
self._update_visitor_cookies(video_id)
|
||||
webpage = self._download_webpage(url, video_id, *args, fatal=fatal, note=note, **kwargs)
|
||||
return self._parse_json(webpage, video_id, fatal=fatal)
|
||||
|
||||
def _extract_formats(self, video_info):
|
||||
media_info = traverse_obj(video_info, ('page_info', 'media_info'))
|
||||
formats = traverse_obj(media_info, (
|
||||
'playback_list', lambda _, v: url_or_none(v['play_info']['url']), 'play_info', {
|
||||
'url': 'url',
|
||||
'format': ('quality_desc', {str}),
|
||||
'format_id': ('label', {str}),
|
||||
'ext': ('mime', {mimetype2ext}),
|
||||
'tbr': ('bitrate', {int_or_none}, {lambda x: x or None}),
|
||||
'vcodec': ('video_codecs', {str}),
|
||||
'fps': ('fps', {int_or_none}),
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
'filesize': ('size', {int_or_none}),
|
||||
'acodec': ('audio_codecs', {str}),
|
||||
'asr': ('audio_sample_rate', {int_or_none}),
|
||||
'audio_channels': ('audio_channels', {int_or_none}),
|
||||
}))
|
||||
if not formats: # fallback, should be barely used
|
||||
for url in set(traverse_obj(media_info, (..., {url_or_none}))):
|
||||
if 'label=' in url: # filter out non-video urls
|
||||
format_id, resolution = self._search_regex(
|
||||
r'label=(\w+)&template=(\d+x\d+)', url, 'format info',
|
||||
group=(1, 2), default=(None, None))
|
||||
formats.append({
|
||||
'url': url,
|
||||
'format_id': format_id,
|
||||
**parse_resolution(resolution),
|
||||
**traverse_obj(media_info, (
|
||||
'video_details', lambda _, v: v['label'].startswith(format_id), {
|
||||
'size': ('size', {int_or_none}),
|
||||
'tbr': ('bitrate', {int_or_none}),
|
||||
}
|
||||
), get_all=False),
|
||||
})
|
||||
return formats
|
||||
|
||||
def _parse_video_info(self, video_info, video_id=None):
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'uploader': uploader,
|
||||
'formats': formats
|
||||
'extractor_key': WeiboIE.ie_key(),
|
||||
'extractor': WeiboIE.IE_NAME,
|
||||
'formats': self._extract_formats(video_info),
|
||||
'http_headers': {'Referer': 'https://weibo.com/'},
|
||||
'_old_archive_ids': [make_archive_id('WeiboMobile', video_id)],
|
||||
**traverse_obj(video_info, {
|
||||
'id': (('id', 'id_str', 'mid'), {str_or_none}),
|
||||
'display_id': ('mblogid', {str_or_none}),
|
||||
'title': ('page_info', 'media_info', ('video_title', 'kol_title', 'name'), {str}, {lambda x: x or None}),
|
||||
'description': ('text_raw', {str}),
|
||||
'duration': ('page_info', 'media_info', 'duration', {int_or_none}),
|
||||
'timestamp': ('page_info', 'media_info', 'video_publish_time', {int_or_none}),
|
||||
'thumbnail': ('page_info', 'page_pic', {url_or_none}),
|
||||
'uploader': ('user', 'screen_name', {str}),
|
||||
'uploader_id': ('user', ('id', 'id_str'), {str_or_none}),
|
||||
'uploader_url': ('user', 'profile_url', {lambda x: urljoin('https://weibo.com/', x)}),
|
||||
'view_count': ('page_info', 'media_info', 'online_users_number', {int_or_none}),
|
||||
'like_count': ('attitudes_count', {int_or_none}),
|
||||
'repost_count': ('reposts_count', {int_or_none}),
|
||||
}, get_all=False),
|
||||
'tags': traverse_obj(video_info, ('topic_struct', ..., 'topic_title', {str})) or None,
|
||||
}
|
||||
|
||||
|
||||
class WeiboMobileIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://m\.weibo\.cn/status/(?P<id>[0-9]+)(\?.+)?'
|
||||
_TEST = {
|
||||
'url': 'https://m.weibo.cn/status/4189191225395228?wm=3333_2001&sourcetype=weixin&featurecode=newtitle&from=singlemessage&isappinstalled=0',
|
||||
class WeiboIE(WeiboBaseIE):
|
||||
_VALID_URL = r'https?://(?:m\.weibo\.cn/status|(?:www\.)?weibo\.com/\d+)/(?P<id>[a-zA-Z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://weibo.com/7827771738/N4xlMvjhI',
|
||||
'info_dict': {
|
||||
'id': '4910815147462302',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'N4xlMvjhI',
|
||||
'title': '【睡前消息暑假版第一期:拉泰国一把 对中国有好处】',
|
||||
'description': 'md5:e2637a7673980d68694ea7c43cf12a5f',
|
||||
'duration': 918,
|
||||
'timestamp': 1686312819,
|
||||
'upload_date': '20230609',
|
||||
'thumbnail': r're:https://.*\.jpg',
|
||||
'uploader': '睡前视频基地',
|
||||
'uploader_id': '7827771738',
|
||||
'uploader_url': 'https://weibo.com/u/7827771738',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'tags': ['泰国大选远进党获胜', '睡前消息', '暑期版'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://m.weibo.cn/status/4189191225395228',
|
||||
'info_dict': {
|
||||
'id': '4189191225395228',
|
||||
'ext': 'mp4',
|
||||
'title': '午睡当然是要甜甜蜜蜜的啦',
|
||||
'uploader': '柴犬柴犬'
|
||||
'display_id': 'FBqgOmDxO',
|
||||
'title': '柴犬柴犬的秒拍视频',
|
||||
'description': 'md5:80f461ab5cdae6bbdb70efbf5a1db24f',
|
||||
'duration': 53,
|
||||
'timestamp': 1514264429,
|
||||
'upload_date': '20171226',
|
||||
'thumbnail': r're:https://.*\.jpg',
|
||||
'uploader': '柴犬柴犬',
|
||||
'uploader_id': '5926682210',
|
||||
'uploader_url': 'https://weibo.com/u/5926682210',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'https://weibo.com/0/4224132150961381',
|
||||
'note': 'no playback_list example',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
# to get Referer url for genvisitor
|
||||
webpage = self._download_webpage(url, video_id, note='visit the page')
|
||||
|
||||
weibo_info = self._parse_json(self._search_regex(
|
||||
r'var\s+\$render_data\s*=\s*\[({.*})\]\[0\]\s*\|\|\s*{};',
|
||||
webpage, 'js_code', flags=re.DOTALL),
|
||||
video_id, transform_source=js_to_json)
|
||||
return self._parse_video_info(self._weibo_download_json(
|
||||
f'https://weibo.com/ajax/statuses/show?id={video_id}', video_id))
|
||||
|
||||
status_data = weibo_info.get('status', {})
|
||||
page_info = status_data.get('page_info')
|
||||
title = status_data['status_title']
|
||||
uploader = status_data.get('user', {}).get('screen_name')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'uploader': uploader,
|
||||
'url': page_info['media_info']['stream_url']
|
||||
class WeiboVideoIE(WeiboBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?weibo\.com/tv/show/(?P<id>\d+:\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://weibo.com/tv/show/1034:4797699866951785?from=old_pc_videoshow',
|
||||
'info_dict': {
|
||||
'id': '4797700463137878',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'LEZDodaiW',
|
||||
'title': '呃,稍微了解了一下靡烟miya,感觉这东西也太二了',
|
||||
'description': '呃,稍微了解了一下靡烟miya,感觉这东西也太二了 http://t.cn/A6aerGsM ',
|
||||
'duration': 76,
|
||||
'timestamp': 1659344278,
|
||||
'upload_date': '20220801',
|
||||
'thumbnail': r're:https://.*\.jpg',
|
||||
'uploader': '君子爱财陈平安',
|
||||
'uploader_id': '3905382233',
|
||||
'uploader_url': 'https://weibo.com/u/3905382233',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
post_data = f'data={{"Component_Play_Playinfo":{{"oid":"{video_id}"}}}}'.encode()
|
||||
video_info = self._weibo_download_json(
|
||||
f'https://weibo.com/tv/api/component?page=%2Ftv%2Fshow%2F{video_id.replace(":", "%3A")}',
|
||||
video_id, headers={'Referer': url}, data=post_data)['data']['Component_Play_Playinfo']
|
||||
return self.url_result(f'https://weibo.com/0/{video_info["mid"]}', WeiboIE)
|
||||
|
||||
|
||||
class WeiboUserIE(WeiboBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?weibo\.com/u/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://weibo.com/u/2066652961?tabtype=video',
|
||||
'info_dict': {
|
||||
'id': '2066652961',
|
||||
'title': '萧影殿下的视频',
|
||||
'description': '萧影殿下的全部视频',
|
||||
'uploader': '萧影殿下',
|
||||
},
|
||||
'playlist_mincount': 195,
|
||||
}]
|
||||
|
||||
def _fetch_page(self, uid, cursor=0, page=1):
|
||||
return self._weibo_download_json(
|
||||
'https://weibo.com/ajax/profile/getWaterFallContent',
|
||||
uid, note=f'Downloading videos page {page}',
|
||||
query={'uid': uid, 'cursor': cursor})['data']
|
||||
|
||||
def _entries(self, uid, first_page):
|
||||
cursor = 0
|
||||
for page in itertools.count(1):
|
||||
response = first_page if page == 1 else self._fetch_page(uid, cursor, page)
|
||||
for video_info in traverse_obj(response, ('list', ..., {dict})):
|
||||
yield self._parse_video_info(video_info)
|
||||
cursor = response.get('next_cursor')
|
||||
if (int_or_none(cursor) or -1) < 0:
|
||||
break
|
||||
|
||||
def _real_extract(self, url):
|
||||
uid = self._match_id(url)
|
||||
first_page = self._fetch_page(uid)
|
||||
uploader = traverse_obj(first_page, ('list', ..., 'user', 'screen_name', {str}), get_all=False)
|
||||
metainfo = {
|
||||
'title': f'{uploader}的视频',
|
||||
'description': f'{uploader}的全部视频',
|
||||
'uploader': uploader,
|
||||
} if uploader else {}
|
||||
|
||||
return self.playlist_result(self._entries(uid, first_page), uid, **metainfo)
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
traverse_obj,
|
||||
try_call,
|
||||
unescapeHTML,
|
||||
url_basename,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
@ -45,12 +46,14 @@ class ZaikoIE(ZaikoBaseIE):
|
|||
'uploader_id': '454',
|
||||
'uploader': 'ZAIKO ZERO',
|
||||
'release_timestamp': 1583809200,
|
||||
'thumbnail': r're:https://[a-z0-9]+.cloudfront.net/[a-z0-9_]+/[a-z0-9_]+',
|
||||
'thumbnail': r're:^https://[\w.-]+/\w+/\w+',
|
||||
'thumbnails': 'maxcount:2',
|
||||
'release_date': '20200310',
|
||||
'categories': ['Tech House'],
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'skip': 'Your account does not have tickets to this event',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -83,6 +86,12 @@ def _real_extract(self, url):
|
|||
if not formats:
|
||||
self.raise_no_formats(msg, expected=expected)
|
||||
|
||||
thumbnail_urls = [
|
||||
traverse_obj(player_meta, ('initial_event_info', 'poster_url')),
|
||||
self._og_search_thumbnail(self._download_webpage(
|
||||
f'https://zaiko.io/event/{video_id}', video_id, 'Downloading event page', fatal=False) or ''),
|
||||
]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
|
@ -96,8 +105,8 @@ def _real_extract(self, url):
|
|||
}),
|
||||
**traverse_obj(player_meta, ('initial_event_info', {
|
||||
'alt_title': ('title', {str}),
|
||||
'thumbnail': ('poster_url', {url_or_none}),
|
||||
})),
|
||||
'thumbnails': [{'url': url, 'id': url_basename(url)} for url in thumbnail_urls if url_or_none(url)]
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -127,6 +127,7 @@ def _real_extract(self, url):
|
|||
return {
|
||||
'id': video_id,
|
||||
'title': str_or_none(traverse_obj(data, ('meet', 'topic'))),
|
||||
'duration': int_or_none(data.get('duration')),
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
'http_headers': {
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
import contextlib
|
||||
import functools
|
||||
import socket
|
||||
import ssl
|
||||
import sys
|
||||
import typing
|
||||
|
@ -206,3 +207,59 @@ def wrapper(self, *args, **kwargs):
|
|||
e.handler = self
|
||||
raise
|
||||
return wrapper
|
||||
|
||||
|
||||
def _socket_connect(ip_addr, timeout, source_address):
|
||||
af, socktype, proto, canonname, sa = ip_addr
|
||||
sock = socket.socket(af, socktype, proto)
|
||||
try:
|
||||
if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
|
||||
sock.settimeout(timeout)
|
||||
if source_address:
|
||||
sock.bind(source_address)
|
||||
sock.connect(sa)
|
||||
return sock
|
||||
except socket.error:
|
||||
sock.close()
|
||||
raise
|
||||
|
||||
|
||||
def create_connection(
|
||||
address,
|
||||
timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
|
||||
source_address=None,
|
||||
*,
|
||||
_create_socket_func=_socket_connect
|
||||
):
|
||||
# Work around socket.create_connection() which tries all addresses from getaddrinfo() including IPv6.
|
||||
# This filters the addresses based on the given source_address.
|
||||
# Based on: https://github.com/python/cpython/blob/main/Lib/socket.py#L810
|
||||
host, port = address
|
||||
ip_addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
|
||||
if not ip_addrs:
|
||||
raise socket.error('getaddrinfo returns an empty list')
|
||||
if source_address is not None:
|
||||
af = socket.AF_INET if ':' not in source_address[0] else socket.AF_INET6
|
||||
ip_addrs = [addr for addr in ip_addrs if addr[0] == af]
|
||||
if not ip_addrs:
|
||||
raise OSError(
|
||||
f'No remote IPv{4 if af == socket.AF_INET else 6} addresses available for connect. '
|
||||
f'Can\'t use "{source_address[0]}" as source address')
|
||||
|
||||
err = None
|
||||
for ip_addr in ip_addrs:
|
||||
try:
|
||||
sock = _create_socket_func(ip_addr, timeout, source_address)
|
||||
# Explicitly break __traceback__ reference cycle
|
||||
# https://bugs.python.org/issue36820
|
||||
err = None
|
||||
return sock
|
||||
except socket.error as e:
|
||||
err = e
|
||||
|
||||
try:
|
||||
raise err
|
||||
finally:
|
||||
# Explicitly break __traceback__ reference cycle
|
||||
# https://bugs.python.org/issue36820
|
||||
err = None
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
from ._helper import (
|
||||
InstanceStoreMixin,
|
||||
add_accept_encoding_header,
|
||||
create_connection,
|
||||
get_redirect_method,
|
||||
make_socks_proxy_opts,
|
||||
select_proxy,
|
||||
|
@ -54,44 +55,10 @@
|
|||
def _create_http_connection(http_class, source_address, *args, **kwargs):
|
||||
hc = http_class(*args, **kwargs)
|
||||
|
||||
if hasattr(hc, '_create_connection'):
|
||||
hc._create_connection = create_connection
|
||||
|
||||
if source_address is not None:
|
||||
# This is to workaround _create_connection() from socket where it will try all
|
||||
# address data from getaddrinfo() including IPv6. This filters the result from
|
||||
# getaddrinfo() based on the source_address value.
|
||||
# This is based on the cpython socket.create_connection() function.
|
||||
# https://github.com/python/cpython/blob/master/Lib/socket.py#L691
|
||||
def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
|
||||
host, port = address
|
||||
err = None
|
||||
addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
|
||||
af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
|
||||
ip_addrs = [addr for addr in addrs if addr[0] == af]
|
||||
if addrs and not ip_addrs:
|
||||
ip_version = 'v4' if af == socket.AF_INET else 'v6'
|
||||
raise OSError(
|
||||
"No remote IP%s addresses available for connect, can't use '%s' as source address"
|
||||
% (ip_version, source_address[0]))
|
||||
for res in ip_addrs:
|
||||
af, socktype, proto, canonname, sa = res
|
||||
sock = None
|
||||
try:
|
||||
sock = socket.socket(af, socktype, proto)
|
||||
if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
|
||||
sock.settimeout(timeout)
|
||||
sock.bind(source_address)
|
||||
sock.connect(sa)
|
||||
err = None # Explicitly break reference cycle
|
||||
return sock
|
||||
except OSError as _:
|
||||
err = _
|
||||
if sock is not None:
|
||||
sock.close()
|
||||
if err is not None:
|
||||
raise err
|
||||
else:
|
||||
raise OSError('getaddrinfo returns an empty list')
|
||||
if hasattr(hc, '_create_connection'):
|
||||
hc._create_connection = _create_connection
|
||||
hc.source_address = (source_address, 0)
|
||||
|
||||
return hc
|
||||
|
@ -220,13 +187,28 @@ def make_socks_conn_class(base_class, socks_proxy):
|
|||
proxy_args = make_socks_proxy_opts(socks_proxy)
|
||||
|
||||
class SocksConnection(base_class):
|
||||
def connect(self):
|
||||
self.sock = sockssocket()
|
||||
self.sock.setproxy(**proxy_args)
|
||||
if type(self.timeout) in (int, float): # noqa: E721
|
||||
self.sock.settimeout(self.timeout)
|
||||
self.sock.connect((self.host, self.port))
|
||||
_create_connection = create_connection
|
||||
|
||||
def connect(self):
|
||||
def sock_socket_connect(ip_addr, timeout, source_address):
|
||||
af, socktype, proto, canonname, sa = ip_addr
|
||||
sock = sockssocket(af, socktype, proto)
|
||||
try:
|
||||
connect_proxy_args = proxy_args.copy()
|
||||
connect_proxy_args.update({'addr': sa[0], 'port': sa[1]})
|
||||
sock.setproxy(**connect_proxy_args)
|
||||
if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT: # noqa: E721
|
||||
sock.settimeout(timeout)
|
||||
if source_address:
|
||||
sock.bind(source_address)
|
||||
sock.connect((self.host, self.port))
|
||||
return sock
|
||||
except socket.error:
|
||||
sock.close()
|
||||
raise
|
||||
self.sock = create_connection(
|
||||
(proxy_args['addr'], proxy_args['port']), timeout=self.timeout,
|
||||
source_address=self.source_address, _create_socket_func=sock_socket_connect)
|
||||
if isinstance(self, http.client.HTTPSConnection):
|
||||
self.sock = self._context.wrap_socket(self.sock, server_hostname=self.host)
|
||||
|
||||
|
@ -429,7 +411,7 @@ def _send(self, request):
|
|||
except urllib.error.HTTPError as e:
|
||||
if isinstance(e.fp, (http.client.HTTPResponse, urllib.response.addinfourl)):
|
||||
# Prevent file object from being closed when urllib.error.HTTPError is destroyed.
|
||||
e._closer.file = None
|
||||
e._closer.close_called = True
|
||||
raise HTTPError(UrllibResponseAdapter(e.fp), redirect_loop='redirect error' in str(e)) from e
|
||||
raise # unexpected
|
||||
except urllib.error.URLError as e:
|
||||
|
|
|
@ -115,7 +115,7 @@ def __init__(self, http_error: HTTPError):
|
|||
hdrs=http_error.response.headers,
|
||||
fp=http_error.response
|
||||
)
|
||||
self._closer.file = None # Disable auto close
|
||||
self._closer.close_called = True # Disable auto close
|
||||
self._http_error = http_error
|
||||
HTTPError.__init__(self, http_error.response, redirect_loop=http_error.redirect_loop)
|
||||
|
||||
|
|
|
@ -134,26 +134,31 @@ def _check_response_version(self, expected_version, got_version):
|
|||
self.close()
|
||||
raise InvalidVersionError(expected_version, got_version)
|
||||
|
||||
def _resolve_address(self, destaddr, default, use_remote_dns):
|
||||
try:
|
||||
return socket.inet_aton(destaddr)
|
||||
except OSError:
|
||||
if use_remote_dns and self._proxy.remote_dns:
|
||||
return default
|
||||
else:
|
||||
return socket.inet_aton(socket.gethostbyname(destaddr))
|
||||
def _resolve_address(self, destaddr, default, use_remote_dns, family=None):
|
||||
for f in (family,) if family else (socket.AF_INET, socket.AF_INET6):
|
||||
try:
|
||||
return f, socket.inet_pton(f, destaddr)
|
||||
except OSError:
|
||||
continue
|
||||
|
||||
if use_remote_dns and self._proxy.remote_dns:
|
||||
return 0, default
|
||||
else:
|
||||
res = socket.getaddrinfo(destaddr, None, family=family or 0)
|
||||
f, _, _, _, ipaddr = res[0]
|
||||
return f, socket.inet_pton(f, ipaddr[0])
|
||||
|
||||
def _setup_socks4(self, address, is_4a=False):
|
||||
destaddr, port = address
|
||||
|
||||
ipaddr = self._resolve_address(destaddr, SOCKS4_DEFAULT_DSTIP, use_remote_dns=is_4a)
|
||||
_, ipaddr = self._resolve_address(destaddr, SOCKS4_DEFAULT_DSTIP, use_remote_dns=is_4a, family=socket.AF_INET)
|
||||
|
||||
packet = struct.pack('!BBH', SOCKS4_VERSION, Socks4Command.CMD_CONNECT, port) + ipaddr
|
||||
|
||||
username = (self._proxy.username or '').encode()
|
||||
packet += username + b'\x00'
|
||||
|
||||
if is_4a and self._proxy.remote_dns:
|
||||
if is_4a and self._proxy.remote_dns and ipaddr == SOCKS4_DEFAULT_DSTIP:
|
||||
packet += destaddr.encode() + b'\x00'
|
||||
|
||||
self.sendall(packet)
|
||||
|
@ -210,7 +215,7 @@ def _socks5_auth(self):
|
|||
def _setup_socks5(self, address):
|
||||
destaddr, port = address
|
||||
|
||||
ipaddr = self._resolve_address(destaddr, None, use_remote_dns=True)
|
||||
family, ipaddr = self._resolve_address(destaddr, None, use_remote_dns=True)
|
||||
|
||||
self._socks5_auth()
|
||||
|
||||
|
@ -220,8 +225,10 @@ def _setup_socks5(self, address):
|
|||
destaddr = destaddr.encode()
|
||||
packet += struct.pack('!B', Socks5AddressType.ATYP_DOMAINNAME)
|
||||
packet += self._len_and_data(destaddr)
|
||||
else:
|
||||
elif family == socket.AF_INET:
|
||||
packet += struct.pack('!B', Socks5AddressType.ATYP_IPV4) + ipaddr
|
||||
elif family == socket.AF_INET6:
|
||||
packet += struct.pack('!B', Socks5AddressType.ATYP_IPV6) + ipaddr
|
||||
packet += struct.pack('!H', port)
|
||||
|
||||
self.sendall(packet)
|
||||
|
|
|
@ -669,6 +669,7 @@ def replace_insane(char):
|
|||
|
||||
def sanitize_path(s, force=False):
|
||||
"""Sanitizes and normalizes path on Windows"""
|
||||
# XXX: this handles drive relative paths (c:sth) incorrectly
|
||||
if sys.platform == 'win32':
|
||||
force = False
|
||||
drive_or_unc, _ = os.path.splitdrive(s)
|
||||
|
@ -687,7 +688,10 @@ def sanitize_path(s, force=False):
|
|||
sanitized_path.insert(0, drive_or_unc + os.path.sep)
|
||||
elif force and s and s[0] == os.path.sep:
|
||||
sanitized_path.insert(0, os.path.sep)
|
||||
return os.path.join(*sanitized_path)
|
||||
# TODO: Fix behavioral differences <3.12
|
||||
# The workaround using `normpath` only superficially passes tests
|
||||
# Ref: https://github.com/python/cpython/pull/100351
|
||||
return os.path.normpath(os.path.join(*sanitized_path))
|
||||
|
||||
|
||||
def sanitize_url(url, *, scheme='http'):
|
||||
|
@ -1256,7 +1260,7 @@ def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
|
|||
if precision == 'auto':
|
||||
auto_precision = True
|
||||
precision = 'microsecond'
|
||||
today = datetime_round(datetime.datetime.utcnow(), precision)
|
||||
today = datetime_round(datetime.datetime.now(datetime.timezone.utc), precision)
|
||||
if date_str in ('now', 'today'):
|
||||
return today
|
||||
if date_str == 'yesterday':
|
||||
|
@ -1319,8 +1323,8 @@ def datetime_round(dt, precision='day'):
|
|||
'second': 1,
|
||||
}
|
||||
roundto = lambda x, n: ((x + n / 2) // n) * n
|
||||
timestamp = calendar.timegm(dt.timetuple())
|
||||
return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
|
||||
timestamp = roundto(calendar.timegm(dt.timetuple()), unit_seconds[precision])
|
||||
return datetime.datetime.fromtimestamp(timestamp, datetime.timezone.utc)
|
||||
|
||||
|
||||
def hyphenate_date(date_str):
|
||||
|
@ -2847,6 +2851,7 @@ def mimetype2ext(mt, default=NO_DEFAULT):
|
|||
'quicktime': 'mov',
|
||||
'webm': 'webm',
|
||||
'vp9': 'vp9',
|
||||
'video/ogg': 'ogv',
|
||||
'x-flv': 'flv',
|
||||
'x-m4v': 'm4v',
|
||||
'x-matroska': 'mkv',
|
||||
|
|
Loading…
Reference in a new issue