mirror of https://github.com/yt-dlp/yt-dlp.git
Merge branch 'master' into murrtube
This commit is contained in:
commit
7b15ee0652
|
@ -164,7 +164,7 @@ jobs:
|
|||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: build-${{ github.job }}
|
||||
name: build-bin-${{ github.job }}
|
||||
path: |
|
||||
yt-dlp
|
||||
yt-dlp.tar.gz
|
||||
|
@ -227,7 +227,7 @@ jobs:
|
|||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: build-linux_${{ matrix.architecture }}
|
||||
name: build-bin-linux_${{ matrix.architecture }}
|
||||
path: | # run-on-arch-action designates armv7l as armv7
|
||||
repo/dist/yt-dlp_linux_${{ (matrix.architecture == 'armv7' && 'armv7l') || matrix.architecture }}
|
||||
compression-level: 0
|
||||
|
@ -271,7 +271,7 @@ jobs:
|
|||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: build-${{ github.job }}
|
||||
name: build-bin-${{ github.job }}
|
||||
path: |
|
||||
dist/yt-dlp_macos
|
||||
dist/yt-dlp_macos.zip
|
||||
|
@ -324,7 +324,7 @@ jobs:
|
|||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: build-${{ github.job }}
|
||||
name: build-bin-${{ github.job }}
|
||||
path: |
|
||||
dist/yt-dlp_macos_legacy
|
||||
compression-level: 0
|
||||
|
@ -373,7 +373,7 @@ jobs:
|
|||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: build-${{ github.job }}
|
||||
name: build-bin-${{ github.job }}
|
||||
path: |
|
||||
dist/yt-dlp.exe
|
||||
dist/yt-dlp_min.exe
|
||||
|
@ -421,7 +421,7 @@ jobs:
|
|||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: build-${{ github.job }}
|
||||
name: build-bin-${{ github.job }}
|
||||
path: |
|
||||
dist/yt-dlp_x86.exe
|
||||
compression-level: 0
|
||||
|
@ -441,7 +441,7 @@ jobs:
|
|||
- uses: actions/download-artifact@v4
|
||||
with:
|
||||
path: artifact
|
||||
pattern: build-*
|
||||
pattern: build-bin-*
|
||||
merge-multiple: true
|
||||
|
||||
- name: Make SHA2-SUMS files
|
||||
|
@ -484,3 +484,4 @@ jobs:
|
|||
_update_spec
|
||||
SHA*SUMS*
|
||||
compression-level: 0
|
||||
overwrite: true
|
||||
|
|
37
README.md
37
README.md
|
@ -167,8 +167,8 @@ For ease of use, a few more compat options are available:
|
|||
* `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams,-playlist-match-filter,-manifest-filesize-approx`
|
||||
* `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect,-playlist-match-filter,-manifest-filesize-approx`
|
||||
* `--compat-options 2021`: Same as `--compat-options 2022,no-certifi,filename-sanitization,no-youtube-prefer-utc-upload-date`
|
||||
* `--compat-options 2022`: Same as `--compat-options 2023,playlist-match-filter,no-external-downloader-progress`
|
||||
* `--compat-options 2023`: Same as `--compat-options prefer-legacy-http-handler,manifest-filesize-approx`. Use this to enable all future compat options
|
||||
* `--compat-options 2022`: Same as `--compat-options 2023,playlist-match-filter,no-external-downloader-progress,prefer-legacy-http-handler,manifest-filesize-approx`
|
||||
* `--compat-options 2023`: Currently does nothing. Use this to enable all future compat options
|
||||
|
||||
|
||||
# INSTALLATION
|
||||
|
@ -218,7 +218,7 @@ Example usage:
|
|||
yt-dlp --update-to nightly
|
||||
|
||||
# To install nightly with pip:
|
||||
python -m pip install -U --pre yt-dlp
|
||||
python -m pip install -U --pre yt-dlp[default]
|
||||
```
|
||||
|
||||
<!-- MANPAGE: BEGIN EXCLUDED SECTION -->
|
||||
|
@ -1310,8 +1310,11 @@ The available fields are:
|
|||
- `description` (string): The description of the video
|
||||
- `display_id` (string): An alternative identifier for the video
|
||||
- `uploader` (string): Full name of the video uploader
|
||||
- `uploader_id` (string): Nickname or id of the video uploader
|
||||
- `uploader_url` (string): URL to the video uploader's profile
|
||||
- `license` (string): License name the video is licensed under
|
||||
- `creator` (string): The creator of the video
|
||||
- `creators` (list): The creators of the video
|
||||
- `creator` (string): The creators of the video; comma-separated
|
||||
- `timestamp` (numeric): UNIX timestamp of the moment the video became available
|
||||
- `upload_date` (string): Video upload date in UTC (YYYYMMDD)
|
||||
- `release_timestamp` (numeric): UNIX timestamp of the moment the video was released
|
||||
|
@ -1319,9 +1322,9 @@ The available fields are:
|
|||
- `release_year` (numeric): Year (YYYY) when the video or album was released
|
||||
- `modified_timestamp` (numeric): UNIX timestamp of the moment the video was last modified
|
||||
- `modified_date` (string): The date (YYYYMMDD) when the video was last modified in UTC
|
||||
- `uploader_id` (string): Nickname or id of the video uploader
|
||||
- `channel` (string): Full name of the channel the video is uploaded on
|
||||
- `channel_id` (string): Id of the channel
|
||||
- `channel_url` (string): URL of the channel
|
||||
- `channel_follower_count` (numeric): Number of followers of the channel
|
||||
- `channel_is_verified` (boolean): Whether the channel is verified on the platform
|
||||
- `location` (string): Physical location where the video was filmed
|
||||
|
@ -1361,7 +1364,10 @@ The available fields are:
|
|||
- `webpage_url_basename` (string): The basename of the webpage URL
|
||||
- `webpage_url_domain` (string): The domain of the webpage URL
|
||||
- `original_url` (string): The URL given by the user (or same as `webpage_url` for playlist entries)
|
||||
|
||||
- `categories` (list): List of categories the video belongs to
|
||||
- `tags` (list): List of tags assigned to the video
|
||||
- `cast` (list): List of cast members
|
||||
|
||||
All the fields in [Filtering Formats](#filtering-formats) can also be used
|
||||
|
||||
Available for the video that belongs to some logical chapter or section:
|
||||
|
@ -1373,6 +1379,7 @@ Available for the video that belongs to some logical chapter or section:
|
|||
Available for the video that is an episode of some series or programme:
|
||||
|
||||
- `series` (string): Title of the series or programme the video episode belongs to
|
||||
- `series_id` (string): Id of the series or programme the video episode belongs to
|
||||
- `season` (string): Title of the season the video episode belongs to
|
||||
- `season_number` (numeric): Number of the season the video episode belongs to
|
||||
- `season_id` (string): Id of the season the video episode belongs to
|
||||
|
@ -1385,11 +1392,16 @@ Available for the media that is a track or a part of a music album:
|
|||
- `track` (string): Title of the track
|
||||
- `track_number` (numeric): Number of the track within an album or a disc
|
||||
- `track_id` (string): Id of the track
|
||||
- `artist` (string): Artist(s) of the track
|
||||
- `genre` (string): Genre(s) of the track
|
||||
- `artists` (list): Artist(s) of the track
|
||||
- `artist` (string): Artist(s) of the track; comma-separated
|
||||
- `genres` (list): Genre(s) of the track
|
||||
- `genre` (string): Genre(s) of the track; comma-separated
|
||||
- `composers` (list): Composer(s) of the piece
|
||||
- `composer` (string): Composer(s) of the piece; comma-separated
|
||||
- `album` (string): Title of the album the track belongs to
|
||||
- `album_type` (string): Type of the album
|
||||
- `album_artist` (string): List of all artists appeared on the album
|
||||
- `album_artists` (list): All artists appeared on the album
|
||||
- `album_artist` (string): All artists appeared on the album; comma-separated
|
||||
- `disc_number` (numeric): Number of the disc or other physical medium the track belongs to
|
||||
|
||||
Available only when using `--download-sections` and for `chapter:` prefix when using `--split-chapters` for videos with internal chapters:
|
||||
|
@ -1767,10 +1779,11 @@ Metadata fields | From
|
|||
`description`, `synopsis` | `description`
|
||||
`purl`, `comment` | `webpage_url`
|
||||
`track` | `track_number`
|
||||
`artist` | `artist`, `creator`, `uploader` or `uploader_id`
|
||||
`genre` | `genre`
|
||||
`artist` | `artist`, `artists`, `creator`, `creators`, `uploader` or `uploader_id`
|
||||
`composer` | `composer` or `composers`
|
||||
`genre` | `genre` or `genres`
|
||||
`album` | `album`
|
||||
`album_artist` | `album_artist`
|
||||
`album_artist` | `album_artist` or `album_artists`
|
||||
`disc` | `disc_number`
|
||||
`show` | `series`
|
||||
`season_number` | `season_number`
|
||||
|
|
|
@ -19,7 +19,7 @@ def parse_args():
|
|||
parser.add_argument(
|
||||
'input', nargs='?', metavar='TOMLFILE', default='pyproject.toml', help='Input file (default: %(default)s)')
|
||||
parser.add_argument(
|
||||
'-e', '--exclude', metavar='REQUIREMENT', action='append', help='Exclude a required dependency')
|
||||
'-e', '--exclude', metavar='DEPENDENCY', action='append', help='Exclude a dependency')
|
||||
parser.add_argument(
|
||||
'-i', '--include', metavar='GROUP', action='append', help='Include an optional dependency group')
|
||||
parser.add_argument(
|
||||
|
@ -33,21 +33,28 @@ def parse_args():
|
|||
|
||||
def main():
|
||||
args = parse_args()
|
||||
toml_data = parse_toml(read_file(args.input))
|
||||
deps = toml_data['project']['dependencies']
|
||||
targets = deps.copy() if not args.only_optional else []
|
||||
project_table = parse_toml(read_file(args.input))['project']
|
||||
optional_groups = project_table['optional-dependencies']
|
||||
excludes = args.exclude or []
|
||||
|
||||
for exclude in args.exclude or []:
|
||||
for dep in deps:
|
||||
simplified_dep = re.match(r'[\w-]+', dep)[0]
|
||||
if dep in targets and (exclude.lower() == simplified_dep.lower() or exclude == dep):
|
||||
targets.remove(dep)
|
||||
deps = []
|
||||
if not args.only_optional: # `-o` should exclude 'dependencies' and the 'default' group
|
||||
deps.extend(project_table['dependencies'])
|
||||
if 'default' not in excludes: # `--exclude default` should exclude entire 'default' group
|
||||
deps.extend(optional_groups['default'])
|
||||
|
||||
optional_deps = toml_data['project']['optional-dependencies']
|
||||
for include in args.include or []:
|
||||
group = optional_deps.get(include)
|
||||
if group:
|
||||
targets.extend(group)
|
||||
def name(dependency):
|
||||
return re.match(r'[\w-]+', dependency)[0].lower()
|
||||
|
||||
target_map = {name(dep): dep for dep in deps}
|
||||
|
||||
for include in filter(None, map(optional_groups.get, args.include or [])):
|
||||
target_map.update(zip(map(name, include), include))
|
||||
|
||||
for exclude in map(name, excludes):
|
||||
target_map.pop(exclude, None)
|
||||
|
||||
targets = list(target_map.values())
|
||||
|
||||
if args.print:
|
||||
for target in targets:
|
||||
|
|
|
@ -51,6 +51,7 @@ dependencies = [
|
|||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
default = []
|
||||
secretstorage = [
|
||||
"cffi",
|
||||
"secretstorage",
|
||||
|
|
|
@ -223,6 +223,10 @@ def sanitize_got_info_dict(got_dict):
|
|||
if test_info_dict.get('display_id') == test_info_dict.get('id'):
|
||||
test_info_dict.pop('display_id')
|
||||
|
||||
# Remove deprecated fields
|
||||
for old in YoutubeDL._deprecated_multivalue_fields.keys():
|
||||
test_info_dict.pop(old, None)
|
||||
|
||||
# release_year may be generated from release_date
|
||||
if try_call(lambda: test_info_dict['release_year'] == int(test_info_dict['release_date'][:4])):
|
||||
test_info_dict.pop('release_year')
|
||||
|
|
|
@ -941,7 +941,7 @@ class TestYoutubeDL(unittest.TestCase):
|
|||
def get_videos(filter_=None):
|
||||
ydl = YDL({'match_filter': filter_, 'simulate': True})
|
||||
for v in videos:
|
||||
ydl.process_ie_result(v, download=True)
|
||||
ydl.process_ie_result(v.copy(), download=True)
|
||||
return [v['id'] for v in ydl.downloaded_info_dicts]
|
||||
|
||||
res = get_videos()
|
||||
|
|
|
@ -192,8 +192,8 @@ class TestWebsSocketRequestHandlerConformance:
|
|||
|
||||
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
|
||||
@pytest.mark.parametrize('params,extensions', [
|
||||
({'timeout': 0.00001}, {}),
|
||||
({}, {'timeout': 0.00001}),
|
||||
({'timeout': sys.float_info.min}, {}),
|
||||
({}, {'timeout': sys.float_info.min}),
|
||||
])
|
||||
def test_timeout(self, handler, params, extensions):
|
||||
with handler(**params) as rh:
|
||||
|
|
|
@ -580,6 +580,13 @@ class YoutubeDL:
|
|||
'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options',
|
||||
'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
|
||||
}
|
||||
_deprecated_multivalue_fields = {
|
||||
'album_artist': 'album_artists',
|
||||
'artist': 'artists',
|
||||
'composer': 'composers',
|
||||
'creator': 'creators',
|
||||
'genre': 'genres',
|
||||
}
|
||||
_format_selection_exts = {
|
||||
'audio': set(MEDIA_EXTENSIONS.common_audio),
|
||||
'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )),
|
||||
|
@ -683,7 +690,6 @@ class YoutubeDL:
|
|||
self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers'))
|
||||
self._load_cookies(self.params['http_headers'].get('Cookie')) # compat
|
||||
self.params['http_headers'].pop('Cookie', None)
|
||||
self._request_director = self.build_request_director(_REQUEST_HANDLERS.values(), _RH_PREFERENCES)
|
||||
|
||||
if auto_init and auto_init != 'no_verbose_header':
|
||||
self.print_debug_header()
|
||||
|
@ -957,6 +963,7 @@ class YoutubeDL:
|
|||
def close(self):
|
||||
self.save_cookies()
|
||||
self._request_director.close()
|
||||
del self._request_director
|
||||
|
||||
def trouble(self, message=None, tb=None, is_error=True):
|
||||
"""Determine action to take when a download problem appears.
|
||||
|
@ -2640,6 +2647,14 @@ class YoutubeDL:
|
|||
if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
|
||||
info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
|
||||
|
||||
for old_key, new_key in self._deprecated_multivalue_fields.items():
|
||||
if new_key in info_dict and old_key in info_dict:
|
||||
self.deprecation_warning(f'Do not return {old_key!r} when {new_key!r} is present')
|
||||
elif old_value := info_dict.get(old_key):
|
||||
info_dict[new_key] = old_value.split(', ')
|
||||
elif new_value := info_dict.get(new_key):
|
||||
info_dict[old_key] = ', '.join(v.replace(',', '\N{FULLWIDTH COMMA}') for v in new_value)
|
||||
|
||||
def _raise_pending_errors(self, info):
|
||||
err = info.pop('__pending_error', None)
|
||||
if err:
|
||||
|
@ -3483,7 +3498,8 @@ class YoutubeDL:
|
|||
or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
|
||||
'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
|
||||
FFmpegFixupM3u8PP)
|
||||
ffmpeg_fixup(info_dict.get('is_live') and downloader == 'dashsegments',
|
||||
ffmpeg_fixup(downloader == 'dashsegments'
|
||||
and (info_dict.get('is_live') or info_dict.get('is_dash_periods')),
|
||||
'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
|
||||
|
||||
ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
|
||||
|
@ -4144,6 +4160,10 @@ class YoutubeDL:
|
|||
director.preferences.add(lambda rh, _: 500 if rh.RH_KEY == 'Urllib' else 0)
|
||||
return director
|
||||
|
||||
@functools.cached_property
|
||||
def _request_director(self):
|
||||
return self.build_request_director(_REQUEST_HANDLERS.values(), _RH_PREFERENCES)
|
||||
|
||||
def encode(self, s):
|
||||
if isinstance(s, bytes):
|
||||
return s # Already encoded
|
||||
|
|
|
@ -14,7 +14,7 @@ import os
|
|||
import re
|
||||
import traceback
|
||||
|
||||
from .compat import compat_shlex_quote
|
||||
from .compat import compat_os_name, compat_shlex_quote
|
||||
from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS
|
||||
from .downloader.external import get_external_downloader
|
||||
from .extractor import list_extractor_classes
|
||||
|
@ -984,7 +984,28 @@ def _real_main(argv=None):
|
|||
if pre_process:
|
||||
return ydl._download_retcode
|
||||
|
||||
ydl.warn_if_short_id(sys.argv[1:] if argv is None else argv)
|
||||
args = sys.argv[1:] if argv is None else argv
|
||||
ydl.warn_if_short_id(args)
|
||||
|
||||
# Show a useful error message and wait for keypress if not launched from shell on Windows
|
||||
if not args and compat_os_name == 'nt' and getattr(sys, 'frozen', False):
|
||||
import ctypes.wintypes
|
||||
import msvcrt
|
||||
|
||||
kernel32 = ctypes.WinDLL('Kernel32')
|
||||
|
||||
buffer = (1 * ctypes.wintypes.DWORD)()
|
||||
attached_processes = kernel32.GetConsoleProcessList(buffer, 1)
|
||||
# If we only have a single process attached, then the executable was double clicked
|
||||
# When using `pyinstaller` with `--onefile`, two processes get attached
|
||||
is_onefile = hasattr(sys, '_MEIPASS') and os.path.basename(sys._MEIPASS).startswith('_MEI')
|
||||
if attached_processes == 1 or is_onefile and attached_processes == 2:
|
||||
print(parser._generate_error_message(
|
||||
'Do not double-click the executable, instead call it from a command line.\n'
|
||||
'Please read the README for further information on how to use yt-dlp: '
|
||||
'https://github.com/yt-dlp/yt-dlp#readme'))
|
||||
msvcrt.getch()
|
||||
_exit(2)
|
||||
parser.error(
|
||||
'You must provide at least one URL.\n'
|
||||
'Type yt-dlp --help to see a list of all options.')
|
||||
|
|
|
@ -320,7 +320,6 @@ from .cbs import (
|
|||
CBSIE,
|
||||
ParamountPressExpressIE,
|
||||
)
|
||||
from .cbsinteractive import CBSInteractiveIE
|
||||
from .cbsnews import (
|
||||
CBSNewsEmbedIE,
|
||||
CBSNewsIE,
|
||||
|
@ -348,10 +347,6 @@ from .cgtn import CGTNIE
|
|||
from .charlierose import CharlieRoseIE
|
||||
from .chaturbate import ChaturbateIE
|
||||
from .chilloutzone import ChilloutzoneIE
|
||||
from .chingari import (
|
||||
ChingariIE,
|
||||
ChingariUserIE,
|
||||
)
|
||||
from .chzzk import (
|
||||
CHZZKLiveIE,
|
||||
CHZZKVideoIE,
|
||||
|
@ -369,7 +364,6 @@ from .ciscolive import (
|
|||
from .ciscowebex import CiscoWebexIE
|
||||
from .cjsw import CJSWIE
|
||||
from .clipchamp import ClipchampIE
|
||||
from .cliphunter import CliphunterIE
|
||||
from .clippit import ClippitIE
|
||||
from .cliprs import ClipRsIE
|
||||
from .closertotruth import CloserToTruthIE
|
||||
|
@ -379,7 +373,6 @@ from .clubic import ClubicIE
|
|||
from .clyp import ClypIE
|
||||
from .cmt import CMTIE
|
||||
from .cnbc import (
|
||||
CNBCIE,
|
||||
CNBCVideoIE,
|
||||
)
|
||||
from .cnn import (
|
||||
|
@ -445,6 +438,7 @@ from .dailymail import DailyMailIE
|
|||
from .dailymotion import (
|
||||
DailymotionIE,
|
||||
DailymotionPlaylistIE,
|
||||
DailymotionSearchIE,
|
||||
DailymotionUserIE,
|
||||
)
|
||||
from .dailywire import (
|
||||
|
@ -476,7 +470,6 @@ from .dlf import (
|
|||
)
|
||||
from .dfb import DFBIE
|
||||
from .dhm import DHMIE
|
||||
from .digg import DiggIE
|
||||
from .douyutv import (
|
||||
DouyuShowIE,
|
||||
DouyuTVIE,
|
||||
|
@ -610,7 +603,6 @@ from .fc2 import (
|
|||
)
|
||||
from .fczenit import FczenitIE
|
||||
from .fifa import FifaIE
|
||||
from .filmmodu import FilmmoduIE
|
||||
from .filmon import (
|
||||
FilmOnIE,
|
||||
FilmOnChannelIE,
|
||||
|
@ -676,7 +668,6 @@ from .gab import (
|
|||
GabIE,
|
||||
)
|
||||
from .gaia import GaiaIE
|
||||
from .gameinformer import GameInformerIE
|
||||
from .gamejolt import (
|
||||
GameJoltIE,
|
||||
GameJoltUserIE,
|
||||
|
@ -705,7 +696,6 @@ from .gettr import (
|
|||
GettrStreamingIE,
|
||||
)
|
||||
from .giantbomb import GiantBombIE
|
||||
from .giga import GigaIE
|
||||
from .glide import GlideIE
|
||||
from .globalplayer import (
|
||||
GlobalPlayerLiveIE,
|
||||
|
@ -896,10 +886,8 @@ from .jtbc import (
|
|||
from .jwplatform import JWPlatformIE
|
||||
from .kakao import KakaoIE
|
||||
from .kaltura import KalturaIE
|
||||
from .kanal2 import Kanal2IE
|
||||
from .kankanews import KankaNewsIE
|
||||
from .karaoketv import KaraoketvIE
|
||||
from .karrierevideos import KarriereVideosIE
|
||||
from .kelbyone import KelbyOneIE
|
||||
from .khanacademy import (
|
||||
KhanAcademyIE,
|
||||
|
@ -915,13 +903,11 @@ from .kinja import KinjaEmbedIE
|
|||
from .kinopoisk import KinoPoiskIE
|
||||
from .kommunetv import KommunetvIE
|
||||
from .kompas import KompasVideoIE
|
||||
from .konserthusetplay import KonserthusetPlayIE
|
||||
from .koo import KooIE
|
||||
from .kth import KTHIE
|
||||
from .krasview import KrasViewIE
|
||||
from .ku6 import Ku6IE
|
||||
from .kukululive import KukuluLiveIE
|
||||
from .kusi import KUSIIE
|
||||
from .kuwo import (
|
||||
KuwoIE,
|
||||
KuwoAlbumIE,
|
||||
|
@ -1003,7 +989,6 @@ from .lnkgo import (
|
|||
LnkGoIE,
|
||||
LnkIE,
|
||||
)
|
||||
from .localnews8 import LocalNews8IE
|
||||
from .lovehomeporn import LoveHomePornIE
|
||||
from .lrt import (
|
||||
LRTVODIE,
|
||||
|
@ -1030,7 +1015,6 @@ from .mailru import (
|
|||
MailRuMusicSearchIE,
|
||||
)
|
||||
from .mainstreaming import MainStreamingIE
|
||||
from .malltv import MallTVIE
|
||||
from .mangomolo import (
|
||||
MangomoloVideoIE,
|
||||
MangomoloLiveIE,
|
||||
|
@ -1074,7 +1058,6 @@ from .meipai import MeipaiIE
|
|||
from .melonvod import MelonVODIE
|
||||
from .metacritic import MetacriticIE
|
||||
from .mgtv import MGTVIE
|
||||
from .miaopai import MiaoPaiIE
|
||||
from .microsoftstream import MicrosoftStreamIE
|
||||
from .microsoftvirtualacademy import (
|
||||
MicrosoftVirtualAcademyIE,
|
||||
|
@ -1092,7 +1075,6 @@ from .minds import (
|
|||
MindsChannelIE,
|
||||
MindsGroupIE,
|
||||
)
|
||||
from .ministrygrid import MinistryGridIE
|
||||
from .minoto import MinotoIE
|
||||
from .mirrativ import (
|
||||
MirrativIE,
|
||||
|
@ -1120,7 +1102,6 @@ from .mlssoccer import MLSSoccerIE
|
|||
from .mocha import MochaVideoIE
|
||||
from .mojvideo import MojvideoIE
|
||||
from .monstercat import MonstercatIE
|
||||
from .morningstar import MorningstarIE
|
||||
from .motherless import (
|
||||
MotherlessIE,
|
||||
MotherlessGroupIE,
|
||||
|
@ -1365,7 +1346,6 @@ from .nuvid import NuvidIE
|
|||
from .nzherald import NZHeraldIE
|
||||
from .nzonscreen import NZOnScreenIE
|
||||
from .nzz import NZZIE
|
||||
from .odatv import OdaTVIE
|
||||
from .odkmedia import OnDemandChinaEpisodeIE
|
||||
from .odnoklassniki import OdnoklassnikiIE
|
||||
from .oftv import (
|
||||
|
@ -1477,7 +1457,6 @@ from .platzi import (
|
|||
PlatziCourseIE,
|
||||
)
|
||||
from .playplustv import PlayPlusTVIE
|
||||
from .playstuff import PlayStuffIE
|
||||
from .playsuisse import PlaySuisseIE
|
||||
from .playtvak import PlaytvakIE
|
||||
from .playwire import PlaywireIE
|
||||
|
@ -1599,7 +1578,6 @@ from .raywenderlich import (
|
|||
RayWenderlichIE,
|
||||
RayWenderlichCourseIE,
|
||||
)
|
||||
from .rbmaradio import RBMARadioIE
|
||||
from .rbgtum import (
|
||||
RbgTumIE,
|
||||
RbgTumCourseIE,
|
||||
|
@ -1631,7 +1609,6 @@ from .redgifs import (
|
|||
RedGifsUserIE,
|
||||
)
|
||||
from .redtube import RedTubeIE
|
||||
from .regiotv import RegioTVIE
|
||||
from .rentv import (
|
||||
RENTVIE,
|
||||
RENTVArticleIE,
|
||||
|
@ -1640,6 +1617,7 @@ from .restudy import RestudyIE
|
|||
from .reuters import ReutersIE
|
||||
from .reverbnation import ReverbNationIE
|
||||
from .rheinmaintv import RheinMainTVIE
|
||||
from .ridehome import RideHomeIE
|
||||
from .rinsefm import (
|
||||
RinseFMIE,
|
||||
RinseFMArtistPlaylistIE,
|
||||
|
@ -1738,7 +1716,6 @@ from .safari import (
|
|||
from .saitosan import SaitosanIE
|
||||
from .samplefocus import SampleFocusIE
|
||||
from .sapo import SapoIE
|
||||
from .savefrom import SaveFromIE
|
||||
from .sbs import SBSIE
|
||||
from .sbscokr import (
|
||||
SBSCoKrIE,
|
||||
|
@ -1758,7 +1735,6 @@ from .scte import (
|
|||
SCTECourseIE,
|
||||
)
|
||||
from .scrolller import ScrolllerIE
|
||||
from .seeker import SeekerIE
|
||||
from .sejmpl import SejmIE
|
||||
from .senalcolombia import SenalColombiaLiveIE
|
||||
from .senategov import SenateISVPIE, SenateGovIE
|
||||
|
@ -1901,7 +1877,6 @@ from .storyfire import (
|
|||
)
|
||||
from .streamable import StreamableIE
|
||||
from .streamcz import StreamCZIE
|
||||
from .streamff import StreamFFIE
|
||||
from .streetvoice import StreetVoiceIE
|
||||
from .stretchinternet import StretchInternetIE
|
||||
from .stripchat import StripchatIE
|
||||
|
@ -1930,7 +1905,6 @@ from .tbsjp import (
|
|||
TBSJPProgramIE,
|
||||
TBSJPPlaylistIE,
|
||||
)
|
||||
from .tdslifeway import TDSLifewayIE
|
||||
from .teachable import (
|
||||
TeachableIE,
|
||||
TeachableCourseIE,
|
||||
|
@ -2500,6 +2474,7 @@ from .zee5 import (
|
|||
Zee5SeriesIE,
|
||||
)
|
||||
from .zeenews import ZeeNewsIE
|
||||
from .zenporn import ZenPornIE
|
||||
from .zetland import ZetlandDKArticleIE
|
||||
from .zhihu import ZhihuIE
|
||||
from .zingmp3 import (
|
||||
|
|
|
@ -245,7 +245,6 @@ class ABCIViewIE(InfoExtractor):
|
|||
'episode_id': 'NC2203H039S00',
|
||||
'season_number': 2022,
|
||||
'season': 'Season 2022',
|
||||
'episode_number': None,
|
||||
'episode': 'Locking Up Kids',
|
||||
'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/nc/NC2203H039S00_636d8a0944a22_1920.jpg',
|
||||
'timestamp': 1668460497,
|
||||
|
@ -271,8 +270,6 @@ class ABCIViewIE(InfoExtractor):
|
|||
'episode_id': 'RF2004Q043S00',
|
||||
'season_number': 2021,
|
||||
'season': 'Season 2021',
|
||||
'episode_number': None,
|
||||
'episode': None,
|
||||
'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/rf/RF2004Q043S00_61a950639dbc0_1920.jpg',
|
||||
'timestamp': 1638710705,
|
||||
|
||||
|
|
|
@ -259,7 +259,7 @@ class AbemaTVIE(AbemaTVBaseIE):
|
|||
'title': 'ゆるキャン△ SEASON2 全話一挙【無料ビデオ72時間】',
|
||||
'series': 'ゆるキャン△ SEASON2',
|
||||
'episode': 'ゆるキャン△ SEASON2 全話一挙【無料ビデオ72時間】',
|
||||
'series_number': 2,
|
||||
'season_number': 2,
|
||||
'episode_number': 1,
|
||||
'description': 'md5:9c5a3172ae763278f9303922f0ea5b17',
|
||||
},
|
||||
|
|
|
@ -3,6 +3,7 @@ from ..utils import (
|
|||
float_or_none,
|
||||
format_field,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
parse_codecs,
|
||||
parse_qs,
|
||||
|
@ -129,7 +130,7 @@ class AcFunBangumiIE(AcFunVideoBaseIE):
|
|||
'title': '红孩儿之趴趴蛙寻石记 第5话 ',
|
||||
'duration': 760.0,
|
||||
'season': '红孩儿之趴趴蛙寻石记',
|
||||
'season_id': 5023171,
|
||||
'season_id': '5023171',
|
||||
'season_number': 1, # series has only 1 season
|
||||
'episode': 'Episode 5',
|
||||
'episode_number': 5,
|
||||
|
@ -146,7 +147,7 @@ class AcFunBangumiIE(AcFunVideoBaseIE):
|
|||
'title': '叽歪老表(第二季) 第5话 坚不可摧',
|
||||
'season': '叽歪老表(第二季)',
|
||||
'season_number': 2,
|
||||
'season_id': 6065485,
|
||||
'season_id': '6065485',
|
||||
'episode': '坚不可摧',
|
||||
'episode_number': 5,
|
||||
'upload_date': '20220324',
|
||||
|
@ -191,7 +192,7 @@ class AcFunBangumiIE(AcFunVideoBaseIE):
|
|||
'title': json_bangumi_data.get('showTitle'),
|
||||
'thumbnail': json_bangumi_data.get('image'),
|
||||
'season': json_bangumi_data.get('bangumiTitle'),
|
||||
'season_id': season_id,
|
||||
'season_id': str_or_none(season_id),
|
||||
'season_number': season_number,
|
||||
'episode': json_bangumi_data.get('title'),
|
||||
'episode_number': episode_number,
|
||||
|
|
|
@ -22,7 +22,7 @@ class AltCensoredIE(InfoExtractor):
|
|||
'title': "QUELLES SONT LES CONSÉQUENCES DE L'HYPERSEXUALISATION DE LA SOCIÉTÉ ?",
|
||||
'display_id': 'k0srjLSkga8.webm',
|
||||
'release_date': '20180403',
|
||||
'creator': 'Virginie Vota',
|
||||
'creators': ['Virginie Vota'],
|
||||
'release_year': 2018,
|
||||
'upload_date': '20230318',
|
||||
'uploader': 'admin@altcensored.com',
|
||||
|
@ -32,7 +32,7 @@ class AltCensoredIE(InfoExtractor):
|
|||
'duration': 926.09,
|
||||
'thumbnail': 'https://archive.org/download/youtube-k0srjLSkga8/youtube-k0srjLSkga8.thumbs/k0srjLSkga8_000925.jpg',
|
||||
'view_count': int,
|
||||
'categories': ['News & Politics'],
|
||||
'categories': ['News & Politics'], # FIXME
|
||||
}
|
||||
}]
|
||||
|
||||
|
@ -62,14 +62,21 @@ class AltCensoredChannelIE(InfoExtractor):
|
|||
'title': 'Virginie Vota',
|
||||
'id': 'UCFPTO55xxHqFqkzRZHu4kcw',
|
||||
},
|
||||
'playlist_count': 91
|
||||
'playlist_count': 85,
|
||||
}, {
|
||||
'url': 'https://altcensored.com/channel/UC9CcJ96HKMWn0LZlcxlpFTw',
|
||||
'info_dict': {
|
||||
'title': 'yukikaze775',
|
||||
'id': 'UC9CcJ96HKMWn0LZlcxlpFTw',
|
||||
},
|
||||
'playlist_count': 4
|
||||
'playlist_count': 4,
|
||||
}, {
|
||||
'url': 'https://altcensored.com/channel/UCfYbb7nga6-icsFWWgS-kWw',
|
||||
'info_dict': {
|
||||
'title': 'Mister Metokur',
|
||||
'id': 'UCfYbb7nga6-icsFWWgS-kWw',
|
||||
},
|
||||
'playlist_count': 121,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -78,7 +85,7 @@ class AltCensoredChannelIE(InfoExtractor):
|
|||
url, channel_id, 'Download channel webpage', 'Unable to get channel webpage')
|
||||
title = self._html_search_meta('altcen_title', webpage, 'title', fatal=False)
|
||||
page_count = int_or_none(self._html_search_regex(
|
||||
r'<a[^>]+href="/channel/\w+/page/(\d+)">(?:\1)</a>',
|
||||
r'<a[^>]+href="/channel/[\w-]+/page/(\d+)">(?:\1)</a>',
|
||||
webpage, 'page count', default='1'))
|
||||
|
||||
def page_func(page_num):
|
||||
|
|
|
@ -31,6 +31,7 @@ from ..utils import (
|
|||
unified_timestamp,
|
||||
url_or_none,
|
||||
urlhandle_detect_ext,
|
||||
variadic,
|
||||
)
|
||||
|
||||
|
||||
|
@ -49,7 +50,7 @@ class ArchiveOrgIE(InfoExtractor):
|
|||
'release_date': '19681210',
|
||||
'timestamp': 1268695290,
|
||||
'upload_date': '20100315',
|
||||
'creator': 'SRI International',
|
||||
'creators': ['SRI International'],
|
||||
'uploader': 'laura@archive.org',
|
||||
'thumbnail': r're:https://archive\.org/download/.*\.jpg',
|
||||
'display_id': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect.cdr',
|
||||
|
@ -109,7 +110,7 @@ class ArchiveOrgIE(InfoExtractor):
|
|||
'title': 'Turning',
|
||||
'ext': 'flac',
|
||||
'track': 'Turning',
|
||||
'creator': 'Grateful Dead',
|
||||
'creators': ['Grateful Dead'],
|
||||
'display_id': 'gd1977-05-08d01t01.flac',
|
||||
'track_number': 1,
|
||||
'album': '1977-05-08 - Barton Hall - Cornell University',
|
||||
|
@ -129,7 +130,7 @@ class ArchiveOrgIE(InfoExtractor):
|
|||
'location': 'Barton Hall - Cornell University',
|
||||
'duration': 438.68,
|
||||
'track': 'Deal',
|
||||
'creator': 'Grateful Dead',
|
||||
'creators': ['Grateful Dead'],
|
||||
'album': '1977-05-08 - Barton Hall - Cornell University',
|
||||
'release_date': '19770508',
|
||||
'display_id': 'gd1977-05-08d01t07.flac',
|
||||
|
@ -167,7 +168,7 @@ class ArchiveOrgIE(InfoExtractor):
|
|||
'upload_date': '20160610',
|
||||
'description': 'md5:f70956a156645a658a0dc9513d9e78b7',
|
||||
'uploader': 'dimitrios@archive.org',
|
||||
'creator': ['British Broadcasting Corporation', 'Time-Life Films'],
|
||||
'creators': ['British Broadcasting Corporation', 'Time-Life Films'],
|
||||
'timestamp': 1465594947,
|
||||
},
|
||||
'playlist': [
|
||||
|
@ -257,7 +258,7 @@ class ArchiveOrgIE(InfoExtractor):
|
|||
'title': m['title'],
|
||||
'description': clean_html(m.get('description')),
|
||||
'uploader': dict_get(m, ['uploader', 'adder']),
|
||||
'creator': m.get('creator'),
|
||||
'creators': traverse_obj(m, ('creator', {variadic}, {lambda x: x[0] and list(x)})),
|
||||
'license': m.get('licenseurl'),
|
||||
'release_date': unified_strdate(m.get('date')),
|
||||
'timestamp': unified_timestamp(dict_get(m, ['publicdate', 'addeddate'])),
|
||||
|
@ -272,7 +273,7 @@ class ArchiveOrgIE(InfoExtractor):
|
|||
'title': f.get('title') or f['name'],
|
||||
'display_id': f['name'],
|
||||
'description': clean_html(f.get('description')),
|
||||
'creator': f.get('creator'),
|
||||
'creators': traverse_obj(f, ('creator', {variadic}, {lambda x: x[0] and list(x)})),
|
||||
'duration': parse_duration(f.get('length')),
|
||||
'track_number': int_or_none(f.get('track')),
|
||||
'album': f.get('album'),
|
||||
|
@ -300,7 +301,7 @@ class ArchiveOrgIE(InfoExtractor):
|
|||
is_logged_in = bool(self._get_cookies('https://archive.org').get('logged-in-sig'))
|
||||
if extension in KNOWN_EXTENSIONS and (not f.get('private') or is_logged_in):
|
||||
entry['formats'].append({
|
||||
'url': 'https://archive.org/download/' + identifier + '/' + f['name'],
|
||||
'url': 'https://archive.org/download/' + identifier + '/' + urllib.parse.quote(f['name']),
|
||||
'format': f.get('format'),
|
||||
'width': int_or_none(f.get('width')),
|
||||
'height': int_or_none(f.get('height')),
|
||||
|
|
|
@ -24,7 +24,8 @@ class AxsIE(InfoExtractor):
|
|||
'timestamp': 1685729564,
|
||||
'duration': 1284.216,
|
||||
'series': 'Rock & Roll Road Trip with Sammy Hagar',
|
||||
'season': 2,
|
||||
'season': 'Season 2',
|
||||
'season_number': 2,
|
||||
'episode': '3',
|
||||
'thumbnail': 'https://images.dotstudiopro.com/5f4e9d330a0c3b295a7e8394',
|
||||
},
|
||||
|
@ -41,7 +42,8 @@ class AxsIE(InfoExtractor):
|
|||
'timestamp': 1676403615,
|
||||
'duration': 2570.668,
|
||||
'series': 'The Big Interview with Dan Rather',
|
||||
'season': 3,
|
||||
'season': 'Season 3',
|
||||
'season_number': 3,
|
||||
'episode': '5',
|
||||
'thumbnail': 'https://images.dotstudiopro.com/5f4d1901f340b50d937cec32',
|
||||
},
|
||||
|
@ -77,7 +79,7 @@ class AxsIE(InfoExtractor):
|
|||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'series': ('seriestitle', {str}),
|
||||
'season': ('season', {int}),
|
||||
'season_number': ('season', {int}),
|
||||
'episode': ('episode', {str}),
|
||||
'duration': ('duration', {float_or_none}),
|
||||
'timestamp': ('updated_at', {parse_iso8601}),
|
||||
|
|
|
@ -2,6 +2,7 @@ from .common import InfoExtractor
|
|||
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
|
@ -22,7 +23,7 @@ class BeegIE(InfoExtractor):
|
|||
'age_limit': 18,
|
||||
'upload_date': '20220131',
|
||||
'timestamp': 1643656455,
|
||||
'display_id': 2540839,
|
||||
'display_id': '2540839',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://beeg.com/-0599050563103750?t=4-861',
|
||||
|
@ -36,7 +37,7 @@ class BeegIE(InfoExtractor):
|
|||
'age_limit': 18,
|
||||
'description': 'md5:b4fc879a58ae6c604f8f259155b7e3b9',
|
||||
'timestamp': 1643623200,
|
||||
'display_id': 2569965,
|
||||
'display_id': '2569965',
|
||||
'upload_date': '20220131',
|
||||
}
|
||||
}, {
|
||||
|
@ -78,7 +79,7 @@ class BeegIE(InfoExtractor):
|
|||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': first_fact.get('id'),
|
||||
'display_id': str_or_none(first_fact.get('id')),
|
||||
'title': traverse_obj(video, ('file', 'stuff', 'sf_name')),
|
||||
'description': traverse_obj(video, ('file', 'stuff', 'sf_story')),
|
||||
'timestamp': unified_timestamp(first_fact.get('fc_created')),
|
||||
|
|
|
@ -32,7 +32,7 @@ class BellMediaIE(InfoExtractor):
|
|||
'description': 'md5:810f7f8c6a83ad5b48677c3f8e5bb2c3',
|
||||
'upload_date': '20180525',
|
||||
'timestamp': 1527288600,
|
||||
'season_id': 73997,
|
||||
'season_id': '73997',
|
||||
'season': '2018',
|
||||
'thumbnail': 'http://images2.9c9media.com/image_asset/2018_5_25_baf30cbd-b28d-4a18-9903-4bb8713b00f5_PNG_956x536.jpg',
|
||||
'tags': [],
|
||||
|
|
|
@ -93,7 +93,6 @@ class BFMTVArticleIE(BFMTVBaseIE):
|
|||
'id': '6318445464112',
|
||||
'ext': 'mp4',
|
||||
'title': 'Le plein de bioéthanol fait de plus en plus mal à la pompe',
|
||||
'description': None,
|
||||
'uploader_id': '876630703001',
|
||||
'upload_date': '20230110',
|
||||
'timestamp': 1673341692,
|
||||
|
|
|
@ -1996,7 +1996,7 @@ class BiliIntlIE(BiliIntlBaseIE):
|
|||
'title': get_element_by_class(
|
||||
'bstar-meta__title', webpage) or self._html_search_meta('og:title', webpage),
|
||||
'description': get_element_by_class(
|
||||
'bstar-meta__desc', webpage) or self._html_search_meta('og:description'),
|
||||
'bstar-meta__desc', webpage) or self._html_search_meta('og:description', webpage),
|
||||
}, self._search_json_ld(webpage, video_id, default={}))
|
||||
|
||||
def _get_comments_reply(self, root_id, next_id=0, display_id=None):
|
||||
|
|
|
@ -185,7 +185,6 @@ class BitChuteChannelIE(InfoExtractor):
|
|||
'info_dict': {
|
||||
'id': 'UGlrF9o9b-Q',
|
||||
'ext': 'mp4',
|
||||
'filesize': None,
|
||||
'title': 'This is the first video on #BitChute !',
|
||||
'description': 'md5:a0337e7b1fe39e32336974af8173a034',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
|
|
|
@ -4,10 +4,12 @@ from ..utils import (
|
|||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
)
|
||||
|
||||
|
||||
class BleacherReportIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/articles/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://bleacherreport.com/articles/2496438-fsu-stat-projections-is-jalen-ramsey-best-defensive-player-in-college-football',
|
||||
|
@ -16,7 +18,7 @@ class BleacherReportIE(InfoExtractor):
|
|||
'id': '2496438',
|
||||
'ext': 'mp4',
|
||||
'title': 'FSU Stat Projections: Is Jalen Ramsey Best Defensive Player in College Football?',
|
||||
'uploader_id': 3992341,
|
||||
'uploader_id': '3992341',
|
||||
'description': 'CFB, ACC, Florida State',
|
||||
'timestamp': 1434380212,
|
||||
'upload_date': '20150615',
|
||||
|
@ -33,7 +35,7 @@ class BleacherReportIE(InfoExtractor):
|
|||
'timestamp': 1446839961,
|
||||
'uploader': 'Sean Fay',
|
||||
'description': 'md5:b1601e2314c4d8eec23b6eafe086a757',
|
||||
'uploader_id': 6466954,
|
||||
'uploader_id': '6466954',
|
||||
'upload_date': '20151011',
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
|
@ -58,7 +60,7 @@ class BleacherReportIE(InfoExtractor):
|
|||
'id': article_id,
|
||||
'title': article_data['title'],
|
||||
'uploader': article_data.get('author', {}).get('name'),
|
||||
'uploader_id': article_data.get('authorId'),
|
||||
'uploader_id': str_or_none(article_data.get('authorId')),
|
||||
'timestamp': parse_iso8601(article_data.get('createdAt')),
|
||||
'thumbnails': thumbnails,
|
||||
'comment_count': int_or_none(article_data.get('commentsCount')),
|
||||
|
@ -82,6 +84,7 @@ class BleacherReportIE(InfoExtractor):
|
|||
|
||||
|
||||
class BleacherReportCMSIE(AMPIE):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36}|\d{5})'
|
||||
_TESTS = [{
|
||||
'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1&library=video-cms',
|
||||
|
|
|
@ -76,6 +76,7 @@ class CBSBaseIE(ThePlatformFeedIE): # XXX: Do not subclass from concrete IE
|
|||
|
||||
|
||||
class CBSIE(CBSBaseIE):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
cbs:|
|
||||
|
|
|
@ -1,98 +0,0 @@
|
|||
from .cbs import CBSIE
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class CBSInteractiveIE(CBSIE): # XXX: Do not subclass from concrete IE
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<site>cnet|zdnet)\.com/(?:videos|video(?:/share)?)/(?P<id>[^/?]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/',
|
||||
'info_dict': {
|
||||
'id': 'R49SYt__yAfmlXR85z4f7gNmCBDcN_00',
|
||||
'display_id': 'hands-on-with-microsofts-windows-8-1-update',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hands-on with Microsoft Windows 8.1 Update',
|
||||
'description': 'The new update to the Windows 8 OS brings improved performance for mouse and keyboard users.',
|
||||
'uploader_id': '6085384d-619e-11e3-b231-14feb5ca9861',
|
||||
'uploader': 'Sarah Mitroff',
|
||||
'duration': 70,
|
||||
'timestamp': 1396479627,
|
||||
'upload_date': '20140402',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.cnet.com/videos/whiny-pothole-tweets-at-local-government-when-hit-by-cars-tomorrow-daily-187/',
|
||||
'md5': 'f11d27b2fa18597fbf92444d2a9ed386',
|
||||
'info_dict': {
|
||||
'id': 'kjOJd_OoVJqbg_ZD8MZCOk8Wekb9QccK',
|
||||
'display_id': 'whiny-pothole-tweets-at-local-government-when-hit-by-cars-tomorrow-daily-187',
|
||||
'ext': 'mp4',
|
||||
'title': 'Whiny potholes tweet at local government when hit by cars (Tomorrow Daily 187)',
|
||||
'description': 'md5:d2b9a95a5ffe978ae6fbd4cf944d618f',
|
||||
'uploader_id': 'b163284d-6b73-44fc-b3e6-3da66c392d40',
|
||||
'uploader': 'Ashley Esqueda',
|
||||
'duration': 1482,
|
||||
'timestamp': 1433289889,
|
||||
'upload_date': '20150603',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.zdnet.com/video/share/video-keeping-android-smartphones-and-tablets-secure/',
|
||||
'info_dict': {
|
||||
'id': 'k0r4T_ehht4xW_hAOqiVQPuBDPZ8SRjt',
|
||||
'display_id': 'video-keeping-android-smartphones-and-tablets-secure',
|
||||
'ext': 'mp4',
|
||||
'title': 'Video: Keeping Android smartphones and tablets secure',
|
||||
'description': 'Here\'s the best way to keep Android devices secure, and what you do when they\'ve come to the end of their lives.',
|
||||
'uploader_id': 'f2d97ea2-8175-11e2-9d12-0018fe8a00b0',
|
||||
'uploader': 'Adrian Kingsley-Hughes',
|
||||
'duration': 731,
|
||||
'timestamp': 1449129925,
|
||||
'upload_date': '20151203',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.zdnet.com/video/huawei-matebook-x-video/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
MPX_ACCOUNTS = {
|
||||
'cnet': 2198311517,
|
||||
'zdnet': 2387448114,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
site, display_id = self._match_valid_url(url).groups()
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
data_json = self._html_search_regex(
|
||||
r"data(?:-(?:cnet|zdnet))?-video(?:-(?:uvp(?:js)?|player))?-options='([^']+)'",
|
||||
webpage, 'data json')
|
||||
data = self._parse_json(data_json, display_id)
|
||||
vdata = data.get('video') or (data.get('videos') or data.get('playlist'))[0]
|
||||
|
||||
video_id = vdata['mpxRefId']
|
||||
|
||||
title = vdata['title']
|
||||
author = vdata.get('author')
|
||||
if author:
|
||||
uploader = '%s %s' % (author['firstName'], author['lastName'])
|
||||
uploader_id = author.get('id')
|
||||
else:
|
||||
uploader = None
|
||||
uploader_id = None
|
||||
|
||||
info = self._extract_video_info(video_id, site, self.MPX_ACCOUNTS[site])
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'duration': int_or_none(vdata.get('duration')),
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
})
|
||||
return info
|
|
@ -8,6 +8,7 @@ from ..utils import (
|
|||
|
||||
# class CBSSportsEmbedIE(CBSBaseIE):
|
||||
class CBSSportsEmbedIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
IE_NAME = 'cbssports:embed'
|
||||
_VALID_URL = r'''(?ix)https?://(?:(?:www\.)?cbs|embed\.247)sports\.com/player/embed.+?
|
||||
(?:
|
||||
|
@ -75,6 +76,7 @@ class CBSSportsBaseIE(InfoExtractor):
|
|||
|
||||
|
||||
class CBSSportsIE(CBSSportsBaseIE):
|
||||
_WORKING = False
|
||||
IE_NAME = 'cbssports'
|
||||
_VALID_URL = r'https?://(?:www\.)?cbssports\.com/[^/]+/video/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
|
@ -92,6 +94,7 @@ class CBSSportsIE(CBSSportsBaseIE):
|
|||
|
||||
|
||||
class TwentyFourSevenSportsIE(CBSSportsBaseIE):
|
||||
_WORKING = False
|
||||
IE_NAME = '247sports'
|
||||
_VALID_URL = r'https?://(?:www\.)?247sports\.com/Video/(?:[^/?#&]+-)?(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
|
|
|
@ -88,6 +88,20 @@ class CCTVIE(InfoExtractor):
|
|||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# videoCenterId: "id"
|
||||
'url': 'http://news.cctv.com/2024/02/21/ARTIcU5tKIOIF2myEGCATkLo240221.shtml',
|
||||
'info_dict': {
|
||||
'id': '5c846c0518444308ba32c4159df3b3e0',
|
||||
'ext': 'mp4',
|
||||
'title': '《平“语”近人——习近平喜欢的典故》第三季 第5集:风物长宜放眼量',
|
||||
'uploader': 'yangjuan',
|
||||
'timestamp': 1708554940,
|
||||
'upload_date': '20240221',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# var ids = ["id"]
|
||||
'url': 'http://www.ncpa-classic.com/clt/more/416/index.shtml',
|
||||
|
@ -128,7 +142,7 @@ class CCTVIE(InfoExtractor):
|
|||
|
||||
video_id = self._search_regex(
|
||||
[r'var\s+guid\s*=\s*["\']([\da-fA-F]+)',
|
||||
r'videoCenterId["\']\s*,\s*["\']([\da-fA-F]+)',
|
||||
r'videoCenterId(?:["\']\s*,|:)\s*["\']([\da-fA-F]+)',
|
||||
r'changePlayer\s*\(\s*["\']([\da-fA-F]+)',
|
||||
r'load[Vv]ideo\s*\(\s*["\']([\da-fA-F]+)',
|
||||
r'var\s+initMyAray\s*=\s*["\']([\da-fA-F]+)',
|
||||
|
|
|
@ -51,7 +51,7 @@ class CeskaTelevizeIE(InfoExtractor):
|
|||
'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/',
|
||||
'only_matching': True,
|
||||
'info_dict': {
|
||||
'id': 402,
|
||||
'id': '402',
|
||||
'ext': 'mp4',
|
||||
'title': r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||
'is_live': True,
|
||||
|
|
|
@ -17,6 +17,7 @@ class CGTNIE(InfoExtractor):
|
|||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'timestamp': 1615295940,
|
||||
'upload_date': '20210309',
|
||||
'categories': ['Video'],
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True
|
||||
|
@ -29,8 +30,8 @@ class CGTNIE(InfoExtractor):
|
|||
'title': 'China, Indonesia vow to further deepen maritime cooperation',
|
||||
'thumbnail': r're:^https?://.*\.png$',
|
||||
'description': 'China and Indonesia vowed to upgrade their cooperation into the maritime sector and also for political security, economy, and cultural and people-to-people exchanges.',
|
||||
'author': 'CGTN',
|
||||
'category': 'China',
|
||||
'creators': ['CGTN'],
|
||||
'categories': ['China'],
|
||||
'timestamp': 1622950200,
|
||||
'upload_date': '20210606',
|
||||
},
|
||||
|
@ -45,7 +46,12 @@ class CGTNIE(InfoExtractor):
|
|||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
download_url = self._html_search_regex(r'data-video ="(?P<url>.+m3u8)"', webpage, 'download_url')
|
||||
datetime_str = self._html_search_regex(r'<span class="date">\s*(.+?)\s*</span>', webpage, 'datetime_str', fatal=False)
|
||||
datetime_str = self._html_search_regex(
|
||||
r'<span class="date">\s*(.+?)\s*</span>', webpage, 'datetime_str', fatal=False)
|
||||
category = self._html_search_regex(
|
||||
r'<span class="section">\s*(.+?)\s*</span>', webpage, 'category', fatal=False)
|
||||
author = self._search_regex(
|
||||
r'<div class="news-author-name">\s*(.+?)\s*</div>', webpage, 'author', default=None)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@ -53,9 +59,7 @@ class CGTNIE(InfoExtractor):
|
|||
'description': self._og_search_description(webpage, default=None),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'formats': self._extract_m3u8_formats(download_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls'),
|
||||
'category': self._html_search_regex(r'<span class="section">\s*(.+?)\s*</span>',
|
||||
webpage, 'category', fatal=False),
|
||||
'author': self._html_search_regex(r'<div class="news-author-name">\s*(.+?)\s*</div>',
|
||||
webpage, 'author', default=None, fatal=False),
|
||||
'categories': [category] if category else None,
|
||||
'creators': [author] if author else None,
|
||||
'timestamp': try_get(unified_timestamp(datetime_str), lambda x: x - 8 * 3600),
|
||||
}
|
||||
|
|
|
@ -1,207 +0,0 @@
|
|||
import itertools
|
||||
import json
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
int_or_none,
|
||||
str_to_int,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class ChingariBaseIE(InfoExtractor):
|
||||
def _get_post(self, id, post_data):
|
||||
media_data = post_data['mediaLocation']
|
||||
base_url = media_data['base']
|
||||
author_data = post_data.get('authorData', {})
|
||||
song_data = post_data.get('song', {}) # revist this in future for differentiating b/w 'art' and 'author'
|
||||
|
||||
formats = [{
|
||||
'format_id': frmt,
|
||||
'width': str_to_int(frmt[1:]),
|
||||
'url': base_url + frmt_path,
|
||||
} for frmt, frmt_path in media_data.get('transcoded', {}).items()]
|
||||
|
||||
if media_data.get('path'):
|
||||
formats.append({
|
||||
'format_id': 'original',
|
||||
'format_note': 'Direct video.',
|
||||
'url': base_url + '/apipublic' + media_data['path'],
|
||||
'quality': 10,
|
||||
})
|
||||
timestamp = str_to_int(post_data.get('created_at'))
|
||||
if timestamp:
|
||||
timestamp = int_or_none(timestamp, 1000)
|
||||
|
||||
thumbnail, uploader_url = None, None
|
||||
if media_data.get('thumbnail'):
|
||||
thumbnail = base_url + media_data.get('thumbnail')
|
||||
if author_data.get('username'):
|
||||
uploader_url = 'https://chingari.io/' + author_data.get('username')
|
||||
|
||||
return {
|
||||
'id': id,
|
||||
'extractor_key': ChingariIE.ie_key(),
|
||||
'extractor': 'Chingari',
|
||||
'title': urllib.parse.unquote_plus(clean_html(post_data.get('caption'))),
|
||||
'description': urllib.parse.unquote_plus(clean_html(post_data.get('caption'))),
|
||||
'duration': media_data.get('duration'),
|
||||
'thumbnail': url_or_none(thumbnail),
|
||||
'like_count': post_data.get('likeCount'),
|
||||
'view_count': post_data.get('viewsCount'),
|
||||
'comment_count': post_data.get('commentCount'),
|
||||
'repost_count': post_data.get('shareCount'),
|
||||
'timestamp': timestamp,
|
||||
'uploader_id': post_data.get('userId') or author_data.get('_id'),
|
||||
'uploader': author_data.get('name'),
|
||||
'uploader_url': url_or_none(uploader_url),
|
||||
'track': song_data.get('title'),
|
||||
'artist': song_data.get('author'),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class ChingariIE(ChingariBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?chingari\.io/share/post\?id=(?P<id>[^&/#?]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://chingari.io/share/post?id=612f8f4ce1dc57090e8a7beb',
|
||||
'info_dict': {
|
||||
'id': '612f8f4ce1dc57090e8a7beb',
|
||||
'ext': 'mp4',
|
||||
'title': 'Happy birthday Srila Prabhupada',
|
||||
'description': 'md5:c7080ebfdfeb06016e638c286d6bc3fa',
|
||||
'duration': 0,
|
||||
'thumbnail': 'https://media.chingari.io/uploads/c41d30e2-06b6-4e3b-9b4b-edbb929cec06-1630506826911/thumbnail/198f993f-ce87-4623-82c6-cd071bd6d4f4-1630506828016.jpg',
|
||||
'like_count': int,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
'timestamp': 1630506828,
|
||||
'upload_date': '20210901',
|
||||
'uploader_id': '5f0403982c8bd344f4813f8c',
|
||||
'uploader': 'ISKCON,Inc.',
|
||||
'uploader_url': 'https://chingari.io/iskcon,inc',
|
||||
'track': None,
|
||||
'artist': None,
|
||||
},
|
||||
'params': {'skip_download': True}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
id = self._match_id(url)
|
||||
post_json = self._download_json(f'https://api.chingari.io/post/post_details/{id}', id)
|
||||
if post_json['code'] != 200:
|
||||
raise ExtractorError(post_json['message'], expected=True)
|
||||
post_data = post_json['data']
|
||||
return self._get_post(id, post_data)
|
||||
|
||||
|
||||
class ChingariUserIE(ChingariBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?chingari\.io/(?!share/post)(?P<id>[^/?]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://chingari.io/dada1023',
|
||||
'info_dict': {
|
||||
'id': 'dada1023',
|
||||
},
|
||||
'params': {'playlistend': 3},
|
||||
'playlist': [{
|
||||
'url': 'https://chingari.io/share/post?id=614781f3ade60b3a0bfff42a',
|
||||
'info_dict': {
|
||||
'id': '614781f3ade60b3a0bfff42a',
|
||||
'ext': 'mp4',
|
||||
'title': '#chingaribappa ',
|
||||
'description': 'md5:d1df21d84088770468fa63afe3b17857',
|
||||
'duration': 7,
|
||||
'thumbnail': 'https://media.chingari.io/uploads/346d86d4-abb2-474e-a164-ffccf2bbcb72-1632076273717/thumbnail/b0b3aac2-2b86-4dd1-909d-9ed6e57cf77c-1632076275552.jpg',
|
||||
'like_count': int,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
'timestamp': 1632076275,
|
||||
'upload_date': '20210919',
|
||||
'uploader_id': '5efc4b12cca35c3d1794c2d3',
|
||||
'uploader': 'dada (girish) dhawale',
|
||||
'uploader_url': 'https://chingari.io/dada1023',
|
||||
'track': None,
|
||||
'artist': None
|
||||
},
|
||||
'params': {'skip_download': True}
|
||||
}, {
|
||||
'url': 'https://chingari.io/share/post?id=6146b132bcbf860959e12cba',
|
||||
'info_dict': {
|
||||
'id': '6146b132bcbf860959e12cba',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tactor harvesting',
|
||||
'description': 'md5:8403f12dce68828b77ecee7eb7e887b7',
|
||||
'duration': 59.3,
|
||||
'thumbnail': 'https://media.chingari.io/uploads/b353ca70-7a87-400d-93a6-fa561afaec86-1632022814584/thumbnail/c09302e3-2043-41b1-a2fe-77d97e5bd676-1632022834260.jpg',
|
||||
'like_count': int,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
'timestamp': 1632022834,
|
||||
'upload_date': '20210919',
|
||||
'uploader_id': '5efc4b12cca35c3d1794c2d3',
|
||||
'uploader': 'dada (girish) dhawale',
|
||||
'uploader_url': 'https://chingari.io/dada1023',
|
||||
'track': None,
|
||||
'artist': None
|
||||
},
|
||||
'params': {'skip_download': True}
|
||||
}, {
|
||||
'url': 'https://chingari.io/share/post?id=6145651b74cb030a64c40b82',
|
||||
'info_dict': {
|
||||
'id': '6145651b74cb030a64c40b82',
|
||||
'ext': 'mp4',
|
||||
'title': '#odiabhajan ',
|
||||
'description': 'md5:687ea36835b9276cf2af90f25e7654cb',
|
||||
'duration': 56.67,
|
||||
'thumbnail': 'https://media.chingari.io/uploads/6cbf216b-babc-4cce-87fe-ceaac8d706ac-1631937782708/thumbnail/8855754f-6669-48ce-b269-8cc0699ed6da-1631937819522.jpg',
|
||||
'like_count': int,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
'timestamp': 1631937819,
|
||||
'upload_date': '20210918',
|
||||
'uploader_id': '5efc4b12cca35c3d1794c2d3',
|
||||
'uploader': 'dada (girish) dhawale',
|
||||
'uploader_url': 'https://chingari.io/dada1023',
|
||||
'track': None,
|
||||
'artist': None
|
||||
},
|
||||
'params': {'skip_download': True}
|
||||
}],
|
||||
}, {
|
||||
'url': 'https://chingari.io/iskcon%2Cinc',
|
||||
'playlist_mincount': 1025,
|
||||
'info_dict': {
|
||||
'id': 'iskcon%2Cinc',
|
||||
},
|
||||
}]
|
||||
|
||||
def _entries(self, id):
|
||||
skip = 0
|
||||
has_more = True
|
||||
for page in itertools.count():
|
||||
posts = self._download_json('https://api.chingari.io/users/getPosts', id,
|
||||
data=json.dumps({'userId': id, 'ownerId': id, 'skip': skip, 'limit': 20}).encode(),
|
||||
headers={'content-type': 'application/json;charset=UTF-8'},
|
||||
note='Downloading page %s' % page)
|
||||
for post in posts.get('data', []):
|
||||
post_data = post['post']
|
||||
yield self._get_post(post_data['_id'], post_data)
|
||||
skip += 20
|
||||
has_more = posts['hasMoreData']
|
||||
if not has_more:
|
||||
break
|
||||
|
||||
def _real_extract(self, url):
|
||||
alt_id = self._match_id(url)
|
||||
post_json = self._download_json(f'https://api.chingari.io/user/{alt_id}', alt_id)
|
||||
if post_json['code'] != 200:
|
||||
raise ExtractorError(post_json['message'], expected=True)
|
||||
id = post_json['data']['_id']
|
||||
return self.playlist_result(self._entries(id), playlist_id=alt_id)
|
|
@ -2,7 +2,7 @@ import functools
|
|||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
UserNotLive,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
|
@ -40,7 +40,7 @@ class CHZZKLiveIE(InfoExtractor):
|
|||
note='Downloading channel info', errnote='Unable to download channel info')['content']
|
||||
|
||||
if live_detail.get('status') == 'CLOSE':
|
||||
raise ExtractorError('The channel is not currently live', expected=True)
|
||||
raise UserNotLive(video_id=channel_id)
|
||||
|
||||
live_playback = self._parse_json(live_detail['livePlaybackJson'], channel_id)
|
||||
|
||||
|
|
|
@ -2,6 +2,7 @@ from .hbo import HBOBaseIE
|
|||
|
||||
|
||||
class CinemaxIE(HBOBaseIE):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'https?://(?:www\.)?cinemax\.com/(?P<path>[^/]+/video/[0-9a-z-]+-(?P<id>\d+))'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.cinemax.com/warrior/video/s1-ep-1-recap-20126903',
|
||||
|
|
|
@ -1,76 +0,0 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class CliphunterIE(InfoExtractor):
|
||||
IE_NAME = 'cliphunter'
|
||||
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?cliphunter\.com/w/
|
||||
(?P<id>[0-9]+)/
|
||||
(?P<seo>.+?)(?:$|[#\?])
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cliphunter.com/w/1012420/Fun_Jynx_Maze_solo',
|
||||
'md5': 'b7c9bbd4eb3a226ab91093714dcaa480',
|
||||
'info_dict': {
|
||||
'id': '1012420',
|
||||
'ext': 'flv',
|
||||
'title': 'Fun Jynx Maze solo',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'age_limit': 18,
|
||||
},
|
||||
'skip': 'Video gone',
|
||||
}, {
|
||||
'url': 'http://www.cliphunter.com/w/2019449/ShesNew__My_booty_girlfriend_Victoria_Paradices_pussy_filled_with_jizz',
|
||||
'md5': '55a723c67bfc6da6b0cfa00d55da8a27',
|
||||
'info_dict': {
|
||||
'id': '2019449',
|
||||
'ext': 'mp4',
|
||||
'title': 'ShesNew - My booty girlfriend, Victoria Paradice\'s pussy filled with jizz',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'age_limit': 18,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_title = self._search_regex(
|
||||
r'mediaTitle = "([^"]+)"', webpage, 'title')
|
||||
|
||||
gexo_files = self._parse_json(
|
||||
self._search_regex(
|
||||
r'var\s+gexoFiles\s*=\s*({.+?});', webpage, 'gexo files'),
|
||||
video_id)
|
||||
|
||||
formats = []
|
||||
for format_id, f in gexo_files.items():
|
||||
video_url = url_or_none(f.get('url'))
|
||||
if not video_url:
|
||||
continue
|
||||
fmt = f.get('fmt')
|
||||
height = f.get('h')
|
||||
format_id = '%s_%sp' % (fmt, height) if fmt and height else format_id
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': format_id,
|
||||
'width': int_or_none(f.get('w')),
|
||||
'height': int_or_none(height),
|
||||
'tbr': int_or_none(f.get('br')),
|
||||
})
|
||||
|
||||
thumbnail = self._search_regex(
|
||||
r"var\s+mov_thumb\s*=\s*'([^']+)';",
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'formats': formats,
|
||||
'age_limit': self._rta_search(webpage),
|
||||
'thumbnail': thumbnail,
|
||||
}
|
|
@ -2,6 +2,7 @@ from .onet import OnetBaseIE
|
|||
|
||||
|
||||
class ClipRsIE(OnetBaseIE):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'https?://(?:www\.)?clip\.rs/(?P<id>[^/]+)/\d+'
|
||||
_TEST = {
|
||||
'url': 'http://www.clip.rs/premijera-frajle-predstavljaju-novi-spot-za-pesmu-moli-me-moli/3732',
|
||||
|
|
|
@ -4,6 +4,7 @@ from .common import InfoExtractor
|
|||
|
||||
|
||||
class CloserToTruthIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'https?://(?:www\.)?closertotruth\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://closertotruth.com/series/solutions-the-mind-body-problem#video-3688',
|
||||
|
|
|
@ -4,27 +4,25 @@ from .common import InfoExtractor
|
|||
|
||||
|
||||
class CloudflareStreamIE(InfoExtractor):
|
||||
_SUBDOMAIN_RE = r'(?:(?:watch|iframe|customer-\w+)\.)?'
|
||||
_DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)'
|
||||
_EMBED_RE = r'embed\.%s/embed/[^/]+\.js\?.*?\bvideo=' % _DOMAIN_RE
|
||||
_EMBED_RE = rf'embed\.{_DOMAIN_RE}/embed/[^/]+\.js\?.*?\bvideo='
|
||||
_ID_RE = r'[\da-f]{32}|[\w-]+\.[\w-]+\.[\w-]+'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:watch\.)?%s/|
|
||||
%s
|
||||
)
|
||||
(?P<id>%s)
|
||||
''' % (_DOMAIN_RE, _EMBED_RE, _ID_RE)
|
||||
_EMBED_REGEX = [fr'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE}).*?)\1']
|
||||
_VALID_URL = rf'https?://(?:{_SUBDOMAIN_RE}{_DOMAIN_RE}/|{_EMBED_RE})(?P<id>{_ID_RE})'
|
||||
_EMBED_REGEX = [
|
||||
rf'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE}).*?)\1',
|
||||
rf'<iframe[^>]+\bsrc=["\'](?P<url>https?://{_SUBDOMAIN_RE}{_DOMAIN_RE}/[\da-f]{{32}})',
|
||||
]
|
||||
_TESTS = [{
|
||||
'url': 'https://embed.cloudflarestream.com/embed/we4g.fla9.latest.js?video=31c9291ab41fac05471db4e73aa11717',
|
||||
'info_dict': {
|
||||
'id': '31c9291ab41fac05471db4e73aa11717',
|
||||
'ext': 'mp4',
|
||||
'title': '31c9291ab41fac05471db4e73aa11717',
|
||||
'thumbnail': 'https://videodelivery.net/31c9291ab41fac05471db4e73aa11717/thumbnails/thumbnail.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1',
|
||||
|
@ -35,6 +33,21 @@ class CloudflareStreamIE(InfoExtractor):
|
|||
}, {
|
||||
'url': 'https://embed.videodelivery.net/embed/r4xu.fla9.latest.js?video=81d80727f3022488598f68d323c1ad5e',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://customer-aw5py76sw8wyqzmh.cloudflarestream.com/2463f6d3e06fa29710a337f5f5389fd8/iframe',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://upride.cc/incident/shoulder-pass-at-light/',
|
||||
'info_dict': {
|
||||
'id': 'eaef9dea5159cf968be84241b5cedfe7',
|
||||
'ext': 'mp4',
|
||||
'title': 'eaef9dea5159cf968be84241b5cedfe7',
|
||||
'thumbnail': 'https://videodelivery.net/eaef9dea5159cf968be84241b5cedfe7/thumbnails/thumbnail.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
|
|
@ -1,68 +1,97 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import smuggle_url
|
||||
|
||||
|
||||
class CNBCIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://video\.cnbc\.com/gallery/\?video=(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://video.cnbc.com/gallery/?video=3000503714',
|
||||
'info_dict': {
|
||||
'id': '3000503714',
|
||||
'ext': 'mp4',
|
||||
'title': 'Fighting zombies is big business',
|
||||
'description': 'md5:0c100d8e1a7947bd2feec9a5550e519e',
|
||||
'timestamp': 1459332000,
|
||||
'upload_date': '20160330',
|
||||
'uploader': 'NBCU-CNBC',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Dead link',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'ThePlatform',
|
||||
'url': smuggle_url(
|
||||
'http://link.theplatform.com/s/gZWlPC/media/guid/2408950221/%s?mbr=true&manifest=m3u' % video_id,
|
||||
{'force_smil_url': True}),
|
||||
'id': video_id,
|
||||
}
|
||||
from ..utils import int_or_none, parse_iso8601, str_or_none, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class CNBCVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?cnbc\.com(?P<path>/video/(?:[^/]+/)+(?P<id>[^./?#&]+)\.html)'
|
||||
_TEST = {
|
||||
'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html',
|
||||
_VALID_URL = r'https?://(?:www\.)?cnbc\.com/video/(?:[^/?#]+/)+(?P<id>[^./?#&]+)\.html'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.cnbc.com/video/2023/12/07/mcdonalds-just-unveiled-cosmcsits-new-spinoff-brand.html',
|
||||
'info_dict': {
|
||||
'id': '7000031301',
|
||||
'ext': 'mp4',
|
||||
'title': "Trump: I don't necessarily agree with raising rates",
|
||||
'description': 'md5:878d8f0b4ebb5bb1dda3514b91b49de3',
|
||||
'timestamp': 1531958400,
|
||||
'upload_date': '20180719',
|
||||
'uploader': 'NBCU-CNBC',
|
||||
'id': '107344774',
|
||||
'display_id': 'mcdonalds-just-unveiled-cosmcsits-new-spinoff-brand',
|
||||
'modified_timestamp': 1702053483,
|
||||
'timestamp': 1701977810,
|
||||
'channel': 'News Videos',
|
||||
'upload_date': '20231207',
|
||||
'description': 'md5:882c001d85cb43d7579b514307b3e78b',
|
||||
'release_timestamp': 1701977375,
|
||||
'modified_date': '20231208',
|
||||
'release_date': '20231207',
|
||||
'duration': 65,
|
||||
'creators': ['Sean Conlon'],
|
||||
'title': 'Here\'s a first look at McDonald\'s new spinoff brand, CosMc\'s',
|
||||
'thumbnail': 'https://image.cnbcfm.com/api/v1/image/107344192-1701894812493-CosMcsskyHero_2336x1040_hero-desktop.jpg?v=1701894855',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}, {
|
||||
'url': 'https://www.cnbc.com/video/2023/12/08/jim-cramer-shares-his-take-on-seattles-tech-scene.html',
|
||||
'info_dict': {
|
||||
'creators': ['Jim Cramer'],
|
||||
'channel': 'Mad Money with Jim Cramer',
|
||||
'description': 'md5:72925be21b952e95eba51178dddf4e3e',
|
||||
'duration': 299.0,
|
||||
'ext': 'mp4',
|
||||
'id': '107345451',
|
||||
'display_id': 'jim-cramer-shares-his-take-on-seattles-tech-scene',
|
||||
'thumbnail': 'https://image.cnbcfm.com/api/v1/image/107345481-1702079431MM-B-120823.jpg?v=1702079430',
|
||||
'timestamp': 1702080139,
|
||||
'title': 'Jim Cramer shares his take on Seattle\'s tech scene',
|
||||
'release_date': '20231208',
|
||||
'upload_date': '20231209',
|
||||
'modified_timestamp': 1702080139,
|
||||
'modified_date': '20231209',
|
||||
'release_timestamp': 1702073551,
|
||||
},
|
||||
'skip': 'Dead link',
|
||||
}
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}, {
|
||||
'url': 'https://www.cnbc.com/video/2023/12/08/the-epicenter-of-ai-is-in-seattle-says-jim-cramer.html',
|
||||
'info_dict': {
|
||||
'creators': ['Jim Cramer'],
|
||||
'channel': 'Mad Money with Jim Cramer',
|
||||
'description': 'md5:72925be21b952e95eba51178dddf4e3e',
|
||||
'duration': 113.0,
|
||||
'ext': 'mp4',
|
||||
'id': '107345474',
|
||||
'display_id': 'the-epicenter-of-ai-is-in-seattle-says-jim-cramer',
|
||||
'thumbnail': 'https://image.cnbcfm.com/api/v1/image/107345486-Screenshot_2023-12-08_at_70339_PM.png?v=1702080248',
|
||||
'timestamp': 1702080535,
|
||||
'title': 'The epicenter of AI is in Seattle, says Jim Cramer',
|
||||
'release_timestamp': 1702077347,
|
||||
'modified_timestamp': 1702080535,
|
||||
'release_date': '20231208',
|
||||
'upload_date': '20231209',
|
||||
'modified_date': '20231209',
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
path, display_id = self._match_valid_url(url).groups()
|
||||
video_id = self._download_json(
|
||||
'https://webql-redesign.cnbcfm.com/graphql', display_id, query={
|
||||
'query': '''{
|
||||
page(path: "%s") {
|
||||
vcpsId
|
||||
}
|
||||
}''' % path,
|
||||
})['data']['page']['vcpsId']
|
||||
return self.url_result(
|
||||
'http://video.cnbc.com/gallery/?video=%d' % video_id,
|
||||
CNBCIE.ie_key())
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
data = self._search_json(r'window\.__s_data=', webpage, 'video data', display_id)
|
||||
|
||||
player_data = traverse_obj(data, (
|
||||
'page', 'page', 'layout', ..., 'columns', ..., 'modules',
|
||||
lambda _, v: v['name'] == 'clipPlayer', 'data', {dict}), get_all=False)
|
||||
|
||||
return {
|
||||
'id': display_id,
|
||||
'display_id': display_id,
|
||||
'formats': self._extract_akamai_formats(player_data['playbackURL'], display_id),
|
||||
**self._search_json_ld(webpage, display_id, fatal=False),
|
||||
**traverse_obj(player_data, {
|
||||
'id': ('id', {str_or_none}),
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'creators': ('author', ..., 'name', {str}),
|
||||
'timestamp': ('datePublished', {parse_iso8601}),
|
||||
'release_timestamp': ('uploadDate', {parse_iso8601}),
|
||||
'modified_timestamp': ('dateLastPublished', {parse_iso8601}),
|
||||
'thumbnail': ('thumbnail', {url_or_none}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'channel': ('section', 'title', {str}),
|
||||
}),
|
||||
}
|
||||
|
|
|
@ -247,6 +247,8 @@ class InfoExtractor:
|
|||
(For internal use only)
|
||||
* http_chunk_size Chunk size for HTTP downloads
|
||||
* ffmpeg_args Extra arguments for ffmpeg downloader
|
||||
* is_dash_periods Whether the format is a result of merging
|
||||
multiple DASH periods.
|
||||
RTMP formats can also have the additional fields: page_url,
|
||||
app, play_path, tc_url, flash_version, rtmp_live, rtmp_conn,
|
||||
rtmp_protocol, rtmp_real_time
|
||||
|
@ -260,7 +262,7 @@ class InfoExtractor:
|
|||
|
||||
direct: True if a direct video file was given (must only be set by GenericIE)
|
||||
alt_title: A secondary title of the video.
|
||||
display_id An alternative identifier for the video, not necessarily
|
||||
display_id: An alternative identifier for the video, not necessarily
|
||||
unique, but available before title. Typically, id is
|
||||
something like "4234987", title "Dancing naked mole rats",
|
||||
and display_id "dancing-naked-mole-rats"
|
||||
|
@ -278,7 +280,7 @@ class InfoExtractor:
|
|||
description: Full video description.
|
||||
uploader: Full name of the video uploader.
|
||||
license: License name the video is licensed under.
|
||||
creator: The creator of the video.
|
||||
creators: List of creators of the video.
|
||||
timestamp: UNIX timestamp of the moment the video was uploaded
|
||||
upload_date: Video upload date in UTC (YYYYMMDD).
|
||||
If not explicitly set, calculated from timestamp
|
||||
|
@ -422,16 +424,16 @@ class InfoExtractor:
|
|||
track_number: Number of the track within an album or a disc, as an integer.
|
||||
track_id: Id of the track (useful in case of custom indexing, e.g. 6.iii),
|
||||
as a unicode string.
|
||||
artist: Artist(s) of the track.
|
||||
genre: Genre(s) of the track.
|
||||
artists: List of artists of the track.
|
||||
composers: List of composers of the piece.
|
||||
genres: List of genres of the track.
|
||||
album: Title of the album the track belongs to.
|
||||
album_type: Type of the album (e.g. "Demo", "Full-length", "Split", "Compilation", etc).
|
||||
album_artist: List of all artists appeared on the album (e.g.
|
||||
"Ash Borer / Fell Voices" or "Various Artists", useful for splits
|
||||
and compilations).
|
||||
album_artists: List of all artists appeared on the album.
|
||||
E.g. ["Ash Borer", "Fell Voices"] or ["Various Artists"].
|
||||
Useful for splits and compilations.
|
||||
disc_number: Number of the disc or other physical medium the track belongs to,
|
||||
as an integer.
|
||||
composer: Composer of the piece
|
||||
|
||||
The following fields should only be set for clips that should be cut from the original video:
|
||||
|
||||
|
@ -442,6 +444,18 @@ class InfoExtractor:
|
|||
rows: Number of rows in each storyboard fragment, as an integer
|
||||
columns: Number of columns in each storyboard fragment, as an integer
|
||||
|
||||
The following fields are deprecated and should not be set by new code:
|
||||
composer: Use "composers" instead.
|
||||
Composer(s) of the piece, comma-separated.
|
||||
artist: Use "artists" instead.
|
||||
Artist(s) of the track, comma-separated.
|
||||
genre: Use "genres" instead.
|
||||
Genre(s) of the track, comma-separated.
|
||||
album_artist: Use "album_artists" instead.
|
||||
All artists appeared on the album, comma-separated.
|
||||
creator: Use "creators" instead.
|
||||
The creator of the video.
|
||||
|
||||
Unless mentioned otherwise, the fields should be Unicode strings.
|
||||
|
||||
Unless mentioned otherwise, None is equivalent to absence of information.
|
||||
|
@ -2530,7 +2544,11 @@ class InfoExtractor:
|
|||
self._report_ignoring_subs('DASH')
|
||||
return fmts
|
||||
|
||||
def _extract_mpd_formats_and_subtitles(
|
||||
def _extract_mpd_formats_and_subtitles(self, *args, **kwargs):
|
||||
periods = self._extract_mpd_periods(*args, **kwargs)
|
||||
return self._merge_mpd_periods(periods)
|
||||
|
||||
def _extract_mpd_periods(
|
||||
self, mpd_url, video_id, mpd_id=None, note=None, errnote=None,
|
||||
fatal=True, data=None, headers={}, query={}):
|
||||
|
||||
|
@ -2543,17 +2561,16 @@ class InfoExtractor:
|
|||
errnote='Failed to download MPD manifest' if errnote is None else errnote,
|
||||
fatal=fatal, data=data, headers=headers, query=query)
|
||||
if res is False:
|
||||
return [], {}
|
||||
return []
|
||||
mpd_doc, urlh = res
|
||||
if mpd_doc is None:
|
||||
return [], {}
|
||||
return []
|
||||
|
||||
# We could have been redirected to a new url when we retrieved our mpd file.
|
||||
mpd_url = urlh.url
|
||||
mpd_base_url = base_url(mpd_url)
|
||||
|
||||
return self._parse_mpd_formats_and_subtitles(
|
||||
mpd_doc, mpd_id, mpd_base_url, mpd_url)
|
||||
return self._parse_mpd_periods(mpd_doc, mpd_id, mpd_base_url, mpd_url)
|
||||
|
||||
def _parse_mpd_formats(self, *args, **kwargs):
|
||||
fmts, subs = self._parse_mpd_formats_and_subtitles(*args, **kwargs)
|
||||
|
@ -2561,8 +2578,39 @@ class InfoExtractor:
|
|||
self._report_ignoring_subs('DASH')
|
||||
return fmts
|
||||
|
||||
def _parse_mpd_formats_and_subtitles(
|
||||
self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None):
|
||||
def _parse_mpd_formats_and_subtitles(self, *args, **kwargs):
|
||||
periods = self._parse_mpd_periods(*args, **kwargs)
|
||||
return self._merge_mpd_periods(periods)
|
||||
|
||||
def _merge_mpd_periods(self, periods):
|
||||
"""
|
||||
Combine all formats and subtitles from an MPD manifest into a single list,
|
||||
by concatenate streams with similar formats.
|
||||
"""
|
||||
formats, subtitles = {}, {}
|
||||
for period in periods:
|
||||
for f in period['formats']:
|
||||
assert 'is_dash_periods' not in f, 'format already processed'
|
||||
f['is_dash_periods'] = True
|
||||
format_key = tuple(v for k, v in f.items() if k not in (
|
||||
('format_id', 'fragments', 'manifest_stream_number')))
|
||||
if format_key not in formats:
|
||||
formats[format_key] = f
|
||||
elif 'fragments' in f:
|
||||
formats[format_key].setdefault('fragments', []).extend(f['fragments'])
|
||||
|
||||
if subtitles and period['subtitles']:
|
||||
self.report_warning(bug_reports_message(
|
||||
'Found subtitles in multiple periods in the DASH manifest; '
|
||||
'if part of the subtitles are missing,'
|
||||
), only_once=True)
|
||||
|
||||
for sub_lang, sub_info in period['subtitles'].items():
|
||||
subtitles.setdefault(sub_lang, []).extend(sub_info)
|
||||
|
||||
return list(formats.values()), subtitles
|
||||
|
||||
def _parse_mpd_periods(self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None):
|
||||
"""
|
||||
Parse formats from MPD manifest.
|
||||
References:
|
||||
|
@ -2641,9 +2689,13 @@ class InfoExtractor:
|
|||
return ms_info
|
||||
|
||||
mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
|
||||
formats, subtitles = [], {}
|
||||
stream_numbers = collections.defaultdict(int)
|
||||
for period in mpd_doc.findall(_add_ns('Period')):
|
||||
for period_idx, period in enumerate(mpd_doc.findall(_add_ns('Period'))):
|
||||
period_entry = {
|
||||
'id': period.get('id', f'period-{period_idx}'),
|
||||
'formats': [],
|
||||
'subtitles': collections.defaultdict(list),
|
||||
}
|
||||
period_duration = parse_duration(period.get('duration')) or mpd_duration
|
||||
period_ms_info = extract_multisegment_info(period, {
|
||||
'start_number': 1,
|
||||
|
@ -2893,11 +2945,10 @@ class InfoExtractor:
|
|||
if content_type in ('video', 'audio', 'image/jpeg'):
|
||||
f['manifest_stream_number'] = stream_numbers[f['url']]
|
||||
stream_numbers[f['url']] += 1
|
||||
formats.append(f)
|
||||
period_entry['formats'].append(f)
|
||||
elif content_type == 'text':
|
||||
subtitles.setdefault(lang or 'und', []).append(f)
|
||||
|
||||
return formats, subtitles
|
||||
period_entry['subtitles'][lang or 'und'].append(f)
|
||||
yield period_entry
|
||||
|
||||
def _extract_ism_formats(self, *args, **kwargs):
|
||||
fmts, subs = self._extract_ism_formats_and_subtitles(*args, **kwargs)
|
||||
|
|
|
@ -65,7 +65,7 @@ class CPACIE(InfoExtractor):
|
|||
'title': title,
|
||||
'description': str_or_none(content['details'].get('description_%s_t' % (url_lang, ))),
|
||||
'timestamp': unified_timestamp(content['details'].get('liveDateTime')),
|
||||
'category': [category] if category else None,
|
||||
'categories': [category] if category else None,
|
||||
'thumbnail': urljoin(url, str_or_none(content['details'].get('image_%s_s' % (url_lang, )))),
|
||||
'is_live': is_live(content['details'].get('type')),
|
||||
}
|
||||
|
|
|
@ -1,12 +1,13 @@
|
|||
import json
|
||||
|
||||
from .brightcove import BrightcoveNewIE
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..utils import (
|
||||
dict_get,
|
||||
get_element_by_id,
|
||||
js_to_json,
|
||||
traverse_obj,
|
||||
extract_attributes,
|
||||
get_element_html_by_class,
|
||||
get_element_text_and_html_by_tag,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class CraftsyIE(InfoExtractor):
|
||||
|
@ -41,28 +42,34 @@ class CraftsyIE(InfoExtractor):
|
|||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_data = self._parse_json(self._search_regex(
|
||||
r'class_video_player_vars\s*=\s*({.*})\s*;',
|
||||
get_element_by_id('vidstore-classes_class-video-player-js-extra', webpage),
|
||||
'video data'), video_id, transform_source=js_to_json)
|
||||
video_player = get_element_html_by_class('class-video-player', webpage)
|
||||
video_data = traverse_obj(video_player, (
|
||||
{extract_attributes}, 'wire:snapshot', {json.loads}, 'data', {dict})) or {}
|
||||
video_js = traverse_obj(video_player, (
|
||||
{lambda x: get_element_text_and_html_by_tag('video-js', x)}, 1, {extract_attributes})) or {}
|
||||
|
||||
account_id = traverse_obj(video_data, ('video_player', 'bc_account_id'))
|
||||
has_access = video_data.get('userHasAccess')
|
||||
lessons = traverse_obj(video_data, ('lessons', ..., ..., lambda _, v: v['video_id']))
|
||||
|
||||
entries = []
|
||||
class_preview = traverse_obj(video_data, ('video_player', 'class_preview'))
|
||||
if class_preview:
|
||||
v_id = class_preview.get('video_id')
|
||||
entries.append(self.url_result(
|
||||
f'http://players.brightcove.net/{account_id}/default_default/index.html?videoId={v_id}',
|
||||
BrightcoveNewIE, v_id, class_preview.get('title')))
|
||||
preview_id = video_js.get('data-video-id')
|
||||
if preview_id and preview_id not in traverse_obj(lessons, (..., 'video_id')):
|
||||
if not lessons and not has_access:
|
||||
self.report_warning(
|
||||
'Only extracting preview. For the full class, pass cookies '
|
||||
+ f'from an account that has access. {self._login_hint()}')
|
||||
lessons.append({'video_id': preview_id})
|
||||
|
||||
if dict_get(video_data, ('is_free', 'user_has_access')):
|
||||
entries += [
|
||||
self.url_result(
|
||||
if not lessons and not has_access:
|
||||
self.raise_login_required('You do not have access to this class')
|
||||
|
||||
account_id = video_data.get('accountId') or video_js['data-account']
|
||||
|
||||
def entries(lessons):
|
||||
for lesson in lessons:
|
||||
yield self.url_result(
|
||||
f'http://players.brightcove.net/{account_id}/default_default/index.html?videoId={lesson["video_id"]}',
|
||||
BrightcoveNewIE, lesson['video_id'], lesson.get('title'))
|
||||
for lesson in video_data['lessons']]
|
||||
|
||||
return self.playlist_result(
|
||||
entries, video_id, video_data.get('class_title'),
|
||||
entries(lessons), video_id, self._html_search_meta(('og:title', 'twitter:title'), webpage),
|
||||
self._html_search_meta(('og:description', 'description'), webpage, default=None))
|
||||
|
|
|
@ -514,7 +514,7 @@ class CrunchyrollMusicIE(CrunchyrollBaseIE):
|
|||
'track': 'Egaono Hana',
|
||||
'artist': 'Goose house',
|
||||
'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
|
||||
'genre': ['J-Pop'],
|
||||
'genres': ['J-Pop'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
|
@ -527,7 +527,7 @@ class CrunchyrollMusicIE(CrunchyrollBaseIE):
|
|||
'track': 'Crossing Field',
|
||||
'artist': 'LiSA',
|
||||
'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
|
||||
'genre': ['Anime'],
|
||||
'genres': ['Anime'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
|
@ -541,7 +541,7 @@ class CrunchyrollMusicIE(CrunchyrollBaseIE):
|
|||
'artist': 'LiSA',
|
||||
'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
|
||||
'description': 'md5:747444e7e6300907b7a43f0a0503072e',
|
||||
'genre': ['J-Pop'],
|
||||
'genres': ['J-Pop'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
|
@ -594,7 +594,7 @@ class CrunchyrollMusicIE(CrunchyrollBaseIE):
|
|||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
}),
|
||||
'genre': ('genres', ..., 'displayValue'),
|
||||
'genres': ('genres', ..., 'displayValue'),
|
||||
'age_limit': ('maturity_ratings', -1, {parse_age_limit}),
|
||||
}),
|
||||
}
|
||||
|
@ -611,7 +611,7 @@ class CrunchyrollArtistIE(CrunchyrollBaseIE):
|
|||
'info_dict': {
|
||||
'id': 'MA179CB50D',
|
||||
'title': 'LiSA',
|
||||
'genre': ['J-Pop', 'Anime', 'Rock'],
|
||||
'genres': ['J-Pop', 'Anime', 'Rock'],
|
||||
'description': 'md5:16d87de61a55c3f7d6c454b73285938e',
|
||||
},
|
||||
'playlist_mincount': 83,
|
||||
|
@ -645,6 +645,6 @@ class CrunchyrollArtistIE(CrunchyrollBaseIE):
|
|||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
}),
|
||||
'genre': ('genres', ..., 'displayValue'),
|
||||
'genres': ('genres', ..., 'displayValue'),
|
||||
}),
|
||||
}
|
||||
|
|
|
@ -114,7 +114,7 @@ class CybraryCourseIE(CybraryBaseIE):
|
|||
_TESTS = [{
|
||||
'url': 'https://app.cybrary.it/browse/course/az-500-microsoft-azure-security-technologies',
|
||||
'info_dict': {
|
||||
'id': 898,
|
||||
'id': '898',
|
||||
'title': 'AZ-500: Microsoft Azure Security Technologies',
|
||||
'description': 'md5:69549d379c0fc1dec92926d4e8b6fbd4'
|
||||
},
|
||||
|
@ -122,7 +122,7 @@ class CybraryCourseIE(CybraryBaseIE):
|
|||
}, {
|
||||
'url': 'https://app.cybrary.it/browse/course/cybrary-orientation',
|
||||
'info_dict': {
|
||||
'id': 1245,
|
||||
'id': '1245',
|
||||
'title': 'Cybrary Orientation',
|
||||
'description': 'md5:9e69ff66b32fe78744e0ad4babe2e88e'
|
||||
},
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
import functools
|
||||
import json
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
|
@ -44,36 +45,41 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
|
|||
self._FAMILY_FILTER = ff == 'on' if ff else age_restricted(18, self.get_param('age_limit'))
|
||||
self._set_dailymotion_cookie('ff', 'on' if self._FAMILY_FILTER else 'off')
|
||||
|
||||
def _get_token(self, xid):
|
||||
cookies = self._get_dailymotion_cookies()
|
||||
token = self._get_cookie_value(cookies, 'access_token') or self._get_cookie_value(cookies, 'client_token')
|
||||
if token:
|
||||
return token
|
||||
|
||||
data = {
|
||||
'client_id': 'f1a362d288c1b98099c7',
|
||||
'client_secret': 'eea605b96e01c796ff369935357eca920c5da4c5',
|
||||
}
|
||||
username, password = self._get_login_info()
|
||||
if username:
|
||||
data.update({
|
||||
'grant_type': 'password',
|
||||
'password': password,
|
||||
'username': username,
|
||||
})
|
||||
else:
|
||||
data['grant_type'] = 'client_credentials'
|
||||
try:
|
||||
token = self._download_json(
|
||||
'https://graphql.api.dailymotion.com/oauth/token',
|
||||
None, 'Downloading Access Token',
|
||||
data=urlencode_postdata(data))['access_token']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 400:
|
||||
raise ExtractorError(self._parse_json(
|
||||
e.cause.response.read().decode(), xid)['error_description'], expected=True)
|
||||
raise
|
||||
self._set_dailymotion_cookie('access_token' if username else 'client_token', token)
|
||||
return token
|
||||
|
||||
def _call_api(self, object_type, xid, object_fields, note, filter_extra=None):
|
||||
if not self._HEADERS.get('Authorization'):
|
||||
cookies = self._get_dailymotion_cookies()
|
||||
token = self._get_cookie_value(cookies, 'access_token') or self._get_cookie_value(cookies, 'client_token')
|
||||
if not token:
|
||||
data = {
|
||||
'client_id': 'f1a362d288c1b98099c7',
|
||||
'client_secret': 'eea605b96e01c796ff369935357eca920c5da4c5',
|
||||
}
|
||||
username, password = self._get_login_info()
|
||||
if username:
|
||||
data.update({
|
||||
'grant_type': 'password',
|
||||
'password': password,
|
||||
'username': username,
|
||||
})
|
||||
else:
|
||||
data['grant_type'] = 'client_credentials'
|
||||
try:
|
||||
token = self._download_json(
|
||||
'https://graphql.api.dailymotion.com/oauth/token',
|
||||
None, 'Downloading Access Token',
|
||||
data=urlencode_postdata(data))['access_token']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 400:
|
||||
raise ExtractorError(self._parse_json(
|
||||
e.cause.response.read().decode(), xid)['error_description'], expected=True)
|
||||
raise
|
||||
self._set_dailymotion_cookie('access_token' if username else 'client_token', token)
|
||||
self._HEADERS['Authorization'] = 'Bearer ' + token
|
||||
self._HEADERS['Authorization'] = f'Bearer {self._get_token(xid)}'
|
||||
|
||||
resp = self._download_json(
|
||||
'https://graphql.api.dailymotion.com/', xid, note, data=json.dumps({
|
||||
|
@ -393,9 +399,55 @@ class DailymotionPlaylistIE(DailymotionPlaylistBaseIE):
|
|||
yield '//dailymotion.com/playlist/%s' % p
|
||||
|
||||
|
||||
class DailymotionSearchIE(DailymotionPlaylistBaseIE):
|
||||
IE_NAME = 'dailymotion:search'
|
||||
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/search/(?P<id>[^/?#]+)/videos'
|
||||
_PAGE_SIZE = 20
|
||||
_TESTS = [{
|
||||
'url': 'http://www.dailymotion.com/search/king of turtles/videos',
|
||||
'info_dict': {
|
||||
'id': 'king of turtles',
|
||||
'title': 'king of turtles',
|
||||
},
|
||||
'playlist_mincount': 90,
|
||||
}]
|
||||
_SEARCH_QUERY = 'query SEARCH_QUERY( $query: String! $page: Int $limit: Int ) { search { videos( query: $query first: $limit page: $page ) { edges { node { xid } } } } } '
|
||||
|
||||
def _call_search_api(self, term, page, note):
|
||||
if not self._HEADERS.get('Authorization'):
|
||||
self._HEADERS['Authorization'] = f'Bearer {self._get_token(term)}'
|
||||
resp = self._download_json(
|
||||
'https://graphql.api.dailymotion.com/', None, note, data=json.dumps({
|
||||
'operationName': 'SEARCH_QUERY',
|
||||
'query': self._SEARCH_QUERY,
|
||||
'variables': {
|
||||
'limit': 20,
|
||||
'page': page,
|
||||
'query': term,
|
||||
}
|
||||
}).encode(), headers=self._HEADERS)
|
||||
obj = traverse_obj(resp, ('data', 'search', {dict}))
|
||||
if not obj:
|
||||
raise ExtractorError(
|
||||
traverse_obj(resp, ('errors', 0, 'message', {str})) or 'Could not fetch search data')
|
||||
|
||||
return obj
|
||||
|
||||
def _fetch_page(self, term, page):
|
||||
page += 1
|
||||
response = self._call_search_api(term, page, f'Searching "{term}" page {page}')
|
||||
for xid in traverse_obj(response, ('videos', 'edges', ..., 'node', 'xid')):
|
||||
yield self.url_result(f'https://www.dailymotion.com/video/{xid}', DailymotionIE, xid)
|
||||
|
||||
def _real_extract(self, url):
|
||||
term = urllib.parse.unquote_plus(self._match_id(url))
|
||||
return self.playlist_result(
|
||||
OnDemandPagedList(functools.partial(self._fetch_page, term), self._PAGE_SIZE), term, term)
|
||||
|
||||
|
||||
class DailymotionUserIE(DailymotionPlaylistBaseIE):
|
||||
IE_NAME = 'dailymotion:user'
|
||||
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist)/)(?:(?:old/)?user/)?(?P<id>[^/]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist|search)/)(?:(?:old/)?user/)?(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.dailymotion.com/user/nqtv',
|
||||
'info_dict': {
|
||||
|
|
|
@ -83,7 +83,6 @@ class DamtomoRecordIE(DamtomoBaseIE):
|
|||
'info_dict': {
|
||||
'id': '27376862',
|
||||
'title': 'イカSUMMER [良音]',
|
||||
'description': None,
|
||||
'uploader': 'NANA',
|
||||
'uploader_id': 'MzAyMDExNTY',
|
||||
'upload_date': '20210721',
|
||||
|
|
|
@ -27,7 +27,7 @@ class DaumIE(DaumBaseIE):
|
|||
'duration': 2117,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'uploader_id': 186139,
|
||||
'uploader_id': '186139',
|
||||
'uploader': '콘간지',
|
||||
'timestamp': 1387310323,
|
||||
},
|
||||
|
@ -44,7 +44,7 @@ class DaumIE(DaumBaseIE):
|
|||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'uploader': 'MBC 예능',
|
||||
'uploader_id': 132251,
|
||||
'uploader_id': '132251',
|
||||
'timestamp': 1421604228,
|
||||
},
|
||||
}, {
|
||||
|
@ -63,7 +63,7 @@ class DaumIE(DaumBaseIE):
|
|||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'uploader': '까칠한 墮落始祖 황비홍님의',
|
||||
'uploader_id': 560824,
|
||||
'uploader_id': '560824',
|
||||
'timestamp': 1203770745,
|
||||
},
|
||||
}, {
|
||||
|
@ -77,7 +77,7 @@ class DaumIE(DaumBaseIE):
|
|||
'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\r\n\r\n[쇼! 음악중심] 20160611, 507회',
|
||||
'upload_date': '20170129',
|
||||
'uploader': '쇼! 음악중심',
|
||||
'uploader_id': 2653210,
|
||||
'uploader_id': '2653210',
|
||||
'timestamp': 1485684628,
|
||||
},
|
||||
}]
|
||||
|
@ -107,7 +107,7 @@ class DaumClipIE(DaumBaseIE):
|
|||
'duration': 3868,
|
||||
'view_count': int,
|
||||
'uploader': 'GOMeXP',
|
||||
'uploader_id': 6667,
|
||||
'uploader_id': '6667',
|
||||
'timestamp': 1377911092,
|
||||
},
|
||||
}, {
|
||||
|
|
|
@ -1,54 +0,0 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import js_to_json
|
||||
|
||||
|
||||
class DiggIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?digg\.com/video/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
# JWPlatform via provider
|
||||
'url': 'http://digg.com/video/sci-fi-short-jonah-daniel-kaluuya-get-out',
|
||||
'info_dict': {
|
||||
'id': 'LcqvmS0b',
|
||||
'ext': 'mp4',
|
||||
'title': "'Get Out' Star Daniel Kaluuya Goes On 'Moby Dick'-Like Journey In Sci-Fi Short 'Jonah'",
|
||||
'description': 'md5:541bb847648b6ee3d6514bc84b82efda',
|
||||
'upload_date': '20180109',
|
||||
'timestamp': 1515530551,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# Youtube via provider
|
||||
'url': 'http://digg.com/video/dog-boat-seal-play',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# vimeo as regular embed
|
||||
'url': 'http://digg.com/video/dream-girl-short-film',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
info = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)video_info\s*=\s*({.+?});\n', webpage, 'video info',
|
||||
default='{}'), display_id, transform_source=js_to_json,
|
||||
fatal=False)
|
||||
|
||||
video_id = info.get('video_id')
|
||||
|
||||
if video_id:
|
||||
provider = info.get('provider_name')
|
||||
if provider == 'youtube':
|
||||
return self.url_result(
|
||||
video_id, ie='Youtube', video_id=video_id)
|
||||
elif provider == 'jwplayer':
|
||||
return self.url_result(
|
||||
'jwplatform:%s' % video_id, ie='JWPlatform',
|
||||
video_id=video_id)
|
||||
|
||||
return self.url_result(url, 'Generic')
|
|
@ -9,6 +9,7 @@ from ..utils import (
|
|||
|
||||
|
||||
class DTubeIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'https?://(?:www\.)?d\.tube/(?:#!/)?v/(?P<uploader_id>[0-9a-z.-]+)/(?P<id>[0-9a-z]{8})'
|
||||
_TEST = {
|
||||
'url': 'https://d.tube/#!/v/broncnutz/x380jtr1',
|
||||
|
|
|
@ -8,9 +8,9 @@ from ..utils import (
|
|||
|
||||
class DumpertIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
(?P<protocol>https?)://(?:(?:www|legacy)\.)?dumpert\.nl(?:
|
||||
/(?:mediabase|embed|item)/|
|
||||
(?:/toppers|/latest|/?)\?selectedId=
|
||||
(?P<protocol>https?)://(?:(?:www|legacy)\.)?dumpert\.nl/(?:
|
||||
(?:mediabase|embed|item)/|
|
||||
[^#]*[?&]selectedId=
|
||||
)(?P<id>[0-9]+[/_][0-9a-zA-Z]+)'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.dumpert.nl/item/6646981_951bc60f',
|
||||
|
@ -56,6 +56,9 @@ class DumpertIE(InfoExtractor):
|
|||
}, {
|
||||
'url': 'https://www.dumpert.nl/?selectedId=100031688_b317a185',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.dumpert.nl/toppers/dag?selectedId=100086074_f5cef3ac',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
|
|
@ -32,7 +32,7 @@ class DuoplayIE(InfoExtractor):
|
|||
'season_number': 2,
|
||||
'episode': 'Operatsioon "Öö"',
|
||||
'episode_number': 12,
|
||||
'episode_id': 24,
|
||||
'episode_id': '24',
|
||||
},
|
||||
}, {
|
||||
'note': 'Empty title',
|
||||
|
@ -50,7 +50,7 @@ class DuoplayIE(InfoExtractor):
|
|||
'series_id': '17',
|
||||
'season': 'Season 2',
|
||||
'season_number': 2,
|
||||
'episode_id': 14,
|
||||
'episode_id': '14',
|
||||
'release_year': 2010,
|
||||
},
|
||||
}, {
|
||||
|
@ -99,6 +99,6 @@ class DuoplayIE(InfoExtractor):
|
|||
'season_number': ('season_id', {int_or_none}),
|
||||
'episode': 'subtitle',
|
||||
'episode_number': ('episode_nr', {int_or_none}),
|
||||
'episode_id': ('episode_id', {int_or_none}),
|
||||
'episode_id': ('episode_id', {str_or_none}),
|
||||
}, get_all=False) if episode_attr.get('category') != 'movies' else {}),
|
||||
}
|
||||
|
|
|
@ -8,6 +8,8 @@ from ..compat import compat_urlparse
|
|||
|
||||
|
||||
class DWIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_ENABLED = None # XXX: pass through to GenericIE
|
||||
IE_NAME = 'dw'
|
||||
_VALID_URL = r'https?://(?:www\.)?dw\.com/(?:[^/]+/)+(?:av|e)-(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
|
@ -82,6 +84,8 @@ class DWIE(InfoExtractor):
|
|||
|
||||
|
||||
class DWArticleIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_ENABLED = None # XXX: pass through to GenericIE
|
||||
IE_NAME = 'dw:article'
|
||||
_VALID_URL = r'https?://(?:www\.)?dw\.com/(?:[^/]+/)+a-(?P<id>\d+)'
|
||||
_TEST = {
|
||||
|
|
|
@ -42,7 +42,6 @@ class EplusIbIE(InfoExtractor):
|
|||
'live_status': 'was_live',
|
||||
'release_date': '20210719',
|
||||
'release_timestamp': 1626703200,
|
||||
'description': None,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
|
|
|
@ -13,6 +13,7 @@ from ..utils import (
|
|||
|
||||
|
||||
class EuropaIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'https?://ec\.europa\.eu/avservices/(?:video/player|audio/audioDetails)\.cfm\?.*?\bref=(?P<id>[A-Za-z0-9-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://ec.europa.eu/avservices/video/player.cfm?ref=I107758',
|
||||
|
|
|
@ -10,6 +10,7 @@ from ..utils import (
|
|||
|
||||
|
||||
class FancodeVodIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
IE_NAME = 'fancode:vod'
|
||||
|
||||
_VALID_URL = r'https?://(?:www\.)?fancode\.com/video/(?P<id>[0-9]+)\b'
|
||||
|
@ -126,6 +127,7 @@ class FancodeVodIE(InfoExtractor):
|
|||
|
||||
|
||||
class FancodeLiveIE(FancodeVodIE): # XXX: Do not subclass from concrete IE
|
||||
_WORKING = False
|
||||
IE_NAME = 'fancode:live'
|
||||
|
||||
_VALID_URL = r'https?://(www\.)?fancode\.com/match/(?P<id>[0-9]+).+'
|
||||
|
|
|
@ -1,69 +0,0 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class FilmmoduIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?filmmodu\.org/(?P<id>[^/]+-(?:turkce-dublaj-izle|altyazili-izle))'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.filmmodu.org/f9-altyazili-izle',
|
||||
'md5': 'aeefd955c2a508a5bdaa3bcec8eeb0d4',
|
||||
'info_dict': {
|
||||
'id': '10804',
|
||||
'ext': 'mp4',
|
||||
'title': 'F9',
|
||||
'description': 'md5:2713f584a4d65afa2611e2948d0b953c',
|
||||
'subtitles': {
|
||||
'tr': [{
|
||||
'ext': 'vtt',
|
||||
}],
|
||||
},
|
||||
'thumbnail': r're:https://s[0-9]+.filmmodu.org/uploads/movie/cover/10804/xXHZeb1yhJvnSHPzZDqee0zfMb6.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.filmmodu.org/the-godfather-turkce-dublaj-izle',
|
||||
'md5': '109f2fcb9c941330eed133971c035c00',
|
||||
'info_dict': {
|
||||
'id': '3646',
|
||||
'ext': 'mp4',
|
||||
'title': 'Baba',
|
||||
'description': 'md5:d43fd651937cd75cc650883ebd8d8461',
|
||||
'thumbnail': r're:https://s[0-9]+.filmmodu.org/uploads/movie/cover/3646/6xKCYgH16UuwEGAyroLU6p8HLIn.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._og_search_title(webpage, fatal=True)
|
||||
description = self._og_search_description(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
real_video_id = self._search_regex(r'var\s*videoId\s*=\s*\'([0-9]+)\'', webpage, 'video_id')
|
||||
video_type = self._search_regex(r'var\s*videoType\s*=\s*\'([a-z]+)\'', webpage, 'video_type')
|
||||
data = self._download_json('https://www.filmmodu.org/get-source', real_video_id, query={
|
||||
'movie_id': real_video_id,
|
||||
'type': video_type,
|
||||
})
|
||||
formats = [{
|
||||
'url': source['src'],
|
||||
'ext': 'mp4',
|
||||
'format_id': source['label'],
|
||||
'height': int_or_none(source.get('res')),
|
||||
'protocol': 'm3u8_native',
|
||||
} for source in data['sources']]
|
||||
|
||||
subtitles = {}
|
||||
|
||||
if data.get('subtitle'):
|
||||
subtitles['tr'] = [{
|
||||
'url': data['subtitle'],
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': real_video_id,
|
||||
'display_id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
|
@ -1,60 +1,49 @@
|
|||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .dailymotion import DailymotionIE
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
filter_dict,
|
||||
format_field,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_iso8601,
|
||||
parse_qs,
|
||||
smuggle_url,
|
||||
unsmuggle_url,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
def _make_url_result(self, video_or_full_id, catalog=None):
|
||||
full_id = 'francetv:%s' % video_or_full_id
|
||||
if '@' not in video_or_full_id and catalog:
|
||||
full_id += '@%s' % catalog
|
||||
return self.url_result(
|
||||
full_id, ie=FranceTVIE.ie_key(),
|
||||
video_id=video_or_full_id.split('@')[0])
|
||||
def _make_url_result(self, video_id, url=None):
|
||||
video_id = video_id.split('@')[0] # for compat with old @catalog IDs
|
||||
full_id = f'francetv:{video_id}'
|
||||
if url:
|
||||
full_id = smuggle_url(full_id, {'hostname': urllib.parse.urlparse(url).hostname})
|
||||
return self.url_result(full_id, FranceTVIE, video_id)
|
||||
|
||||
|
||||
class FranceTVIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
https?://
|
||||
sivideo\.webservices\.francetelevisions\.fr/tools/getInfosOeuvre/v2/\?
|
||||
.*?\bidDiffusion=[^&]+|
|
||||
(?:
|
||||
https?://videos\.francetv\.fr/video/|
|
||||
francetv:
|
||||
)
|
||||
(?P<id>[^@]+)(?:@(?P<catalog>.+))?
|
||||
)
|
||||
'''
|
||||
_EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1']
|
||||
_VALID_URL = r'francetv:(?P<id>[^@#]+)'
|
||||
_GEO_COUNTRIES = ['FR']
|
||||
_GEO_BYPASS = False
|
||||
|
||||
_TESTS = [{
|
||||
# without catalog
|
||||
'url': 'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/?idDiffusion=162311093&callback=_jsonp_loader_callback_request_0',
|
||||
'md5': 'c2248a8de38c4e65ea8fae7b5df2d84f',
|
||||
'url': 'francetv:ec217ecc-0733-48cf-ac06-af1347b849d1',
|
||||
'info_dict': {
|
||||
'id': '162311093',
|
||||
'id': 'ec217ecc-0733-48cf-ac06-af1347b849d1',
|
||||
'ext': 'mp4',
|
||||
'title': '13h15, le dimanche... - Les mystères de Jésus',
|
||||
'description': 'md5:75efe8d4c0a8205e5904498ffe1e1a42',
|
||||
'timestamp': 1502623500,
|
||||
'duration': 2580,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'upload_date': '20170813',
|
||||
},
|
||||
}, {
|
||||
# with catalog
|
||||
'url': 'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/?idDiffusion=NI_1004933&catalogue=Zouzous&callback=_jsonp_loader_callback_request_4',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://videos.francetv.fr/video/NI_657393@Regions',
|
||||
'only_matching': True,
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'francetv:162311093',
|
||||
'only_matching': True,
|
||||
|
@ -76,10 +65,7 @@ class FranceTVIE(InfoExtractor):
|
|||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_video(self, video_id, catalogue=None):
|
||||
# Videos are identified by idDiffusion so catalogue part is optional.
|
||||
# However when provided, some extra formats may be returned so we pass
|
||||
# it if available.
|
||||
def _extract_video(self, video_id, hostname=None):
|
||||
is_live = None
|
||||
videos = []
|
||||
title = None
|
||||
|
@ -91,18 +77,20 @@ class FranceTVIE(InfoExtractor):
|
|||
timestamp = None
|
||||
spritesheets = None
|
||||
|
||||
for device_type in ('desktop', 'mobile'):
|
||||
# desktop+chrome returns dash; mobile+safari returns hls
|
||||
for device_type, browser in [('desktop', 'chrome'), ('mobile', 'safari')]:
|
||||
dinfo = self._download_json(
|
||||
'https://player.webservices.francetelevisions.fr/v1/videos/%s' % video_id,
|
||||
video_id, 'Downloading %s video JSON' % device_type, query={
|
||||
f'https://k7.ftven.fr/videos/{video_id}', video_id,
|
||||
f'Downloading {device_type} {browser} video JSON', query=filter_dict({
|
||||
'device_type': device_type,
|
||||
'browser': 'chrome',
|
||||
}, fatal=False)
|
||||
'browser': browser,
|
||||
'domain': hostname,
|
||||
}), fatal=False)
|
||||
|
||||
if not dinfo:
|
||||
continue
|
||||
|
||||
video = dinfo.get('video')
|
||||
video = traverse_obj(dinfo, ('video', {dict}))
|
||||
if video:
|
||||
videos.append(video)
|
||||
if duration is None:
|
||||
|
@ -112,7 +100,7 @@ class FranceTVIE(InfoExtractor):
|
|||
if spritesheets is None:
|
||||
spritesheets = video.get('spritesheets')
|
||||
|
||||
meta = dinfo.get('meta')
|
||||
meta = traverse_obj(dinfo, ('meta', {dict}))
|
||||
if meta:
|
||||
if title is None:
|
||||
title = meta.get('title')
|
||||
|
@ -126,43 +114,46 @@ class FranceTVIE(InfoExtractor):
|
|||
if timestamp is None:
|
||||
timestamp = parse_iso8601(meta.get('broadcasted_at'))
|
||||
|
||||
formats = []
|
||||
subtitles = {}
|
||||
for video in videos:
|
||||
formats, subtitles, video_url = [], {}, None
|
||||
for video in traverse_obj(videos, lambda _, v: url_or_none(v['url'])):
|
||||
video_url = video['url']
|
||||
format_id = video.get('format')
|
||||
|
||||
video_url = None
|
||||
if video.get('workflow') == 'token-akamai':
|
||||
token_url = video.get('token')
|
||||
if token_url:
|
||||
token_json = self._download_json(
|
||||
token_url, video_id,
|
||||
'Downloading signed %s manifest URL' % format_id)
|
||||
if token_json:
|
||||
video_url = token_json.get('url')
|
||||
if not video_url:
|
||||
video_url = video.get('url')
|
||||
if token_url := url_or_none(video.get('token')):
|
||||
tokenized_url = traverse_obj(self._download_json(
|
||||
token_url, video_id, f'Downloading signed {format_id} manifest URL',
|
||||
fatal=False, query={
|
||||
'format': 'json',
|
||||
'url': video_url,
|
||||
}), ('url', {url_or_none}))
|
||||
if tokenized_url:
|
||||
video_url = tokenized_url
|
||||
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
video_url, video_id, f4m_id=format_id, fatal=False))
|
||||
video_url, video_id, f4m_id=format_id or ext, fatal=False))
|
||||
elif ext == 'm3u8':
|
||||
format_id = format_id or 'hls'
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
video_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id=format_id,
|
||||
fatal=False)
|
||||
video_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)
|
||||
for f in traverse_obj(fmts, lambda _, v: v['vcodec'] == 'none' and v.get('tbr') is None):
|
||||
if mobj := re.match(rf'{format_id}-[Aa]udio-\w+-(?P<bitrate>\d+)', f['format_id']):
|
||||
f.update({
|
||||
'tbr': int_or_none(mobj.group('bitrate')),
|
||||
'acodec': 'mp4a',
|
||||
})
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
elif ext == 'mpd':
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||
video_url, video_id, mpd_id=format_id, fatal=False)
|
||||
video_url, video_id, mpd_id=format_id or 'dash', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
elif video_url.startswith('rtmp'):
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': 'rtmp-%s' % format_id,
|
||||
'format_id': join_nonempty('rtmp', format_id),
|
||||
'ext': 'flv',
|
||||
})
|
||||
else:
|
||||
|
@ -174,6 +165,13 @@ class FranceTVIE(InfoExtractor):
|
|||
|
||||
# XXX: what is video['captions']?
|
||||
|
||||
if not formats and video_url:
|
||||
urlh = self._request_webpage(
|
||||
HEADRequest(video_url), video_id, 'Checking for geo-restriction',
|
||||
fatal=False, expected_status=403)
|
||||
if urlh and urlh.headers.get('x-errortype') == 'geo':
|
||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
|
||||
|
||||
for f in formats:
|
||||
if f.get('acodec') != 'none' and f.get('language') in ('qtz', 'qad'):
|
||||
f['language_preference'] = -10
|
||||
|
@ -194,7 +192,7 @@ class FranceTVIE(InfoExtractor):
|
|||
# a 10×10 grid of thumbnails corresponding to approximately
|
||||
# 2 seconds of the video; the last spritesheet may be shorter
|
||||
'duration': 200,
|
||||
} for sheet in spritesheets]
|
||||
} for sheet in traverse_obj(spritesheets, (..., {url_or_none}))]
|
||||
})
|
||||
|
||||
return {
|
||||
|
@ -210,21 +208,15 @@ class FranceTVIE(InfoExtractor):
|
|||
'series': title if episode_number else None,
|
||||
'episode_number': int_or_none(episode_number),
|
||||
'season_number': int_or_none(season_number),
|
||||
'_format_sort_fields': ('res', 'tbr', 'proto'), # prioritize m3u8 over dash
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
video_id = mobj.group('id')
|
||||
catalog = mobj.group('catalog')
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
video_id = self._match_id(url)
|
||||
hostname = smuggled_data.get('hostname') or 'www.france.tv'
|
||||
|
||||
if not video_id:
|
||||
qs = parse_qs(url)
|
||||
video_id = qs.get('idDiffusion', [None])[0]
|
||||
catalog = qs.get('catalogue', [None])[0]
|
||||
if not video_id:
|
||||
raise ExtractorError('Invalid URL', expected=True)
|
||||
|
||||
return self._extract_video(video_id, catalog)
|
||||
return self._extract_video(video_id, hostname=hostname)
|
||||
|
||||
|
||||
class FranceTVSiteIE(FranceTVBaseInfoExtractor):
|
||||
|
@ -246,6 +238,7 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
|
|||
},
|
||||
'add_ie': [FranceTVIE.ie_key()],
|
||||
}, {
|
||||
# geo-restricted
|
||||
'url': 'https://www.france.tv/enfants/six-huit-ans/foot2rue/saison-1/3066387-duel-au-vieux-port.html',
|
||||
'info_dict': {
|
||||
'id': 'a9050959-eedd-4b4a-9b0d-de6eeaa73e44',
|
||||
|
@ -261,6 +254,26 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
|
|||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 1441,
|
||||
},
|
||||
}, {
|
||||
# geo-restricted livestream (workflow == 'token-akamai')
|
||||
'url': 'https://www.france.tv/france-4/direct.html',
|
||||
'info_dict': {
|
||||
'id': '9a6a7670-dde9-4264-adbc-55b89558594b',
|
||||
'ext': 'mp4',
|
||||
'title': r're:France 4 en direct .+',
|
||||
'live_status': 'is_live',
|
||||
},
|
||||
'skip': 'geo-restricted livestream',
|
||||
}, {
|
||||
# livestream (workflow == 'dai')
|
||||
'url': 'https://www.france.tv/france-2/direct.html',
|
||||
'info_dict': {
|
||||
'id': '006194ea-117d-4bcf-94a9-153d999c59ae',
|
||||
'ext': 'mp4',
|
||||
'title': r're:France 2 en direct .+',
|
||||
'live_status': 'is_live',
|
||||
},
|
||||
'params': {'skip_download': 'livestream'},
|
||||
}, {
|
||||
# france3
|
||||
'url': 'https://www.france.tv/france-3/des-chiffres-et-des-lettres/139063-emission-du-mardi-9-mai-2017.html',
|
||||
|
@ -277,10 +290,6 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
|
|||
# franceo
|
||||
'url': 'https://www.france.tv/france-o/archipels/132249-mon-ancetre-l-esclave.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# france2 live
|
||||
'url': 'https://www.france.tv/france-2/direct.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.france.tv/documentaires/histoire/136517-argentine-les-500-bebes-voles-de-la-dictature.html',
|
||||
'only_matching': True,
|
||||
|
@ -304,17 +313,16 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
|
|||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
catalogue = None
|
||||
video_id = self._search_regex(
|
||||
r'(?:data-main-video\s*=|videoId["\']?\s*[:=])\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
|
||||
webpage, 'video id', default=None, group='id')
|
||||
|
||||
if not video_id:
|
||||
video_id, catalogue = self._html_search_regex(
|
||||
r'(?:href=|player\.setVideo\(\s*)"http://videos?\.francetv\.fr/video/([^@]+@[^"]+)"',
|
||||
webpage, 'video ID').split('@')
|
||||
video_id = self._html_search_regex(
|
||||
r'(?:href=|player\.setVideo\(\s*)"http://videos?\.francetv\.fr/video/([^@"]+@[^"]+)"',
|
||||
webpage, 'video ID')
|
||||
|
||||
return self._make_url_result(video_id, catalogue)
|
||||
return self._make_url_result(video_id, url=url)
|
||||
|
||||
|
||||
class FranceTVInfoIE(FranceTVBaseInfoExtractor):
|
||||
|
@ -328,8 +336,9 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
|
|||
'ext': 'mp4',
|
||||
'title': 'Soir 3',
|
||||
'upload_date': '20190822',
|
||||
'timestamp': 1566510900,
|
||||
'description': 'md5:72d167097237701d6e8452ff03b83c00',
|
||||
'timestamp': 1566510730,
|
||||
'thumbnail': r're:^https?://.*\.jpe?g$',
|
||||
'duration': 1637,
|
||||
'subtitles': {
|
||||
'fr': 'mincount:2',
|
||||
},
|
||||
|
@ -344,8 +353,8 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
|
|||
'info_dict': {
|
||||
'id': '7d204c9e-a2d3-11eb-9e4c-000d3a23d482',
|
||||
'ext': 'mp4',
|
||||
'title': 'Covid-19 : une situation catastrophique à New Dehli',
|
||||
'thumbnail': str,
|
||||
'title': 'Covid-19 : une situation catastrophique à New Dehli - Édition du mercredi 21 avril 2021',
|
||||
'thumbnail': r're:^https?://.*\.jpe?g$',
|
||||
'duration': 76,
|
||||
'timestamp': 1619028518,
|
||||
'upload_date': '20210421',
|
||||
|
@ -371,11 +380,17 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
|
|||
'id': 'x4iiko0',
|
||||
'ext': 'mp4',
|
||||
'title': 'NDDL, référendum, Brexit : Cécile Duflot répond à Patrick Cohen',
|
||||
'description': 'Au lendemain de la victoire du "oui" au référendum sur l\'aéroport de Notre-Dame-des-Landes, l\'ancienne ministre écologiste est l\'invitée de Patrick Cohen. Plus d\'info : https://www.franceinter.fr/emissions/le-7-9/le-7-9-27-juin-2016',
|
||||
'description': 'md5:fdcb582c370756293a65cdfbc6ecd90e',
|
||||
'timestamp': 1467011958,
|
||||
'upload_date': '20160627',
|
||||
'uploader': 'France Inter',
|
||||
'uploader_id': 'x2q2ez',
|
||||
'upload_date': '20160627',
|
||||
'view_count': int,
|
||||
'tags': ['Politique', 'France Inter', '27 juin 2016', 'Linvité de 8h20', 'Cécile Duflot', 'Patrick Cohen'],
|
||||
'age_limit': 0,
|
||||
'duration': 640,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:https://[^/?#]+/v/[^/?#]+/x1080',
|
||||
},
|
||||
'add_ie': ['Dailymotion'],
|
||||
}, {
|
||||
|
@ -405,4 +420,4 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
|
|||
r'(?:data-id|<figure[^<]+\bid)=["\']([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'),
|
||||
webpage, 'video id')
|
||||
|
||||
return self._make_url_result(video_id)
|
||||
return self._make_url_result(video_id, url=url)
|
||||
|
|
|
@ -301,7 +301,7 @@ class FunimationShowIE(FunimationBaseIE):
|
|||
_TESTS = [{
|
||||
'url': 'https://www.funimation.com/en/shows/sk8-the-infinity',
|
||||
'info_dict': {
|
||||
'id': 1315000,
|
||||
'id': '1315000',
|
||||
'title': 'SK8 the Infinity'
|
||||
},
|
||||
'playlist_count': 13,
|
||||
|
@ -312,7 +312,7 @@ class FunimationShowIE(FunimationBaseIE):
|
|||
# without lang code
|
||||
'url': 'https://www.funimation.com/shows/ouran-high-school-host-club/',
|
||||
'info_dict': {
|
||||
'id': 39643,
|
||||
'id': '39643',
|
||||
'title': 'Ouran High School Host Club'
|
||||
},
|
||||
'playlist_count': 26,
|
||||
|
@ -339,7 +339,7 @@ class FunimationShowIE(FunimationBaseIE):
|
|||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': show_info['id'],
|
||||
'id': str_or_none(show_info['id']),
|
||||
'title': show_info['name'],
|
||||
'entries': orderedSet(
|
||||
self.url_result(
|
||||
|
|
|
@ -19,7 +19,6 @@ class GabTVIE(InfoExtractor):
|
|||
'id': '61217eacea5665de450d0488',
|
||||
'ext': 'mp4',
|
||||
'title': 'WHY WAS AMERICA IN AFGHANISTAN - AMERICA FIRST AGAINST AMERICAN OLIGARCHY',
|
||||
'description': None,
|
||||
'uploader': 'Wurzelroot',
|
||||
'uploader_id': '608fb0a85738fd1974984f7d',
|
||||
'thumbnail': 'https://tv.gab.com/image/61217eacea5665de450d0488',
|
||||
|
|
|
@ -1,46 +0,0 @@
|
|||
from .brightcove import BrightcoveNewIE
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
get_element_by_class,
|
||||
get_element_by_id,
|
||||
)
|
||||
|
||||
|
||||
class GameInformerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?gameinformer\.com/(?:[^/]+/)*(?P<id>[^.?&#]+)'
|
||||
_TESTS = [{
|
||||
# normal Brightcove embed code extracted with BrightcoveNewIE._extract_url
|
||||
'url': 'http://www.gameinformer.com/b/features/archive/2015/09/26/replay-animal-crossing.aspx',
|
||||
'md5': '292f26da1ab4beb4c9099f1304d2b071',
|
||||
'info_dict': {
|
||||
'id': '4515472681001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Replay - Animal Crossing',
|
||||
'description': 'md5:2e211891b215c85d061adc7a4dd2d930',
|
||||
'timestamp': 1443457610,
|
||||
'upload_date': '20150928',
|
||||
'uploader_id': '694940074001',
|
||||
},
|
||||
}, {
|
||||
# Brightcove id inside unique element with field--name-field-brightcove-video-id class
|
||||
'url': 'https://www.gameinformer.com/video-feature/new-gameplay-today/2019/07/09/new-gameplay-today-streets-of-rogue',
|
||||
'info_dict': {
|
||||
'id': '6057111913001',
|
||||
'ext': 'mp4',
|
||||
'title': 'New Gameplay Today – Streets Of Rogue',
|
||||
'timestamp': 1562699001,
|
||||
'upload_date': '20190709',
|
||||
'uploader_id': '694940074001',
|
||||
|
||||
},
|
||||
}]
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/694940074001/default_default/index.html?videoId=%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
url, display_id, headers=self.geo_verification_headers())
|
||||
brightcove_id = clean_html(get_element_by_class('field--name-field-brightcove-video-id', webpage) or get_element_by_id('video-source-content', webpage))
|
||||
brightcove_url = self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id if brightcove_id else BrightcoveNewIE._extract_url(self, webpage)
|
||||
return self.url_result(brightcove_url, 'BrightcoveNew', brightcove_id)
|
|
@ -267,9 +267,9 @@ class GameJoltIE(GameJoltBaseIE):
|
|||
'id': 'dszyjnwi',
|
||||
'ext': 'webm',
|
||||
'title': 'gif-presentacion-mejorado-dszyjnwi',
|
||||
'n_entries': 1,
|
||||
}
|
||||
}]
|
||||
}],
|
||||
'playlist_count': 1,
|
||||
}, {
|
||||
# Multiple GIFs
|
||||
'url': 'https://gamejolt.com/p/gif-yhsqkumq',
|
||||
|
@ -374,7 +374,6 @@ class GameJoltGameSoundtrackIE(GameJoltBaseIE):
|
|||
'info_dict': {
|
||||
'id': '657899',
|
||||
'title': 'Friday Night Funkin\': Vs Oswald',
|
||||
'n_entries': None,
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
|
@ -384,7 +383,6 @@ class GameJoltGameSoundtrackIE(GameJoltBaseIE):
|
|||
'url': r're:^https://.+vs-oswald-menu-music\.mp3$',
|
||||
'release_timestamp': 1635190816,
|
||||
'release_date': '20211025',
|
||||
'n_entries': 3,
|
||||
}
|
||||
}, {
|
||||
'info_dict': {
|
||||
|
@ -394,7 +392,6 @@ class GameJoltGameSoundtrackIE(GameJoltBaseIE):
|
|||
'url': r're:^https://.+rabbit-s-luck--full-version-\.mp3$',
|
||||
'release_timestamp': 1635190841,
|
||||
'release_date': '20211025',
|
||||
'n_entries': 3,
|
||||
}
|
||||
}, {
|
||||
'info_dict': {
|
||||
|
@ -404,9 +401,9 @@ class GameJoltGameSoundtrackIE(GameJoltBaseIE):
|
|||
'url': r're:^https://.+last-straw\.mp3$',
|
||||
'release_timestamp': 1635881104,
|
||||
'release_date': '20211102',
|
||||
'n_entries': 3,
|
||||
}
|
||||
}]
|
||||
}],
|
||||
'playlist_count': 3,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
|
|
@ -21,7 +21,6 @@ class GaskrankIE(InfoExtractor):
|
|||
'display_id': 'strike-einparken-durch-anfaenger-crash-mit-groesserem-flurschaden',
|
||||
'uploader_id': 'Bikefun',
|
||||
'upload_date': '20170110',
|
||||
'uploader_url': None,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.gaskrank.tv/tv/racing/isle-of-man-tt-2011-michael-du-15920.htm',
|
||||
|
|
|
@ -2,6 +2,7 @@ from .common import InfoExtractor
|
|||
|
||||
|
||||
class GazetaIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?gazeta\.ru/(?:[^/]+/)?video/(?:main/)*(?:\d{4}/\d{2}/\d{2}/)?(?P<id>[A-Za-z0-9-_.]+)\.s?html)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.gazeta.ru/video/main/zadaite_vopros_vladislavu_yurevichu.shtml',
|
||||
|
|
|
@ -7,6 +7,7 @@ from ..utils import remove_start, smuggle_url, urlencode_postdata
|
|||
|
||||
|
||||
class GDCVaultIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'https?://(?:www\.)?gdcvault\.com/play/(?P<id>\d+)(?:/(?P<name>[\w-]+))?'
|
||||
_NETRC_MACHINE = 'gdcvault'
|
||||
_TESTS = [
|
||||
|
|
|
@ -1,93 +0,0 @@
|
|||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import parse_duration, parse_iso8601, qualities, str_to_int
|
||||
|
||||
|
||||
class GigaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?giga\.de/(?:[^/]+/)*(?P<id>[^/]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.giga.de/filme/anime-awesome/trailer/anime-awesome-chihiros-reise-ins-zauberland-das-beste-kommt-zum-schluss/',
|
||||
'md5': '6bc5535e945e724640664632055a584f',
|
||||
'info_dict': {
|
||||
'id': '2622086',
|
||||
'display_id': 'anime-awesome-chihiros-reise-ins-zauberland-das-beste-kommt-zum-schluss',
|
||||
'ext': 'mp4',
|
||||
'title': 'Anime Awesome: Chihiros Reise ins Zauberland – Das Beste kommt zum Schluss',
|
||||
'description': 'md5:afdf5862241aded4718a30dff6a57baf',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 578,
|
||||
'timestamp': 1414749706,
|
||||
'upload_date': '20141031',
|
||||
'uploader': 'Robin Schweiger',
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.giga.de/games/channel/giga-top-montag/giga-topmontag-die-besten-serien-2014/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.giga.de/extra/netzkultur/videos/giga-games-tom-mats-robin-werden-eigene-wege-gehen-eine-ankuendigung/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.giga.de/tv/jonas-liest-spieletitel-eingedeutscht-episode-2/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_id = self._search_regex(
|
||||
[r'data-video-id="(\d+)"', r'/api/video/jwplayer/#v=(\d+)'],
|
||||
webpage, 'video id')
|
||||
|
||||
playlist = self._download_json(
|
||||
'http://www.giga.de/api/syndication/video/video_id/%s/playlist.json?content=syndication/key/368b5f151da4ae05ced7fa296bdff65a/'
|
||||
% video_id, video_id)[0]
|
||||
|
||||
quality = qualities(['normal', 'hd720'])
|
||||
|
||||
formats = []
|
||||
for format_id in itertools.count(0):
|
||||
fmt = playlist.get(compat_str(format_id))
|
||||
if not fmt:
|
||||
break
|
||||
formats.append({
|
||||
'url': fmt['src'],
|
||||
'format_id': '%s-%s' % (fmt['quality'], fmt['type'].split('/')[-1]),
|
||||
'quality': quality(fmt['quality']),
|
||||
})
|
||||
|
||||
title = self._html_search_meta(
|
||||
'title', webpage, 'title', fatal=True)
|
||||
description = self._html_search_meta(
|
||||
'description', webpage, 'description')
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'(?s)(?:data-video-id="{0}"|data-video="[^"]*/api/video/jwplayer/#v={0}[^"]*")[^>]*>.+?<span class="duration">([^<]+)</span>'.format(video_id),
|
||||
webpage, 'duration', fatal=False))
|
||||
|
||||
timestamp = parse_iso8601(self._search_regex(
|
||||
r'datetime="([^"]+)"', webpage, 'upload date', fatal=False))
|
||||
uploader = self._search_regex(
|
||||
r'class="author">([^<]+)</a>', webpage, 'uploader', fatal=False)
|
||||
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'<span class="views"><strong>([\d.,]+)</strong>',
|
||||
webpage, 'view count', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'uploader': uploader,
|
||||
'view_count': view_count,
|
||||
'formats': formats,
|
||||
}
|
|
@ -6,6 +6,7 @@ from ..utils import (
|
|||
|
||||
|
||||
class GodTubeIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'https?://(?:www\.)?godtube\.com/watch/\?v=(?P<id>[\da-zA-Z]+)'
|
||||
_TESTS = [
|
||||
{
|
||||
|
|
|
@ -40,6 +40,22 @@ class GoPlayIE(InfoExtractor):
|
|||
'title': 'A Family for the Holidays',
|
||||
},
|
||||
'skip': 'This video is only available for registered users'
|
||||
}, {
|
||||
'url': 'https://www.goplay.be/video/de-mol/de-mol-s11/de-mol-s11-aflevering-1#autoplay',
|
||||
'info_dict': {
|
||||
'id': '03eb8f2f-153e-41cb-9805-0d3a29dab656',
|
||||
'ext': 'mp4',
|
||||
'title': 'S11 - Aflevering 1',
|
||||
'episode': 'Episode 1',
|
||||
'series': 'De Mol',
|
||||
'season_number': 11,
|
||||
'episode_number': 1,
|
||||
'season': 'Season 11'
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True
|
||||
},
|
||||
'skip': 'This video is only available for registered users'
|
||||
}]
|
||||
|
||||
_id_token = None
|
||||
|
@ -77,16 +93,39 @@ class GoPlayIE(InfoExtractor):
|
|||
|
||||
api = self._download_json(
|
||||
f'https://api.goplay.be/web/v1/videos/long-form/{video_id}',
|
||||
video_id, headers={'Authorization': 'Bearer %s' % self._id_token})
|
||||
video_id, headers={
|
||||
'Authorization': 'Bearer %s' % self._id_token,
|
||||
**self.geo_verification_headers(),
|
||||
})
|
||||
|
||||
formats, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
api['manifestUrls']['hls'], video_id, ext='mp4', m3u8_id='HLS')
|
||||
if 'manifestUrls' in api:
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
api['manifestUrls']['hls'], video_id, ext='mp4', m3u8_id='HLS')
|
||||
|
||||
else:
|
||||
if 'ssai' not in api:
|
||||
raise ExtractorError('expecting Google SSAI stream')
|
||||
|
||||
ssai_content_source_id = api['ssai']['contentSourceID']
|
||||
ssai_video_id = api['ssai']['videoID']
|
||||
|
||||
dai = self._download_json(
|
||||
f'https://dai.google.com/ondemand/dash/content/{ssai_content_source_id}/vid/{ssai_video_id}/streams',
|
||||
video_id, data=b'{"api-key":"null"}',
|
||||
headers={'content-type': 'application/json'})
|
||||
|
||||
periods = self._extract_mpd_periods(dai['stream_manifest'], video_id)
|
||||
|
||||
# skip pre-roll and mid-roll ads
|
||||
periods = [p for p in periods if '-ad-' not in p['id']]
|
||||
|
||||
formats, subtitles = self._merge_mpd_periods(periods)
|
||||
|
||||
info_dict.update({
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
})
|
||||
|
||||
return info_dict
|
||||
|
||||
|
||||
|
|
|
@ -5,6 +5,7 @@ from ..utils import ExtractorError, urlencode_postdata
|
|||
|
||||
|
||||
class HotNewHipHopIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'https?://(?:www\.)?hotnewhiphop\.com/.*\.(?P<id>.*)\.html'
|
||||
_TEST = {
|
||||
'url': 'http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html',
|
||||
|
|
|
@ -115,11 +115,11 @@ class HotStarIE(HotStarBaseIE):
|
|||
'upload_date': '20190501',
|
||||
'duration': 1219,
|
||||
'channel': 'StarPlus',
|
||||
'channel_id': 3,
|
||||
'channel_id': '3',
|
||||
'series': 'Ek Bhram - Sarvagun Sampanna',
|
||||
'season': 'Chapter 1',
|
||||
'season_number': 1,
|
||||
'season_id': 6771,
|
||||
'season_id': '6771',
|
||||
'episode': 'Janhvi Targets Suman',
|
||||
'episode_number': 8,
|
||||
}
|
||||
|
@ -135,12 +135,12 @@ class HotStarIE(HotStarBaseIE):
|
|||
'channel': 'StarPlus',
|
||||
'series': 'Anupama',
|
||||
'season_number': 1,
|
||||
'season_id': 7399,
|
||||
'season_id': '7399',
|
||||
'upload_date': '20230307',
|
||||
'episode': 'Anupama, Anuj Share a Moment',
|
||||
'episode_number': 853,
|
||||
'duration': 1272,
|
||||
'channel_id': 3,
|
||||
'channel_id': '3',
|
||||
},
|
||||
'skip': 'HTTP Error 504: Gateway Time-out', # XXX: Investigate 504 errors on some episodes
|
||||
}, {
|
||||
|
@ -155,12 +155,12 @@ class HotStarIE(HotStarBaseIE):
|
|||
'channel': 'Hotstar Specials',
|
||||
'series': 'Kana Kaanum Kaalangal',
|
||||
'season_number': 1,
|
||||
'season_id': 9441,
|
||||
'season_id': '9441',
|
||||
'upload_date': '20220421',
|
||||
'episode': 'Back To School',
|
||||
'episode_number': 1,
|
||||
'duration': 1810,
|
||||
'channel_id': 54,
|
||||
'channel_id': '54',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.hotstar.com/in/clips/e3-sairat-kahani-pyaar-ki/1000262286',
|
||||
|
@ -325,11 +325,11 @@ class HotStarIE(HotStarBaseIE):
|
|||
'formats': formats,
|
||||
'subtitles': subs,
|
||||
'channel': video_data.get('channelName'),
|
||||
'channel_id': video_data.get('channelId'),
|
||||
'channel_id': str_or_none(video_data.get('channelId')),
|
||||
'series': video_data.get('showName'),
|
||||
'season': video_data.get('seasonName'),
|
||||
'season_number': int_or_none(video_data.get('seasonNo')),
|
||||
'season_id': video_data.get('seasonId'),
|
||||
'season_id': str_or_none(video_data.get('seasonId')),
|
||||
'episode': video_data.get('title'),
|
||||
'episode_number': int_or_none(video_data.get('episodeNo')),
|
||||
}
|
||||
|
|
|
@ -114,7 +114,6 @@ class HungamaSongIE(InfoExtractor):
|
|||
'title': 'Lucky Ali - Kitni Haseen Zindagi',
|
||||
'track': 'Kitni Haseen Zindagi',
|
||||
'artist': 'Lucky Ali',
|
||||
'album': None,
|
||||
'release_year': 2000,
|
||||
'thumbnail': 'https://stat2.hungama.ind.in/assets/images/default_images/da-200x200.png',
|
||||
},
|
||||
|
|
|
@ -9,7 +9,7 @@ class MonsterSirenHypergryphMusicIE(InfoExtractor):
|
|||
'info_dict': {
|
||||
'id': '514562',
|
||||
'ext': 'wav',
|
||||
'artist': ['塞壬唱片-MSR'],
|
||||
'artists': ['塞壬唱片-MSR'],
|
||||
'album': 'Flame Shadow',
|
||||
'title': 'Flame Shadow',
|
||||
}
|
||||
|
@ -27,6 +27,6 @@ class MonsterSirenHypergryphMusicIE(InfoExtractor):
|
|||
'url': traverse_obj(json_data, ('player', 'songDetail', 'sourceUrl')),
|
||||
'ext': 'wav',
|
||||
'vcodec': 'none',
|
||||
'artist': traverse_obj(json_data, ('player', 'songDetail', 'artists')),
|
||||
'artists': traverse_obj(json_data, ('player', 'songDetail', 'artists', ...)),
|
||||
'album': traverse_obj(json_data, ('musicPlay', 'albumDetail', 'name'))
|
||||
}
|
||||
|
|
|
@ -617,6 +617,7 @@ class InstagramPlaylistBaseIE(InstagramBaseIE):
|
|||
|
||||
|
||||
class InstagramUserIE(InstagramPlaylistBaseIE):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'https?://(?:www\.)?instagram\.com/(?P<id>[^/]{2,})/?(?:$|[?#])'
|
||||
IE_DESC = 'Instagram user profile'
|
||||
IE_NAME = 'instagram:user'
|
||||
|
|
|
@ -2,6 +2,8 @@ from .common import InfoExtractor
|
|||
|
||||
|
||||
class JeuxVideoIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_ENABLED = None # XXX: pass through to GenericIE
|
||||
_VALID_URL = r'https?://.*?\.jeuxvideo\.com/.*/(.*?)\.htm'
|
||||
|
||||
_TESTS = [{
|
||||
|
|
|
@ -1,66 +0,0 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
join_nonempty,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class Kanal2IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://kanal2\.postimees\.ee/[^?#]+\?([^#]+&)?id=(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'note': 'Test standard url (#5575)',
|
||||
'url': 'https://kanal2.postimees.ee/pluss/video/?id=40792',
|
||||
'md5': '7ea7b16266ec1798743777df241883dd',
|
||||
'info_dict': {
|
||||
'id': '40792',
|
||||
'ext': 'mp4',
|
||||
'title': 'Aedniku aabits / Osa 53 (05.08.2016 20:00)',
|
||||
'thumbnail': r're:https?://.*\.jpg$',
|
||||
'description': 'md5:53cabf3c5d73150d594747f727431248',
|
||||
'upload_date': '20160805',
|
||||
'timestamp': 1470420000,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
playlist = self._download_json(
|
||||
f'https://kanal2.postimees.ee/player/playlist/{video_id}',
|
||||
video_id, query={'type': 'episodes'},
|
||||
headers={'X-Requested-With': 'XMLHttpRequest'})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': join_nonempty(*traverse_obj(playlist, ('info', ('title', 'subtitle'))), delim=' / '),
|
||||
'description': traverse_obj(playlist, ('info', 'description')),
|
||||
'thumbnail': traverse_obj(playlist, ('data', 'image')),
|
||||
'formats': self.get_formats(playlist, video_id),
|
||||
'timestamp': unified_timestamp(self._search_regex(
|
||||
r'\((\d{2}\.\d{2}\.\d{4}\s\d{2}:\d{2})\)$',
|
||||
traverse_obj(playlist, ('info', 'subtitle')), 'timestamp', default='') + ' +0200'),
|
||||
}
|
||||
|
||||
def get_formats(self, playlist, video_id):
|
||||
path = traverse_obj(playlist, ('data', 'path'))
|
||||
if not path:
|
||||
raise ExtractorError('Path value not found in playlist JSON response')
|
||||
session = self._download_json(
|
||||
'https://sts.postimees.ee/session/register',
|
||||
video_id, note='Creating session', errnote='Error creating session',
|
||||
headers={
|
||||
'X-Original-URI': path,
|
||||
'Accept': 'application/json',
|
||||
})
|
||||
if session.get('reason') != 'OK' or not session.get('session'):
|
||||
reason = session.get('reason', 'unknown error')
|
||||
raise ExtractorError(f'Unable to obtain session: {reason}')
|
||||
|
||||
formats = []
|
||||
for stream in traverse_obj(playlist, ('data', 'streams', ..., 'file')):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
update_url_query(stream, {'s': session['session']}), video_id, 'mp4'))
|
||||
|
||||
return formats
|
|
@ -8,6 +8,7 @@ from .common import InfoExtractor
|
|||
|
||||
|
||||
class KankaNewsIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'https?://(?:www\.)?kankanews\.com/a/\d+\-\d+\-\d+/(?P<id>\d+)\.shtml'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.kankanews.com/a/2022-11-08/00310276054.shtml?appid=1088227',
|
||||
|
|
|
@ -1,96 +0,0 @@
|
|||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
fix_xml_ampersands,
|
||||
float_or_none,
|
||||
xpath_with_ns,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
class KarriereVideosIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?karrierevideos\.at(?:/[^/]+)+/(?P<id>[^/]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.karrierevideos.at/berufsvideos/mittlere-hoehere-schulen/altenpflegerin',
|
||||
'info_dict': {
|
||||
'id': '32c91',
|
||||
'ext': 'flv',
|
||||
'title': 'AltenpflegerIn',
|
||||
'description': 'md5:dbadd1259fde2159a9b28667cb664ae2',
|
||||
'thumbnail': r're:^http://.*\.png',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# broken ampersands
|
||||
'url': 'http://www.karrierevideos.at/orientierung/vaeterkarenz-und-neue-chancen-fuer-muetter-baby-was-nun',
|
||||
'info_dict': {
|
||||
'id': '5sniu',
|
||||
'ext': 'flv',
|
||||
'title': 'Väterkarenz und neue Chancen für Mütter - "Baby - was nun?"',
|
||||
'description': 'md5:97092c6ad1fd7d38e9d6a5fdeb2bcc33',
|
||||
'thumbnail': r're:^http://.*\.png',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = (self._html_search_meta('title', webpage, default=None)
|
||||
or self._search_regex(r'<h1 class="title">([^<]+)</h1>', webpage, 'video title'))
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'/config/video/(.+?)\.xml', webpage, 'video id')
|
||||
# Server returns malformed headers
|
||||
# Force Accept-Encoding: * to prevent gzipped results
|
||||
playlist = self._download_xml(
|
||||
'http://www.karrierevideos.at/player-playlist.xml.php?p=%s' % video_id,
|
||||
video_id, transform_source=fix_xml_ampersands,
|
||||
headers={'Accept-Encoding': '*'})
|
||||
|
||||
NS_MAP = {
|
||||
'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats'
|
||||
}
|
||||
|
||||
def ns(path):
|
||||
return xpath_with_ns(path, NS_MAP)
|
||||
|
||||
item = playlist.find('./tracklist/item')
|
||||
video_file = xpath_text(
|
||||
item, ns('./jwplayer:file'), 'video url', fatal=True)
|
||||
streamer = xpath_text(
|
||||
item, ns('./jwplayer:streamer'), 'streamer', fatal=True)
|
||||
|
||||
uploader = xpath_text(
|
||||
item, ns('./jwplayer:author'), 'uploader')
|
||||
duration = float_or_none(
|
||||
xpath_text(item, ns('./jwplayer:duration'), 'duration'))
|
||||
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<div class="leadtext">(.+?)</div>',
|
||||
webpage, 'description')
|
||||
|
||||
thumbnail = self._html_search_meta(
|
||||
'thumbnail', webpage, 'thumbnail')
|
||||
if thumbnail:
|
||||
thumbnail = compat_urlparse.urljoin(url, thumbnail)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': streamer.replace('rtmpt', 'rtmp'),
|
||||
'play_path': 'mp4:%s' % video_file,
|
||||
'ext': 'flv',
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'duration': duration,
|
||||
}
|
|
@ -3,6 +3,7 @@ from ..utils import int_or_none
|
|||
|
||||
|
||||
class KelbyOneIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'https?://members\.kelbyone\.com/course/(?P<id>[^$&?#/]+)'
|
||||
|
||||
_TESTS = [{
|
||||
|
|
|
@ -1,119 +0,0 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class KonserthusetPlayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:konserthusetplay|rspoplay)\.se/\?.*\bm=(?P<id>[^&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.konserthusetplay.se/?m=CKDDnlCY-dhWAAqiMERd-A',
|
||||
'md5': 'e3fd47bf44e864bd23c08e487abe1967',
|
||||
'info_dict': {
|
||||
'id': 'CKDDnlCY-dhWAAqiMERd-A',
|
||||
'ext': 'mp4',
|
||||
'title': 'Orkesterns instrument: Valthornen',
|
||||
'description': 'md5:f10e1f0030202020396a4d712d2fa827',
|
||||
'thumbnail': 're:^https?://.*$',
|
||||
'duration': 398.76,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://rspoplay.se/?m=elWuEH34SMKvaO4wO_cHBw',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
e = self._search_regex(
|
||||
r'https?://csp\.picsearch\.com/rest\?.*\be=(.+?)[&"\']', webpage, 'e')
|
||||
|
||||
rest = self._download_json(
|
||||
'http://csp.picsearch.com/rest?e=%s&containerId=mediaplayer&i=object' % e,
|
||||
video_id, transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1])
|
||||
|
||||
media = rest['media']
|
||||
player_config = media['playerconfig']
|
||||
playlist = player_config['playlist']
|
||||
|
||||
source = next(f for f in playlist if f.get('bitrates') or f.get('provider'))
|
||||
|
||||
FORMAT_ID_REGEX = r'_([^_]+)_h264m\.mp4'
|
||||
|
||||
formats = []
|
||||
|
||||
m3u8_url = source.get('url')
|
||||
if m3u8_url and determine_ext(m3u8_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
|
||||
fallback_url = source.get('fallbackUrl')
|
||||
fallback_format_id = None
|
||||
if fallback_url:
|
||||
fallback_format_id = self._search_regex(
|
||||
FORMAT_ID_REGEX, fallback_url, 'format id', default=None)
|
||||
|
||||
connection_url = (player_config.get('rtmp', {}).get(
|
||||
'netConnectionUrl') or player_config.get(
|
||||
'plugins', {}).get('bwcheck', {}).get('netConnectionUrl'))
|
||||
if connection_url:
|
||||
for f in source['bitrates']:
|
||||
video_url = f.get('url')
|
||||
if not video_url:
|
||||
continue
|
||||
format_id = self._search_regex(
|
||||
FORMAT_ID_REGEX, video_url, 'format id', default=None)
|
||||
f_common = {
|
||||
'vbr': int_or_none(f.get('bitrate')),
|
||||
'width': int_or_none(f.get('width')),
|
||||
'height': int_or_none(f.get('height')),
|
||||
}
|
||||
f = f_common.copy()
|
||||
f.update({
|
||||
'url': connection_url,
|
||||
'play_path': video_url,
|
||||
'format_id': 'rtmp-%s' % format_id if format_id else 'rtmp',
|
||||
'ext': 'flv',
|
||||
})
|
||||
formats.append(f)
|
||||
if format_id and format_id == fallback_format_id:
|
||||
f = f_common.copy()
|
||||
f.update({
|
||||
'url': fallback_url,
|
||||
'format_id': 'http-%s' % format_id if format_id else 'http',
|
||||
})
|
||||
formats.append(f)
|
||||
|
||||
if not formats and fallback_url:
|
||||
formats.append({
|
||||
'url': fallback_url,
|
||||
})
|
||||
|
||||
title = player_config.get('title') or media['title']
|
||||
description = player_config.get('mediaInfo', {}).get('description')
|
||||
thumbnail = media.get('image')
|
||||
duration = float_or_none(media.get('duration'), 1000)
|
||||
|
||||
subtitles = {}
|
||||
captions = source.get('captionsAvailableLanguages')
|
||||
if isinstance(captions, dict):
|
||||
for lang, subtitle_url in captions.items():
|
||||
subtitle_url = url_or_none(subtitle_url)
|
||||
if lang != 'none' and subtitle_url:
|
||||
subtitles.setdefault(lang, []).append({'url': subtitle_url})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
|
@ -6,6 +6,7 @@ from ..utils import (
|
|||
|
||||
|
||||
class KooIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'https?://(?:www\.)?kooapp\.com/koo/[^/]+/(?P<id>[^/&#$?]+)'
|
||||
_TESTS = [{ # Test for video in the comments
|
||||
'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/946c4189-bc2d-4524-b95b-43f641e2adde',
|
||||
|
|
|
@ -8,6 +8,7 @@ from ..utils import (
|
|||
|
||||
|
||||
class KrasViewIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
IE_DESC = 'Красвью'
|
||||
_VALID_URL = r'https?://krasview\.ru/(?:video|embed)/(?P<id>\d+)'
|
||||
|
||||
|
|
|
@ -1,83 +0,0 @@
|
|||
import random
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
timeconvert,
|
||||
update_url_query,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
class KUSIIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?kusi\.com/(?P<path>story/.+|video\?clipId=(?P<clipId>\d+))'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.kusi.com/story/32849881/turko-files-refused-to-help-it-aint-right',
|
||||
'md5': '4e76ce8e53660ce9697d06c0ba6fc47d',
|
||||
'info_dict': {
|
||||
'id': '12689020',
|
||||
'ext': 'mp4',
|
||||
'title': "Turko Files: Refused to Help, It Ain't Right!",
|
||||
'duration': 223.586,
|
||||
'upload_date': '20160826',
|
||||
'timestamp': 1472233118,
|
||||
'thumbnail': r're:^https?://.*\.jpg$'
|
||||
},
|
||||
}, {
|
||||
'url': 'http://kusi.com/video?clipId=12203019',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
clip_id = mobj.group('clipId')
|
||||
video_id = clip_id or mobj.group('path')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if clip_id is None:
|
||||
video_id = clip_id = self._html_search_regex(
|
||||
r'"clipId"\s*,\s*"(\d+)"', webpage, 'clip id')
|
||||
|
||||
affiliate_id = self._search_regex(
|
||||
r'affiliateId\s*:\s*\'([^\']+)\'', webpage, 'affiliate id')
|
||||
|
||||
# See __Packages/worldnow/model/GalleryModel.as of WNGallery.swf
|
||||
xml_url = update_url_query('http://www.kusi.com/build.asp', {
|
||||
'buildtype': 'buildfeaturexmlrequest',
|
||||
'featureType': 'Clip',
|
||||
'featureid': clip_id,
|
||||
'affiliateno': affiliate_id,
|
||||
'clientgroupid': '1',
|
||||
'rnd': int(round(random.random() * 1000000)),
|
||||
})
|
||||
|
||||
doc = self._download_xml(xml_url, video_id)
|
||||
|
||||
video_title = xpath_text(doc, 'HEADLINE', fatal=True)
|
||||
duration = float_or_none(xpath_text(doc, 'DURATION'), scale=1000)
|
||||
description = xpath_text(doc, 'ABSTRACT')
|
||||
thumbnail = xpath_text(doc, './THUMBNAILIMAGE/FILENAME')
|
||||
creation_time = timeconvert(xpath_text(doc, 'rfc822creationdate'))
|
||||
|
||||
quality_options = doc.find('{http://search.yahoo.com/mrss/}group').findall('{http://search.yahoo.com/mrss/}content')
|
||||
formats = []
|
||||
for quality in quality_options:
|
||||
formats.append({
|
||||
'url': urllib.parse.unquote_plus(quality.attrib['url']),
|
||||
'height': int_or_none(quality.attrib.get('height')),
|
||||
'width': int_or_none(quality.attrib.get('width')),
|
||||
'vbr': float_or_none(quality.attrib.get('bitratebits'), scale=1000),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': creation_time,
|
||||
}
|
|
@ -54,6 +54,7 @@ class KuwoBaseIE(InfoExtractor):
|
|||
|
||||
|
||||
class KuwoIE(KuwoBaseIE):
|
||||
_WORKING = False
|
||||
IE_NAME = 'kuwo:song'
|
||||
IE_DESC = '酷我音乐'
|
||||
_VALID_URL = r'https?://(?:www\.)?kuwo\.cn/yinyue/(?P<id>\d+)'
|
||||
|
@ -133,6 +134,7 @@ class KuwoIE(KuwoBaseIE):
|
|||
|
||||
|
||||
class KuwoAlbumIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
IE_NAME = 'kuwo:album'
|
||||
IE_DESC = '酷我音乐 - 专辑'
|
||||
_VALID_URL = r'https?://(?:www\.)?kuwo\.cn/album/(?P<id>\d+?)/'
|
||||
|
@ -169,6 +171,7 @@ class KuwoAlbumIE(InfoExtractor):
|
|||
|
||||
|
||||
class KuwoChartIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
IE_NAME = 'kuwo:chart'
|
||||
IE_DESC = '酷我音乐 - 排行榜'
|
||||
_VALID_URL = r'https?://yinyue\.kuwo\.cn/billboard_(?P<id>[^.]+).htm'
|
||||
|
@ -194,6 +197,7 @@ class KuwoChartIE(InfoExtractor):
|
|||
|
||||
|
||||
class KuwoSingerIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
IE_NAME = 'kuwo:singer'
|
||||
IE_DESC = '酷我音乐 - 歌手'
|
||||
_VALID_URL = r'https?://(?:www\.)?kuwo\.cn/mingxing/(?P<id>[^/]+)'
|
||||
|
@ -251,6 +255,7 @@ class KuwoSingerIE(InfoExtractor):
|
|||
|
||||
|
||||
class KuwoCategoryIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
IE_NAME = 'kuwo:category'
|
||||
IE_DESC = '酷我音乐 - 分类'
|
||||
_VALID_URL = r'https?://yinyue\.kuwo\.cn/yy/cinfo_(?P<id>\d+?).htm'
|
||||
|
@ -290,6 +295,7 @@ class KuwoCategoryIE(InfoExtractor):
|
|||
|
||||
|
||||
class KuwoMvIE(KuwoBaseIE):
|
||||
_WORKING = False
|
||||
IE_NAME = 'kuwo:mv'
|
||||
IE_DESC = '酷我音乐 - MV'
|
||||
_VALID_URL = r'https?://(?:www\.)?kuwo\.cn/mv/(?P<id>\d+?)/'
|
||||
|
|
|
@ -231,7 +231,6 @@ class LBRYIE(LBRYBaseIE):
|
|||
'release_timestamp': int,
|
||||
'release_date': str,
|
||||
'tags': list,
|
||||
'duration': None,
|
||||
'channel': 'RT',
|
||||
'channel_id': 'fdd11cb3ab75f95efb7b3bc2d726aa13ac915b66',
|
||||
'channel_url': 'https://odysee.com/@RT:fdd11cb3ab75f95efb7b3bc2d726aa13ac915b66',
|
||||
|
|
|
@ -10,6 +10,7 @@ from ..utils import (
|
|||
|
||||
|
||||
class Lecture2GoIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'https?://lecture2go\.uni-hamburg\.de/veranstaltungen/-/v/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://lecture2go.uni-hamburg.de/veranstaltungen/-/v/17473',
|
||||
|
|
|
@ -2,6 +2,7 @@ from .common import InfoExtractor
|
|||
|
||||
|
||||
class LentaIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'https?://(?:www\.)?lenta\.ru/[^/]+/\d+/\d+/\d+/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://lenta.ru/news/2018/03/22/savshenko_go/',
|
||||
|
|
|
@ -22,8 +22,6 @@ class LikeeIE(InfoExtractor):
|
|||
'description': 'md5:9a7ebe816f0e78722ee5ed76f75983b4',
|
||||
'thumbnail': r're:^https?://.+\.jpg',
|
||||
'uploader': 'Huỳnh Hồng Quân ',
|
||||
'play_count': int,
|
||||
'download_count': int,
|
||||
'artist': 'Huỳnh Hồng Quân ',
|
||||
'timestamp': 1651571320,
|
||||
'upload_date': '20220503',
|
||||
|
@ -44,11 +42,9 @@ class LikeeIE(InfoExtractor):
|
|||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'uploader': 'Vương Phước Nhi',
|
||||
'download_count': int,
|
||||
'timestamp': 1651506835,
|
||||
'upload_date': '20220502',
|
||||
'duration': 60024,
|
||||
'play_count': int,
|
||||
'artist': 'Vương Phước Nhi',
|
||||
'uploader_id': '649222262',
|
||||
'view_count': int,
|
||||
|
@ -65,9 +61,7 @@ class LikeeIE(InfoExtractor):
|
|||
'duration': 9684,
|
||||
'uploader_id': 'fernanda_rivasg',
|
||||
'view_count': int,
|
||||
'play_count': int,
|
||||
'artist': 'La Cami La✨',
|
||||
'download_count': int,
|
||||
'like_count': int,
|
||||
'uploader': 'Fernanda Rivas🎶',
|
||||
'timestamp': 1614034308,
|
||||
|
@ -83,13 +77,11 @@ class LikeeIE(InfoExtractor):
|
|||
'thumbnail': r're:^https?://.+\.jpg',
|
||||
'comment_count': int,
|
||||
'duration': 18014,
|
||||
'play_count': int,
|
||||
'view_count': int,
|
||||
'timestamp': 1611694774,
|
||||
'like_count': int,
|
||||
'uploader': 'Fernanda Rivas🎶',
|
||||
'uploader_id': 'fernanda_rivasg',
|
||||
'download_count': int,
|
||||
'artist': 'ʟᴇʀɪᴋ_ᴜɴɪᴄᴏʀɴ♡︎',
|
||||
'upload_date': '20210126',
|
||||
},
|
||||
|
@ -128,8 +120,6 @@ class LikeeIE(InfoExtractor):
|
|||
'description': info.get('share_desc'),
|
||||
'view_count': int_or_none(info.get('video_count')),
|
||||
'like_count': int_or_none(info.get('likeCount')),
|
||||
'play_count': int_or_none(info.get('play_count')),
|
||||
'download_count': int_or_none(info.get('download_count')),
|
||||
'comment_count': int_or_none(info.get('comment_count')),
|
||||
'uploader': str_or_none(info.get('nick_name')),
|
||||
'uploader_id': str_or_none(info.get('likeeId')),
|
||||
|
|
|
@ -1,42 +0,0 @@
|
|||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class LocalNews8IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?localnews8\.com/(?:[^/]+/)*(?P<display_id>[^/]+)/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.localnews8.com/news/rexburg-business-turns-carbon-fiber-scraps-into-wedding-rings/35183304',
|
||||
'md5': 'be4d48aea61aa2bde7be2ee47691ad20',
|
||||
'info_dict': {
|
||||
'id': '35183304',
|
||||
'display_id': 'rexburg-business-turns-carbon-fiber-scraps-into-wedding-rings',
|
||||
'ext': 'mp4',
|
||||
'title': 'Rexburg business turns carbon fiber scraps into wedding ring',
|
||||
'description': 'The process was first invented by Lamborghini and less than a dozen companies around the world use it.',
|
||||
'duration': 153,
|
||||
'timestamp': 1441844822,
|
||||
'upload_date': '20150910',
|
||||
'uploader_id': 'api',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
partner_id = self._search_regex(
|
||||
r'partnerId\s*[:=]\s*(["\'])(?P<id>\d+)\1',
|
||||
webpage, 'partner id', group='id')
|
||||
kaltura_id = self._search_regex(
|
||||
r'videoIdString\s*[:=]\s*(["\'])kaltura:(?P<id>[0-9a-z_]+)\1',
|
||||
webpage, 'videl id', group='id')
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'kaltura:%s:%s' % (partner_id, kaltura_id),
|
||||
'ie_key': 'Kaltura',
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
}
|
|
@ -1,8 +1,7 @@
|
|||
from .common import InfoExtractor
|
||||
from .francetv import FranceTVIE
|
||||
from .francetv import FranceTVBaseInfoExtractor
|
||||
|
||||
|
||||
class LumniIE(InfoExtractor):
|
||||
class LumniIE(FranceTVBaseInfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?lumni\.fr/video/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.lumni.fr/video/l-homme-et-son-environnement-dans-la-revolution-industrielle',
|
||||
|
@ -21,4 +20,4 @@ class LumniIE(InfoExtractor):
|
|||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._html_search_regex(
|
||||
r'<div[^>]+data-factoryid\s*=\s*["\']([^"\']+)', webpage, 'video id')
|
||||
return self.url_result(f'francetv:{video_id}', FranceTVIE, video_id)
|
||||
return self._make_url_result(video_id, url=url)
|
||||
|
|
|
@ -1,107 +0,0 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
dict_get,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
parse_duration,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class MallTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:www|sk)\.)?mall\.tv/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.mall.tv/18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
|
||||
'md5': 'cd69ce29176f6533b65bff69ed9a5f2a',
|
||||
'info_dict': {
|
||||
'id': 't0zzt0',
|
||||
'display_id': '18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
|
||||
'ext': 'mp4',
|
||||
'title': '18 miliard pro neziskovky. Opravdu jsou sportovci nebo Člověk v tísni pijavice?',
|
||||
'description': 'md5:db7d5744a4bd4043d9d98324aa72ab35',
|
||||
'duration': 216,
|
||||
'timestamp': 1538870400,
|
||||
'upload_date': '20181007',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'thumbnail': 'https://cdn.vpplayer.tech/agmipnzv/encode/vjsnigfq/thumbnails/retina.jpg',
|
||||
'average_rating': 9.060869565217391,
|
||||
'dislike_count': int,
|
||||
'like_count': int,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.mall.tv/kdo-to-plati/18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://sk.mall.tv/gejmhaus/reklamacia-nehreje-vyrobnik-tepla-alebo-spekacka',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.mall.tv/zivoty-slavnych/nadeje-vychodu-i-zapadu-jak-michail-gorbacov-zmenil-politickou-mapu-sveta-a-ziskal-za-to-nobelovu-cenu-miru',
|
||||
'info_dict': {
|
||||
'id': 'yx010y',
|
||||
'ext': 'mp4',
|
||||
'dislike_count': int,
|
||||
'description': 'md5:aee02bee5a8d072c6a8207b91d1905a9',
|
||||
'thumbnail': 'https://cdn.vpplayer.tech/agmipnzv/encode/vjsnjdeu/thumbnails/retina.jpg',
|
||||
'comment_count': int,
|
||||
'display_id': 'md5:0ec2afa94d2e2b7091c019cef2a43a9b',
|
||||
'like_count': int,
|
||||
'duration': 752,
|
||||
'timestamp': 1646956800,
|
||||
'title': 'md5:fe79385daaf16d74c12c1ec4a26687af',
|
||||
'view_count': int,
|
||||
'upload_date': '20220311',
|
||||
'average_rating': 9.685714285714285,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
url, display_id, headers=self.geo_verification_headers())
|
||||
|
||||
video = self._parse_json(self._search_regex(
|
||||
r'videoObject\s*=\s*JSON\.parse\(JSON\.stringify\(({.+?})\)\);',
|
||||
webpage, 'video object'), display_id)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'<input\s*id\s*=\s*player-id-name\s*[^>]+value\s*=\s*(\w+)', webpage, 'video id')
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
video['VideoSource'], video_id, 'mp4', 'm3u8_native')
|
||||
|
||||
subtitles = {}
|
||||
for s in (video.get('Subtitles') or {}):
|
||||
s_url = s.get('Url')
|
||||
if not s_url:
|
||||
continue
|
||||
subtitles.setdefault(s.get('Language') or 'cz', []).append({
|
||||
'url': s_url,
|
||||
})
|
||||
|
||||
entity_counts = video.get('EntityCounts') or {}
|
||||
|
||||
def get_count(k):
|
||||
v = entity_counts.get(k + 's') or {}
|
||||
return int_or_none(dict_get(v, ('Count', 'StrCount')))
|
||||
|
||||
info = self._search_json_ld(webpage, video_id, default={})
|
||||
|
||||
return merge_dicts({
|
||||
'id': str(video_id),
|
||||
'display_id': display_id,
|
||||
'title': video.get('Title'),
|
||||
'description': clean_html(video.get('Description')),
|
||||
'thumbnail': video.get('ThumbnailUrl'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'duration': int_or_none(video.get('DurationSeconds')) or parse_duration(video.get('Duration')),
|
||||
'view_count': get_count('View'),
|
||||
'like_count': get_count('Like'),
|
||||
'dislike_count': get_count('Dislike'),
|
||||
'average_rating': float_or_none(try_get(video, lambda x: x['EntityRating']['AvarageRate'])),
|
||||
'comment_count': get_count('Comment'),
|
||||
}, info)
|
|
@ -12,6 +12,7 @@ from ..utils import (
|
|||
|
||||
|
||||
class ManyVidsIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'(?i)https?://(?:www\.)?manyvids\.com/video/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
# preview video
|
||||
|
|
|
@ -10,6 +10,7 @@ from ..utils import (
|
|||
|
||||
|
||||
class MarkizaIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'https?://(?:www\.)?videoarchiv\.markiza\.sk/(?:video/(?:[^/]+/)*|embed/)(?P<id>\d+)(?:[_/]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'http://videoarchiv.markiza.sk/video/oteckovia/84723_oteckovia-109',
|
||||
|
@ -68,6 +69,7 @@ class MarkizaIE(InfoExtractor):
|
|||
|
||||
|
||||
class MarkizaPageIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:(?:[^/]+\.)?markiza|tvnoviny)\.sk/(?:[^/]+/)*(?P<id>\d+)_'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.markiza.sk/soubiz/zahranicny/1923705_oteckovia-maju-svoj-den-ti-slavni-nie-su-o-nic-menej-rozkosni',
|
||||
|
|
|
@ -8,15 +8,15 @@ class MegaphoneIE(InfoExtractor):
|
|||
_VALID_URL = r'https://player\.megaphone\.fm/(?P<id>[A-Z0-9]+)'
|
||||
_EMBED_REGEX = [rf'<iframe[^>]*?\ssrc=["\'](?P<url>{_VALID_URL})']
|
||||
_TEST = {
|
||||
'url': 'https://player.megaphone.fm/GLT9749789991?"',
|
||||
'url': 'https://player.megaphone.fm/GLT9749789991',
|
||||
'md5': '4816a0de523eb3e972dc0dda2c191f96',
|
||||
'info_dict': {
|
||||
'id': 'GLT9749789991',
|
||||
'ext': 'mp3',
|
||||
'title': '#97 What Kind Of Idiot Gets Phished?',
|
||||
'thumbnail': r're:^https://.*\.png.*$',
|
||||
'duration': 1776.26375,
|
||||
'author': 'Reply All',
|
||||
'duration': 1998.36,
|
||||
'creators': ['Reply All'],
|
||||
},
|
||||
}
|
||||
|
||||
|
@ -40,7 +40,7 @@ class MegaphoneIE(InfoExtractor):
|
|||
'id': video_id,
|
||||
'thumbnail': thumbnail,
|
||||
'title': title,
|
||||
'author': author,
|
||||
'creators': [author] if author else None,
|
||||
'duration': episode_data['duration'],
|
||||
'formats': formats,
|
||||
}
|
||||
|
|
|
@ -1,36 +0,0 @@
|
|||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class MiaoPaiIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?miaopai\.com/show/(?P<id>[-A-Za-z0-9~_]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.miaopai.com/show/n~0hO7sfV1nBEw4Y29-Hqg__.htm',
|
||||
'md5': '095ed3f1cd96b821add957bdc29f845b',
|
||||
'info_dict': {
|
||||
'id': 'n~0hO7sfV1nBEw4Y29-Hqg__',
|
||||
'ext': 'mp4',
|
||||
'title': '西游记音乐会的秒拍视频',
|
||||
'thumbnail': 're:^https?://.*/n~0hO7sfV1nBEw4Y29-Hqg___m.jpg',
|
||||
}
|
||||
}
|
||||
|
||||
_USER_AGENT_IPAD = 'Mozilla/5.0 (iPad; CPU OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
url, video_id, headers={'User-Agent': self._USER_AGENT_IPAD})
|
||||
|
||||
title = self._html_extract_title(webpage)
|
||||
thumbnail = self._html_search_regex(
|
||||
r'<div[^>]+class=(?P<q1>[\'"]).*\bvideo_img\b.*(?P=q1)[^>]+data-url=(?P<q2>[\'"])(?P<url>[^\'"]+)(?P=q2)',
|
||||
webpage, 'thumbnail', fatal=False, group='url')
|
||||
videos = self._parse_html5_media_entries(url, webpage, video_id)
|
||||
info = videos[0]
|
||||
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
})
|
||||
return info
|
|
@ -1,55 +0,0 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
smuggle_url,
|
||||
)
|
||||
|
||||
|
||||
class MinistryGridIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ministrygrid\.com/([^/?#]*/)*(?P<id>[^/#?]+)/?(?:$|[?#])'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.ministrygrid.com/training-viewer/-/training/t4g-2014-conference/the-gospel-by-numbers-4/the-gospel-by-numbers',
|
||||
'md5': '844be0d2a1340422759c2a9101bab017',
|
||||
'info_dict': {
|
||||
'id': '3453494717001',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Gospel by Numbers',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'upload_date': '20140410',
|
||||
'description': 'Coming soon from T4G 2014!',
|
||||
'uploader_id': '2034960640001',
|
||||
'timestamp': 1397145591,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['TDSLifeway'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
portlets = self._parse_json(self._search_regex(
|
||||
r'Liferay\.Portlet\.list=(\[.+?\])', webpage, 'portlet list'),
|
||||
video_id)
|
||||
pl_id = self._search_regex(
|
||||
r'getPlid:function\(\){return"(\d+)"}', webpage, 'p_l_id')
|
||||
|
||||
for i, portlet in enumerate(portlets):
|
||||
portlet_url = 'http://www.ministrygrid.com/c/portal/render_portlet?p_l_id=%s&p_p_id=%s' % (pl_id, portlet)
|
||||
portlet_code = self._download_webpage(
|
||||
portlet_url, video_id,
|
||||
note='Looking in portlet %s (%d/%d)' % (portlet, i + 1, len(portlets)),
|
||||
fatal=False)
|
||||
video_iframe_url = self._search_regex(
|
||||
r'<iframe.*?src="([^"]+)"', portlet_code, 'video iframe',
|
||||
default=None)
|
||||
if video_iframe_url:
|
||||
return self.url_result(
|
||||
smuggle_url(video_iframe_url, {'force_videoid': video_id}),
|
||||
video_id=video_id)
|
||||
|
||||
raise ExtractorError('Could not find video iframe in any portlets')
|
|
@ -1,45 +0,0 @@
|
|||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class MorningstarIE(InfoExtractor):
|
||||
IE_DESC = 'morningstar.com'
|
||||
_VALID_URL = r'https?://(?:(?:www|news)\.)morningstar\.com/[cC]over/video[cC]enter\.aspx\?id=(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.morningstar.com/cover/videocenter.aspx?id=615869',
|
||||
'md5': '6c0acface7a787aadc8391e4bbf7b0f5',
|
||||
'info_dict': {
|
||||
'id': '615869',
|
||||
'ext': 'mp4',
|
||||
'title': 'Get Ahead of the Curve on 2013 Taxes',
|
||||
'description': "Vanguard's Joel Dickson on managing higher tax rates for high-income earners and fund capital-gain distributions in 2013.",
|
||||
'thumbnail': r're:^https?://.*m(?:orning)?star\.com/.+thumb\.jpg$'
|
||||
}
|
||||
}, {
|
||||
'url': 'http://news.morningstar.com/cover/videocenter.aspx?id=825556',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._html_search_regex(
|
||||
r'<h1 id="titleLink">(.*?)</h1>', webpage, 'title')
|
||||
video_url = self._html_search_regex(
|
||||
r'<input type="hidden" id="hidVideoUrl" value="([^"]+)"',
|
||||
webpage, 'video URL')
|
||||
thumbnail = self._html_search_regex(
|
||||
r'<input type="hidden" id="hidSnapshot" value="([^"]+)"',
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
description = self._html_search_regex(
|
||||
r'<div id="mstarDeck".*?>(.*?)</div>',
|
||||
webpage, 'description', fatal=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
}
|
|
@ -5,6 +5,7 @@ from ..compat import (
|
|||
|
||||
|
||||
class MotorsportIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
IE_DESC = 'motorsport.com'
|
||||
_VALID_URL = r'https?://(?:www\.)?motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P<id>[^/]+)/?(?:$|[?#])'
|
||||
_TEST = {
|
||||
|
|
|
@ -451,6 +451,7 @@ class MTVVideoIE(MTVServicesInfoExtractor):
|
|||
|
||||
|
||||
class MTVDEIE(MTVServicesInfoExtractor):
|
||||
_WORKING = False
|
||||
IE_NAME = 'mtv.de'
|
||||
_VALID_URL = r'https?://(?:www\.)?mtv\.de/(?:musik/videoclips|folgen|news)/(?P<id>[0-9a-z]+)'
|
||||
_TESTS = [{
|
||||
|
|
|
@ -9,6 +9,7 @@ from ..utils import (
|
|||
|
||||
|
||||
class MuenchenTVIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'https?://(?:www\.)?muenchen\.tv/livestream'
|
||||
IE_DESC = 'münchen.tv'
|
||||
_TEST = {
|
||||
|
|
|
@ -17,11 +17,11 @@ class MusicdexBaseIE(InfoExtractor):
|
|||
'track_number': track_json.get('number'),
|
||||
'url': format_field(track_json, 'url', 'https://www.musicdex.org/%s'),
|
||||
'duration': track_json.get('duration'),
|
||||
'genre': [genre.get('name') for genre in track_json.get('genres') or []],
|
||||
'genres': [genre.get('name') for genre in track_json.get('genres') or []],
|
||||
'like_count': track_json.get('likes_count'),
|
||||
'view_count': track_json.get('plays'),
|
||||
'artist': [artist.get('name') for artist in track_json.get('artists') or []],
|
||||
'album_artist': [artist.get('name') for artist in album_json.get('artists') or []],
|
||||
'artists': [artist.get('name') for artist in track_json.get('artists') or []],
|
||||
'album_artists': [artist.get('name') for artist in album_json.get('artists') or []],
|
||||
'thumbnail': format_field(album_json, 'image', 'https://www.musicdex.org/%s'),
|
||||
'album': album_json.get('name'),
|
||||
'release_year': try_get(album_json, lambda x: date_from_str(unified_strdate(x['release_date'])).year),
|
||||
|
@ -43,11 +43,11 @@ class MusicdexSongIE(MusicdexBaseIE):
|
|||
'track': 'dual existence',
|
||||
'track_number': 1,
|
||||
'duration': 266000,
|
||||
'genre': ['Anime'],
|
||||
'genres': ['Anime'],
|
||||
'like_count': int,
|
||||
'view_count': int,
|
||||
'artist': ['fripSide'],
|
||||
'album_artist': ['fripSide'],
|
||||
'artists': ['fripSide'],
|
||||
'album_artists': ['fripSide'],
|
||||
'thumbnail': 'https://www.musicdex.org/storage/album/9iDIam1DHTVqUG4UclFIEq1WAFGXfPW4y0TtZa91.png',
|
||||
'album': 'To Aru Kagaku no Railgun T OP2 Single - dual existence',
|
||||
'release_year': 2020
|
||||
|
@ -69,9 +69,9 @@ class MusicdexAlbumIE(MusicdexBaseIE):
|
|||
'playlist_mincount': 28,
|
||||
'info_dict': {
|
||||
'id': '56',
|
||||
'genre': ['OST'],
|
||||
'genres': ['OST'],
|
||||
'view_count': int,
|
||||
'artist': ['TENMON & Eiichiro Yanagi / minori'],
|
||||
'artists': ['TENMON & Eiichiro Yanagi / minori'],
|
||||
'title': 'ef - a tale of memories Original Soundtrack 2 ~fortissimo~',
|
||||
'release_year': 2008,
|
||||
'thumbnail': 'https://www.musicdex.org/storage/album/2rSHkyYBYfB7sbvElpEyTMcUn6toY7AohOgJuDlE.jpg',
|
||||
|
@ -88,9 +88,9 @@ class MusicdexAlbumIE(MusicdexBaseIE):
|
|||
'id': id,
|
||||
'title': data_json.get('name'),
|
||||
'description': data_json.get('description'),
|
||||
'genre': [genre.get('name') for genre in data_json.get('genres') or []],
|
||||
'genres': [genre.get('name') for genre in data_json.get('genres') or []],
|
||||
'view_count': data_json.get('plays'),
|
||||
'artist': [artist.get('name') for artist in data_json.get('artists') or []],
|
||||
'artists': [artist.get('name') for artist in data_json.get('artists') or []],
|
||||
'thumbnail': format_field(data_json, 'image', 'https://www.musicdex.org/%s'),
|
||||
'release_year': try_get(data_json, lambda x: date_from_str(unified_strdate(x['release_date'])).year),
|
||||
'entries': entries,
|
||||
|
|
|
@ -5,6 +5,7 @@ from ..utils import parse_duration, remove_end, unified_strdate, urljoin
|
|||
|
||||
|
||||
class NDTVIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?ndtv\.com/(?:[^/]+/)*videos?/?(?:[^/]+/)*[^/?^&]+-(?P<id>\d+)'
|
||||
|
||||
_TESTS = [
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
import itertools
|
||||
import json
|
||||
|
||||
from .art19 import Art19IE
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
|
@ -112,7 +113,8 @@ class NebulaBaseIE(InfoExtractor):
|
|||
|
||||
|
||||
class NebulaIE(NebulaBaseIE):
|
||||
_VALID_URL = rf'{_BASE_URL_RE}/videos/(?P<id>[-\w]+)'
|
||||
IE_NAME = 'nebula:video'
|
||||
_VALID_URL = rf'{_BASE_URL_RE}/videos/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://nebula.tv/videos/that-time-disney-remade-beauty-and-the-beast',
|
||||
'info_dict': {
|
||||
|
@ -236,8 +238,8 @@ class NebulaIE(NebulaBaseIE):
|
|||
|
||||
|
||||
class NebulaClassIE(NebulaBaseIE):
|
||||
IE_NAME = 'nebula:class'
|
||||
_VALID_URL = rf'{_BASE_URL_RE}/(?P<id>[-\w]+)/(?P<ep>\d+)'
|
||||
IE_NAME = 'nebula:media'
|
||||
_VALID_URL = rf'{_BASE_URL_RE}/(?!(?:myshows|library|videos)/)(?P<id>[\w-]+)/(?P<ep>[\w-]+)/?(?:$|[?#])'
|
||||
_TESTS = [{
|
||||
'url': 'https://nebula.tv/copyright-for-fun-and-profit/14',
|
||||
'info_dict': {
|
||||
|
@ -253,6 +255,46 @@ class NebulaClassIE(NebulaBaseIE):
|
|||
'title': 'Photos, Sculpture, and Video',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://nebula.tv/extremitiespodcast/pyramiden-the-high-arctic-soviet-ghost-town',
|
||||
'info_dict': {
|
||||
'ext': 'mp3',
|
||||
'id': '018f65f0-0033-4021-8f87-2d132beb19aa',
|
||||
'description': 'md5:05d2b23ab780c955e2511a2b9127acff',
|
||||
'series_id': '335e8159-d663-491a-888f-1732285706ac',
|
||||
'modified_timestamp': 1599091504,
|
||||
'episode_id': '018f65f0-0033-4021-8f87-2d132beb19aa',
|
||||
'series': 'Extremities',
|
||||
'modified_date': '20200903',
|
||||
'upload_date': '20200902',
|
||||
'title': 'Pyramiden: The High-Arctic Soviet Ghost Town',
|
||||
'release_timestamp': 1571237958,
|
||||
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
|
||||
'duration': 1546.05714,
|
||||
'timestamp': 1599085608,
|
||||
'release_date': '20191016',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://nebula.tv/thelayover/the-layover-episode-1',
|
||||
'info_dict': {
|
||||
'ext': 'mp3',
|
||||
'id': '9d74a762-00bb-45a8-9e8d-9ed47c04a1d0',
|
||||
'episode_number': 1,
|
||||
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
|
||||
'release_date': '20230304',
|
||||
'modified_date': '20230403',
|
||||
'series': 'The Layover',
|
||||
'episode_id': '9d74a762-00bb-45a8-9e8d-9ed47c04a1d0',
|
||||
'modified_timestamp': 1680554566,
|
||||
'duration': 3130.46401,
|
||||
'release_timestamp': 1677943800,
|
||||
'title': 'The Layover — Episode 1',
|
||||
'series_id': '874303a5-4900-4626-a4b6-2aacac34466a',
|
||||
'upload_date': '20230303',
|
||||
'episode': 'Episode 1',
|
||||
'timestamp': 1677883672,
|
||||
'description': 'md5:002cca89258e3bc7c268d5b8c24ba482',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -268,16 +310,38 @@ class NebulaClassIE(NebulaBaseIE):
|
|||
|
||||
metadata = self._call_api(
|
||||
f'https://content.api.nebula.app/content/{slug}/{episode}/?include=lessons',
|
||||
slug, note='Fetching video metadata')
|
||||
return {
|
||||
**self._extract_video_metadata(metadata),
|
||||
**self._extract_formats(metadata['id'], slug),
|
||||
}
|
||||
slug, note='Fetching class/podcast metadata')
|
||||
content_type = metadata.get('type')
|
||||
if content_type == 'lesson':
|
||||
return {
|
||||
**self._extract_video_metadata(metadata),
|
||||
**self._extract_formats(metadata['id'], slug),
|
||||
}
|
||||
elif content_type == 'podcast_episode':
|
||||
episode_url = metadata['episode_url']
|
||||
if not episode_url and metadata.get('premium'):
|
||||
self.raise_login_required()
|
||||
|
||||
if Art19IE.suitable(episode_url):
|
||||
return self.url_result(episode_url, Art19IE)
|
||||
return traverse_obj(metadata, {
|
||||
'id': ('id', {str}),
|
||||
'url': ('episode_url', {url_or_none}),
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'timestamp': ('published_at', {parse_iso8601}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'channel_id': ('channel_id', {str}),
|
||||
'chnanel': ('channel_title', {str}),
|
||||
'thumbnail': ('assets', 'regular', {url_or_none}),
|
||||
})
|
||||
|
||||
raise ExtractorError(f'Unexpected content type {content_type!r}')
|
||||
|
||||
|
||||
class NebulaSubscriptionsIE(NebulaBaseIE):
|
||||
IE_NAME = 'nebula:subscriptions'
|
||||
_VALID_URL = rf'{_BASE_URL_RE}/(?P<id>myshows|library/latest-videos)'
|
||||
_VALID_URL = rf'{_BASE_URL_RE}/(?P<id>myshows|library/latest-videos)/?(?:$|[?#])'
|
||||
_TESTS = [{
|
||||
'url': 'https://nebula.tv/myshows',
|
||||
'playlist_mincount': 1,
|
||||
|
@ -310,7 +374,7 @@ class NebulaSubscriptionsIE(NebulaBaseIE):
|
|||
|
||||
class NebulaChannelIE(NebulaBaseIE):
|
||||
IE_NAME = 'nebula:channel'
|
||||
_VALID_URL = rf'{_BASE_URL_RE}/(?!myshows|library|videos/)(?P<id>[-\w]+)/?(?:$|[?#])'
|
||||
_VALID_URL = rf'{_BASE_URL_RE}/(?!myshows|library|videos)(?P<id>[\w-]+)/?(?:$|[?#])'
|
||||
_TESTS = [{
|
||||
'url': 'https://nebula.tv/tom-scott-presents-money',
|
||||
'info_dict': {
|
||||
|
@ -343,6 +407,14 @@ class NebulaChannelIE(NebulaBaseIE):
|
|||
'description': 'md5:6690248223eed044a9f11cd5a24f9742',
|
||||
},
|
||||
'playlist_count': 23,
|
||||
}, {
|
||||
'url': 'https://nebula.tv/trussissuespodcast',
|
||||
'info_dict': {
|
||||
'id': 'trussissuespodcast',
|
||||
'title': 'The TLDR News Podcast',
|
||||
'description': 'md5:a08c4483bc0b705881d3e0199e721385',
|
||||
},
|
||||
'playlist_mincount': 80,
|
||||
}]
|
||||
|
||||
def _generate_playlist_entries(self, collection_id, collection_slug):
|
||||
|
@ -365,6 +437,17 @@ class NebulaChannelIE(NebulaBaseIE):
|
|||
lesson.get('share_url') or f'https://nebula.tv/{metadata["class_slug"]}/{metadata["slug"]}',
|
||||
{'id': lesson['id']}), NebulaClassIE, url_transparent=True, **metadata)
|
||||
|
||||
def _generate_podcast_entries(self, collection_id, collection_slug):
|
||||
next_url = f'https://content.api.nebula.app/podcast_channels/{collection_id}/podcast_episodes/?ordering=-published_at&premium=true'
|
||||
for page_num in itertools.count(1):
|
||||
episodes = self._call_api(next_url, collection_slug, note=f'Retrieving podcast page {page_num}')
|
||||
|
||||
for episode in traverse_obj(episodes, ('results', lambda _, v: url_or_none(v['share_url']))):
|
||||
yield self.url_result(episode['share_url'], NebulaClassIE)
|
||||
next_url = episodes.get('next')
|
||||
if not next_url:
|
||||
break
|
||||
|
||||
def _real_extract(self, url):
|
||||
collection_slug = self._match_id(url)
|
||||
channel = self._call_api(
|
||||
|
@ -373,6 +456,8 @@ class NebulaChannelIE(NebulaBaseIE):
|
|||
|
||||
if channel.get('type') == 'class':
|
||||
entries = self._generate_class_entries(channel)
|
||||
elif channel.get('type') == 'podcast_channel':
|
||||
entries = self._generate_podcast_entries(channel['id'], collection_slug)
|
||||
else:
|
||||
entries = self._generate_playlist_entries(channel['id'], collection_slug)
|
||||
|
||||
|
|
|
@ -118,7 +118,6 @@ class NekoHackerIE(InfoExtractor):
|
|||
'artist': 'Neko Hacker',
|
||||
'track': 'md5:1a5fcbc96ca3c3265b1c6f9f79f30fd0',
|
||||
'track_number': 1,
|
||||
'duration': None
|
||||
}
|
||||
},
|
||||
{
|
||||
|
@ -136,7 +135,6 @@ class NekoHackerIE(InfoExtractor):
|
|||
'artist': 'Neko Hacker',
|
||||
'track': 'むじな de なじむ feat. 六科なじむ (CV: 日高里菜 )',
|
||||
'track_number': 2,
|
||||
'duration': None
|
||||
}
|
||||
},
|
||||
{
|
||||
|
@ -154,7 +152,6 @@ class NekoHackerIE(InfoExtractor):
|
|||
'artist': 'Neko Hacker',
|
||||
'track': '進め!むじなカンパニー (instrumental)',
|
||||
'track_number': 3,
|
||||
'duration': None
|
||||
}
|
||||
},
|
||||
{
|
||||
|
@ -172,7 +169,6 @@ class NekoHackerIE(InfoExtractor):
|
|||
'artist': 'Neko Hacker',
|
||||
'track': 'むじな de なじむ (instrumental)',
|
||||
'track_number': 4,
|
||||
'duration': None
|
||||
}
|
||||
}
|
||||
]
|
||||
|
|
|
@ -1,33 +1,38 @@
|
|||
import datetime
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..utils import parse_iso8601, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class NerdCubedFeedIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?nerdcubed\.co\.uk/feed\.json'
|
||||
_VALID_URL = r'https?://(?:www\.)?nerdcubed\.co\.uk/?(?:$|[#?])'
|
||||
_TEST = {
|
||||
'url': 'http://www.nerdcubed.co.uk/feed.json',
|
||||
'url': 'http://www.nerdcubed.co.uk/',
|
||||
'info_dict': {
|
||||
'id': 'nerdcubed-feed',
|
||||
'title': 'nerdcubed.co.uk feed',
|
||||
},
|
||||
'playlist_mincount': 1300,
|
||||
'playlist_mincount': 5500,
|
||||
}
|
||||
|
||||
def _extract_video(self, feed_entry):
|
||||
return self.url_result(
|
||||
f'https://www.youtube.com/watch?v={feed_entry["id"]}', YoutubeIE,
|
||||
**traverse_obj(feed_entry, {
|
||||
'id': ('id', {str}),
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'timestamp': ('publishedAt', {parse_iso8601}),
|
||||
'channel': ('source', 'name', {str}),
|
||||
'channel_id': ('source', 'id', {str}),
|
||||
'channel_url': ('source', 'url', {str}),
|
||||
'thumbnail': ('thumbnail', 'source', {url_or_none}),
|
||||
}), url_transparent=True)
|
||||
|
||||
def _real_extract(self, url):
|
||||
feed = self._download_json(url, url, 'Downloading NerdCubed JSON feed')
|
||||
video_id = 'nerdcubed-feed'
|
||||
feed = self._download_json('https://www.nerdcubed.co.uk/_/cdn/videos.json', video_id)
|
||||
|
||||
entries = [{
|
||||
'_type': 'url',
|
||||
'title': feed_entry['title'],
|
||||
'uploader': feed_entry['source']['name'] if feed_entry['source'] else None,
|
||||
'upload_date': datetime.datetime.strptime(feed_entry['date'], '%Y-%m-%d').strftime('%Y%m%d'),
|
||||
'url': 'http://www.youtube.com/watch?v=' + feed_entry['youtube_id'],
|
||||
} for feed_entry in feed]
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'title': 'nerdcubed.co.uk feed',
|
||||
'id': 'nerdcubed-feed',
|
||||
'entries': entries,
|
||||
}
|
||||
return self.playlist_result(
|
||||
map(self._extract_video, traverse_obj(feed, ('videos', lambda _, v: v['id']))),
|
||||
video_id, 'nerdcubed.co.uk feed')
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue