Merge branch 'master' into murrtube

This commit is contained in:
bashonly 2024-03-08 18:12:31 -06:00 committed by GitHub
commit 7b15ee0652
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
218 changed files with 1813 additions and 2558 deletions

View File

@ -164,7 +164,7 @@ jobs:
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
name: build-${{ github.job }}
name: build-bin-${{ github.job }}
path: |
yt-dlp
yt-dlp.tar.gz
@ -227,7 +227,7 @@ jobs:
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
name: build-linux_${{ matrix.architecture }}
name: build-bin-linux_${{ matrix.architecture }}
path: | # run-on-arch-action designates armv7l as armv7
repo/dist/yt-dlp_linux_${{ (matrix.architecture == 'armv7' && 'armv7l') || matrix.architecture }}
compression-level: 0
@ -271,7 +271,7 @@ jobs:
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
name: build-${{ github.job }}
name: build-bin-${{ github.job }}
path: |
dist/yt-dlp_macos
dist/yt-dlp_macos.zip
@ -324,7 +324,7 @@ jobs:
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
name: build-${{ github.job }}
name: build-bin-${{ github.job }}
path: |
dist/yt-dlp_macos_legacy
compression-level: 0
@ -373,7 +373,7 @@ jobs:
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
name: build-${{ github.job }}
name: build-bin-${{ github.job }}
path: |
dist/yt-dlp.exe
dist/yt-dlp_min.exe
@ -421,7 +421,7 @@ jobs:
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
name: build-${{ github.job }}
name: build-bin-${{ github.job }}
path: |
dist/yt-dlp_x86.exe
compression-level: 0
@ -441,7 +441,7 @@ jobs:
- uses: actions/download-artifact@v4
with:
path: artifact
pattern: build-*
pattern: build-bin-*
merge-multiple: true
- name: Make SHA2-SUMS files
@ -484,3 +484,4 @@ jobs:
_update_spec
SHA*SUMS*
compression-level: 0
overwrite: true

View File

@ -167,8 +167,8 @@ For ease of use, a few more compat options are available:
* `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams,-playlist-match-filter,-manifest-filesize-approx`
* `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect,-playlist-match-filter,-manifest-filesize-approx`
* `--compat-options 2021`: Same as `--compat-options 2022,no-certifi,filename-sanitization,no-youtube-prefer-utc-upload-date`
* `--compat-options 2022`: Same as `--compat-options 2023,playlist-match-filter,no-external-downloader-progress`
* `--compat-options 2023`: Same as `--compat-options prefer-legacy-http-handler,manifest-filesize-approx`. Use this to enable all future compat options
* `--compat-options 2022`: Same as `--compat-options 2023,playlist-match-filter,no-external-downloader-progress,prefer-legacy-http-handler,manifest-filesize-approx`
* `--compat-options 2023`: Currently does nothing. Use this to enable all future compat options
# INSTALLATION
@ -218,7 +218,7 @@ Example usage:
yt-dlp --update-to nightly
# To install nightly with pip:
python -m pip install -U --pre yt-dlp
python -m pip install -U --pre yt-dlp[default]
```
<!-- MANPAGE: BEGIN EXCLUDED SECTION -->
@ -1310,8 +1310,11 @@ The available fields are:
- `description` (string): The description of the video
- `display_id` (string): An alternative identifier for the video
- `uploader` (string): Full name of the video uploader
- `uploader_id` (string): Nickname or id of the video uploader
- `uploader_url` (string): URL to the video uploader's profile
- `license` (string): License name the video is licensed under
- `creator` (string): The creator of the video
- `creators` (list): The creators of the video
- `creator` (string): The creators of the video; comma-separated
- `timestamp` (numeric): UNIX timestamp of the moment the video became available
- `upload_date` (string): Video upload date in UTC (YYYYMMDD)
- `release_timestamp` (numeric): UNIX timestamp of the moment the video was released
@ -1319,9 +1322,9 @@ The available fields are:
- `release_year` (numeric): Year (YYYY) when the video or album was released
- `modified_timestamp` (numeric): UNIX timestamp of the moment the video was last modified
- `modified_date` (string): The date (YYYYMMDD) when the video was last modified in UTC
- `uploader_id` (string): Nickname or id of the video uploader
- `channel` (string): Full name of the channel the video is uploaded on
- `channel_id` (string): Id of the channel
- `channel_url` (string): URL of the channel
- `channel_follower_count` (numeric): Number of followers of the channel
- `channel_is_verified` (boolean): Whether the channel is verified on the platform
- `location` (string): Physical location where the video was filmed
@ -1361,7 +1364,10 @@ The available fields are:
- `webpage_url_basename` (string): The basename of the webpage URL
- `webpage_url_domain` (string): The domain of the webpage URL
- `original_url` (string): The URL given by the user (or same as `webpage_url` for playlist entries)
- `categories` (list): List of categories the video belongs to
- `tags` (list): List of tags assigned to the video
- `cast` (list): List of cast members
All the fields in [Filtering Formats](#filtering-formats) can also be used
Available for the video that belongs to some logical chapter or section:
@ -1373,6 +1379,7 @@ Available for the video that belongs to some logical chapter or section:
Available for the video that is an episode of some series or programme:
- `series` (string): Title of the series or programme the video episode belongs to
- `series_id` (string): Id of the series or programme the video episode belongs to
- `season` (string): Title of the season the video episode belongs to
- `season_number` (numeric): Number of the season the video episode belongs to
- `season_id` (string): Id of the season the video episode belongs to
@ -1385,11 +1392,16 @@ Available for the media that is a track or a part of a music album:
- `track` (string): Title of the track
- `track_number` (numeric): Number of the track within an album or a disc
- `track_id` (string): Id of the track
- `artist` (string): Artist(s) of the track
- `genre` (string): Genre(s) of the track
- `artists` (list): Artist(s) of the track
- `artist` (string): Artist(s) of the track; comma-separated
- `genres` (list): Genre(s) of the track
- `genre` (string): Genre(s) of the track; comma-separated
- `composers` (list): Composer(s) of the piece
- `composer` (string): Composer(s) of the piece; comma-separated
- `album` (string): Title of the album the track belongs to
- `album_type` (string): Type of the album
- `album_artist` (string): List of all artists appeared on the album
- `album_artists` (list): All artists appeared on the album
- `album_artist` (string): All artists appeared on the album; comma-separated
- `disc_number` (numeric): Number of the disc or other physical medium the track belongs to
Available only when using `--download-sections` and for `chapter:` prefix when using `--split-chapters` for videos with internal chapters:
@ -1767,10 +1779,11 @@ Metadata fields | From
`description`, `synopsis` | `description`
`purl`, `comment` | `webpage_url`
`track` | `track_number`
`artist` | `artist`, `creator`, `uploader` or `uploader_id`
`genre` | `genre`
`artist` | `artist`, `artists`, `creator`, `creators`, `uploader` or `uploader_id`
`composer` | `composer` or `composers`
`genre` | `genre` or `genres`
`album` | `album`
`album_artist` | `album_artist`
`album_artist` | `album_artist` or `album_artists`
`disc` | `disc_number`
`show` | `series`
`season_number` | `season_number`

View File

@ -19,7 +19,7 @@ def parse_args():
parser.add_argument(
'input', nargs='?', metavar='TOMLFILE', default='pyproject.toml', help='Input file (default: %(default)s)')
parser.add_argument(
'-e', '--exclude', metavar='REQUIREMENT', action='append', help='Exclude a required dependency')
'-e', '--exclude', metavar='DEPENDENCY', action='append', help='Exclude a dependency')
parser.add_argument(
'-i', '--include', metavar='GROUP', action='append', help='Include an optional dependency group')
parser.add_argument(
@ -33,21 +33,28 @@ def parse_args():
def main():
args = parse_args()
toml_data = parse_toml(read_file(args.input))
deps = toml_data['project']['dependencies']
targets = deps.copy() if not args.only_optional else []
project_table = parse_toml(read_file(args.input))['project']
optional_groups = project_table['optional-dependencies']
excludes = args.exclude or []
for exclude in args.exclude or []:
for dep in deps:
simplified_dep = re.match(r'[\w-]+', dep)[0]
if dep in targets and (exclude.lower() == simplified_dep.lower() or exclude == dep):
targets.remove(dep)
deps = []
if not args.only_optional: # `-o` should exclude 'dependencies' and the 'default' group
deps.extend(project_table['dependencies'])
if 'default' not in excludes: # `--exclude default` should exclude entire 'default' group
deps.extend(optional_groups['default'])
optional_deps = toml_data['project']['optional-dependencies']
for include in args.include or []:
group = optional_deps.get(include)
if group:
targets.extend(group)
def name(dependency):
return re.match(r'[\w-]+', dependency)[0].lower()
target_map = {name(dep): dep for dep in deps}
for include in filter(None, map(optional_groups.get, args.include or [])):
target_map.update(zip(map(name, include), include))
for exclude in map(name, excludes):
target_map.pop(exclude, None)
targets = list(target_map.values())
if args.print:
for target in targets:

View File

@ -51,6 +51,7 @@ dependencies = [
]
[project.optional-dependencies]
default = []
secretstorage = [
"cffi",
"secretstorage",

View File

@ -223,6 +223,10 @@ def sanitize_got_info_dict(got_dict):
if test_info_dict.get('display_id') == test_info_dict.get('id'):
test_info_dict.pop('display_id')
# Remove deprecated fields
for old in YoutubeDL._deprecated_multivalue_fields.keys():
test_info_dict.pop(old, None)
# release_year may be generated from release_date
if try_call(lambda: test_info_dict['release_year'] == int(test_info_dict['release_date'][:4])):
test_info_dict.pop('release_year')

View File

@ -941,7 +941,7 @@ class TestYoutubeDL(unittest.TestCase):
def get_videos(filter_=None):
ydl = YDL({'match_filter': filter_, 'simulate': True})
for v in videos:
ydl.process_ie_result(v, download=True)
ydl.process_ie_result(v.copy(), download=True)
return [v['id'] for v in ydl.downloaded_info_dicts]
res = get_videos()

View File

@ -192,8 +192,8 @@ class TestWebsSocketRequestHandlerConformance:
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
@pytest.mark.parametrize('params,extensions', [
({'timeout': 0.00001}, {}),
({}, {'timeout': 0.00001}),
({'timeout': sys.float_info.min}, {}),
({}, {'timeout': sys.float_info.min}),
])
def test_timeout(self, handler, params, extensions):
with handler(**params) as rh:

View File

@ -580,6 +580,13 @@ class YoutubeDL:
'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options',
'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
}
_deprecated_multivalue_fields = {
'album_artist': 'album_artists',
'artist': 'artists',
'composer': 'composers',
'creator': 'creators',
'genre': 'genres',
}
_format_selection_exts = {
'audio': set(MEDIA_EXTENSIONS.common_audio),
'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )),
@ -683,7 +690,6 @@ class YoutubeDL:
self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers'))
self._load_cookies(self.params['http_headers'].get('Cookie')) # compat
self.params['http_headers'].pop('Cookie', None)
self._request_director = self.build_request_director(_REQUEST_HANDLERS.values(), _RH_PREFERENCES)
if auto_init and auto_init != 'no_verbose_header':
self.print_debug_header()
@ -957,6 +963,7 @@ class YoutubeDL:
def close(self):
self.save_cookies()
self._request_director.close()
del self._request_director
def trouble(self, message=None, tb=None, is_error=True):
"""Determine action to take when a download problem appears.
@ -2640,6 +2647,14 @@ class YoutubeDL:
if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
for old_key, new_key in self._deprecated_multivalue_fields.items():
if new_key in info_dict and old_key in info_dict:
self.deprecation_warning(f'Do not return {old_key!r} when {new_key!r} is present')
elif old_value := info_dict.get(old_key):
info_dict[new_key] = old_value.split(', ')
elif new_value := info_dict.get(new_key):
info_dict[old_key] = ', '.join(v.replace(',', '\N{FULLWIDTH COMMA}') for v in new_value)
def _raise_pending_errors(self, info):
err = info.pop('__pending_error', None)
if err:
@ -3483,7 +3498,8 @@ class YoutubeDL:
or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
FFmpegFixupM3u8PP)
ffmpeg_fixup(info_dict.get('is_live') and downloader == 'dashsegments',
ffmpeg_fixup(downloader == 'dashsegments'
and (info_dict.get('is_live') or info_dict.get('is_dash_periods')),
'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
@ -4144,6 +4160,10 @@ class YoutubeDL:
director.preferences.add(lambda rh, _: 500 if rh.RH_KEY == 'Urllib' else 0)
return director
@functools.cached_property
def _request_director(self):
return self.build_request_director(_REQUEST_HANDLERS.values(), _RH_PREFERENCES)
def encode(self, s):
if isinstance(s, bytes):
return s # Already encoded

View File

@ -14,7 +14,7 @@ import os
import re
import traceback
from .compat import compat_shlex_quote
from .compat import compat_os_name, compat_shlex_quote
from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS
from .downloader.external import get_external_downloader
from .extractor import list_extractor_classes
@ -984,7 +984,28 @@ def _real_main(argv=None):
if pre_process:
return ydl._download_retcode
ydl.warn_if_short_id(sys.argv[1:] if argv is None else argv)
args = sys.argv[1:] if argv is None else argv
ydl.warn_if_short_id(args)
# Show a useful error message and wait for keypress if not launched from shell on Windows
if not args and compat_os_name == 'nt' and getattr(sys, 'frozen', False):
import ctypes.wintypes
import msvcrt
kernel32 = ctypes.WinDLL('Kernel32')
buffer = (1 * ctypes.wintypes.DWORD)()
attached_processes = kernel32.GetConsoleProcessList(buffer, 1)
# If we only have a single process attached, then the executable was double clicked
# When using `pyinstaller` with `--onefile`, two processes get attached
is_onefile = hasattr(sys, '_MEIPASS') and os.path.basename(sys._MEIPASS).startswith('_MEI')
if attached_processes == 1 or is_onefile and attached_processes == 2:
print(parser._generate_error_message(
'Do not double-click the executable, instead call it from a command line.\n'
'Please read the README for further information on how to use yt-dlp: '
'https://github.com/yt-dlp/yt-dlp#readme'))
msvcrt.getch()
_exit(2)
parser.error(
'You must provide at least one URL.\n'
'Type yt-dlp --help to see a list of all options.')

View File

@ -320,7 +320,6 @@ from .cbs import (
CBSIE,
ParamountPressExpressIE,
)
from .cbsinteractive import CBSInteractiveIE
from .cbsnews import (
CBSNewsEmbedIE,
CBSNewsIE,
@ -348,10 +347,6 @@ from .cgtn import CGTNIE
from .charlierose import CharlieRoseIE
from .chaturbate import ChaturbateIE
from .chilloutzone import ChilloutzoneIE
from .chingari import (
ChingariIE,
ChingariUserIE,
)
from .chzzk import (
CHZZKLiveIE,
CHZZKVideoIE,
@ -369,7 +364,6 @@ from .ciscolive import (
from .ciscowebex import CiscoWebexIE
from .cjsw import CJSWIE
from .clipchamp import ClipchampIE
from .cliphunter import CliphunterIE
from .clippit import ClippitIE
from .cliprs import ClipRsIE
from .closertotruth import CloserToTruthIE
@ -379,7 +373,6 @@ from .clubic import ClubicIE
from .clyp import ClypIE
from .cmt import CMTIE
from .cnbc import (
CNBCIE,
CNBCVideoIE,
)
from .cnn import (
@ -445,6 +438,7 @@ from .dailymail import DailyMailIE
from .dailymotion import (
DailymotionIE,
DailymotionPlaylistIE,
DailymotionSearchIE,
DailymotionUserIE,
)
from .dailywire import (
@ -476,7 +470,6 @@ from .dlf import (
)
from .dfb import DFBIE
from .dhm import DHMIE
from .digg import DiggIE
from .douyutv import (
DouyuShowIE,
DouyuTVIE,
@ -610,7 +603,6 @@ from .fc2 import (
)
from .fczenit import FczenitIE
from .fifa import FifaIE
from .filmmodu import FilmmoduIE
from .filmon import (
FilmOnIE,
FilmOnChannelIE,
@ -676,7 +668,6 @@ from .gab import (
GabIE,
)
from .gaia import GaiaIE
from .gameinformer import GameInformerIE
from .gamejolt import (
GameJoltIE,
GameJoltUserIE,
@ -705,7 +696,6 @@ from .gettr import (
GettrStreamingIE,
)
from .giantbomb import GiantBombIE
from .giga import GigaIE
from .glide import GlideIE
from .globalplayer import (
GlobalPlayerLiveIE,
@ -896,10 +886,8 @@ from .jtbc import (
from .jwplatform import JWPlatformIE
from .kakao import KakaoIE
from .kaltura import KalturaIE
from .kanal2 import Kanal2IE
from .kankanews import KankaNewsIE
from .karaoketv import KaraoketvIE
from .karrierevideos import KarriereVideosIE
from .kelbyone import KelbyOneIE
from .khanacademy import (
KhanAcademyIE,
@ -915,13 +903,11 @@ from .kinja import KinjaEmbedIE
from .kinopoisk import KinoPoiskIE
from .kommunetv import KommunetvIE
from .kompas import KompasVideoIE
from .konserthusetplay import KonserthusetPlayIE
from .koo import KooIE
from .kth import KTHIE
from .krasview import KrasViewIE
from .ku6 import Ku6IE
from .kukululive import KukuluLiveIE
from .kusi import KUSIIE
from .kuwo import (
KuwoIE,
KuwoAlbumIE,
@ -1003,7 +989,6 @@ from .lnkgo import (
LnkGoIE,
LnkIE,
)
from .localnews8 import LocalNews8IE
from .lovehomeporn import LoveHomePornIE
from .lrt import (
LRTVODIE,
@ -1030,7 +1015,6 @@ from .mailru import (
MailRuMusicSearchIE,
)
from .mainstreaming import MainStreamingIE
from .malltv import MallTVIE
from .mangomolo import (
MangomoloVideoIE,
MangomoloLiveIE,
@ -1074,7 +1058,6 @@ from .meipai import MeipaiIE
from .melonvod import MelonVODIE
from .metacritic import MetacriticIE
from .mgtv import MGTVIE
from .miaopai import MiaoPaiIE
from .microsoftstream import MicrosoftStreamIE
from .microsoftvirtualacademy import (
MicrosoftVirtualAcademyIE,
@ -1092,7 +1075,6 @@ from .minds import (
MindsChannelIE,
MindsGroupIE,
)
from .ministrygrid import MinistryGridIE
from .minoto import MinotoIE
from .mirrativ import (
MirrativIE,
@ -1120,7 +1102,6 @@ from .mlssoccer import MLSSoccerIE
from .mocha import MochaVideoIE
from .mojvideo import MojvideoIE
from .monstercat import MonstercatIE
from .morningstar import MorningstarIE
from .motherless import (
MotherlessIE,
MotherlessGroupIE,
@ -1365,7 +1346,6 @@ from .nuvid import NuvidIE
from .nzherald import NZHeraldIE
from .nzonscreen import NZOnScreenIE
from .nzz import NZZIE
from .odatv import OdaTVIE
from .odkmedia import OnDemandChinaEpisodeIE
from .odnoklassniki import OdnoklassnikiIE
from .oftv import (
@ -1477,7 +1457,6 @@ from .platzi import (
PlatziCourseIE,
)
from .playplustv import PlayPlusTVIE
from .playstuff import PlayStuffIE
from .playsuisse import PlaySuisseIE
from .playtvak import PlaytvakIE
from .playwire import PlaywireIE
@ -1599,7 +1578,6 @@ from .raywenderlich import (
RayWenderlichIE,
RayWenderlichCourseIE,
)
from .rbmaradio import RBMARadioIE
from .rbgtum import (
RbgTumIE,
RbgTumCourseIE,
@ -1631,7 +1609,6 @@ from .redgifs import (
RedGifsUserIE,
)
from .redtube import RedTubeIE
from .regiotv import RegioTVIE
from .rentv import (
RENTVIE,
RENTVArticleIE,
@ -1640,6 +1617,7 @@ from .restudy import RestudyIE
from .reuters import ReutersIE
from .reverbnation import ReverbNationIE
from .rheinmaintv import RheinMainTVIE
from .ridehome import RideHomeIE
from .rinsefm import (
RinseFMIE,
RinseFMArtistPlaylistIE,
@ -1738,7 +1716,6 @@ from .safari import (
from .saitosan import SaitosanIE
from .samplefocus import SampleFocusIE
from .sapo import SapoIE
from .savefrom import SaveFromIE
from .sbs import SBSIE
from .sbscokr import (
SBSCoKrIE,
@ -1758,7 +1735,6 @@ from .scte import (
SCTECourseIE,
)
from .scrolller import ScrolllerIE
from .seeker import SeekerIE
from .sejmpl import SejmIE
from .senalcolombia import SenalColombiaLiveIE
from .senategov import SenateISVPIE, SenateGovIE
@ -1901,7 +1877,6 @@ from .storyfire import (
)
from .streamable import StreamableIE
from .streamcz import StreamCZIE
from .streamff import StreamFFIE
from .streetvoice import StreetVoiceIE
from .stretchinternet import StretchInternetIE
from .stripchat import StripchatIE
@ -1930,7 +1905,6 @@ from .tbsjp import (
TBSJPProgramIE,
TBSJPPlaylistIE,
)
from .tdslifeway import TDSLifewayIE
from .teachable import (
TeachableIE,
TeachableCourseIE,
@ -2500,6 +2474,7 @@ from .zee5 import (
Zee5SeriesIE,
)
from .zeenews import ZeeNewsIE
from .zenporn import ZenPornIE
from .zetland import ZetlandDKArticleIE
from .zhihu import ZhihuIE
from .zingmp3 import (

View File

@ -245,7 +245,6 @@ class ABCIViewIE(InfoExtractor):
'episode_id': 'NC2203H039S00',
'season_number': 2022,
'season': 'Season 2022',
'episode_number': None,
'episode': 'Locking Up Kids',
'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/nc/NC2203H039S00_636d8a0944a22_1920.jpg',
'timestamp': 1668460497,
@ -271,8 +270,6 @@ class ABCIViewIE(InfoExtractor):
'episode_id': 'RF2004Q043S00',
'season_number': 2021,
'season': 'Season 2021',
'episode_number': None,
'episode': None,
'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/rf/RF2004Q043S00_61a950639dbc0_1920.jpg',
'timestamp': 1638710705,

View File

@ -259,7 +259,7 @@ class AbemaTVIE(AbemaTVBaseIE):
'title': 'ゆるキャン△ SEASON 全話一挙【無料ビデオ72時間】',
'series': 'ゆるキャン△ SEASON',
'episode': 'ゆるキャン△ SEASON 全話一挙【無料ビデオ72時間】',
'series_number': 2,
'season_number': 2,
'episode_number': 1,
'description': 'md5:9c5a3172ae763278f9303922f0ea5b17',
},

View File

@ -3,6 +3,7 @@ from ..utils import (
float_or_none,
format_field,
int_or_none,
str_or_none,
traverse_obj,
parse_codecs,
parse_qs,
@ -129,7 +130,7 @@ class AcFunBangumiIE(AcFunVideoBaseIE):
'title': '红孩儿之趴趴蛙寻石记 第5话 ',
'duration': 760.0,
'season': '红孩儿之趴趴蛙寻石记',
'season_id': 5023171,
'season_id': '5023171',
'season_number': 1, # series has only 1 season
'episode': 'Episode 5',
'episode_number': 5,
@ -146,7 +147,7 @@ class AcFunBangumiIE(AcFunVideoBaseIE):
'title': '叽歪老表(第二季) 第5话 坚不可摧',
'season': '叽歪老表(第二季)',
'season_number': 2,
'season_id': 6065485,
'season_id': '6065485',
'episode': '坚不可摧',
'episode_number': 5,
'upload_date': '20220324',
@ -191,7 +192,7 @@ class AcFunBangumiIE(AcFunVideoBaseIE):
'title': json_bangumi_data.get('showTitle'),
'thumbnail': json_bangumi_data.get('image'),
'season': json_bangumi_data.get('bangumiTitle'),
'season_id': season_id,
'season_id': str_or_none(season_id),
'season_number': season_number,
'episode': json_bangumi_data.get('title'),
'episode_number': episode_number,

View File

@ -22,7 +22,7 @@ class AltCensoredIE(InfoExtractor):
'title': "QUELLES SONT LES CONSÉQUENCES DE L'HYPERSEXUALISATION DE LA SOCIÉTÉ ?",
'display_id': 'k0srjLSkga8.webm',
'release_date': '20180403',
'creator': 'Virginie Vota',
'creators': ['Virginie Vota'],
'release_year': 2018,
'upload_date': '20230318',
'uploader': 'admin@altcensored.com',
@ -32,7 +32,7 @@ class AltCensoredIE(InfoExtractor):
'duration': 926.09,
'thumbnail': 'https://archive.org/download/youtube-k0srjLSkga8/youtube-k0srjLSkga8.thumbs/k0srjLSkga8_000925.jpg',
'view_count': int,
'categories': ['News & Politics'],
'categories': ['News & Politics'], # FIXME
}
}]
@ -62,14 +62,21 @@ class AltCensoredChannelIE(InfoExtractor):
'title': 'Virginie Vota',
'id': 'UCFPTO55xxHqFqkzRZHu4kcw',
},
'playlist_count': 91
'playlist_count': 85,
}, {
'url': 'https://altcensored.com/channel/UC9CcJ96HKMWn0LZlcxlpFTw',
'info_dict': {
'title': 'yukikaze775',
'id': 'UC9CcJ96HKMWn0LZlcxlpFTw',
},
'playlist_count': 4
'playlist_count': 4,
}, {
'url': 'https://altcensored.com/channel/UCfYbb7nga6-icsFWWgS-kWw',
'info_dict': {
'title': 'Mister Metokur',
'id': 'UCfYbb7nga6-icsFWWgS-kWw',
},
'playlist_count': 121,
}]
def _real_extract(self, url):
@ -78,7 +85,7 @@ class AltCensoredChannelIE(InfoExtractor):
url, channel_id, 'Download channel webpage', 'Unable to get channel webpage')
title = self._html_search_meta('altcen_title', webpage, 'title', fatal=False)
page_count = int_or_none(self._html_search_regex(
r'<a[^>]+href="/channel/\w+/page/(\d+)">(?:\1)</a>',
r'<a[^>]+href="/channel/[\w-]+/page/(\d+)">(?:\1)</a>',
webpage, 'page count', default='1'))
def page_func(page_num):

View File

@ -31,6 +31,7 @@ from ..utils import (
unified_timestamp,
url_or_none,
urlhandle_detect_ext,
variadic,
)
@ -49,7 +50,7 @@ class ArchiveOrgIE(InfoExtractor):
'release_date': '19681210',
'timestamp': 1268695290,
'upload_date': '20100315',
'creator': 'SRI International',
'creators': ['SRI International'],
'uploader': 'laura@archive.org',
'thumbnail': r're:https://archive\.org/download/.*\.jpg',
'display_id': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect.cdr',
@ -109,7 +110,7 @@ class ArchiveOrgIE(InfoExtractor):
'title': 'Turning',
'ext': 'flac',
'track': 'Turning',
'creator': 'Grateful Dead',
'creators': ['Grateful Dead'],
'display_id': 'gd1977-05-08d01t01.flac',
'track_number': 1,
'album': '1977-05-08 - Barton Hall - Cornell University',
@ -129,7 +130,7 @@ class ArchiveOrgIE(InfoExtractor):
'location': 'Barton Hall - Cornell University',
'duration': 438.68,
'track': 'Deal',
'creator': 'Grateful Dead',
'creators': ['Grateful Dead'],
'album': '1977-05-08 - Barton Hall - Cornell University',
'release_date': '19770508',
'display_id': 'gd1977-05-08d01t07.flac',
@ -167,7 +168,7 @@ class ArchiveOrgIE(InfoExtractor):
'upload_date': '20160610',
'description': 'md5:f70956a156645a658a0dc9513d9e78b7',
'uploader': 'dimitrios@archive.org',
'creator': ['British Broadcasting Corporation', 'Time-Life Films'],
'creators': ['British Broadcasting Corporation', 'Time-Life Films'],
'timestamp': 1465594947,
},
'playlist': [
@ -257,7 +258,7 @@ class ArchiveOrgIE(InfoExtractor):
'title': m['title'],
'description': clean_html(m.get('description')),
'uploader': dict_get(m, ['uploader', 'adder']),
'creator': m.get('creator'),
'creators': traverse_obj(m, ('creator', {variadic}, {lambda x: x[0] and list(x)})),
'license': m.get('licenseurl'),
'release_date': unified_strdate(m.get('date')),
'timestamp': unified_timestamp(dict_get(m, ['publicdate', 'addeddate'])),
@ -272,7 +273,7 @@ class ArchiveOrgIE(InfoExtractor):
'title': f.get('title') or f['name'],
'display_id': f['name'],
'description': clean_html(f.get('description')),
'creator': f.get('creator'),
'creators': traverse_obj(f, ('creator', {variadic}, {lambda x: x[0] and list(x)})),
'duration': parse_duration(f.get('length')),
'track_number': int_or_none(f.get('track')),
'album': f.get('album'),
@ -300,7 +301,7 @@ class ArchiveOrgIE(InfoExtractor):
is_logged_in = bool(self._get_cookies('https://archive.org').get('logged-in-sig'))
if extension in KNOWN_EXTENSIONS and (not f.get('private') or is_logged_in):
entry['formats'].append({
'url': 'https://archive.org/download/' + identifier + '/' + f['name'],
'url': 'https://archive.org/download/' + identifier + '/' + urllib.parse.quote(f['name']),
'format': f.get('format'),
'width': int_or_none(f.get('width')),
'height': int_or_none(f.get('height')),

View File

@ -24,7 +24,8 @@ class AxsIE(InfoExtractor):
'timestamp': 1685729564,
'duration': 1284.216,
'series': 'Rock & Roll Road Trip with Sammy Hagar',
'season': 2,
'season': 'Season 2',
'season_number': 2,
'episode': '3',
'thumbnail': 'https://images.dotstudiopro.com/5f4e9d330a0c3b295a7e8394',
},
@ -41,7 +42,8 @@ class AxsIE(InfoExtractor):
'timestamp': 1676403615,
'duration': 2570.668,
'series': 'The Big Interview with Dan Rather',
'season': 3,
'season': 'Season 3',
'season_number': 3,
'episode': '5',
'thumbnail': 'https://images.dotstudiopro.com/5f4d1901f340b50d937cec32',
},
@ -77,7 +79,7 @@ class AxsIE(InfoExtractor):
'title': ('title', {str}),
'description': ('description', {str}),
'series': ('seriestitle', {str}),
'season': ('season', {int}),
'season_number': ('season', {int}),
'episode': ('episode', {str}),
'duration': ('duration', {float_or_none}),
'timestamp': ('updated_at', {parse_iso8601}),

View File

@ -2,6 +2,7 @@ from .common import InfoExtractor
from ..utils import (
int_or_none,
str_or_none,
traverse_obj,
try_get,
unified_timestamp,
@ -22,7 +23,7 @@ class BeegIE(InfoExtractor):
'age_limit': 18,
'upload_date': '20220131',
'timestamp': 1643656455,
'display_id': 2540839,
'display_id': '2540839',
}
}, {
'url': 'https://beeg.com/-0599050563103750?t=4-861',
@ -36,7 +37,7 @@ class BeegIE(InfoExtractor):
'age_limit': 18,
'description': 'md5:b4fc879a58ae6c604f8f259155b7e3b9',
'timestamp': 1643623200,
'display_id': 2569965,
'display_id': '2569965',
'upload_date': '20220131',
}
}, {
@ -78,7 +79,7 @@ class BeegIE(InfoExtractor):
return {
'id': video_id,
'display_id': first_fact.get('id'),
'display_id': str_or_none(first_fact.get('id')),
'title': traverse_obj(video, ('file', 'stuff', 'sf_name')),
'description': traverse_obj(video, ('file', 'stuff', 'sf_story')),
'timestamp': unified_timestamp(first_fact.get('fc_created')),

View File

@ -32,7 +32,7 @@ class BellMediaIE(InfoExtractor):
'description': 'md5:810f7f8c6a83ad5b48677c3f8e5bb2c3',
'upload_date': '20180525',
'timestamp': 1527288600,
'season_id': 73997,
'season_id': '73997',
'season': '2018',
'thumbnail': 'http://images2.9c9media.com/image_asset/2018_5_25_baf30cbd-b28d-4a18-9903-4bb8713b00f5_PNG_956x536.jpg',
'tags': [],

View File

@ -93,7 +93,6 @@ class BFMTVArticleIE(BFMTVBaseIE):
'id': '6318445464112',
'ext': 'mp4',
'title': 'Le plein de bioéthanol fait de plus en plus mal à la pompe',
'description': None,
'uploader_id': '876630703001',
'upload_date': '20230110',
'timestamp': 1673341692,

View File

@ -1996,7 +1996,7 @@ class BiliIntlIE(BiliIntlBaseIE):
'title': get_element_by_class(
'bstar-meta__title', webpage) or self._html_search_meta('og:title', webpage),
'description': get_element_by_class(
'bstar-meta__desc', webpage) or self._html_search_meta('og:description'),
'bstar-meta__desc', webpage) or self._html_search_meta('og:description', webpage),
}, self._search_json_ld(webpage, video_id, default={}))
def _get_comments_reply(self, root_id, next_id=0, display_id=None):

View File

@ -185,7 +185,6 @@ class BitChuteChannelIE(InfoExtractor):
'info_dict': {
'id': 'UGlrF9o9b-Q',
'ext': 'mp4',
'filesize': None,
'title': 'This is the first video on #BitChute !',
'description': 'md5:a0337e7b1fe39e32336974af8173a034',
'thumbnail': r're:^https?://.*\.jpg$',

View File

@ -4,10 +4,12 @@ from ..utils import (
ExtractorError,
int_or_none,
parse_iso8601,
str_or_none,
)
class BleacherReportIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/articles/(?P<id>\d+)'
_TESTS = [{
'url': 'http://bleacherreport.com/articles/2496438-fsu-stat-projections-is-jalen-ramsey-best-defensive-player-in-college-football',
@ -16,7 +18,7 @@ class BleacherReportIE(InfoExtractor):
'id': '2496438',
'ext': 'mp4',
'title': 'FSU Stat Projections: Is Jalen Ramsey Best Defensive Player in College Football?',
'uploader_id': 3992341,
'uploader_id': '3992341',
'description': 'CFB, ACC, Florida State',
'timestamp': 1434380212,
'upload_date': '20150615',
@ -33,7 +35,7 @@ class BleacherReportIE(InfoExtractor):
'timestamp': 1446839961,
'uploader': 'Sean Fay',
'description': 'md5:b1601e2314c4d8eec23b6eafe086a757',
'uploader_id': 6466954,
'uploader_id': '6466954',
'upload_date': '20151011',
},
'add_ie': ['Youtube'],
@ -58,7 +60,7 @@ class BleacherReportIE(InfoExtractor):
'id': article_id,
'title': article_data['title'],
'uploader': article_data.get('author', {}).get('name'),
'uploader_id': article_data.get('authorId'),
'uploader_id': str_or_none(article_data.get('authorId')),
'timestamp': parse_iso8601(article_data.get('createdAt')),
'thumbnails': thumbnails,
'comment_count': int_or_none(article_data.get('commentsCount')),
@ -82,6 +84,7 @@ class BleacherReportIE(InfoExtractor):
class BleacherReportCMSIE(AMPIE):
_WORKING = False
_VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36}|\d{5})'
_TESTS = [{
'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1&library=video-cms',

View File

@ -76,6 +76,7 @@ class CBSBaseIE(ThePlatformFeedIE): # XXX: Do not subclass from concrete IE
class CBSIE(CBSBaseIE):
_WORKING = False
_VALID_URL = r'''(?x)
(?:
cbs:|

View File

@ -1,98 +0,0 @@
from .cbs import CBSIE
from ..utils import int_or_none
class CBSInteractiveIE(CBSIE): # XXX: Do not subclass from concrete IE
_VALID_URL = r'https?://(?:www\.)?(?P<site>cnet|zdnet)\.com/(?:videos|video(?:/share)?)/(?P<id>[^/?]+)'
_TESTS = [{
'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/',
'info_dict': {
'id': 'R49SYt__yAfmlXR85z4f7gNmCBDcN_00',
'display_id': 'hands-on-with-microsofts-windows-8-1-update',
'ext': 'mp4',
'title': 'Hands-on with Microsoft Windows 8.1 Update',
'description': 'The new update to the Windows 8 OS brings improved performance for mouse and keyboard users.',
'uploader_id': '6085384d-619e-11e3-b231-14feb5ca9861',
'uploader': 'Sarah Mitroff',
'duration': 70,
'timestamp': 1396479627,
'upload_date': '20140402',
},
'params': {
# m3u8 download
'skip_download': True,
},
}, {
'url': 'http://www.cnet.com/videos/whiny-pothole-tweets-at-local-government-when-hit-by-cars-tomorrow-daily-187/',
'md5': 'f11d27b2fa18597fbf92444d2a9ed386',
'info_dict': {
'id': 'kjOJd_OoVJqbg_ZD8MZCOk8Wekb9QccK',
'display_id': 'whiny-pothole-tweets-at-local-government-when-hit-by-cars-tomorrow-daily-187',
'ext': 'mp4',
'title': 'Whiny potholes tweet at local government when hit by cars (Tomorrow Daily 187)',
'description': 'md5:d2b9a95a5ffe978ae6fbd4cf944d618f',
'uploader_id': 'b163284d-6b73-44fc-b3e6-3da66c392d40',
'uploader': 'Ashley Esqueda',
'duration': 1482,
'timestamp': 1433289889,
'upload_date': '20150603',
},
}, {
'url': 'http://www.zdnet.com/video/share/video-keeping-android-smartphones-and-tablets-secure/',
'info_dict': {
'id': 'k0r4T_ehht4xW_hAOqiVQPuBDPZ8SRjt',
'display_id': 'video-keeping-android-smartphones-and-tablets-secure',
'ext': 'mp4',
'title': 'Video: Keeping Android smartphones and tablets secure',
'description': 'Here\'s the best way to keep Android devices secure, and what you do when they\'ve come to the end of their lives.',
'uploader_id': 'f2d97ea2-8175-11e2-9d12-0018fe8a00b0',
'uploader': 'Adrian Kingsley-Hughes',
'duration': 731,
'timestamp': 1449129925,
'upload_date': '20151203',
},
'params': {
# m3u8 download
'skip_download': True,
},
}, {
'url': 'http://www.zdnet.com/video/huawei-matebook-x-video/',
'only_matching': True,
}]
MPX_ACCOUNTS = {
'cnet': 2198311517,
'zdnet': 2387448114,
}
def _real_extract(self, url):
site, display_id = self._match_valid_url(url).groups()
webpage = self._download_webpage(url, display_id)
data_json = self._html_search_regex(
r"data(?:-(?:cnet|zdnet))?-video(?:-(?:uvp(?:js)?|player))?-options='([^']+)'",
webpage, 'data json')
data = self._parse_json(data_json, display_id)
vdata = data.get('video') or (data.get('videos') or data.get('playlist'))[0]
video_id = vdata['mpxRefId']
title = vdata['title']
author = vdata.get('author')
if author:
uploader = '%s %s' % (author['firstName'], author['lastName'])
uploader_id = author.get('id')
else:
uploader = None
uploader_id = None
info = self._extract_video_info(video_id, site, self.MPX_ACCOUNTS[site])
info.update({
'id': video_id,
'display_id': display_id,
'title': title,
'duration': int_or_none(vdata.get('duration')),
'uploader': uploader,
'uploader_id': uploader_id,
})
return info

View File

@ -8,6 +8,7 @@ from ..utils import (
# class CBSSportsEmbedIE(CBSBaseIE):
class CBSSportsEmbedIE(InfoExtractor):
_WORKING = False
IE_NAME = 'cbssports:embed'
_VALID_URL = r'''(?ix)https?://(?:(?:www\.)?cbs|embed\.247)sports\.com/player/embed.+?
(?:
@ -75,6 +76,7 @@ class CBSSportsBaseIE(InfoExtractor):
class CBSSportsIE(CBSSportsBaseIE):
_WORKING = False
IE_NAME = 'cbssports'
_VALID_URL = r'https?://(?:www\.)?cbssports\.com/[^/]+/video/(?P<id>[^/?#&]+)'
_TESTS = [{
@ -92,6 +94,7 @@ class CBSSportsIE(CBSSportsBaseIE):
class TwentyFourSevenSportsIE(CBSSportsBaseIE):
_WORKING = False
IE_NAME = '247sports'
_VALID_URL = r'https?://(?:www\.)?247sports\.com/Video/(?:[^/?#&]+-)?(?P<id>\d+)'
_TESTS = [{

View File

@ -88,6 +88,20 @@ class CCTVIE(InfoExtractor):
'params': {
'skip_download': True,
},
}, {
# videoCenterId: "id"
'url': 'http://news.cctv.com/2024/02/21/ARTIcU5tKIOIF2myEGCATkLo240221.shtml',
'info_dict': {
'id': '5c846c0518444308ba32c4159df3b3e0',
'ext': 'mp4',
'title': '《平“语”近人——习近平喜欢的典故》第三季 第5集风物长宜放眼量',
'uploader': 'yangjuan',
'timestamp': 1708554940,
'upload_date': '20240221',
},
'params': {
'skip_download': True,
},
}, {
# var ids = ["id"]
'url': 'http://www.ncpa-classic.com/clt/more/416/index.shtml',
@ -128,7 +142,7 @@ class CCTVIE(InfoExtractor):
video_id = self._search_regex(
[r'var\s+guid\s*=\s*["\']([\da-fA-F]+)',
r'videoCenterId["\']\s*,\s*["\']([\da-fA-F]+)',
r'videoCenterId(?:["\']\s*,|:)\s*["\']([\da-fA-F]+)',
r'changePlayer\s*\(\s*["\']([\da-fA-F]+)',
r'load[Vv]ideo\s*\(\s*["\']([\da-fA-F]+)',
r'var\s+initMyAray\s*=\s*["\']([\da-fA-F]+)',

View File

@ -51,7 +51,7 @@ class CeskaTelevizeIE(InfoExtractor):
'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/',
'only_matching': True,
'info_dict': {
'id': 402,
'id': '402',
'ext': 'mp4',
'title': r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
'is_live': True,

View File

@ -17,6 +17,7 @@ class CGTNIE(InfoExtractor):
'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1615295940,
'upload_date': '20210309',
'categories': ['Video'],
},
'params': {
'skip_download': True
@ -29,8 +30,8 @@ class CGTNIE(InfoExtractor):
'title': 'China, Indonesia vow to further deepen maritime cooperation',
'thumbnail': r're:^https?://.*\.png$',
'description': 'China and Indonesia vowed to upgrade their cooperation into the maritime sector and also for political security, economy, and cultural and people-to-people exchanges.',
'author': 'CGTN',
'category': 'China',
'creators': ['CGTN'],
'categories': ['China'],
'timestamp': 1622950200,
'upload_date': '20210606',
},
@ -45,7 +46,12 @@ class CGTNIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
download_url = self._html_search_regex(r'data-video ="(?P<url>.+m3u8)"', webpage, 'download_url')
datetime_str = self._html_search_regex(r'<span class="date">\s*(.+?)\s*</span>', webpage, 'datetime_str', fatal=False)
datetime_str = self._html_search_regex(
r'<span class="date">\s*(.+?)\s*</span>', webpage, 'datetime_str', fatal=False)
category = self._html_search_regex(
r'<span class="section">\s*(.+?)\s*</span>', webpage, 'category', fatal=False)
author = self._search_regex(
r'<div class="news-author-name">\s*(.+?)\s*</div>', webpage, 'author', default=None)
return {
'id': video_id,
@ -53,9 +59,7 @@ class CGTNIE(InfoExtractor):
'description': self._og_search_description(webpage, default=None),
'thumbnail': self._og_search_thumbnail(webpage),
'formats': self._extract_m3u8_formats(download_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls'),
'category': self._html_search_regex(r'<span class="section">\s*(.+?)\s*</span>',
webpage, 'category', fatal=False),
'author': self._html_search_regex(r'<div class="news-author-name">\s*(.+?)\s*</div>',
webpage, 'author', default=None, fatal=False),
'categories': [category] if category else None,
'creators': [author] if author else None,
'timestamp': try_get(unified_timestamp(datetime_str), lambda x: x - 8 * 3600),
}

View File

@ -1,207 +0,0 @@
import itertools
import json
import urllib.parse
from .common import InfoExtractor
from ..utils import (
ExtractorError,
clean_html,
int_or_none,
str_to_int,
url_or_none,
)
class ChingariBaseIE(InfoExtractor):
def _get_post(self, id, post_data):
media_data = post_data['mediaLocation']
base_url = media_data['base']
author_data = post_data.get('authorData', {})
song_data = post_data.get('song', {}) # revist this in future for differentiating b/w 'art' and 'author'
formats = [{
'format_id': frmt,
'width': str_to_int(frmt[1:]),
'url': base_url + frmt_path,
} for frmt, frmt_path in media_data.get('transcoded', {}).items()]
if media_data.get('path'):
formats.append({
'format_id': 'original',
'format_note': 'Direct video.',
'url': base_url + '/apipublic' + media_data['path'],
'quality': 10,
})
timestamp = str_to_int(post_data.get('created_at'))
if timestamp:
timestamp = int_or_none(timestamp, 1000)
thumbnail, uploader_url = None, None
if media_data.get('thumbnail'):
thumbnail = base_url + media_data.get('thumbnail')
if author_data.get('username'):
uploader_url = 'https://chingari.io/' + author_data.get('username')
return {
'id': id,
'extractor_key': ChingariIE.ie_key(),
'extractor': 'Chingari',
'title': urllib.parse.unquote_plus(clean_html(post_data.get('caption'))),
'description': urllib.parse.unquote_plus(clean_html(post_data.get('caption'))),
'duration': media_data.get('duration'),
'thumbnail': url_or_none(thumbnail),
'like_count': post_data.get('likeCount'),
'view_count': post_data.get('viewsCount'),
'comment_count': post_data.get('commentCount'),
'repost_count': post_data.get('shareCount'),
'timestamp': timestamp,
'uploader_id': post_data.get('userId') or author_data.get('_id'),
'uploader': author_data.get('name'),
'uploader_url': url_or_none(uploader_url),
'track': song_data.get('title'),
'artist': song_data.get('author'),
'formats': formats,
}
class ChingariIE(ChingariBaseIE):
_VALID_URL = r'https?://(?:www\.)?chingari\.io/share/post\?id=(?P<id>[^&/#?]+)'
_TESTS = [{
'url': 'https://chingari.io/share/post?id=612f8f4ce1dc57090e8a7beb',
'info_dict': {
'id': '612f8f4ce1dc57090e8a7beb',
'ext': 'mp4',
'title': 'Happy birthday Srila Prabhupada',
'description': 'md5:c7080ebfdfeb06016e638c286d6bc3fa',
'duration': 0,
'thumbnail': 'https://media.chingari.io/uploads/c41d30e2-06b6-4e3b-9b4b-edbb929cec06-1630506826911/thumbnail/198f993f-ce87-4623-82c6-cd071bd6d4f4-1630506828016.jpg',
'like_count': int,
'view_count': int,
'comment_count': int,
'repost_count': int,
'timestamp': 1630506828,
'upload_date': '20210901',
'uploader_id': '5f0403982c8bd344f4813f8c',
'uploader': 'ISKCON,Inc.',
'uploader_url': 'https://chingari.io/iskcon,inc',
'track': None,
'artist': None,
},
'params': {'skip_download': True}
}]
def _real_extract(self, url):
id = self._match_id(url)
post_json = self._download_json(f'https://api.chingari.io/post/post_details/{id}', id)
if post_json['code'] != 200:
raise ExtractorError(post_json['message'], expected=True)
post_data = post_json['data']
return self._get_post(id, post_data)
class ChingariUserIE(ChingariBaseIE):
_VALID_URL = r'https?://(?:www\.)?chingari\.io/(?!share/post)(?P<id>[^/?]+)'
_TESTS = [{
'url': 'https://chingari.io/dada1023',
'info_dict': {
'id': 'dada1023',
},
'params': {'playlistend': 3},
'playlist': [{
'url': 'https://chingari.io/share/post?id=614781f3ade60b3a0bfff42a',
'info_dict': {
'id': '614781f3ade60b3a0bfff42a',
'ext': 'mp4',
'title': '#chingaribappa ',
'description': 'md5:d1df21d84088770468fa63afe3b17857',
'duration': 7,
'thumbnail': 'https://media.chingari.io/uploads/346d86d4-abb2-474e-a164-ffccf2bbcb72-1632076273717/thumbnail/b0b3aac2-2b86-4dd1-909d-9ed6e57cf77c-1632076275552.jpg',
'like_count': int,
'view_count': int,
'comment_count': int,
'repost_count': int,
'timestamp': 1632076275,
'upload_date': '20210919',
'uploader_id': '5efc4b12cca35c3d1794c2d3',
'uploader': 'dada (girish) dhawale',
'uploader_url': 'https://chingari.io/dada1023',
'track': None,
'artist': None
},
'params': {'skip_download': True}
}, {
'url': 'https://chingari.io/share/post?id=6146b132bcbf860959e12cba',
'info_dict': {
'id': '6146b132bcbf860959e12cba',
'ext': 'mp4',
'title': 'Tactor harvesting',
'description': 'md5:8403f12dce68828b77ecee7eb7e887b7',
'duration': 59.3,
'thumbnail': 'https://media.chingari.io/uploads/b353ca70-7a87-400d-93a6-fa561afaec86-1632022814584/thumbnail/c09302e3-2043-41b1-a2fe-77d97e5bd676-1632022834260.jpg',
'like_count': int,
'view_count': int,
'comment_count': int,
'repost_count': int,
'timestamp': 1632022834,
'upload_date': '20210919',
'uploader_id': '5efc4b12cca35c3d1794c2d3',
'uploader': 'dada (girish) dhawale',
'uploader_url': 'https://chingari.io/dada1023',
'track': None,
'artist': None
},
'params': {'skip_download': True}
}, {
'url': 'https://chingari.io/share/post?id=6145651b74cb030a64c40b82',
'info_dict': {
'id': '6145651b74cb030a64c40b82',
'ext': 'mp4',
'title': '#odiabhajan ',
'description': 'md5:687ea36835b9276cf2af90f25e7654cb',
'duration': 56.67,
'thumbnail': 'https://media.chingari.io/uploads/6cbf216b-babc-4cce-87fe-ceaac8d706ac-1631937782708/thumbnail/8855754f-6669-48ce-b269-8cc0699ed6da-1631937819522.jpg',
'like_count': int,
'view_count': int,
'comment_count': int,
'repost_count': int,
'timestamp': 1631937819,
'upload_date': '20210918',
'uploader_id': '5efc4b12cca35c3d1794c2d3',
'uploader': 'dada (girish) dhawale',
'uploader_url': 'https://chingari.io/dada1023',
'track': None,
'artist': None
},
'params': {'skip_download': True}
}],
}, {
'url': 'https://chingari.io/iskcon%2Cinc',
'playlist_mincount': 1025,
'info_dict': {
'id': 'iskcon%2Cinc',
},
}]
def _entries(self, id):
skip = 0
has_more = True
for page in itertools.count():
posts = self._download_json('https://api.chingari.io/users/getPosts', id,
data=json.dumps({'userId': id, 'ownerId': id, 'skip': skip, 'limit': 20}).encode(),
headers={'content-type': 'application/json;charset=UTF-8'},
note='Downloading page %s' % page)
for post in posts.get('data', []):
post_data = post['post']
yield self._get_post(post_data['_id'], post_data)
skip += 20
has_more = posts['hasMoreData']
if not has_more:
break
def _real_extract(self, url):
alt_id = self._match_id(url)
post_json = self._download_json(f'https://api.chingari.io/user/{alt_id}', alt_id)
if post_json['code'] != 200:
raise ExtractorError(post_json['message'], expected=True)
id = post_json['data']['_id']
return self.playlist_result(self._entries(id), playlist_id=alt_id)

View File

@ -2,7 +2,7 @@ import functools
from .common import InfoExtractor
from ..utils import (
ExtractorError,
UserNotLive,
float_or_none,
int_or_none,
parse_iso8601,
@ -40,7 +40,7 @@ class CHZZKLiveIE(InfoExtractor):
note='Downloading channel info', errnote='Unable to download channel info')['content']
if live_detail.get('status') == 'CLOSE':
raise ExtractorError('The channel is not currently live', expected=True)
raise UserNotLive(video_id=channel_id)
live_playback = self._parse_json(live_detail['livePlaybackJson'], channel_id)

View File

@ -2,6 +2,7 @@ from .hbo import HBOBaseIE
class CinemaxIE(HBOBaseIE):
_WORKING = False
_VALID_URL = r'https?://(?:www\.)?cinemax\.com/(?P<path>[^/]+/video/[0-9a-z-]+-(?P<id>\d+))'
_TESTS = [{
'url': 'https://www.cinemax.com/warrior/video/s1-ep-1-recap-20126903',

View File

@ -1,76 +0,0 @@
from .common import InfoExtractor
from ..utils import (
int_or_none,
url_or_none,
)
class CliphunterIE(InfoExtractor):
IE_NAME = 'cliphunter'
_VALID_URL = r'''(?x)https?://(?:www\.)?cliphunter\.com/w/
(?P<id>[0-9]+)/
(?P<seo>.+?)(?:$|[#\?])
'''
_TESTS = [{
'url': 'http://www.cliphunter.com/w/1012420/Fun_Jynx_Maze_solo',
'md5': 'b7c9bbd4eb3a226ab91093714dcaa480',
'info_dict': {
'id': '1012420',
'ext': 'flv',
'title': 'Fun Jynx Maze solo',
'thumbnail': r're:^https?://.*\.jpg$',
'age_limit': 18,
},
'skip': 'Video gone',
}, {
'url': 'http://www.cliphunter.com/w/2019449/ShesNew__My_booty_girlfriend_Victoria_Paradices_pussy_filled_with_jizz',
'md5': '55a723c67bfc6da6b0cfa00d55da8a27',
'info_dict': {
'id': '2019449',
'ext': 'mp4',
'title': 'ShesNew - My booty girlfriend, Victoria Paradice\'s pussy filled with jizz',
'thumbnail': r're:^https?://.*\.jpg$',
'age_limit': 18,
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_title = self._search_regex(
r'mediaTitle = "([^"]+)"', webpage, 'title')
gexo_files = self._parse_json(
self._search_regex(
r'var\s+gexoFiles\s*=\s*({.+?});', webpage, 'gexo files'),
video_id)
formats = []
for format_id, f in gexo_files.items():
video_url = url_or_none(f.get('url'))
if not video_url:
continue
fmt = f.get('fmt')
height = f.get('h')
format_id = '%s_%sp' % (fmt, height) if fmt and height else format_id
formats.append({
'url': video_url,
'format_id': format_id,
'width': int_or_none(f.get('w')),
'height': int_or_none(height),
'tbr': int_or_none(f.get('br')),
})
thumbnail = self._search_regex(
r"var\s+mov_thumb\s*=\s*'([^']+)';",
webpage, 'thumbnail', fatal=False)
return {
'id': video_id,
'title': video_title,
'formats': formats,
'age_limit': self._rta_search(webpage),
'thumbnail': thumbnail,
}

View File

@ -2,6 +2,7 @@ from .onet import OnetBaseIE
class ClipRsIE(OnetBaseIE):
_WORKING = False
_VALID_URL = r'https?://(?:www\.)?clip\.rs/(?P<id>[^/]+)/\d+'
_TEST = {
'url': 'http://www.clip.rs/premijera-frajle-predstavljaju-novi-spot-za-pesmu-moli-me-moli/3732',

View File

@ -4,6 +4,7 @@ from .common import InfoExtractor
class CloserToTruthIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://(?:www\.)?closertotruth\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'http://closertotruth.com/series/solutions-the-mind-body-problem#video-3688',

View File

@ -4,27 +4,25 @@ from .common import InfoExtractor
class CloudflareStreamIE(InfoExtractor):
_SUBDOMAIN_RE = r'(?:(?:watch|iframe|customer-\w+)\.)?'
_DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)'
_EMBED_RE = r'embed\.%s/embed/[^/]+\.js\?.*?\bvideo=' % _DOMAIN_RE
_EMBED_RE = rf'embed\.{_DOMAIN_RE}/embed/[^/]+\.js\?.*?\bvideo='
_ID_RE = r'[\da-f]{32}|[\w-]+\.[\w-]+\.[\w-]+'
_VALID_URL = r'''(?x)
https?://
(?:
(?:watch\.)?%s/|
%s
)
(?P<id>%s)
''' % (_DOMAIN_RE, _EMBED_RE, _ID_RE)
_EMBED_REGEX = [fr'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE}).*?)\1']
_VALID_URL = rf'https?://(?:{_SUBDOMAIN_RE}{_DOMAIN_RE}/|{_EMBED_RE})(?P<id>{_ID_RE})'
_EMBED_REGEX = [
rf'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE}).*?)\1',
rf'<iframe[^>]+\bsrc=["\'](?P<url>https?://{_SUBDOMAIN_RE}{_DOMAIN_RE}/[\da-f]{{32}})',
]
_TESTS = [{
'url': 'https://embed.cloudflarestream.com/embed/we4g.fla9.latest.js?video=31c9291ab41fac05471db4e73aa11717',
'info_dict': {
'id': '31c9291ab41fac05471db4e73aa11717',
'ext': 'mp4',
'title': '31c9291ab41fac05471db4e73aa11717',
'thumbnail': 'https://videodelivery.net/31c9291ab41fac05471db4e73aa11717/thumbnails/thumbnail.jpg',
},
'params': {
'skip_download': True,
'skip_download': 'm3u8',
},
}, {
'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1',
@ -35,6 +33,21 @@ class CloudflareStreamIE(InfoExtractor):
}, {
'url': 'https://embed.videodelivery.net/embed/r4xu.fla9.latest.js?video=81d80727f3022488598f68d323c1ad5e',
'only_matching': True,
}, {
'url': 'https://customer-aw5py76sw8wyqzmh.cloudflarestream.com/2463f6d3e06fa29710a337f5f5389fd8/iframe',
'only_matching': True,
}]
_WEBPAGE_TESTS = [{
'url': 'https://upride.cc/incident/shoulder-pass-at-light/',
'info_dict': {
'id': 'eaef9dea5159cf968be84241b5cedfe7',
'ext': 'mp4',
'title': 'eaef9dea5159cf968be84241b5cedfe7',
'thumbnail': 'https://videodelivery.net/eaef9dea5159cf968be84241b5cedfe7/thumbnails/thumbnail.jpg',
},
'params': {
'skip_download': 'm3u8',
},
}]
def _real_extract(self, url):

View File

@ -1,68 +1,97 @@
from .common import InfoExtractor
from ..utils import smuggle_url
class CNBCIE(InfoExtractor):
_VALID_URL = r'https?://video\.cnbc\.com/gallery/\?video=(?P<id>[0-9]+)'
_TEST = {
'url': 'http://video.cnbc.com/gallery/?video=3000503714',
'info_dict': {
'id': '3000503714',
'ext': 'mp4',
'title': 'Fighting zombies is big business',
'description': 'md5:0c100d8e1a7947bd2feec9a5550e519e',
'timestamp': 1459332000,
'upload_date': '20160330',
'uploader': 'NBCU-CNBC',
},
'params': {
# m3u8 download
'skip_download': True,
},
'skip': 'Dead link',
}
def _real_extract(self, url):
video_id = self._match_id(url)
return {
'_type': 'url_transparent',
'ie_key': 'ThePlatform',
'url': smuggle_url(
'http://link.theplatform.com/s/gZWlPC/media/guid/2408950221/%s?mbr=true&manifest=m3u' % video_id,
{'force_smil_url': True}),
'id': video_id,
}
from ..utils import int_or_none, parse_iso8601, str_or_none, url_or_none
from ..utils.traversal import traverse_obj
class CNBCVideoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?cnbc\.com(?P<path>/video/(?:[^/]+/)+(?P<id>[^./?#&]+)\.html)'
_TEST = {
'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html',
_VALID_URL = r'https?://(?:www\.)?cnbc\.com/video/(?:[^/?#]+/)+(?P<id>[^./?#&]+)\.html'
_TESTS = [{
'url': 'https://www.cnbc.com/video/2023/12/07/mcdonalds-just-unveiled-cosmcsits-new-spinoff-brand.html',
'info_dict': {
'id': '7000031301',
'ext': 'mp4',
'title': "Trump: I don't necessarily agree with raising rates",
'description': 'md5:878d8f0b4ebb5bb1dda3514b91b49de3',
'timestamp': 1531958400,
'upload_date': '20180719',
'uploader': 'NBCU-CNBC',
'id': '107344774',
'display_id': 'mcdonalds-just-unveiled-cosmcsits-new-spinoff-brand',
'modified_timestamp': 1702053483,
'timestamp': 1701977810,
'channel': 'News Videos',
'upload_date': '20231207',
'description': 'md5:882c001d85cb43d7579b514307b3e78b',
'release_timestamp': 1701977375,
'modified_date': '20231208',
'release_date': '20231207',
'duration': 65,
'creators': ['Sean Conlon'],
'title': 'Here\'s a first look at McDonald\'s new spinoff brand, CosMc\'s',
'thumbnail': 'https://image.cnbcfm.com/api/v1/image/107344192-1701894812493-CosMcsskyHero_2336x1040_hero-desktop.jpg?v=1701894855',
},
'params': {
'skip_download': True,
'expected_warnings': ['Unable to download f4m manifest'],
}, {
'url': 'https://www.cnbc.com/video/2023/12/08/jim-cramer-shares-his-take-on-seattles-tech-scene.html',
'info_dict': {
'creators': ['Jim Cramer'],
'channel': 'Mad Money with Jim Cramer',
'description': 'md5:72925be21b952e95eba51178dddf4e3e',
'duration': 299.0,
'ext': 'mp4',
'id': '107345451',
'display_id': 'jim-cramer-shares-his-take-on-seattles-tech-scene',
'thumbnail': 'https://image.cnbcfm.com/api/v1/image/107345481-1702079431MM-B-120823.jpg?v=1702079430',
'timestamp': 1702080139,
'title': 'Jim Cramer shares his take on Seattle\'s tech scene',
'release_date': '20231208',
'upload_date': '20231209',
'modified_timestamp': 1702080139,
'modified_date': '20231209',
'release_timestamp': 1702073551,
},
'skip': 'Dead link',
}
'expected_warnings': ['Unable to download f4m manifest'],
}, {
'url': 'https://www.cnbc.com/video/2023/12/08/the-epicenter-of-ai-is-in-seattle-says-jim-cramer.html',
'info_dict': {
'creators': ['Jim Cramer'],
'channel': 'Mad Money with Jim Cramer',
'description': 'md5:72925be21b952e95eba51178dddf4e3e',
'duration': 113.0,
'ext': 'mp4',
'id': '107345474',
'display_id': 'the-epicenter-of-ai-is-in-seattle-says-jim-cramer',
'thumbnail': 'https://image.cnbcfm.com/api/v1/image/107345486-Screenshot_2023-12-08_at_70339_PM.png?v=1702080248',
'timestamp': 1702080535,
'title': 'The epicenter of AI is in Seattle, says Jim Cramer',
'release_timestamp': 1702077347,
'modified_timestamp': 1702080535,
'release_date': '20231208',
'upload_date': '20231209',
'modified_date': '20231209',
},
'expected_warnings': ['Unable to download f4m manifest'],
}]
def _real_extract(self, url):
path, display_id = self._match_valid_url(url).groups()
video_id = self._download_json(
'https://webql-redesign.cnbcfm.com/graphql', display_id, query={
'query': '''{
page(path: "%s") {
vcpsId
}
}''' % path,
})['data']['page']['vcpsId']
return self.url_result(
'http://video.cnbc.com/gallery/?video=%d' % video_id,
CNBCIE.ie_key())
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
data = self._search_json(r'window\.__s_data=', webpage, 'video data', display_id)
player_data = traverse_obj(data, (
'page', 'page', 'layout', ..., 'columns', ..., 'modules',
lambda _, v: v['name'] == 'clipPlayer', 'data', {dict}), get_all=False)
return {
'id': display_id,
'display_id': display_id,
'formats': self._extract_akamai_formats(player_data['playbackURL'], display_id),
**self._search_json_ld(webpage, display_id, fatal=False),
**traverse_obj(player_data, {
'id': ('id', {str_or_none}),
'title': ('title', {str}),
'description': ('description', {str}),
'creators': ('author', ..., 'name', {str}),
'timestamp': ('datePublished', {parse_iso8601}),
'release_timestamp': ('uploadDate', {parse_iso8601}),
'modified_timestamp': ('dateLastPublished', {parse_iso8601}),
'thumbnail': ('thumbnail', {url_or_none}),
'duration': ('duration', {int_or_none}),
'channel': ('section', 'title', {str}),
}),
}

View File

@ -247,6 +247,8 @@ class InfoExtractor:
(For internal use only)
* http_chunk_size Chunk size for HTTP downloads
* ffmpeg_args Extra arguments for ffmpeg downloader
* is_dash_periods Whether the format is a result of merging
multiple DASH periods.
RTMP formats can also have the additional fields: page_url,
app, play_path, tc_url, flash_version, rtmp_live, rtmp_conn,
rtmp_protocol, rtmp_real_time
@ -260,7 +262,7 @@ class InfoExtractor:
direct: True if a direct video file was given (must only be set by GenericIE)
alt_title: A secondary title of the video.
display_id An alternative identifier for the video, not necessarily
display_id: An alternative identifier for the video, not necessarily
unique, but available before title. Typically, id is
something like "4234987", title "Dancing naked mole rats",
and display_id "dancing-naked-mole-rats"
@ -278,7 +280,7 @@ class InfoExtractor:
description: Full video description.
uploader: Full name of the video uploader.
license: License name the video is licensed under.
creator: The creator of the video.
creators: List of creators of the video.
timestamp: UNIX timestamp of the moment the video was uploaded
upload_date: Video upload date in UTC (YYYYMMDD).
If not explicitly set, calculated from timestamp
@ -422,16 +424,16 @@ class InfoExtractor:
track_number: Number of the track within an album or a disc, as an integer.
track_id: Id of the track (useful in case of custom indexing, e.g. 6.iii),
as a unicode string.
artist: Artist(s) of the track.
genre: Genre(s) of the track.
artists: List of artists of the track.
composers: List of composers of the piece.
genres: List of genres of the track.
album: Title of the album the track belongs to.
album_type: Type of the album (e.g. "Demo", "Full-length", "Split", "Compilation", etc).
album_artist: List of all artists appeared on the album (e.g.
"Ash Borer / Fell Voices" or "Various Artists", useful for splits
and compilations).
album_artists: List of all artists appeared on the album.
E.g. ["Ash Borer", "Fell Voices"] or ["Various Artists"].
Useful for splits and compilations.
disc_number: Number of the disc or other physical medium the track belongs to,
as an integer.
composer: Composer of the piece
The following fields should only be set for clips that should be cut from the original video:
@ -442,6 +444,18 @@ class InfoExtractor:
rows: Number of rows in each storyboard fragment, as an integer
columns: Number of columns in each storyboard fragment, as an integer
The following fields are deprecated and should not be set by new code:
composer: Use "composers" instead.
Composer(s) of the piece, comma-separated.
artist: Use "artists" instead.
Artist(s) of the track, comma-separated.
genre: Use "genres" instead.
Genre(s) of the track, comma-separated.
album_artist: Use "album_artists" instead.
All artists appeared on the album, comma-separated.
creator: Use "creators" instead.
The creator of the video.
Unless mentioned otherwise, the fields should be Unicode strings.
Unless mentioned otherwise, None is equivalent to absence of information.
@ -2530,7 +2544,11 @@ class InfoExtractor:
self._report_ignoring_subs('DASH')
return fmts
def _extract_mpd_formats_and_subtitles(
def _extract_mpd_formats_and_subtitles(self, *args, **kwargs):
periods = self._extract_mpd_periods(*args, **kwargs)
return self._merge_mpd_periods(periods)
def _extract_mpd_periods(
self, mpd_url, video_id, mpd_id=None, note=None, errnote=None,
fatal=True, data=None, headers={}, query={}):
@ -2543,17 +2561,16 @@ class InfoExtractor:
errnote='Failed to download MPD manifest' if errnote is None else errnote,
fatal=fatal, data=data, headers=headers, query=query)
if res is False:
return [], {}
return []
mpd_doc, urlh = res
if mpd_doc is None:
return [], {}
return []
# We could have been redirected to a new url when we retrieved our mpd file.
mpd_url = urlh.url
mpd_base_url = base_url(mpd_url)
return self._parse_mpd_formats_and_subtitles(
mpd_doc, mpd_id, mpd_base_url, mpd_url)
return self._parse_mpd_periods(mpd_doc, mpd_id, mpd_base_url, mpd_url)
def _parse_mpd_formats(self, *args, **kwargs):
fmts, subs = self._parse_mpd_formats_and_subtitles(*args, **kwargs)
@ -2561,8 +2578,39 @@ class InfoExtractor:
self._report_ignoring_subs('DASH')
return fmts
def _parse_mpd_formats_and_subtitles(
self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None):
def _parse_mpd_formats_and_subtitles(self, *args, **kwargs):
periods = self._parse_mpd_periods(*args, **kwargs)
return self._merge_mpd_periods(periods)
def _merge_mpd_periods(self, periods):
"""
Combine all formats and subtitles from an MPD manifest into a single list,
by concatenate streams with similar formats.
"""
formats, subtitles = {}, {}
for period in periods:
for f in period['formats']:
assert 'is_dash_periods' not in f, 'format already processed'
f['is_dash_periods'] = True
format_key = tuple(v for k, v in f.items() if k not in (
('format_id', 'fragments', 'manifest_stream_number')))
if format_key not in formats:
formats[format_key] = f
elif 'fragments' in f:
formats[format_key].setdefault('fragments', []).extend(f['fragments'])
if subtitles and period['subtitles']:
self.report_warning(bug_reports_message(
'Found subtitles in multiple periods in the DASH manifest; '
'if part of the subtitles are missing,'
), only_once=True)
for sub_lang, sub_info in period['subtitles'].items():
subtitles.setdefault(sub_lang, []).extend(sub_info)
return list(formats.values()), subtitles
def _parse_mpd_periods(self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None):
"""
Parse formats from MPD manifest.
References:
@ -2641,9 +2689,13 @@ class InfoExtractor:
return ms_info
mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
formats, subtitles = [], {}
stream_numbers = collections.defaultdict(int)
for period in mpd_doc.findall(_add_ns('Period')):
for period_idx, period in enumerate(mpd_doc.findall(_add_ns('Period'))):
period_entry = {
'id': period.get('id', f'period-{period_idx}'),
'formats': [],
'subtitles': collections.defaultdict(list),
}
period_duration = parse_duration(period.get('duration')) or mpd_duration
period_ms_info = extract_multisegment_info(period, {
'start_number': 1,
@ -2893,11 +2945,10 @@ class InfoExtractor:
if content_type in ('video', 'audio', 'image/jpeg'):
f['manifest_stream_number'] = stream_numbers[f['url']]
stream_numbers[f['url']] += 1
formats.append(f)
period_entry['formats'].append(f)
elif content_type == 'text':
subtitles.setdefault(lang or 'und', []).append(f)
return formats, subtitles
period_entry['subtitles'][lang or 'und'].append(f)
yield period_entry
def _extract_ism_formats(self, *args, **kwargs):
fmts, subs = self._extract_ism_formats_and_subtitles(*args, **kwargs)

View File

@ -65,7 +65,7 @@ class CPACIE(InfoExtractor):
'title': title,
'description': str_or_none(content['details'].get('description_%s_t' % (url_lang, ))),
'timestamp': unified_timestamp(content['details'].get('liveDateTime')),
'category': [category] if category else None,
'categories': [category] if category else None,
'thumbnail': urljoin(url, str_or_none(content['details'].get('image_%s_s' % (url_lang, )))),
'is_live': is_live(content['details'].get('type')),
}

View File

@ -1,12 +1,13 @@
import json
from .brightcove import BrightcoveNewIE
from .common import InfoExtractor
from ..utils import (
dict_get,
get_element_by_id,
js_to_json,
traverse_obj,
extract_attributes,
get_element_html_by_class,
get_element_text_and_html_by_tag,
)
from ..utils.traversal import traverse_obj
class CraftsyIE(InfoExtractor):
@ -41,28 +42,34 @@ class CraftsyIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_data = self._parse_json(self._search_regex(
r'class_video_player_vars\s*=\s*({.*})\s*;',
get_element_by_id('vidstore-classes_class-video-player-js-extra', webpage),
'video data'), video_id, transform_source=js_to_json)
video_player = get_element_html_by_class('class-video-player', webpage)
video_data = traverse_obj(video_player, (
{extract_attributes}, 'wire:snapshot', {json.loads}, 'data', {dict})) or {}
video_js = traverse_obj(video_player, (
{lambda x: get_element_text_and_html_by_tag('video-js', x)}, 1, {extract_attributes})) or {}
account_id = traverse_obj(video_data, ('video_player', 'bc_account_id'))
has_access = video_data.get('userHasAccess')
lessons = traverse_obj(video_data, ('lessons', ..., ..., lambda _, v: v['video_id']))
entries = []
class_preview = traverse_obj(video_data, ('video_player', 'class_preview'))
if class_preview:
v_id = class_preview.get('video_id')
entries.append(self.url_result(
f'http://players.brightcove.net/{account_id}/default_default/index.html?videoId={v_id}',
BrightcoveNewIE, v_id, class_preview.get('title')))
preview_id = video_js.get('data-video-id')
if preview_id and preview_id not in traverse_obj(lessons, (..., 'video_id')):
if not lessons and not has_access:
self.report_warning(
'Only extracting preview. For the full class, pass cookies '
+ f'from an account that has access. {self._login_hint()}')
lessons.append({'video_id': preview_id})
if dict_get(video_data, ('is_free', 'user_has_access')):
entries += [
self.url_result(
if not lessons and not has_access:
self.raise_login_required('You do not have access to this class')
account_id = video_data.get('accountId') or video_js['data-account']
def entries(lessons):
for lesson in lessons:
yield self.url_result(
f'http://players.brightcove.net/{account_id}/default_default/index.html?videoId={lesson["video_id"]}',
BrightcoveNewIE, lesson['video_id'], lesson.get('title'))
for lesson in video_data['lessons']]
return self.playlist_result(
entries, video_id, video_data.get('class_title'),
entries(lessons), video_id, self._html_search_meta(('og:title', 'twitter:title'), webpage),
self._html_search_meta(('og:description', 'description'), webpage, default=None))

View File

@ -514,7 +514,7 @@ class CrunchyrollMusicIE(CrunchyrollBaseIE):
'track': 'Egaono Hana',
'artist': 'Goose house',
'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
'genre': ['J-Pop'],
'genres': ['J-Pop'],
},
'params': {'skip_download': 'm3u8'},
}, {
@ -527,7 +527,7 @@ class CrunchyrollMusicIE(CrunchyrollBaseIE):
'track': 'Crossing Field',
'artist': 'LiSA',
'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
'genre': ['Anime'],
'genres': ['Anime'],
},
'params': {'skip_download': 'm3u8'},
}, {
@ -541,7 +541,7 @@ class CrunchyrollMusicIE(CrunchyrollBaseIE):
'artist': 'LiSA',
'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
'description': 'md5:747444e7e6300907b7a43f0a0503072e',
'genre': ['J-Pop'],
'genres': ['J-Pop'],
},
'params': {'skip_download': 'm3u8'},
}, {
@ -594,7 +594,7 @@ class CrunchyrollMusicIE(CrunchyrollBaseIE):
'width': ('width', {int_or_none}),
'height': ('height', {int_or_none}),
}),
'genre': ('genres', ..., 'displayValue'),
'genres': ('genres', ..., 'displayValue'),
'age_limit': ('maturity_ratings', -1, {parse_age_limit}),
}),
}
@ -611,7 +611,7 @@ class CrunchyrollArtistIE(CrunchyrollBaseIE):
'info_dict': {
'id': 'MA179CB50D',
'title': 'LiSA',
'genre': ['J-Pop', 'Anime', 'Rock'],
'genres': ['J-Pop', 'Anime', 'Rock'],
'description': 'md5:16d87de61a55c3f7d6c454b73285938e',
},
'playlist_mincount': 83,
@ -645,6 +645,6 @@ class CrunchyrollArtistIE(CrunchyrollBaseIE):
'width': ('width', {int_or_none}),
'height': ('height', {int_or_none}),
}),
'genre': ('genres', ..., 'displayValue'),
'genres': ('genres', ..., 'displayValue'),
}),
}

View File

@ -114,7 +114,7 @@ class CybraryCourseIE(CybraryBaseIE):
_TESTS = [{
'url': 'https://app.cybrary.it/browse/course/az-500-microsoft-azure-security-technologies',
'info_dict': {
'id': 898,
'id': '898',
'title': 'AZ-500: Microsoft Azure Security Technologies',
'description': 'md5:69549d379c0fc1dec92926d4e8b6fbd4'
},
@ -122,7 +122,7 @@ class CybraryCourseIE(CybraryBaseIE):
}, {
'url': 'https://app.cybrary.it/browse/course/cybrary-orientation',
'info_dict': {
'id': 1245,
'id': '1245',
'title': 'Cybrary Orientation',
'description': 'md5:9e69ff66b32fe78744e0ad4babe2e88e'
},

View File

@ -1,6 +1,7 @@
import functools
import json
import re
import urllib.parse
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
@ -44,36 +45,41 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
self._FAMILY_FILTER = ff == 'on' if ff else age_restricted(18, self.get_param('age_limit'))
self._set_dailymotion_cookie('ff', 'on' if self._FAMILY_FILTER else 'off')
def _get_token(self, xid):
cookies = self._get_dailymotion_cookies()
token = self._get_cookie_value(cookies, 'access_token') or self._get_cookie_value(cookies, 'client_token')
if token:
return token
data = {
'client_id': 'f1a362d288c1b98099c7',
'client_secret': 'eea605b96e01c796ff369935357eca920c5da4c5',
}
username, password = self._get_login_info()
if username:
data.update({
'grant_type': 'password',
'password': password,
'username': username,
})
else:
data['grant_type'] = 'client_credentials'
try:
token = self._download_json(
'https://graphql.api.dailymotion.com/oauth/token',
None, 'Downloading Access Token',
data=urlencode_postdata(data))['access_token']
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 400:
raise ExtractorError(self._parse_json(
e.cause.response.read().decode(), xid)['error_description'], expected=True)
raise
self._set_dailymotion_cookie('access_token' if username else 'client_token', token)
return token
def _call_api(self, object_type, xid, object_fields, note, filter_extra=None):
if not self._HEADERS.get('Authorization'):
cookies = self._get_dailymotion_cookies()
token = self._get_cookie_value(cookies, 'access_token') or self._get_cookie_value(cookies, 'client_token')
if not token:
data = {
'client_id': 'f1a362d288c1b98099c7',
'client_secret': 'eea605b96e01c796ff369935357eca920c5da4c5',
}
username, password = self._get_login_info()
if username:
data.update({
'grant_type': 'password',
'password': password,
'username': username,
})
else:
data['grant_type'] = 'client_credentials'
try:
token = self._download_json(
'https://graphql.api.dailymotion.com/oauth/token',
None, 'Downloading Access Token',
data=urlencode_postdata(data))['access_token']
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 400:
raise ExtractorError(self._parse_json(
e.cause.response.read().decode(), xid)['error_description'], expected=True)
raise
self._set_dailymotion_cookie('access_token' if username else 'client_token', token)
self._HEADERS['Authorization'] = 'Bearer ' + token
self._HEADERS['Authorization'] = f'Bearer {self._get_token(xid)}'
resp = self._download_json(
'https://graphql.api.dailymotion.com/', xid, note, data=json.dumps({
@ -393,9 +399,55 @@ class DailymotionPlaylistIE(DailymotionPlaylistBaseIE):
yield '//dailymotion.com/playlist/%s' % p
class DailymotionSearchIE(DailymotionPlaylistBaseIE):
IE_NAME = 'dailymotion:search'
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/search/(?P<id>[^/?#]+)/videos'
_PAGE_SIZE = 20
_TESTS = [{
'url': 'http://www.dailymotion.com/search/king of turtles/videos',
'info_dict': {
'id': 'king of turtles',
'title': 'king of turtles',
},
'playlist_mincount': 90,
}]
_SEARCH_QUERY = 'query SEARCH_QUERY( $query: String! $page: Int $limit: Int ) { search { videos( query: $query first: $limit page: $page ) { edges { node { xid } } } } } '
def _call_search_api(self, term, page, note):
if not self._HEADERS.get('Authorization'):
self._HEADERS['Authorization'] = f'Bearer {self._get_token(term)}'
resp = self._download_json(
'https://graphql.api.dailymotion.com/', None, note, data=json.dumps({
'operationName': 'SEARCH_QUERY',
'query': self._SEARCH_QUERY,
'variables': {
'limit': 20,
'page': page,
'query': term,
}
}).encode(), headers=self._HEADERS)
obj = traverse_obj(resp, ('data', 'search', {dict}))
if not obj:
raise ExtractorError(
traverse_obj(resp, ('errors', 0, 'message', {str})) or 'Could not fetch search data')
return obj
def _fetch_page(self, term, page):
page += 1
response = self._call_search_api(term, page, f'Searching "{term}" page {page}')
for xid in traverse_obj(response, ('videos', 'edges', ..., 'node', 'xid')):
yield self.url_result(f'https://www.dailymotion.com/video/{xid}', DailymotionIE, xid)
def _real_extract(self, url):
term = urllib.parse.unquote_plus(self._match_id(url))
return self.playlist_result(
OnDemandPagedList(functools.partial(self._fetch_page, term), self._PAGE_SIZE), term, term)
class DailymotionUserIE(DailymotionPlaylistBaseIE):
IE_NAME = 'dailymotion:user'
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist)/)(?:(?:old/)?user/)?(?P<id>[^/]+)'
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist|search)/)(?:(?:old/)?user/)?(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://www.dailymotion.com/user/nqtv',
'info_dict': {

View File

@ -83,7 +83,6 @@ class DamtomoRecordIE(DamtomoBaseIE):
'info_dict': {
'id': '27376862',
'title': 'イカSUMMER [良音]',
'description': None,
'uploader': '',
'uploader_id': 'MzAyMDExNTY',
'upload_date': '20210721',

View File

@ -27,7 +27,7 @@ class DaumIE(DaumBaseIE):
'duration': 2117,
'view_count': int,
'comment_count': int,
'uploader_id': 186139,
'uploader_id': '186139',
'uploader': '콘간지',
'timestamp': 1387310323,
},
@ -44,7 +44,7 @@ class DaumIE(DaumBaseIE):
'view_count': int,
'comment_count': int,
'uploader': 'MBC 예능',
'uploader_id': 132251,
'uploader_id': '132251',
'timestamp': 1421604228,
},
}, {
@ -63,7 +63,7 @@ class DaumIE(DaumBaseIE):
'view_count': int,
'comment_count': int,
'uploader': '까칠한 墮落始祖 황비홍님의',
'uploader_id': 560824,
'uploader_id': '560824',
'timestamp': 1203770745,
},
}, {
@ -77,7 +77,7 @@ class DaumIE(DaumBaseIE):
'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\r\n\r\n[쇼! 음악중심] 20160611, 507회',
'upload_date': '20170129',
'uploader': '쇼! 음악중심',
'uploader_id': 2653210,
'uploader_id': '2653210',
'timestamp': 1485684628,
},
}]
@ -107,7 +107,7 @@ class DaumClipIE(DaumBaseIE):
'duration': 3868,
'view_count': int,
'uploader': 'GOMeXP',
'uploader_id': 6667,
'uploader_id': '6667',
'timestamp': 1377911092,
},
}, {

View File

@ -1,54 +0,0 @@
from .common import InfoExtractor
from ..utils import js_to_json
class DiggIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?digg\.com/video/(?P<id>[^/?#&]+)'
_TESTS = [{
# JWPlatform via provider
'url': 'http://digg.com/video/sci-fi-short-jonah-daniel-kaluuya-get-out',
'info_dict': {
'id': 'LcqvmS0b',
'ext': 'mp4',
'title': "'Get Out' Star Daniel Kaluuya Goes On 'Moby Dick'-Like Journey In Sci-Fi Short 'Jonah'",
'description': 'md5:541bb847648b6ee3d6514bc84b82efda',
'upload_date': '20180109',
'timestamp': 1515530551,
},
'params': {
'skip_download': True,
},
}, {
# Youtube via provider
'url': 'http://digg.com/video/dog-boat-seal-play',
'only_matching': True,
}, {
# vimeo as regular embed
'url': 'http://digg.com/video/dream-girl-short-film',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
info = self._parse_json(
self._search_regex(
r'(?s)video_info\s*=\s*({.+?});\n', webpage, 'video info',
default='{}'), display_id, transform_source=js_to_json,
fatal=False)
video_id = info.get('video_id')
if video_id:
provider = info.get('provider_name')
if provider == 'youtube':
return self.url_result(
video_id, ie='Youtube', video_id=video_id)
elif provider == 'jwplayer':
return self.url_result(
'jwplatform:%s' % video_id, ie='JWPlatform',
video_id=video_id)
return self.url_result(url, 'Generic')

View File

@ -9,6 +9,7 @@ from ..utils import (
class DTubeIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://(?:www\.)?d\.tube/(?:#!/)?v/(?P<uploader_id>[0-9a-z.-]+)/(?P<id>[0-9a-z]{8})'
_TEST = {
'url': 'https://d.tube/#!/v/broncnutz/x380jtr1',

View File

@ -8,9 +8,9 @@ from ..utils import (
class DumpertIE(InfoExtractor):
_VALID_URL = r'''(?x)
(?P<protocol>https?)://(?:(?:www|legacy)\.)?dumpert\.nl(?:
/(?:mediabase|embed|item)/|
(?:/toppers|/latest|/?)\?selectedId=
(?P<protocol>https?)://(?:(?:www|legacy)\.)?dumpert\.nl/(?:
(?:mediabase|embed|item)/|
[^#]*[?&]selectedId=
)(?P<id>[0-9]+[/_][0-9a-zA-Z]+)'''
_TESTS = [{
'url': 'https://www.dumpert.nl/item/6646981_951bc60f',
@ -56,6 +56,9 @@ class DumpertIE(InfoExtractor):
}, {
'url': 'https://www.dumpert.nl/?selectedId=100031688_b317a185',
'only_matching': True,
}, {
'url': 'https://www.dumpert.nl/toppers/dag?selectedId=100086074_f5cef3ac',
'only_matching': True,
}]
def _real_extract(self, url):

View File

@ -32,7 +32,7 @@ class DuoplayIE(InfoExtractor):
'season_number': 2,
'episode': 'Operatsioon "Öö"',
'episode_number': 12,
'episode_id': 24,
'episode_id': '24',
},
}, {
'note': 'Empty title',
@ -50,7 +50,7 @@ class DuoplayIE(InfoExtractor):
'series_id': '17',
'season': 'Season 2',
'season_number': 2,
'episode_id': 14,
'episode_id': '14',
'release_year': 2010,
},
}, {
@ -99,6 +99,6 @@ class DuoplayIE(InfoExtractor):
'season_number': ('season_id', {int_or_none}),
'episode': 'subtitle',
'episode_number': ('episode_nr', {int_or_none}),
'episode_id': ('episode_id', {int_or_none}),
'episode_id': ('episode_id', {str_or_none}),
}, get_all=False) if episode_attr.get('category') != 'movies' else {}),
}

View File

@ -8,6 +8,8 @@ from ..compat import compat_urlparse
class DWIE(InfoExtractor):
_WORKING = False
_ENABLED = None # XXX: pass through to GenericIE
IE_NAME = 'dw'
_VALID_URL = r'https?://(?:www\.)?dw\.com/(?:[^/]+/)+(?:av|e)-(?P<id>\d+)'
_TESTS = [{
@ -82,6 +84,8 @@ class DWIE(InfoExtractor):
class DWArticleIE(InfoExtractor):
_WORKING = False
_ENABLED = None # XXX: pass through to GenericIE
IE_NAME = 'dw:article'
_VALID_URL = r'https?://(?:www\.)?dw\.com/(?:[^/]+/)+a-(?P<id>\d+)'
_TEST = {

View File

@ -42,7 +42,6 @@ class EplusIbIE(InfoExtractor):
'live_status': 'was_live',
'release_date': '20210719',
'release_timestamp': 1626703200,
'description': None,
},
'params': {
'skip_download': True,

View File

@ -13,6 +13,7 @@ from ..utils import (
class EuropaIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://ec\.europa\.eu/avservices/(?:video/player|audio/audioDetails)\.cfm\?.*?\bref=(?P<id>[A-Za-z0-9-]+)'
_TESTS = [{
'url': 'http://ec.europa.eu/avservices/video/player.cfm?ref=I107758',

View File

@ -10,6 +10,7 @@ from ..utils import (
class FancodeVodIE(InfoExtractor):
_WORKING = False
IE_NAME = 'fancode:vod'
_VALID_URL = r'https?://(?:www\.)?fancode\.com/video/(?P<id>[0-9]+)\b'
@ -126,6 +127,7 @@ class FancodeVodIE(InfoExtractor):
class FancodeLiveIE(FancodeVodIE): # XXX: Do not subclass from concrete IE
_WORKING = False
IE_NAME = 'fancode:live'
_VALID_URL = r'https?://(www\.)?fancode\.com/match/(?P<id>[0-9]+).+'

View File

@ -1,69 +0,0 @@
from .common import InfoExtractor
from ..utils import int_or_none
class FilmmoduIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?filmmodu\.org/(?P<id>[^/]+-(?:turkce-dublaj-izle|altyazili-izle))'
_TESTS = [{
'url': 'https://www.filmmodu.org/f9-altyazili-izle',
'md5': 'aeefd955c2a508a5bdaa3bcec8eeb0d4',
'info_dict': {
'id': '10804',
'ext': 'mp4',
'title': 'F9',
'description': 'md5:2713f584a4d65afa2611e2948d0b953c',
'subtitles': {
'tr': [{
'ext': 'vtt',
}],
},
'thumbnail': r're:https://s[0-9]+.filmmodu.org/uploads/movie/cover/10804/xXHZeb1yhJvnSHPzZDqee0zfMb6.jpg',
},
}, {
'url': 'https://www.filmmodu.org/the-godfather-turkce-dublaj-izle',
'md5': '109f2fcb9c941330eed133971c035c00',
'info_dict': {
'id': '3646',
'ext': 'mp4',
'title': 'Baba',
'description': 'md5:d43fd651937cd75cc650883ebd8d8461',
'thumbnail': r're:https://s[0-9]+.filmmodu.org/uploads/movie/cover/3646/6xKCYgH16UuwEGAyroLU6p8HLIn.jpg',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._og_search_title(webpage, fatal=True)
description = self._og_search_description(webpage)
thumbnail = self._og_search_thumbnail(webpage)
real_video_id = self._search_regex(r'var\s*videoId\s*=\s*\'([0-9]+)\'', webpage, 'video_id')
video_type = self._search_regex(r'var\s*videoType\s*=\s*\'([a-z]+)\'', webpage, 'video_type')
data = self._download_json('https://www.filmmodu.org/get-source', real_video_id, query={
'movie_id': real_video_id,
'type': video_type,
})
formats = [{
'url': source['src'],
'ext': 'mp4',
'format_id': source['label'],
'height': int_or_none(source.get('res')),
'protocol': 'm3u8_native',
} for source in data['sources']]
subtitles = {}
if data.get('subtitle'):
subtitles['tr'] = [{
'url': data['subtitle'],
}]
return {
'id': real_video_id,
'display_id': video_id,
'title': title,
'description': description,
'formats': formats,
'subtitles': subtitles,
'thumbnail': thumbnail,
}

View File

@ -1,60 +1,49 @@
import re
import urllib.parse
from .common import InfoExtractor
from .dailymotion import DailymotionIE
from ..networking import HEADRequest
from ..utils import (
ExtractorError,
determine_ext,
filter_dict,
format_field,
int_or_none,
join_nonempty,
parse_iso8601,
parse_qs,
smuggle_url,
unsmuggle_url,
url_or_none,
)
from ..utils.traversal import traverse_obj
class FranceTVBaseInfoExtractor(InfoExtractor):
def _make_url_result(self, video_or_full_id, catalog=None):
full_id = 'francetv:%s' % video_or_full_id
if '@' not in video_or_full_id and catalog:
full_id += '@%s' % catalog
return self.url_result(
full_id, ie=FranceTVIE.ie_key(),
video_id=video_or_full_id.split('@')[0])
def _make_url_result(self, video_id, url=None):
video_id = video_id.split('@')[0] # for compat with old @catalog IDs
full_id = f'francetv:{video_id}'
if url:
full_id = smuggle_url(full_id, {'hostname': urllib.parse.urlparse(url).hostname})
return self.url_result(full_id, FranceTVIE, video_id)
class FranceTVIE(InfoExtractor):
_VALID_URL = r'''(?x)
(?:
https?://
sivideo\.webservices\.francetelevisions\.fr/tools/getInfosOeuvre/v2/\?
.*?\bidDiffusion=[^&]+|
(?:
https?://videos\.francetv\.fr/video/|
francetv:
)
(?P<id>[^@]+)(?:@(?P<catalog>.+))?
)
'''
_EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1']
_VALID_URL = r'francetv:(?P<id>[^@#]+)'
_GEO_COUNTRIES = ['FR']
_GEO_BYPASS = False
_TESTS = [{
# without catalog
'url': 'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/?idDiffusion=162311093&callback=_jsonp_loader_callback_request_0',
'md5': 'c2248a8de38c4e65ea8fae7b5df2d84f',
'url': 'francetv:ec217ecc-0733-48cf-ac06-af1347b849d1',
'info_dict': {
'id': '162311093',
'id': 'ec217ecc-0733-48cf-ac06-af1347b849d1',
'ext': 'mp4',
'title': '13h15, le dimanche... - Les mystères de Jésus',
'description': 'md5:75efe8d4c0a8205e5904498ffe1e1a42',
'timestamp': 1502623500,
'duration': 2580,
'thumbnail': r're:^https?://.*\.jpg$',
'upload_date': '20170813',
},
}, {
# with catalog
'url': 'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/?idDiffusion=NI_1004933&catalogue=Zouzous&callback=_jsonp_loader_callback_request_4',
'only_matching': True,
}, {
'url': 'http://videos.francetv.fr/video/NI_657393@Regions',
'only_matching': True,
'params': {'skip_download': 'm3u8'},
}, {
'url': 'francetv:162311093',
'only_matching': True,
@ -76,10 +65,7 @@ class FranceTVIE(InfoExtractor):
'only_matching': True,
}]
def _extract_video(self, video_id, catalogue=None):
# Videos are identified by idDiffusion so catalogue part is optional.
# However when provided, some extra formats may be returned so we pass
# it if available.
def _extract_video(self, video_id, hostname=None):
is_live = None
videos = []
title = None
@ -91,18 +77,20 @@ class FranceTVIE(InfoExtractor):
timestamp = None
spritesheets = None
for device_type in ('desktop', 'mobile'):
# desktop+chrome returns dash; mobile+safari returns hls
for device_type, browser in [('desktop', 'chrome'), ('mobile', 'safari')]:
dinfo = self._download_json(
'https://player.webservices.francetelevisions.fr/v1/videos/%s' % video_id,
video_id, 'Downloading %s video JSON' % device_type, query={
f'https://k7.ftven.fr/videos/{video_id}', video_id,
f'Downloading {device_type} {browser} video JSON', query=filter_dict({
'device_type': device_type,
'browser': 'chrome',
}, fatal=False)
'browser': browser,
'domain': hostname,
}), fatal=False)
if not dinfo:
continue
video = dinfo.get('video')
video = traverse_obj(dinfo, ('video', {dict}))
if video:
videos.append(video)
if duration is None:
@ -112,7 +100,7 @@ class FranceTVIE(InfoExtractor):
if spritesheets is None:
spritesheets = video.get('spritesheets')
meta = dinfo.get('meta')
meta = traverse_obj(dinfo, ('meta', {dict}))
if meta:
if title is None:
title = meta.get('title')
@ -126,43 +114,46 @@ class FranceTVIE(InfoExtractor):
if timestamp is None:
timestamp = parse_iso8601(meta.get('broadcasted_at'))
formats = []
subtitles = {}
for video in videos:
formats, subtitles, video_url = [], {}, None
for video in traverse_obj(videos, lambda _, v: url_or_none(v['url'])):
video_url = video['url']
format_id = video.get('format')
video_url = None
if video.get('workflow') == 'token-akamai':
token_url = video.get('token')
if token_url:
token_json = self._download_json(
token_url, video_id,
'Downloading signed %s manifest URL' % format_id)
if token_json:
video_url = token_json.get('url')
if not video_url:
video_url = video.get('url')
if token_url := url_or_none(video.get('token')):
tokenized_url = traverse_obj(self._download_json(
token_url, video_id, f'Downloading signed {format_id} manifest URL',
fatal=False, query={
'format': 'json',
'url': video_url,
}), ('url', {url_or_none}))
if tokenized_url:
video_url = tokenized_url
ext = determine_ext(video_url)
if ext == 'f4m':
formats.extend(self._extract_f4m_formats(
video_url, video_id, f4m_id=format_id, fatal=False))
video_url, video_id, f4m_id=format_id or ext, fatal=False))
elif ext == 'm3u8':
format_id = format_id or 'hls'
fmts, subs = self._extract_m3u8_formats_and_subtitles(
video_url, video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id=format_id,
fatal=False)
video_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)
for f in traverse_obj(fmts, lambda _, v: v['vcodec'] == 'none' and v.get('tbr') is None):
if mobj := re.match(rf'{format_id}-[Aa]udio-\w+-(?P<bitrate>\d+)', f['format_id']):
f.update({
'tbr': int_or_none(mobj.group('bitrate')),
'acodec': 'mp4a',
})
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
elif ext == 'mpd':
fmts, subs = self._extract_mpd_formats_and_subtitles(
video_url, video_id, mpd_id=format_id, fatal=False)
video_url, video_id, mpd_id=format_id or 'dash', fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
elif video_url.startswith('rtmp'):
formats.append({
'url': video_url,
'format_id': 'rtmp-%s' % format_id,
'format_id': join_nonempty('rtmp', format_id),
'ext': 'flv',
})
else:
@ -174,6 +165,13 @@ class FranceTVIE(InfoExtractor):
# XXX: what is video['captions']?
if not formats and video_url:
urlh = self._request_webpage(
HEADRequest(video_url), video_id, 'Checking for geo-restriction',
fatal=False, expected_status=403)
if urlh and urlh.headers.get('x-errortype') == 'geo':
self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
for f in formats:
if f.get('acodec') != 'none' and f.get('language') in ('qtz', 'qad'):
f['language_preference'] = -10
@ -194,7 +192,7 @@ class FranceTVIE(InfoExtractor):
# a 10×10 grid of thumbnails corresponding to approximately
# 2 seconds of the video; the last spritesheet may be shorter
'duration': 200,
} for sheet in spritesheets]
} for sheet in traverse_obj(spritesheets, (..., {url_or_none}))]
})
return {
@ -210,21 +208,15 @@ class FranceTVIE(InfoExtractor):
'series': title if episode_number else None,
'episode_number': int_or_none(episode_number),
'season_number': int_or_none(season_number),
'_format_sort_fields': ('res', 'tbr', 'proto'), # prioritize m3u8 over dash
}
def _real_extract(self, url):
mobj = self._match_valid_url(url)
video_id = mobj.group('id')
catalog = mobj.group('catalog')
url, smuggled_data = unsmuggle_url(url, {})
video_id = self._match_id(url)
hostname = smuggled_data.get('hostname') or 'www.france.tv'
if not video_id:
qs = parse_qs(url)
video_id = qs.get('idDiffusion', [None])[0]
catalog = qs.get('catalogue', [None])[0]
if not video_id:
raise ExtractorError('Invalid URL', expected=True)
return self._extract_video(video_id, catalog)
return self._extract_video(video_id, hostname=hostname)
class FranceTVSiteIE(FranceTVBaseInfoExtractor):
@ -246,6 +238,7 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
},
'add_ie': [FranceTVIE.ie_key()],
}, {
# geo-restricted
'url': 'https://www.france.tv/enfants/six-huit-ans/foot2rue/saison-1/3066387-duel-au-vieux-port.html',
'info_dict': {
'id': 'a9050959-eedd-4b4a-9b0d-de6eeaa73e44',
@ -261,6 +254,26 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 1441,
},
}, {
# geo-restricted livestream (workflow == 'token-akamai')
'url': 'https://www.france.tv/france-4/direct.html',
'info_dict': {
'id': '9a6a7670-dde9-4264-adbc-55b89558594b',
'ext': 'mp4',
'title': r're:France 4 en direct .+',
'live_status': 'is_live',
},
'skip': 'geo-restricted livestream',
}, {
# livestream (workflow == 'dai')
'url': 'https://www.france.tv/france-2/direct.html',
'info_dict': {
'id': '006194ea-117d-4bcf-94a9-153d999c59ae',
'ext': 'mp4',
'title': r're:France 2 en direct .+',
'live_status': 'is_live',
},
'params': {'skip_download': 'livestream'},
}, {
# france3
'url': 'https://www.france.tv/france-3/des-chiffres-et-des-lettres/139063-emission-du-mardi-9-mai-2017.html',
@ -277,10 +290,6 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
# franceo
'url': 'https://www.france.tv/france-o/archipels/132249-mon-ancetre-l-esclave.html',
'only_matching': True,
}, {
# france2 live
'url': 'https://www.france.tv/france-2/direct.html',
'only_matching': True,
}, {
'url': 'https://www.france.tv/documentaires/histoire/136517-argentine-les-500-bebes-voles-de-la-dictature.html',
'only_matching': True,
@ -304,17 +313,16 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
webpage = self._download_webpage(url, display_id)
catalogue = None
video_id = self._search_regex(
r'(?:data-main-video\s*=|videoId["\']?\s*[:=])\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
webpage, 'video id', default=None, group='id')
if not video_id:
video_id, catalogue = self._html_search_regex(
r'(?:href=|player\.setVideo\(\s*)"http://videos?\.francetv\.fr/video/([^@]+@[^"]+)"',
webpage, 'video ID').split('@')
video_id = self._html_search_regex(
r'(?:href=|player\.setVideo\(\s*)"http://videos?\.francetv\.fr/video/([^@"]+@[^"]+)"',
webpage, 'video ID')
return self._make_url_result(video_id, catalogue)
return self._make_url_result(video_id, url=url)
class FranceTVInfoIE(FranceTVBaseInfoExtractor):
@ -328,8 +336,9 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
'ext': 'mp4',
'title': 'Soir 3',
'upload_date': '20190822',
'timestamp': 1566510900,
'description': 'md5:72d167097237701d6e8452ff03b83c00',
'timestamp': 1566510730,
'thumbnail': r're:^https?://.*\.jpe?g$',
'duration': 1637,
'subtitles': {
'fr': 'mincount:2',
},
@ -344,8 +353,8 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
'info_dict': {
'id': '7d204c9e-a2d3-11eb-9e4c-000d3a23d482',
'ext': 'mp4',
'title': 'Covid-19 : une situation catastrophique à New Dehli',
'thumbnail': str,
'title': 'Covid-19 : une situation catastrophique à New Dehli - Édition du mercredi 21 avril 2021',
'thumbnail': r're:^https?://.*\.jpe?g$',
'duration': 76,
'timestamp': 1619028518,
'upload_date': '20210421',
@ -371,11 +380,17 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
'id': 'x4iiko0',
'ext': 'mp4',
'title': 'NDDL, référendum, Brexit : Cécile Duflot répond à Patrick Cohen',
'description': 'Au lendemain de la victoire du "oui" au référendum sur l\'aéroport de Notre-Dame-des-Landes, l\'ancienne ministre écologiste est l\'invitée de Patrick Cohen. Plus d\'info : https://www.franceinter.fr/emissions/le-7-9/le-7-9-27-juin-2016',
'description': 'md5:fdcb582c370756293a65cdfbc6ecd90e',
'timestamp': 1467011958,
'upload_date': '20160627',
'uploader': 'France Inter',
'uploader_id': 'x2q2ez',
'upload_date': '20160627',
'view_count': int,
'tags': ['Politique', 'France Inter', '27 juin 2016', 'Linvité de 8h20', 'Cécile Duflot', 'Patrick Cohen'],
'age_limit': 0,
'duration': 640,
'like_count': int,
'thumbnail': r're:https://[^/?#]+/v/[^/?#]+/x1080',
},
'add_ie': ['Dailymotion'],
}, {
@ -405,4 +420,4 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
r'(?:data-id|<figure[^<]+\bid)=["\']([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'),
webpage, 'video id')
return self._make_url_result(video_id)
return self._make_url_result(video_id, url=url)

View File

@ -301,7 +301,7 @@ class FunimationShowIE(FunimationBaseIE):
_TESTS = [{
'url': 'https://www.funimation.com/en/shows/sk8-the-infinity',
'info_dict': {
'id': 1315000,
'id': '1315000',
'title': 'SK8 the Infinity'
},
'playlist_count': 13,
@ -312,7 +312,7 @@ class FunimationShowIE(FunimationBaseIE):
# without lang code
'url': 'https://www.funimation.com/shows/ouran-high-school-host-club/',
'info_dict': {
'id': 39643,
'id': '39643',
'title': 'Ouran High School Host Club'
},
'playlist_count': 26,
@ -339,7 +339,7 @@ class FunimationShowIE(FunimationBaseIE):
return {
'_type': 'playlist',
'id': show_info['id'],
'id': str_or_none(show_info['id']),
'title': show_info['name'],
'entries': orderedSet(
self.url_result(

View File

@ -19,7 +19,6 @@ class GabTVIE(InfoExtractor):
'id': '61217eacea5665de450d0488',
'ext': 'mp4',
'title': 'WHY WAS AMERICA IN AFGHANISTAN - AMERICA FIRST AGAINST AMERICAN OLIGARCHY',
'description': None,
'uploader': 'Wurzelroot',
'uploader_id': '608fb0a85738fd1974984f7d',
'thumbnail': 'https://tv.gab.com/image/61217eacea5665de450d0488',

View File

@ -1,46 +0,0 @@
from .brightcove import BrightcoveNewIE
from .common import InfoExtractor
from ..utils import (
clean_html,
get_element_by_class,
get_element_by_id,
)
class GameInformerIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?gameinformer\.com/(?:[^/]+/)*(?P<id>[^.?&#]+)'
_TESTS = [{
# normal Brightcove embed code extracted with BrightcoveNewIE._extract_url
'url': 'http://www.gameinformer.com/b/features/archive/2015/09/26/replay-animal-crossing.aspx',
'md5': '292f26da1ab4beb4c9099f1304d2b071',
'info_dict': {
'id': '4515472681001',
'ext': 'mp4',
'title': 'Replay - Animal Crossing',
'description': 'md5:2e211891b215c85d061adc7a4dd2d930',
'timestamp': 1443457610,
'upload_date': '20150928',
'uploader_id': '694940074001',
},
}, {
# Brightcove id inside unique element with field--name-field-brightcove-video-id class
'url': 'https://www.gameinformer.com/video-feature/new-gameplay-today/2019/07/09/new-gameplay-today-streets-of-rogue',
'info_dict': {
'id': '6057111913001',
'ext': 'mp4',
'title': 'New Gameplay Today Streets Of Rogue',
'timestamp': 1562699001,
'upload_date': '20190709',
'uploader_id': '694940074001',
},
}]
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/694940074001/default_default/index.html?videoId=%s'
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(
url, display_id, headers=self.geo_verification_headers())
brightcove_id = clean_html(get_element_by_class('field--name-field-brightcove-video-id', webpage) or get_element_by_id('video-source-content', webpage))
brightcove_url = self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id if brightcove_id else BrightcoveNewIE._extract_url(self, webpage)
return self.url_result(brightcove_url, 'BrightcoveNew', brightcove_id)

View File

@ -267,9 +267,9 @@ class GameJoltIE(GameJoltBaseIE):
'id': 'dszyjnwi',
'ext': 'webm',
'title': 'gif-presentacion-mejorado-dszyjnwi',
'n_entries': 1,
}
}]
}],
'playlist_count': 1,
}, {
# Multiple GIFs
'url': 'https://gamejolt.com/p/gif-yhsqkumq',
@ -374,7 +374,6 @@ class GameJoltGameSoundtrackIE(GameJoltBaseIE):
'info_dict': {
'id': '657899',
'title': 'Friday Night Funkin\': Vs Oswald',
'n_entries': None,
},
'playlist': [{
'info_dict': {
@ -384,7 +383,6 @@ class GameJoltGameSoundtrackIE(GameJoltBaseIE):
'url': r're:^https://.+vs-oswald-menu-music\.mp3$',
'release_timestamp': 1635190816,
'release_date': '20211025',
'n_entries': 3,
}
}, {
'info_dict': {
@ -394,7 +392,6 @@ class GameJoltGameSoundtrackIE(GameJoltBaseIE):
'url': r're:^https://.+rabbit-s-luck--full-version-\.mp3$',
'release_timestamp': 1635190841,
'release_date': '20211025',
'n_entries': 3,
}
}, {
'info_dict': {
@ -404,9 +401,9 @@ class GameJoltGameSoundtrackIE(GameJoltBaseIE):
'url': r're:^https://.+last-straw\.mp3$',
'release_timestamp': 1635881104,
'release_date': '20211102',
'n_entries': 3,
}
}]
}],
'playlist_count': 3,
}]
def _real_extract(self, url):

View File

@ -21,7 +21,6 @@ class GaskrankIE(InfoExtractor):
'display_id': 'strike-einparken-durch-anfaenger-crash-mit-groesserem-flurschaden',
'uploader_id': 'Bikefun',
'upload_date': '20170110',
'uploader_url': None,
}
}, {
'url': 'http://www.gaskrank.tv/tv/racing/isle-of-man-tt-2011-michael-du-15920.htm',

View File

@ -2,6 +2,7 @@ from .common import InfoExtractor
class GazetaIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'(?P<url>https?://(?:www\.)?gazeta\.ru/(?:[^/]+/)?video/(?:main/)*(?:\d{4}/\d{2}/\d{2}/)?(?P<id>[A-Za-z0-9-_.]+)\.s?html)'
_TESTS = [{
'url': 'http://www.gazeta.ru/video/main/zadaite_vopros_vladislavu_yurevichu.shtml',

View File

@ -7,6 +7,7 @@ from ..utils import remove_start, smuggle_url, urlencode_postdata
class GDCVaultIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://(?:www\.)?gdcvault\.com/play/(?P<id>\d+)(?:/(?P<name>[\w-]+))?'
_NETRC_MACHINE = 'gdcvault'
_TESTS = [

View File

@ -1,93 +0,0 @@
import itertools
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import parse_duration, parse_iso8601, qualities, str_to_int
class GigaIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?giga\.de/(?:[^/]+/)*(?P<id>[^/]+)'
_TESTS = [{
'url': 'http://www.giga.de/filme/anime-awesome/trailer/anime-awesome-chihiros-reise-ins-zauberland-das-beste-kommt-zum-schluss/',
'md5': '6bc5535e945e724640664632055a584f',
'info_dict': {
'id': '2622086',
'display_id': 'anime-awesome-chihiros-reise-ins-zauberland-das-beste-kommt-zum-schluss',
'ext': 'mp4',
'title': 'Anime Awesome: Chihiros Reise ins Zauberland Das Beste kommt zum Schluss',
'description': 'md5:afdf5862241aded4718a30dff6a57baf',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 578,
'timestamp': 1414749706,
'upload_date': '20141031',
'uploader': 'Robin Schweiger',
'view_count': int,
},
}, {
'url': 'http://www.giga.de/games/channel/giga-top-montag/giga-topmontag-die-besten-serien-2014/',
'only_matching': True,
}, {
'url': 'http://www.giga.de/extra/netzkultur/videos/giga-games-tom-mats-robin-werden-eigene-wege-gehen-eine-ankuendigung/',
'only_matching': True,
}, {
'url': 'http://www.giga.de/tv/jonas-liest-spieletitel-eingedeutscht-episode-2/',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_id = self._search_regex(
[r'data-video-id="(\d+)"', r'/api/video/jwplayer/#v=(\d+)'],
webpage, 'video id')
playlist = self._download_json(
'http://www.giga.de/api/syndication/video/video_id/%s/playlist.json?content=syndication/key/368b5f151da4ae05ced7fa296bdff65a/'
% video_id, video_id)[0]
quality = qualities(['normal', 'hd720'])
formats = []
for format_id in itertools.count(0):
fmt = playlist.get(compat_str(format_id))
if not fmt:
break
formats.append({
'url': fmt['src'],
'format_id': '%s-%s' % (fmt['quality'], fmt['type'].split('/')[-1]),
'quality': quality(fmt['quality']),
})
title = self._html_search_meta(
'title', webpage, 'title', fatal=True)
description = self._html_search_meta(
'description', webpage, 'description')
thumbnail = self._og_search_thumbnail(webpage)
duration = parse_duration(self._search_regex(
r'(?s)(?:data-video-id="{0}"|data-video="[^"]*/api/video/jwplayer/#v={0}[^"]*")[^>]*>.+?<span class="duration">([^<]+)</span>'.format(video_id),
webpage, 'duration', fatal=False))
timestamp = parse_iso8601(self._search_regex(
r'datetime="([^"]+)"', webpage, 'upload date', fatal=False))
uploader = self._search_regex(
r'class="author">([^<]+)</a>', webpage, 'uploader', fatal=False)
view_count = str_to_int(self._search_regex(
r'<span class="views"><strong>([\d.,]+)</strong>',
webpage, 'view count', fatal=False))
return {
'id': video_id,
'display_id': display_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'timestamp': timestamp,
'uploader': uploader,
'view_count': view_count,
'formats': formats,
}

View File

@ -6,6 +6,7 @@ from ..utils import (
class GodTubeIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://(?:www\.)?godtube\.com/watch/\?v=(?P<id>[\da-zA-Z]+)'
_TESTS = [
{

View File

@ -40,6 +40,22 @@ class GoPlayIE(InfoExtractor):
'title': 'A Family for the Holidays',
},
'skip': 'This video is only available for registered users'
}, {
'url': 'https://www.goplay.be/video/de-mol/de-mol-s11/de-mol-s11-aflevering-1#autoplay',
'info_dict': {
'id': '03eb8f2f-153e-41cb-9805-0d3a29dab656',
'ext': 'mp4',
'title': 'S11 - Aflevering 1',
'episode': 'Episode 1',
'series': 'De Mol',
'season_number': 11,
'episode_number': 1,
'season': 'Season 11'
},
'params': {
'skip_download': True
},
'skip': 'This video is only available for registered users'
}]
_id_token = None
@ -77,16 +93,39 @@ class GoPlayIE(InfoExtractor):
api = self._download_json(
f'https://api.goplay.be/web/v1/videos/long-form/{video_id}',
video_id, headers={'Authorization': 'Bearer %s' % self._id_token})
video_id, headers={
'Authorization': 'Bearer %s' % self._id_token,
**self.geo_verification_headers(),
})
formats, subs = self._extract_m3u8_formats_and_subtitles(
api['manifestUrls']['hls'], video_id, ext='mp4', m3u8_id='HLS')
if 'manifestUrls' in api:
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
api['manifestUrls']['hls'], video_id, ext='mp4', m3u8_id='HLS')
else:
if 'ssai' not in api:
raise ExtractorError('expecting Google SSAI stream')
ssai_content_source_id = api['ssai']['contentSourceID']
ssai_video_id = api['ssai']['videoID']
dai = self._download_json(
f'https://dai.google.com/ondemand/dash/content/{ssai_content_source_id}/vid/{ssai_video_id}/streams',
video_id, data=b'{"api-key":"null"}',
headers={'content-type': 'application/json'})
periods = self._extract_mpd_periods(dai['stream_manifest'], video_id)
# skip pre-roll and mid-roll ads
periods = [p for p in periods if '-ad-' not in p['id']]
formats, subtitles = self._merge_mpd_periods(periods)
info_dict.update({
'id': video_id,
'formats': formats,
'subtitles': subtitles,
})
return info_dict

View File

@ -5,6 +5,7 @@ from ..utils import ExtractorError, urlencode_postdata
class HotNewHipHopIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://(?:www\.)?hotnewhiphop\.com/.*\.(?P<id>.*)\.html'
_TEST = {
'url': 'http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html',

View File

@ -115,11 +115,11 @@ class HotStarIE(HotStarBaseIE):
'upload_date': '20190501',
'duration': 1219,
'channel': 'StarPlus',
'channel_id': 3,
'channel_id': '3',
'series': 'Ek Bhram - Sarvagun Sampanna',
'season': 'Chapter 1',
'season_number': 1,
'season_id': 6771,
'season_id': '6771',
'episode': 'Janhvi Targets Suman',
'episode_number': 8,
}
@ -135,12 +135,12 @@ class HotStarIE(HotStarBaseIE):
'channel': 'StarPlus',
'series': 'Anupama',
'season_number': 1,
'season_id': 7399,
'season_id': '7399',
'upload_date': '20230307',
'episode': 'Anupama, Anuj Share a Moment',
'episode_number': 853,
'duration': 1272,
'channel_id': 3,
'channel_id': '3',
},
'skip': 'HTTP Error 504: Gateway Time-out', # XXX: Investigate 504 errors on some episodes
}, {
@ -155,12 +155,12 @@ class HotStarIE(HotStarBaseIE):
'channel': 'Hotstar Specials',
'series': 'Kana Kaanum Kaalangal',
'season_number': 1,
'season_id': 9441,
'season_id': '9441',
'upload_date': '20220421',
'episode': 'Back To School',
'episode_number': 1,
'duration': 1810,
'channel_id': 54,
'channel_id': '54',
},
}, {
'url': 'https://www.hotstar.com/in/clips/e3-sairat-kahani-pyaar-ki/1000262286',
@ -325,11 +325,11 @@ class HotStarIE(HotStarBaseIE):
'formats': formats,
'subtitles': subs,
'channel': video_data.get('channelName'),
'channel_id': video_data.get('channelId'),
'channel_id': str_or_none(video_data.get('channelId')),
'series': video_data.get('showName'),
'season': video_data.get('seasonName'),
'season_number': int_or_none(video_data.get('seasonNo')),
'season_id': video_data.get('seasonId'),
'season_id': str_or_none(video_data.get('seasonId')),
'episode': video_data.get('title'),
'episode_number': int_or_none(video_data.get('episodeNo')),
}

View File

@ -114,7 +114,6 @@ class HungamaSongIE(InfoExtractor):
'title': 'Lucky Ali - Kitni Haseen Zindagi',
'track': 'Kitni Haseen Zindagi',
'artist': 'Lucky Ali',
'album': None,
'release_year': 2000,
'thumbnail': 'https://stat2.hungama.ind.in/assets/images/default_images/da-200x200.png',
},

View File

@ -9,7 +9,7 @@ class MonsterSirenHypergryphMusicIE(InfoExtractor):
'info_dict': {
'id': '514562',
'ext': 'wav',
'artist': ['塞壬唱片-MSR'],
'artists': ['塞壬唱片-MSR'],
'album': 'Flame Shadow',
'title': 'Flame Shadow',
}
@ -27,6 +27,6 @@ class MonsterSirenHypergryphMusicIE(InfoExtractor):
'url': traverse_obj(json_data, ('player', 'songDetail', 'sourceUrl')),
'ext': 'wav',
'vcodec': 'none',
'artist': traverse_obj(json_data, ('player', 'songDetail', 'artists')),
'artists': traverse_obj(json_data, ('player', 'songDetail', 'artists', ...)),
'album': traverse_obj(json_data, ('musicPlay', 'albumDetail', 'name'))
}

View File

@ -617,6 +617,7 @@ class InstagramPlaylistBaseIE(InstagramBaseIE):
class InstagramUserIE(InstagramPlaylistBaseIE):
_WORKING = False
_VALID_URL = r'https?://(?:www\.)?instagram\.com/(?P<id>[^/]{2,})/?(?:$|[?#])'
IE_DESC = 'Instagram user profile'
IE_NAME = 'instagram:user'

View File

@ -2,6 +2,8 @@ from .common import InfoExtractor
class JeuxVideoIE(InfoExtractor):
_WORKING = False
_ENABLED = None # XXX: pass through to GenericIE
_VALID_URL = r'https?://.*?\.jeuxvideo\.com/.*/(.*?)\.htm'
_TESTS = [{

View File

@ -1,66 +0,0 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
join_nonempty,
traverse_obj,
unified_timestamp,
update_url_query,
)
class Kanal2IE(InfoExtractor):
_VALID_URL = r'https?://kanal2\.postimees\.ee/[^?#]+\?([^#]+&)?id=(?P<id>\d+)'
_TESTS = [{
'note': 'Test standard url (#5575)',
'url': 'https://kanal2.postimees.ee/pluss/video/?id=40792',
'md5': '7ea7b16266ec1798743777df241883dd',
'info_dict': {
'id': '40792',
'ext': 'mp4',
'title': 'Aedniku aabits / Osa 53 (05.08.2016 20:00)',
'thumbnail': r're:https?://.*\.jpg$',
'description': 'md5:53cabf3c5d73150d594747f727431248',
'upload_date': '20160805',
'timestamp': 1470420000,
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
playlist = self._download_json(
f'https://kanal2.postimees.ee/player/playlist/{video_id}',
video_id, query={'type': 'episodes'},
headers={'X-Requested-With': 'XMLHttpRequest'})
return {
'id': video_id,
'title': join_nonempty(*traverse_obj(playlist, ('info', ('title', 'subtitle'))), delim=' / '),
'description': traverse_obj(playlist, ('info', 'description')),
'thumbnail': traverse_obj(playlist, ('data', 'image')),
'formats': self.get_formats(playlist, video_id),
'timestamp': unified_timestamp(self._search_regex(
r'\((\d{2}\.\d{2}\.\d{4}\s\d{2}:\d{2})\)$',
traverse_obj(playlist, ('info', 'subtitle')), 'timestamp', default='') + ' +0200'),
}
def get_formats(self, playlist, video_id):
path = traverse_obj(playlist, ('data', 'path'))
if not path:
raise ExtractorError('Path value not found in playlist JSON response')
session = self._download_json(
'https://sts.postimees.ee/session/register',
video_id, note='Creating session', errnote='Error creating session',
headers={
'X-Original-URI': path,
'Accept': 'application/json',
})
if session.get('reason') != 'OK' or not session.get('session'):
reason = session.get('reason', 'unknown error')
raise ExtractorError(f'Unable to obtain session: {reason}')
formats = []
for stream in traverse_obj(playlist, ('data', 'streams', ..., 'file')):
formats.extend(self._extract_m3u8_formats(
update_url_query(stream, {'s': session['session']}), video_id, 'mp4'))
return formats

View File

@ -8,6 +8,7 @@ from .common import InfoExtractor
class KankaNewsIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://(?:www\.)?kankanews\.com/a/\d+\-\d+\-\d+/(?P<id>\d+)\.shtml'
_TESTS = [{
'url': 'https://www.kankanews.com/a/2022-11-08/00310276054.shtml?appid=1088227',

View File

@ -1,96 +0,0 @@
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
fix_xml_ampersands,
float_or_none,
xpath_with_ns,
xpath_text,
)
class KarriereVideosIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?karrierevideos\.at(?:/[^/]+)+/(?P<id>[^/]+)'
_TESTS = [{
'url': 'http://www.karrierevideos.at/berufsvideos/mittlere-hoehere-schulen/altenpflegerin',
'info_dict': {
'id': '32c91',
'ext': 'flv',
'title': 'AltenpflegerIn',
'description': 'md5:dbadd1259fde2159a9b28667cb664ae2',
'thumbnail': r're:^http://.*\.png',
},
'params': {
# rtmp download
'skip_download': True,
}
}, {
# broken ampersands
'url': 'http://www.karrierevideos.at/orientierung/vaeterkarenz-und-neue-chancen-fuer-muetter-baby-was-nun',
'info_dict': {
'id': '5sniu',
'ext': 'flv',
'title': 'Väterkarenz und neue Chancen für Mütter - "Baby - was nun?"',
'description': 'md5:97092c6ad1fd7d38e9d6a5fdeb2bcc33',
'thumbnail': r're:^http://.*\.png',
},
'params': {
# rtmp download
'skip_download': True,
}
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = (self._html_search_meta('title', webpage, default=None)
or self._search_regex(r'<h1 class="title">([^<]+)</h1>', webpage, 'video title'))
video_id = self._search_regex(
r'/config/video/(.+?)\.xml', webpage, 'video id')
# Server returns malformed headers
# Force Accept-Encoding: * to prevent gzipped results
playlist = self._download_xml(
'http://www.karrierevideos.at/player-playlist.xml.php?p=%s' % video_id,
video_id, transform_source=fix_xml_ampersands,
headers={'Accept-Encoding': '*'})
NS_MAP = {
'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats'
}
def ns(path):
return xpath_with_ns(path, NS_MAP)
item = playlist.find('./tracklist/item')
video_file = xpath_text(
item, ns('./jwplayer:file'), 'video url', fatal=True)
streamer = xpath_text(
item, ns('./jwplayer:streamer'), 'streamer', fatal=True)
uploader = xpath_text(
item, ns('./jwplayer:author'), 'uploader')
duration = float_or_none(
xpath_text(item, ns('./jwplayer:duration'), 'duration'))
description = self._html_search_regex(
r'(?s)<div class="leadtext">(.+?)</div>',
webpage, 'description')
thumbnail = self._html_search_meta(
'thumbnail', webpage, 'thumbnail')
if thumbnail:
thumbnail = compat_urlparse.urljoin(url, thumbnail)
return {
'id': video_id,
'url': streamer.replace('rtmpt', 'rtmp'),
'play_path': 'mp4:%s' % video_file,
'ext': 'flv',
'title': title,
'description': description,
'thumbnail': thumbnail,
'uploader': uploader,
'duration': duration,
}

View File

@ -3,6 +3,7 @@ from ..utils import int_or_none
class KelbyOneIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://members\.kelbyone\.com/course/(?P<id>[^$&?#/]+)'
_TESTS = [{

View File

@ -1,119 +0,0 @@
from .common import InfoExtractor
from ..utils import (
determine_ext,
float_or_none,
int_or_none,
url_or_none,
)
class KonserthusetPlayIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?:konserthusetplay|rspoplay)\.se/\?.*\bm=(?P<id>[^&]+)'
_TESTS = [{
'url': 'http://www.konserthusetplay.se/?m=CKDDnlCY-dhWAAqiMERd-A',
'md5': 'e3fd47bf44e864bd23c08e487abe1967',
'info_dict': {
'id': 'CKDDnlCY-dhWAAqiMERd-A',
'ext': 'mp4',
'title': 'Orkesterns instrument: Valthornen',
'description': 'md5:f10e1f0030202020396a4d712d2fa827',
'thumbnail': 're:^https?://.*$',
'duration': 398.76,
},
}, {
'url': 'http://rspoplay.se/?m=elWuEH34SMKvaO4wO_cHBw',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
e = self._search_regex(
r'https?://csp\.picsearch\.com/rest\?.*\be=(.+?)[&"\']', webpage, 'e')
rest = self._download_json(
'http://csp.picsearch.com/rest?e=%s&containerId=mediaplayer&i=object' % e,
video_id, transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1])
media = rest['media']
player_config = media['playerconfig']
playlist = player_config['playlist']
source = next(f for f in playlist if f.get('bitrates') or f.get('provider'))
FORMAT_ID_REGEX = r'_([^_]+)_h264m\.mp4'
formats = []
m3u8_url = source.get('url')
if m3u8_url and determine_ext(m3u8_url) == 'm3u8':
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
fallback_url = source.get('fallbackUrl')
fallback_format_id = None
if fallback_url:
fallback_format_id = self._search_regex(
FORMAT_ID_REGEX, fallback_url, 'format id', default=None)
connection_url = (player_config.get('rtmp', {}).get(
'netConnectionUrl') or player_config.get(
'plugins', {}).get('bwcheck', {}).get('netConnectionUrl'))
if connection_url:
for f in source['bitrates']:
video_url = f.get('url')
if not video_url:
continue
format_id = self._search_regex(
FORMAT_ID_REGEX, video_url, 'format id', default=None)
f_common = {
'vbr': int_or_none(f.get('bitrate')),
'width': int_or_none(f.get('width')),
'height': int_or_none(f.get('height')),
}
f = f_common.copy()
f.update({
'url': connection_url,
'play_path': video_url,
'format_id': 'rtmp-%s' % format_id if format_id else 'rtmp',
'ext': 'flv',
})
formats.append(f)
if format_id and format_id == fallback_format_id:
f = f_common.copy()
f.update({
'url': fallback_url,
'format_id': 'http-%s' % format_id if format_id else 'http',
})
formats.append(f)
if not formats and fallback_url:
formats.append({
'url': fallback_url,
})
title = player_config.get('title') or media['title']
description = player_config.get('mediaInfo', {}).get('description')
thumbnail = media.get('image')
duration = float_or_none(media.get('duration'), 1000)
subtitles = {}
captions = source.get('captionsAvailableLanguages')
if isinstance(captions, dict):
for lang, subtitle_url in captions.items():
subtitle_url = url_or_none(subtitle_url)
if lang != 'none' and subtitle_url:
subtitles.setdefault(lang, []).append({'url': subtitle_url})
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'formats': formats,
'subtitles': subtitles,
}

View File

@ -6,6 +6,7 @@ from ..utils import (
class KooIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://(?:www\.)?kooapp\.com/koo/[^/]+/(?P<id>[^/&#$?]+)'
_TESTS = [{ # Test for video in the comments
'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/946c4189-bc2d-4524-b95b-43f641e2adde',

View File

@ -8,6 +8,7 @@ from ..utils import (
class KrasViewIE(InfoExtractor):
_WORKING = False
IE_DESC = 'Красвью'
_VALID_URL = r'https?://krasview\.ru/(?:video|embed)/(?P<id>\d+)'

View File

@ -1,83 +0,0 @@
import random
import urllib.parse
from .common import InfoExtractor
from ..utils import (
float_or_none,
int_or_none,
timeconvert,
update_url_query,
xpath_text,
)
class KUSIIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?kusi\.com/(?P<path>story/.+|video\?clipId=(?P<clipId>\d+))'
_TESTS = [{
'url': 'http://www.kusi.com/story/32849881/turko-files-refused-to-help-it-aint-right',
'md5': '4e76ce8e53660ce9697d06c0ba6fc47d',
'info_dict': {
'id': '12689020',
'ext': 'mp4',
'title': "Turko Files: Refused to Help, It Ain't Right!",
'duration': 223.586,
'upload_date': '20160826',
'timestamp': 1472233118,
'thumbnail': r're:^https?://.*\.jpg$'
},
}, {
'url': 'http://kusi.com/video?clipId=12203019',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = self._match_valid_url(url)
clip_id = mobj.group('clipId')
video_id = clip_id or mobj.group('path')
webpage = self._download_webpage(url, video_id)
if clip_id is None:
video_id = clip_id = self._html_search_regex(
r'"clipId"\s*,\s*"(\d+)"', webpage, 'clip id')
affiliate_id = self._search_regex(
r'affiliateId\s*:\s*\'([^\']+)\'', webpage, 'affiliate id')
# See __Packages/worldnow/model/GalleryModel.as of WNGallery.swf
xml_url = update_url_query('http://www.kusi.com/build.asp', {
'buildtype': 'buildfeaturexmlrequest',
'featureType': 'Clip',
'featureid': clip_id,
'affiliateno': affiliate_id,
'clientgroupid': '1',
'rnd': int(round(random.random() * 1000000)),
})
doc = self._download_xml(xml_url, video_id)
video_title = xpath_text(doc, 'HEADLINE', fatal=True)
duration = float_or_none(xpath_text(doc, 'DURATION'), scale=1000)
description = xpath_text(doc, 'ABSTRACT')
thumbnail = xpath_text(doc, './THUMBNAILIMAGE/FILENAME')
creation_time = timeconvert(xpath_text(doc, 'rfc822creationdate'))
quality_options = doc.find('{http://search.yahoo.com/mrss/}group').findall('{http://search.yahoo.com/mrss/}content')
formats = []
for quality in quality_options:
formats.append({
'url': urllib.parse.unquote_plus(quality.attrib['url']),
'height': int_or_none(quality.attrib.get('height')),
'width': int_or_none(quality.attrib.get('width')),
'vbr': float_or_none(quality.attrib.get('bitratebits'), scale=1000),
})
return {
'id': video_id,
'title': video_title,
'description': description,
'duration': duration,
'formats': formats,
'thumbnail': thumbnail,
'timestamp': creation_time,
}

View File

@ -54,6 +54,7 @@ class KuwoBaseIE(InfoExtractor):
class KuwoIE(KuwoBaseIE):
_WORKING = False
IE_NAME = 'kuwo:song'
IE_DESC = '酷我音乐'
_VALID_URL = r'https?://(?:www\.)?kuwo\.cn/yinyue/(?P<id>\d+)'
@ -133,6 +134,7 @@ class KuwoIE(KuwoBaseIE):
class KuwoAlbumIE(InfoExtractor):
_WORKING = False
IE_NAME = 'kuwo:album'
IE_DESC = '酷我音乐 - 专辑'
_VALID_URL = r'https?://(?:www\.)?kuwo\.cn/album/(?P<id>\d+?)/'
@ -169,6 +171,7 @@ class KuwoAlbumIE(InfoExtractor):
class KuwoChartIE(InfoExtractor):
_WORKING = False
IE_NAME = 'kuwo:chart'
IE_DESC = '酷我音乐 - 排行榜'
_VALID_URL = r'https?://yinyue\.kuwo\.cn/billboard_(?P<id>[^.]+).htm'
@ -194,6 +197,7 @@ class KuwoChartIE(InfoExtractor):
class KuwoSingerIE(InfoExtractor):
_WORKING = False
IE_NAME = 'kuwo:singer'
IE_DESC = '酷我音乐 - 歌手'
_VALID_URL = r'https?://(?:www\.)?kuwo\.cn/mingxing/(?P<id>[^/]+)'
@ -251,6 +255,7 @@ class KuwoSingerIE(InfoExtractor):
class KuwoCategoryIE(InfoExtractor):
_WORKING = False
IE_NAME = 'kuwo:category'
IE_DESC = '酷我音乐 - 分类'
_VALID_URL = r'https?://yinyue\.kuwo\.cn/yy/cinfo_(?P<id>\d+?).htm'
@ -290,6 +295,7 @@ class KuwoCategoryIE(InfoExtractor):
class KuwoMvIE(KuwoBaseIE):
_WORKING = False
IE_NAME = 'kuwo:mv'
IE_DESC = '酷我音乐 - MV'
_VALID_URL = r'https?://(?:www\.)?kuwo\.cn/mv/(?P<id>\d+?)/'

View File

@ -231,7 +231,6 @@ class LBRYIE(LBRYBaseIE):
'release_timestamp': int,
'release_date': str,
'tags': list,
'duration': None,
'channel': 'RT',
'channel_id': 'fdd11cb3ab75f95efb7b3bc2d726aa13ac915b66',
'channel_url': 'https://odysee.com/@RT:fdd11cb3ab75f95efb7b3bc2d726aa13ac915b66',

View File

@ -10,6 +10,7 @@ from ..utils import (
class Lecture2GoIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://lecture2go\.uni-hamburg\.de/veranstaltungen/-/v/(?P<id>\d+)'
_TEST = {
'url': 'https://lecture2go.uni-hamburg.de/veranstaltungen/-/v/17473',

View File

@ -2,6 +2,7 @@ from .common import InfoExtractor
class LentaIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://(?:www\.)?lenta\.ru/[^/]+/\d+/\d+/\d+/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://lenta.ru/news/2018/03/22/savshenko_go/',

View File

@ -22,8 +22,6 @@ class LikeeIE(InfoExtractor):
'description': 'md5:9a7ebe816f0e78722ee5ed76f75983b4',
'thumbnail': r're:^https?://.+\.jpg',
'uploader': 'Huỳnh Hồng Qu&acirc;n ',
'play_count': int,
'download_count': int,
'artist': 'Huỳnh Hồng Qu&acirc;n ',
'timestamp': 1651571320,
'upload_date': '20220503',
@ -44,11 +42,9 @@ class LikeeIE(InfoExtractor):
'comment_count': int,
'like_count': int,
'uploader': 'Vương Phước Nhi',
'download_count': int,
'timestamp': 1651506835,
'upload_date': '20220502',
'duration': 60024,
'play_count': int,
'artist': 'Vương Phước Nhi',
'uploader_id': '649222262',
'view_count': int,
@ -65,9 +61,7 @@ class LikeeIE(InfoExtractor):
'duration': 9684,
'uploader_id': 'fernanda_rivasg',
'view_count': int,
'play_count': int,
'artist': 'La Cami La✨',
'download_count': int,
'like_count': int,
'uploader': 'Fernanda Rivas🎶',
'timestamp': 1614034308,
@ -83,13 +77,11 @@ class LikeeIE(InfoExtractor):
'thumbnail': r're:^https?://.+\.jpg',
'comment_count': int,
'duration': 18014,
'play_count': int,
'view_count': int,
'timestamp': 1611694774,
'like_count': int,
'uploader': 'Fernanda Rivas🎶',
'uploader_id': 'fernanda_rivasg',
'download_count': int,
'artist': 'ʟᴇʀɪᴋ_ɴɪʀɴ♡',
'upload_date': '20210126',
},
@ -128,8 +120,6 @@ class LikeeIE(InfoExtractor):
'description': info.get('share_desc'),
'view_count': int_or_none(info.get('video_count')),
'like_count': int_or_none(info.get('likeCount')),
'play_count': int_or_none(info.get('play_count')),
'download_count': int_or_none(info.get('download_count')),
'comment_count': int_or_none(info.get('comment_count')),
'uploader': str_or_none(info.get('nick_name')),
'uploader_id': str_or_none(info.get('likeeId')),

View File

@ -1,42 +0,0 @@
from .common import InfoExtractor
class LocalNews8IE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?localnews8\.com/(?:[^/]+/)*(?P<display_id>[^/]+)/(?P<id>[0-9]+)'
_TEST = {
'url': 'http://www.localnews8.com/news/rexburg-business-turns-carbon-fiber-scraps-into-wedding-rings/35183304',
'md5': 'be4d48aea61aa2bde7be2ee47691ad20',
'info_dict': {
'id': '35183304',
'display_id': 'rexburg-business-turns-carbon-fiber-scraps-into-wedding-rings',
'ext': 'mp4',
'title': 'Rexburg business turns carbon fiber scraps into wedding ring',
'description': 'The process was first invented by Lamborghini and less than a dozen companies around the world use it.',
'duration': 153,
'timestamp': 1441844822,
'upload_date': '20150910',
'uploader_id': 'api',
}
}
def _real_extract(self, url):
mobj = self._match_valid_url(url)
video_id = mobj.group('id')
display_id = mobj.group('display_id')
webpage = self._download_webpage(url, display_id)
partner_id = self._search_regex(
r'partnerId\s*[:=]\s*(["\'])(?P<id>\d+)\1',
webpage, 'partner id', group='id')
kaltura_id = self._search_regex(
r'videoIdString\s*[:=]\s*(["\'])kaltura:(?P<id>[0-9a-z_]+)\1',
webpage, 'videl id', group='id')
return {
'_type': 'url_transparent',
'url': 'kaltura:%s:%s' % (partner_id, kaltura_id),
'ie_key': 'Kaltura',
'id': video_id,
'display_id': display_id,
}

View File

@ -1,8 +1,7 @@
from .common import InfoExtractor
from .francetv import FranceTVIE
from .francetv import FranceTVBaseInfoExtractor
class LumniIE(InfoExtractor):
class LumniIE(FranceTVBaseInfoExtractor):
_VALID_URL = r'https?://(?:www\.)?lumni\.fr/video/(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://www.lumni.fr/video/l-homme-et-son-environnement-dans-la-revolution-industrielle',
@ -21,4 +20,4 @@ class LumniIE(InfoExtractor):
webpage = self._download_webpage(url, display_id)
video_id = self._html_search_regex(
r'<div[^>]+data-factoryid\s*=\s*["\']([^"\']+)', webpage, 'video id')
return self.url_result(f'francetv:{video_id}', FranceTVIE, video_id)
return self._make_url_result(video_id, url=url)

View File

@ -1,107 +0,0 @@
from .common import InfoExtractor
from ..utils import (
clean_html,
dict_get,
float_or_none,
int_or_none,
merge_dicts,
parse_duration,
try_get,
)
class MallTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www|sk)\.)?mall\.tv/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://www.mall.tv/18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
'md5': 'cd69ce29176f6533b65bff69ed9a5f2a',
'info_dict': {
'id': 't0zzt0',
'display_id': '18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
'ext': 'mp4',
'title': '18 miliard pro neziskovky. Opravdu jsou sportovci nebo Člověk v tísni pijavice?',
'description': 'md5:db7d5744a4bd4043d9d98324aa72ab35',
'duration': 216,
'timestamp': 1538870400,
'upload_date': '20181007',
'view_count': int,
'comment_count': int,
'thumbnail': 'https://cdn.vpplayer.tech/agmipnzv/encode/vjsnigfq/thumbnails/retina.jpg',
'average_rating': 9.060869565217391,
'dislike_count': int,
'like_count': int,
}
}, {
'url': 'https://www.mall.tv/kdo-to-plati/18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
'only_matching': True,
}, {
'url': 'https://sk.mall.tv/gejmhaus/reklamacia-nehreje-vyrobnik-tepla-alebo-spekacka',
'only_matching': True,
}, {
'url': 'https://www.mall.tv/zivoty-slavnych/nadeje-vychodu-i-zapadu-jak-michail-gorbacov-zmenil-politickou-mapu-sveta-a-ziskal-za-to-nobelovu-cenu-miru',
'info_dict': {
'id': 'yx010y',
'ext': 'mp4',
'dislike_count': int,
'description': 'md5:aee02bee5a8d072c6a8207b91d1905a9',
'thumbnail': 'https://cdn.vpplayer.tech/agmipnzv/encode/vjsnjdeu/thumbnails/retina.jpg',
'comment_count': int,
'display_id': 'md5:0ec2afa94d2e2b7091c019cef2a43a9b',
'like_count': int,
'duration': 752,
'timestamp': 1646956800,
'title': 'md5:fe79385daaf16d74c12c1ec4a26687af',
'view_count': int,
'upload_date': '20220311',
'average_rating': 9.685714285714285,
}
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(
url, display_id, headers=self.geo_verification_headers())
video = self._parse_json(self._search_regex(
r'videoObject\s*=\s*JSON\.parse\(JSON\.stringify\(({.+?})\)\);',
webpage, 'video object'), display_id)
video_id = self._search_regex(
r'<input\s*id\s*=\s*player-id-name\s*[^>]+value\s*=\s*(\w+)', webpage, 'video id')
formats = self._extract_m3u8_formats(
video['VideoSource'], video_id, 'mp4', 'm3u8_native')
subtitles = {}
for s in (video.get('Subtitles') or {}):
s_url = s.get('Url')
if not s_url:
continue
subtitles.setdefault(s.get('Language') or 'cz', []).append({
'url': s_url,
})
entity_counts = video.get('EntityCounts') or {}
def get_count(k):
v = entity_counts.get(k + 's') or {}
return int_or_none(dict_get(v, ('Count', 'StrCount')))
info = self._search_json_ld(webpage, video_id, default={})
return merge_dicts({
'id': str(video_id),
'display_id': display_id,
'title': video.get('Title'),
'description': clean_html(video.get('Description')),
'thumbnail': video.get('ThumbnailUrl'),
'formats': formats,
'subtitles': subtitles,
'duration': int_or_none(video.get('DurationSeconds')) or parse_duration(video.get('Duration')),
'view_count': get_count('View'),
'like_count': get_count('Like'),
'dislike_count': get_count('Dislike'),
'average_rating': float_or_none(try_get(video, lambda x: x['EntityRating']['AvarageRate'])),
'comment_count': get_count('Comment'),
}, info)

View File

@ -12,6 +12,7 @@ from ..utils import (
class ManyVidsIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'(?i)https?://(?:www\.)?manyvids\.com/video/(?P<id>\d+)'
_TESTS = [{
# preview video

View File

@ -10,6 +10,7 @@ from ..utils import (
class MarkizaIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://(?:www\.)?videoarchiv\.markiza\.sk/(?:video/(?:[^/]+/)*|embed/)(?P<id>\d+)(?:[_/]|$)'
_TESTS = [{
'url': 'http://videoarchiv.markiza.sk/video/oteckovia/84723_oteckovia-109',
@ -68,6 +69,7 @@ class MarkizaIE(InfoExtractor):
class MarkizaPageIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://(?:www\.)?(?:(?:[^/]+\.)?markiza|tvnoviny)\.sk/(?:[^/]+/)*(?P<id>\d+)_'
_TESTS = [{
'url': 'http://www.markiza.sk/soubiz/zahranicny/1923705_oteckovia-maju-svoj-den-ti-slavni-nie-su-o-nic-menej-rozkosni',

View File

@ -8,15 +8,15 @@ class MegaphoneIE(InfoExtractor):
_VALID_URL = r'https://player\.megaphone\.fm/(?P<id>[A-Z0-9]+)'
_EMBED_REGEX = [rf'<iframe[^>]*?\ssrc=["\'](?P<url>{_VALID_URL})']
_TEST = {
'url': 'https://player.megaphone.fm/GLT9749789991?"',
'url': 'https://player.megaphone.fm/GLT9749789991',
'md5': '4816a0de523eb3e972dc0dda2c191f96',
'info_dict': {
'id': 'GLT9749789991',
'ext': 'mp3',
'title': '#97 What Kind Of Idiot Gets Phished?',
'thumbnail': r're:^https://.*\.png.*$',
'duration': 1776.26375,
'author': 'Reply All',
'duration': 1998.36,
'creators': ['Reply All'],
},
}
@ -40,7 +40,7 @@ class MegaphoneIE(InfoExtractor):
'id': video_id,
'thumbnail': thumbnail,
'title': title,
'author': author,
'creators': [author] if author else None,
'duration': episode_data['duration'],
'formats': formats,
}

View File

@ -1,36 +0,0 @@
from .common import InfoExtractor
class MiaoPaiIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?miaopai\.com/show/(?P<id>[-A-Za-z0-9~_]+)'
_TEST = {
'url': 'http://www.miaopai.com/show/n~0hO7sfV1nBEw4Y29-Hqg__.htm',
'md5': '095ed3f1cd96b821add957bdc29f845b',
'info_dict': {
'id': 'n~0hO7sfV1nBEw4Y29-Hqg__',
'ext': 'mp4',
'title': '西游记音乐会的秒拍视频',
'thumbnail': 're:^https?://.*/n~0hO7sfV1nBEw4Y29-Hqg___m.jpg',
}
}
_USER_AGENT_IPAD = 'Mozilla/5.0 (iPad; CPU OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1'
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
url, video_id, headers={'User-Agent': self._USER_AGENT_IPAD})
title = self._html_extract_title(webpage)
thumbnail = self._html_search_regex(
r'<div[^>]+class=(?P<q1>[\'"]).*\bvideo_img\b.*(?P=q1)[^>]+data-url=(?P<q2>[\'"])(?P<url>[^\'"]+)(?P=q2)',
webpage, 'thumbnail', fatal=False, group='url')
videos = self._parse_html5_media_entries(url, webpage, video_id)
info = videos[0]
info.update({
'id': video_id,
'title': title,
'thumbnail': thumbnail,
})
return info

View File

@ -1,55 +0,0 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
smuggle_url,
)
class MinistryGridIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ministrygrid\.com/([^/?#]*/)*(?P<id>[^/#?]+)/?(?:$|[?#])'
_TEST = {
'url': 'http://www.ministrygrid.com/training-viewer/-/training/t4g-2014-conference/the-gospel-by-numbers-4/the-gospel-by-numbers',
'md5': '844be0d2a1340422759c2a9101bab017',
'info_dict': {
'id': '3453494717001',
'ext': 'mp4',
'title': 'The Gospel by Numbers',
'thumbnail': r're:^https?://.*\.jpg',
'upload_date': '20140410',
'description': 'Coming soon from T4G 2014!',
'uploader_id': '2034960640001',
'timestamp': 1397145591,
},
'params': {
# m3u8 download
'skip_download': True,
},
'add_ie': ['TDSLifeway'],
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
portlets = self._parse_json(self._search_regex(
r'Liferay\.Portlet\.list=(\[.+?\])', webpage, 'portlet list'),
video_id)
pl_id = self._search_regex(
r'getPlid:function\(\){return"(\d+)"}', webpage, 'p_l_id')
for i, portlet in enumerate(portlets):
portlet_url = 'http://www.ministrygrid.com/c/portal/render_portlet?p_l_id=%s&p_p_id=%s' % (pl_id, portlet)
portlet_code = self._download_webpage(
portlet_url, video_id,
note='Looking in portlet %s (%d/%d)' % (portlet, i + 1, len(portlets)),
fatal=False)
video_iframe_url = self._search_regex(
r'<iframe.*?src="([^"]+)"', portlet_code, 'video iframe',
default=None)
if video_iframe_url:
return self.url_result(
smuggle_url(video_iframe_url, {'force_videoid': video_id}),
video_id=video_id)
raise ExtractorError('Could not find video iframe in any portlets')

View File

@ -1,45 +0,0 @@
from .common import InfoExtractor
class MorningstarIE(InfoExtractor):
IE_DESC = 'morningstar.com'
_VALID_URL = r'https?://(?:(?:www|news)\.)morningstar\.com/[cC]over/video[cC]enter\.aspx\?id=(?P<id>[0-9]+)'
_TESTS = [{
'url': 'http://www.morningstar.com/cover/videocenter.aspx?id=615869',
'md5': '6c0acface7a787aadc8391e4bbf7b0f5',
'info_dict': {
'id': '615869',
'ext': 'mp4',
'title': 'Get Ahead of the Curve on 2013 Taxes',
'description': "Vanguard's Joel Dickson on managing higher tax rates for high-income earners and fund capital-gain distributions in 2013.",
'thumbnail': r're:^https?://.*m(?:orning)?star\.com/.+thumb\.jpg$'
}
}, {
'url': 'http://news.morningstar.com/cover/videocenter.aspx?id=825556',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = self._match_valid_url(url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(
r'<h1 id="titleLink">(.*?)</h1>', webpage, 'title')
video_url = self._html_search_regex(
r'<input type="hidden" id="hidVideoUrl" value="([^"]+)"',
webpage, 'video URL')
thumbnail = self._html_search_regex(
r'<input type="hidden" id="hidSnapshot" value="([^"]+)"',
webpage, 'thumbnail', fatal=False)
description = self._html_search_regex(
r'<div id="mstarDeck".*?>(.*?)</div>',
webpage, 'description', fatal=False)
return {
'id': video_id,
'title': title,
'url': video_url,
'thumbnail': thumbnail,
'description': description,
}

View File

@ -5,6 +5,7 @@ from ..compat import (
class MotorsportIE(InfoExtractor):
_WORKING = False
IE_DESC = 'motorsport.com'
_VALID_URL = r'https?://(?:www\.)?motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P<id>[^/]+)/?(?:$|[?#])'
_TEST = {

View File

@ -451,6 +451,7 @@ class MTVVideoIE(MTVServicesInfoExtractor):
class MTVDEIE(MTVServicesInfoExtractor):
_WORKING = False
IE_NAME = 'mtv.de'
_VALID_URL = r'https?://(?:www\.)?mtv\.de/(?:musik/videoclips|folgen|news)/(?P<id>[0-9a-z]+)'
_TESTS = [{

View File

@ -9,6 +9,7 @@ from ..utils import (
class MuenchenTVIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://(?:www\.)?muenchen\.tv/livestream'
IE_DESC = 'münchen.tv'
_TEST = {

View File

@ -17,11 +17,11 @@ class MusicdexBaseIE(InfoExtractor):
'track_number': track_json.get('number'),
'url': format_field(track_json, 'url', 'https://www.musicdex.org/%s'),
'duration': track_json.get('duration'),
'genre': [genre.get('name') for genre in track_json.get('genres') or []],
'genres': [genre.get('name') for genre in track_json.get('genres') or []],
'like_count': track_json.get('likes_count'),
'view_count': track_json.get('plays'),
'artist': [artist.get('name') for artist in track_json.get('artists') or []],
'album_artist': [artist.get('name') for artist in album_json.get('artists') or []],
'artists': [artist.get('name') for artist in track_json.get('artists') or []],
'album_artists': [artist.get('name') for artist in album_json.get('artists') or []],
'thumbnail': format_field(album_json, 'image', 'https://www.musicdex.org/%s'),
'album': album_json.get('name'),
'release_year': try_get(album_json, lambda x: date_from_str(unified_strdate(x['release_date'])).year),
@ -43,11 +43,11 @@ class MusicdexSongIE(MusicdexBaseIE):
'track': 'dual existence',
'track_number': 1,
'duration': 266000,
'genre': ['Anime'],
'genres': ['Anime'],
'like_count': int,
'view_count': int,
'artist': ['fripSide'],
'album_artist': ['fripSide'],
'artists': ['fripSide'],
'album_artists': ['fripSide'],
'thumbnail': 'https://www.musicdex.org/storage/album/9iDIam1DHTVqUG4UclFIEq1WAFGXfPW4y0TtZa91.png',
'album': 'To Aru Kagaku no Railgun T OP2 Single - dual existence',
'release_year': 2020
@ -69,9 +69,9 @@ class MusicdexAlbumIE(MusicdexBaseIE):
'playlist_mincount': 28,
'info_dict': {
'id': '56',
'genre': ['OST'],
'genres': ['OST'],
'view_count': int,
'artist': ['TENMON & Eiichiro Yanagi / minori'],
'artists': ['TENMON & Eiichiro Yanagi / minori'],
'title': 'ef - a tale of memories Original Soundtrack 2 ~fortissimo~',
'release_year': 2008,
'thumbnail': 'https://www.musicdex.org/storage/album/2rSHkyYBYfB7sbvElpEyTMcUn6toY7AohOgJuDlE.jpg',
@ -88,9 +88,9 @@ class MusicdexAlbumIE(MusicdexBaseIE):
'id': id,
'title': data_json.get('name'),
'description': data_json.get('description'),
'genre': [genre.get('name') for genre in data_json.get('genres') or []],
'genres': [genre.get('name') for genre in data_json.get('genres') or []],
'view_count': data_json.get('plays'),
'artist': [artist.get('name') for artist in data_json.get('artists') or []],
'artists': [artist.get('name') for artist in data_json.get('artists') or []],
'thumbnail': format_field(data_json, 'image', 'https://www.musicdex.org/%s'),
'release_year': try_get(data_json, lambda x: date_from_str(unified_strdate(x['release_date'])).year),
'entries': entries,

View File

@ -5,6 +5,7 @@ from ..utils import parse_duration, remove_end, unified_strdate, urljoin
class NDTVIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://(?:[^/]+\.)?ndtv\.com/(?:[^/]+/)*videos?/?(?:[^/]+/)*[^/?^&]+-(?P<id>\d+)'
_TESTS = [

View File

@ -1,6 +1,7 @@
import itertools
import json
from .art19 import Art19IE
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
@ -112,7 +113,8 @@ class NebulaBaseIE(InfoExtractor):
class NebulaIE(NebulaBaseIE):
_VALID_URL = rf'{_BASE_URL_RE}/videos/(?P<id>[-\w]+)'
IE_NAME = 'nebula:video'
_VALID_URL = rf'{_BASE_URL_RE}/videos/(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://nebula.tv/videos/that-time-disney-remade-beauty-and-the-beast',
'info_dict': {
@ -236,8 +238,8 @@ class NebulaIE(NebulaBaseIE):
class NebulaClassIE(NebulaBaseIE):
IE_NAME = 'nebula:class'
_VALID_URL = rf'{_BASE_URL_RE}/(?P<id>[-\w]+)/(?P<ep>\d+)'
IE_NAME = 'nebula:media'
_VALID_URL = rf'{_BASE_URL_RE}/(?!(?:myshows|library|videos)/)(?P<id>[\w-]+)/(?P<ep>[\w-]+)/?(?:$|[?#])'
_TESTS = [{
'url': 'https://nebula.tv/copyright-for-fun-and-profit/14',
'info_dict': {
@ -253,6 +255,46 @@ class NebulaClassIE(NebulaBaseIE):
'title': 'Photos, Sculpture, and Video',
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://nebula.tv/extremitiespodcast/pyramiden-the-high-arctic-soviet-ghost-town',
'info_dict': {
'ext': 'mp3',
'id': '018f65f0-0033-4021-8f87-2d132beb19aa',
'description': 'md5:05d2b23ab780c955e2511a2b9127acff',
'series_id': '335e8159-d663-491a-888f-1732285706ac',
'modified_timestamp': 1599091504,
'episode_id': '018f65f0-0033-4021-8f87-2d132beb19aa',
'series': 'Extremities',
'modified_date': '20200903',
'upload_date': '20200902',
'title': 'Pyramiden: The High-Arctic Soviet Ghost Town',
'release_timestamp': 1571237958,
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
'duration': 1546.05714,
'timestamp': 1599085608,
'release_date': '20191016',
},
}, {
'url': 'https://nebula.tv/thelayover/the-layover-episode-1',
'info_dict': {
'ext': 'mp3',
'id': '9d74a762-00bb-45a8-9e8d-9ed47c04a1d0',
'episode_number': 1,
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
'release_date': '20230304',
'modified_date': '20230403',
'series': 'The Layover',
'episode_id': '9d74a762-00bb-45a8-9e8d-9ed47c04a1d0',
'modified_timestamp': 1680554566,
'duration': 3130.46401,
'release_timestamp': 1677943800,
'title': 'The Layover — Episode 1',
'series_id': '874303a5-4900-4626-a4b6-2aacac34466a',
'upload_date': '20230303',
'episode': 'Episode 1',
'timestamp': 1677883672,
'description': 'md5:002cca89258e3bc7c268d5b8c24ba482',
},
}]
def _real_extract(self, url):
@ -268,16 +310,38 @@ class NebulaClassIE(NebulaBaseIE):
metadata = self._call_api(
f'https://content.api.nebula.app/content/{slug}/{episode}/?include=lessons',
slug, note='Fetching video metadata')
return {
**self._extract_video_metadata(metadata),
**self._extract_formats(metadata['id'], slug),
}
slug, note='Fetching class/podcast metadata')
content_type = metadata.get('type')
if content_type == 'lesson':
return {
**self._extract_video_metadata(metadata),
**self._extract_formats(metadata['id'], slug),
}
elif content_type == 'podcast_episode':
episode_url = metadata['episode_url']
if not episode_url and metadata.get('premium'):
self.raise_login_required()
if Art19IE.suitable(episode_url):
return self.url_result(episode_url, Art19IE)
return traverse_obj(metadata, {
'id': ('id', {str}),
'url': ('episode_url', {url_or_none}),
'title': ('title', {str}),
'description': ('description', {str}),
'timestamp': ('published_at', {parse_iso8601}),
'duration': ('duration', {int_or_none}),
'channel_id': ('channel_id', {str}),
'chnanel': ('channel_title', {str}),
'thumbnail': ('assets', 'regular', {url_or_none}),
})
raise ExtractorError(f'Unexpected content type {content_type!r}')
class NebulaSubscriptionsIE(NebulaBaseIE):
IE_NAME = 'nebula:subscriptions'
_VALID_URL = rf'{_BASE_URL_RE}/(?P<id>myshows|library/latest-videos)'
_VALID_URL = rf'{_BASE_URL_RE}/(?P<id>myshows|library/latest-videos)/?(?:$|[?#])'
_TESTS = [{
'url': 'https://nebula.tv/myshows',
'playlist_mincount': 1,
@ -310,7 +374,7 @@ class NebulaSubscriptionsIE(NebulaBaseIE):
class NebulaChannelIE(NebulaBaseIE):
IE_NAME = 'nebula:channel'
_VALID_URL = rf'{_BASE_URL_RE}/(?!myshows|library|videos/)(?P<id>[-\w]+)/?(?:$|[?#])'
_VALID_URL = rf'{_BASE_URL_RE}/(?!myshows|library|videos)(?P<id>[\w-]+)/?(?:$|[?#])'
_TESTS = [{
'url': 'https://nebula.tv/tom-scott-presents-money',
'info_dict': {
@ -343,6 +407,14 @@ class NebulaChannelIE(NebulaBaseIE):
'description': 'md5:6690248223eed044a9f11cd5a24f9742',
},
'playlist_count': 23,
}, {
'url': 'https://nebula.tv/trussissuespodcast',
'info_dict': {
'id': 'trussissuespodcast',
'title': 'The TLDR News Podcast',
'description': 'md5:a08c4483bc0b705881d3e0199e721385',
},
'playlist_mincount': 80,
}]
def _generate_playlist_entries(self, collection_id, collection_slug):
@ -365,6 +437,17 @@ class NebulaChannelIE(NebulaBaseIE):
lesson.get('share_url') or f'https://nebula.tv/{metadata["class_slug"]}/{metadata["slug"]}',
{'id': lesson['id']}), NebulaClassIE, url_transparent=True, **metadata)
def _generate_podcast_entries(self, collection_id, collection_slug):
next_url = f'https://content.api.nebula.app/podcast_channels/{collection_id}/podcast_episodes/?ordering=-published_at&premium=true'
for page_num in itertools.count(1):
episodes = self._call_api(next_url, collection_slug, note=f'Retrieving podcast page {page_num}')
for episode in traverse_obj(episodes, ('results', lambda _, v: url_or_none(v['share_url']))):
yield self.url_result(episode['share_url'], NebulaClassIE)
next_url = episodes.get('next')
if not next_url:
break
def _real_extract(self, url):
collection_slug = self._match_id(url)
channel = self._call_api(
@ -373,6 +456,8 @@ class NebulaChannelIE(NebulaBaseIE):
if channel.get('type') == 'class':
entries = self._generate_class_entries(channel)
elif channel.get('type') == 'podcast_channel':
entries = self._generate_podcast_entries(channel['id'], collection_slug)
else:
entries = self._generate_playlist_entries(channel['id'], collection_slug)

View File

@ -118,7 +118,6 @@ class NekoHackerIE(InfoExtractor):
'artist': 'Neko Hacker',
'track': 'md5:1a5fcbc96ca3c3265b1c6f9f79f30fd0',
'track_number': 1,
'duration': None
}
},
{
@ -136,7 +135,6 @@ class NekoHackerIE(InfoExtractor):
'artist': 'Neko Hacker',
'track': 'むじな de なじむ feat. 六科なじむ (CV: 日高里菜 )',
'track_number': 2,
'duration': None
}
},
{
@ -154,7 +152,6 @@ class NekoHackerIE(InfoExtractor):
'artist': 'Neko Hacker',
'track': '進め!むじなカンパニー (instrumental)',
'track_number': 3,
'duration': None
}
},
{
@ -172,7 +169,6 @@ class NekoHackerIE(InfoExtractor):
'artist': 'Neko Hacker',
'track': 'むじな de なじむ (instrumental)',
'track_number': 4,
'duration': None
}
}
]

View File

@ -1,33 +1,38 @@
import datetime
from .common import InfoExtractor
from .youtube import YoutubeIE
from ..utils import parse_iso8601, url_or_none
from ..utils.traversal import traverse_obj
class NerdCubedFeedIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?nerdcubed\.co\.uk/feed\.json'
_VALID_URL = r'https?://(?:www\.)?nerdcubed\.co\.uk/?(?:$|[#?])'
_TEST = {
'url': 'http://www.nerdcubed.co.uk/feed.json',
'url': 'http://www.nerdcubed.co.uk/',
'info_dict': {
'id': 'nerdcubed-feed',
'title': 'nerdcubed.co.uk feed',
},
'playlist_mincount': 1300,
'playlist_mincount': 5500,
}
def _extract_video(self, feed_entry):
return self.url_result(
f'https://www.youtube.com/watch?v={feed_entry["id"]}', YoutubeIE,
**traverse_obj(feed_entry, {
'id': ('id', {str}),
'title': ('title', {str}),
'description': ('description', {str}),
'timestamp': ('publishedAt', {parse_iso8601}),
'channel': ('source', 'name', {str}),
'channel_id': ('source', 'id', {str}),
'channel_url': ('source', 'url', {str}),
'thumbnail': ('thumbnail', 'source', {url_or_none}),
}), url_transparent=True)
def _real_extract(self, url):
feed = self._download_json(url, url, 'Downloading NerdCubed JSON feed')
video_id = 'nerdcubed-feed'
feed = self._download_json('https://www.nerdcubed.co.uk/_/cdn/videos.json', video_id)
entries = [{
'_type': 'url',
'title': feed_entry['title'],
'uploader': feed_entry['source']['name'] if feed_entry['source'] else None,
'upload_date': datetime.datetime.strptime(feed_entry['date'], '%Y-%m-%d').strftime('%Y%m%d'),
'url': 'http://www.youtube.com/watch?v=' + feed_entry['youtube_id'],
} for feed_entry in feed]
return {
'_type': 'playlist',
'title': 'nerdcubed.co.uk feed',
'id': 'nerdcubed-feed',
'entries': entries,
}
return self.playlist_result(
map(self._extract_video, traverse_obj(feed, ('videos', lambda _, v: v['id']))),
video_id, 'nerdcubed.co.uk feed')

Some files were not shown because too many files have changed in this diff Show More