Update to ytdl-2021.02.04.1 except youtube

This commit is contained in:
pukkandan 2021-02-04 13:26:01 +05:30
parent e29663c644
commit 2181983a0c
24 changed files with 663 additions and 434 deletions

View file

@ -1,14 +1,15 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import calendar
import re import re
import time
from .amp import AMPIE from .amp import AMPIE
from .common import InfoExtractor from .common import InfoExtractor
from .youtube import YoutubeIE from ..utils import (
from ..compat import compat_urlparse parse_duration,
parse_iso8601,
try_get,
)
class AbcNewsVideoIE(AMPIE): class AbcNewsVideoIE(AMPIE):
@ -18,8 +19,8 @@ class AbcNewsVideoIE(AMPIE):
(?: (?:
abcnews\.go\.com/ abcnews\.go\.com/
(?: (?:
[^/]+/video/(?P<display_id>[0-9a-z-]+)-| (?:[^/]+/)*video/(?P<display_id>[0-9a-z-]+)-|
video/embed\?.*?\bid= video/(?:embed|itemfeed)\?.*?\bid=
)| )|
fivethirtyeight\.abcnews\.go\.com/video/embed/\d+/ fivethirtyeight\.abcnews\.go\.com/video/embed/\d+/
) )
@ -36,6 +37,8 @@ class AbcNewsVideoIE(AMPIE):
'description': 'George Stephanopoulos goes one-on-one with Iranian Foreign Minister Dr. Javad Zarif.', 'description': 'George Stephanopoulos goes one-on-one with Iranian Foreign Minister Dr. Javad Zarif.',
'duration': 180, 'duration': 180,
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1380454200,
'upload_date': '20130929',
}, },
'params': { 'params': {
# m3u8 download # m3u8 download
@ -47,6 +50,12 @@ class AbcNewsVideoIE(AMPIE):
}, { }, {
'url': 'http://abcnews.go.com/2020/video/2020-husband-stands-teacher-jail-student-affairs-26119478', 'url': 'http://abcnews.go.com/2020/video/2020-husband-stands-teacher-jail-student-affairs-26119478',
'only_matching': True, 'only_matching': True,
}, {
'url': 'http://abcnews.go.com/video/itemfeed?id=46979033',
'only_matching': True,
}, {
'url': 'https://abcnews.go.com/GMA/News/video/history-christmas-story-67894761',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -67,28 +76,23 @@ class AbcNewsIE(InfoExtractor):
_VALID_URL = r'https?://abcnews\.go\.com/(?:[^/]+/)+(?P<display_id>[0-9a-z-]+)/story\?id=(?P<id>\d+)' _VALID_URL = r'https?://abcnews\.go\.com/(?:[^/]+/)+(?P<display_id>[0-9a-z-]+)/story\?id=(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'http://abcnews.go.com/Blotter/News/dramatic-video-rare-death-job-america/story?id=10498713#.UIhwosWHLjY', # Youtube Embeds
'url': 'https://abcnews.go.com/Entertainment/peter-billingsley-child-actor-christmas-story-hollywood-power/story?id=51286501',
'info_dict': { 'info_dict': {
'id': '10505354', 'id': '51286501',
'ext': 'flv', 'title': "Peter Billingsley: From child actor in 'A Christmas Story' to Hollywood power player",
'display_id': 'dramatic-video-rare-death-job-america', 'description': 'Billingsley went from a child actor to Hollywood power player.',
'title': 'Occupational Hazards',
'description': 'Nightline investigates the dangers that lurk at various jobs.',
'thumbnail': r're:^https?://.*\.jpg$',
'upload_date': '20100428',
'timestamp': 1272412800,
}, },
'add_ie': ['AbcNewsVideo'], 'playlist_count': 5,
}, { }, {
'url': 'http://abcnews.go.com/Entertainment/justin-timberlake-performs-stop-feeling-eurovision-2016/story?id=39125818', 'url': 'http://abcnews.go.com/Entertainment/justin-timberlake-performs-stop-feeling-eurovision-2016/story?id=39125818',
'info_dict': { 'info_dict': {
'id': '38897857', 'id': '38897857',
'ext': 'mp4', 'ext': 'mp4',
'display_id': 'justin-timberlake-performs-stop-feeling-eurovision-2016',
'title': 'Justin Timberlake Drops Hints For Secret Single', 'title': 'Justin Timberlake Drops Hints For Secret Single',
'description': 'Lara Spencer reports the buzziest stories of the day in "GMA" Pop News.', 'description': 'Lara Spencer reports the buzziest stories of the day in "GMA" Pop News.',
'upload_date': '20160515', 'upload_date': '20160505',
'timestamp': 1463329500, 'timestamp': 1462442280,
}, },
'params': { 'params': {
# m3u8 download # m3u8 download
@ -100,49 +104,55 @@ class AbcNewsIE(InfoExtractor):
}, { }, {
'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343', 'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343',
'only_matching': True, 'only_matching': True,
}, {
# inline.type == 'video'
'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) story_id = self._match_id(url)
display_id = mobj.group('display_id') webpage = self._download_webpage(url, story_id)
video_id = mobj.group('id') story = self._parse_json(self._search_regex(
r"window\['__abcnews__'\]\s*=\s*({.+?});",
webpage, 'data'), story_id)['page']['content']['story']['everscroll'][0]
article_contents = story.get('articleContents') or {}
webpage = self._download_webpage(url, video_id) def entries():
video_url = self._search_regex( featured_video = story.get('featuredVideo') or {}
r'window\.abcnvideo\.url\s*=\s*"([^"]+)"', webpage, 'video URL') feed = try_get(featured_video, lambda x: x['video']['feed'])
full_video_url = compat_urlparse.urljoin(url, video_url) if feed:
yield {
youtube_url = YoutubeIE._extract_url(webpage) '_type': 'url',
'id': featured_video.get('id'),
timestamp = None 'title': featured_video.get('name'),
date_str = self._html_search_regex( 'url': feed,
r'<span[^>]+class="timestamp">([^<]+)</span>', 'thumbnail': featured_video.get('images'),
webpage, 'timestamp', fatal=False) 'description': featured_video.get('description'),
if date_str: 'timestamp': parse_iso8601(featured_video.get('uploadDate')),
tz_offset = 0 'duration': parse_duration(featured_video.get('duration')),
if date_str.endswith(' ET'): # Eastern Time
tz_offset = -5
date_str = date_str[:-3]
date_formats = ['%b. %d, %Y', '%b %d, %Y, %I:%M %p']
for date_format in date_formats:
try:
timestamp = calendar.timegm(time.strptime(date_str.strip(), date_format))
except ValueError:
continue
if timestamp is not None:
timestamp -= tz_offset * 3600
entry = {
'_type': 'url_transparent',
'ie_key': AbcNewsVideoIE.ie_key(), 'ie_key': AbcNewsVideoIE.ie_key(),
'url': full_video_url,
'id': video_id,
'display_id': display_id,
'timestamp': timestamp,
} }
if youtube_url: for inline in (article_contents.get('inlines') or []):
entries = [entry, self.url_result(youtube_url, ie=YoutubeIE.ie_key())] inline_type = inline.get('type')
return self.playlist_result(entries) if inline_type == 'iframe':
iframe_url = try_get(inline, lambda x: x['attrs']['src'])
if iframe_url:
yield self.url_result(iframe_url)
elif inline_type == 'video':
video_id = inline.get('id')
if video_id:
yield {
'_type': 'url',
'id': video_id,
'url': 'http://abcnews.go.com/video/embed?id=' + video_id,
'thumbnail': inline.get('imgSrc') or inline.get('imgDefault'),
'description': inline.get('description'),
'duration': parse_duration(inline.get('duration')),
'ie_key': AbcNewsVideoIE.ie_key(),
}
return entry return self.playlist_result(
entries(), story_id, article_contents.get('headline'),
article_contents.get('subHead'))

View file

@ -26,6 +26,7 @@
strip_or_none, strip_or_none,
try_get, try_get,
unified_strdate, unified_strdate,
urlencode_postdata,
) )
@ -51,9 +52,12 @@ class ADNIE(InfoExtractor):
} }
} }
_NETRC_MACHINE = 'animedigitalnetwork'
_BASE_URL = 'http://animedigitalnetwork.fr' _BASE_URL = 'http://animedigitalnetwork.fr'
_API_BASE_URL = 'https://gw.api.animedigitalnetwork.fr/' _API_BASE_URL = 'https://gw.api.animedigitalnetwork.fr/'
_PLAYER_BASE_URL = _API_BASE_URL + 'player/' _PLAYER_BASE_URL = _API_BASE_URL + 'player/'
_HEADERS = {}
_LOGIN_ERR_MESSAGE = 'Unable to log in'
_RSA_KEY = (0x9B42B08905199A5CCE2026274399CA560ECB209EE9878A708B1C0812E1BB8CB5D1FB7441861147C1A1F2F3A0476DD63A9CAC20D3E983613346850AA6CB38F16DC7D720FD7D86FC6E5B3D5BBC72E14CD0BF9E869F2CEA2CCAD648F1DCE38F1FF916CEFB2D339B64AA0264372344BC775E265E8A852F88144AB0BD9AA06C1A4ABB, 65537) _RSA_KEY = (0x9B42B08905199A5CCE2026274399CA560ECB209EE9878A708B1C0812E1BB8CB5D1FB7441861147C1A1F2F3A0476DD63A9CAC20D3E983613346850AA6CB38F16DC7D720FD7D86FC6E5B3D5BBC72E14CD0BF9E869F2CEA2CCAD648F1DCE38F1FF916CEFB2D339B64AA0264372344BC775E265E8A852F88144AB0BD9AA06C1A4ABB, 65537)
_POS_ALIGN_MAP = { _POS_ALIGN_MAP = {
'start': 1, 'start': 1,
@ -129,19 +133,42 @@ def _get_subtitles(self, sub_url, video_id):
}]) }])
return subtitles return subtitles
def _real_initialize(self):
username, password = self._get_login_info()
if not username:
return
try:
access_token = (self._download_json(
self._API_BASE_URL + 'authentication/login', None,
'Logging in', self._LOGIN_ERR_MESSAGE, fatal=False,
data=urlencode_postdata({
'password': password,
'rememberMe': False,
'source': 'Web',
'username': username,
})) or {}).get('accessToken')
if access_token:
self._HEADERS = {'authorization': 'Bearer ' + access_token}
except ExtractorError as e:
message = None
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
resp = self._parse_json(
e.cause.read().decode(), None, fatal=False) or {}
message = resp.get('message') or resp.get('code')
self.report_warning(message or self._LOGIN_ERR_MESSAGE)
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
video_base_url = self._PLAYER_BASE_URL + 'video/%s/' % video_id video_base_url = self._PLAYER_BASE_URL + 'video/%s/' % video_id
player = self._download_json( player = self._download_json(
video_base_url + 'configuration', video_id, video_base_url + 'configuration', video_id,
'Downloading player config JSON metadata')['player'] 'Downloading player config JSON metadata',
headers=self._HEADERS)['player']
options = player['options'] options = player['options']
user = options['user'] user = options['user']
if not user.get('hasAccess'): if not user.get('hasAccess'):
raise ExtractorError( self.raise_login_required()
'This video is only available for paying users', expected=True)
# self.raise_login_required() # FIXME: Login is not implemented
token = self._download_json( token = self._download_json(
user.get('refreshTokenUrl') or (self._PLAYER_BASE_URL + 'refresh/token'), user.get('refreshTokenUrl') or (self._PLAYER_BASE_URL + 'refresh/token'),
@ -188,7 +215,6 @@ def _real_extract(self, url):
message = error.get('message') message = error.get('message')
if e.cause.code == 403 and error.get('code') == 'player-bad-geolocation-country': if e.cause.code == 403 and error.get('code') == 'player-bad-geolocation-country':
self.raise_geo_restricted(msg=message) self.raise_geo_restricted(msg=message)
else:
raise ExtractorError(message) raise ExtractorError(message)
else: else:
raise ExtractorError('Giving up retrying') raise ExtractorError('Giving up retrying')

View file

@ -252,7 +252,7 @@ class AENetworksShowIE(AENetworksListBaseIE):
_TESTS = [{ _TESTS = [{
'url': 'http://www.history.com/shows/ancient-aliens', 'url': 'http://www.history.com/shows/ancient-aliens',
'info_dict': { 'info_dict': {
'id': 'SH012427480000', 'id': 'SERIES1574',
'title': 'Ancient Aliens', 'title': 'Ancient Aliens',
'description': 'md5:3f6d74daf2672ff3ae29ed732e37ea7f', 'description': 'md5:3f6d74daf2672ff3ae29ed732e37ea7f',
}, },

View file

@ -8,6 +8,7 @@
int_or_none, int_or_none,
mimetype2ext, mimetype2ext,
parse_iso8601, parse_iso8601,
unified_timestamp,
url_or_none, url_or_none,
) )
@ -88,7 +89,7 @@ def get_media_node(name, default=None):
self._sort_formats(formats) self._sort_formats(formats)
timestamp = parse_iso8601(item.get('pubDate'), ' ') or parse_iso8601(item.get('dc-date')) timestamp = unified_timestamp(item.get('pubDate'), ' ') or parse_iso8601(item.get('dc-date'))
return { return {
'id': video_id, 'id': video_id,

View file

@ -48,6 +48,7 @@ def _parse_video_data(self, video_data, video_id, is_live):
'duration': int_or_none(video_data.get('duration')), 'duration': int_or_none(video_data.get('duration')),
'timestamp': parse_iso8601(video_data.get('create_time'), ' '), 'timestamp': parse_iso8601(video_data.get('create_time'), ' '),
'is_live': is_live, 'is_live': is_live,
'uploader_id': video_data.get('user_id'),
} }
@ -107,6 +108,7 @@ class AWAANLiveIE(AWAANBaseIE):
'title': 're:Dubai Al Oula [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'title': 're:Dubai Al Oula [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'upload_date': '20150107', 'upload_date': '20150107',
'timestamp': 1420588800, 'timestamp': 1420588800,
'uploader_id': '71',
}, },
'params': { 'params': {
# m3u8 download # m3u8 download

View file

@ -47,7 +47,7 @@ class AZMedienIE(InfoExtractor):
'url': 'https://www.telebaern.tv/telebaern-news/montag-1-oktober-2018-ganze-sendung-133531189#video=0_7xjo9lf1', 'url': 'https://www.telebaern.tv/telebaern-news/montag-1-oktober-2018-ganze-sendung-133531189#video=0_7xjo9lf1',
'only_matching': True 'only_matching': True
}] }]
_API_TEMPL = 'https://www.%s/api/pub/gql/%s/NewsArticleTeaser/cb9f2f81ed22e9b47f4ca64ea3cc5a5d13e88d1d' _API_TEMPL = 'https://www.%s/api/pub/gql/%s/NewsArticleTeaser/a4016f65fe62b81dc6664dd9f4910e4ab40383be'
_PARTNER_ID = '1719221' _PARTNER_ID = '1719221'
def _real_extract(self, url): def _real_extract(self, url):

View file

@ -90,13 +90,19 @@ class BleacherReportCMSIE(AMPIE):
_VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36}|\d{5})' _VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36}|\d{5})'
_TESTS = [{ _TESTS = [{
'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1&library=video-cms', 'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1&library=video-cms',
'md5': '2e4b0a997f9228ffa31fada5c53d1ed1', 'md5': '670b2d73f48549da032861130488c681',
'info_dict': { 'info_dict': {
'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1', 'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
'ext': 'flv', 'ext': 'mp4',
'title': 'Cena vs. Rollins Would Expose the Heavyweight Division', 'title': 'Cena vs. Rollins Would Expose the Heavyweight Division',
'description': 'md5:984afb4ade2f9c0db35f3267ed88b36e', 'description': 'md5:984afb4ade2f9c0db35f3267ed88b36e',
'upload_date': '20150723',
'timestamp': 1437679032,
}, },
'expected_warnings': [
'Unable to download f4m manifest'
]
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View file

@ -12,7 +12,7 @@
class BravoTVIE(AdobePassIE): class BravoTVIE(AdobePassIE):
_VALID_URL = r'https?://(?:www\.)?bravotv\.com/(?:[^/]+/)+(?P<id>[^/?#]+)' _VALID_URL = r'https?://(?:www\.)?(?P<req_id>bravotv|oxygen)\.com/(?:[^/]+/)+(?P<id>[^/?#]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.bravotv.com/top-chef/season-16/episode-15/videos/the-top-chef-season-16-winner-is', 'url': 'https://www.bravotv.com/top-chef/season-16/episode-15/videos/the-top-chef-season-16-winner-is',
'md5': 'e34684cfea2a96cd2ee1ef3a60909de9', 'md5': 'e34684cfea2a96cd2ee1ef3a60909de9',
@ -28,10 +28,13 @@ class BravoTVIE(AdobePassIE):
}, { }, {
'url': 'http://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1', 'url': 'http://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.oxygen.com/in-ice-cold-blood/season-2/episode-16/videos/handling-the-horwitz-house-after-the-murder-season-2',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) site, display_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
settings = self._parse_json(self._search_regex( settings = self._parse_json(self._search_regex(
r'<script[^>]+data-drupal-selector="drupal-settings-json"[^>]*>({.+?})</script>', webpage, 'drupal settings'), r'<script[^>]+data-drupal-selector="drupal-settings-json"[^>]*>({.+?})</script>', webpage, 'drupal settings'),
@ -53,11 +56,14 @@ def _real_extract(self, url):
tp_path = release_pid = tve['release_pid'] tp_path = release_pid = tve['release_pid']
if tve.get('entitlement') == 'auth': if tve.get('entitlement') == 'auth':
adobe_pass = settings.get('tve_adobe_auth', {}) adobe_pass = settings.get('tve_adobe_auth', {})
if site == 'bravotv':
site = 'bravo'
resource = self._get_mvpd_resource( resource = self._get_mvpd_resource(
adobe_pass.get('adobePassResourceId', 'bravo'), adobe_pass.get('adobePassResourceId') or site,
tve['title'], release_pid, tve.get('rating')) tve['title'], release_pid, tve.get('rating'))
query['auth'] = self._extract_mvpd_auth( query['auth'] = self._extract_mvpd_auth(
url, release_pid, adobe_pass.get('adobePassRequestorId', 'bravo'), resource) url, release_pid,
adobe_pass.get('adobePassRequestorId') or site, resource)
else: else:
shared_playlist = settings['ls_playlist'] shared_playlist = settings['ls_playlist']
account_pid = shared_playlist['account_pid'] account_pid = shared_playlist['account_pid']

View file

@ -1,6 +1,7 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import datetime
import re import re
from .common import InfoExtractor from .common import InfoExtractor
@ -8,8 +9,8 @@
clean_html, clean_html,
int_or_none, int_or_none,
parse_duration, parse_duration,
parse_iso8601,
parse_resolution, parse_resolution,
try_get,
url_or_none, url_or_none,
) )
@ -24,8 +25,9 @@ class CCMAIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'L\'espot de La Marató de TV3', 'title': 'L\'espot de La Marató de TV3',
'description': 'md5:f12987f320e2f6e988e9908e4fe97765', 'description': 'md5:f12987f320e2f6e988e9908e4fe97765',
'timestamp': 1470918540, 'timestamp': 1478608140,
'upload_date': '20160811', 'upload_date': '20161108',
'age_limit': 0,
} }
}, { }, {
'url': 'http://www.ccma.cat/catradio/alacarta/programa/el-consell-de-savis-analitza-el-derbi/audio/943685/', 'url': 'http://www.ccma.cat/catradio/alacarta/programa/el-consell-de-savis-analitza-el-derbi/audio/943685/',
@ -35,8 +37,24 @@ class CCMAIE(InfoExtractor):
'ext': 'mp3', 'ext': 'mp3',
'title': 'El Consell de Savis analitza el derbi', 'title': 'El Consell de Savis analitza el derbi',
'description': 'md5:e2a3648145f3241cb9c6b4b624033e53', 'description': 'md5:e2a3648145f3241cb9c6b4b624033e53',
'upload_date': '20171205', 'upload_date': '20170512',
'timestamp': 1512507300, 'timestamp': 1494622500,
'vcodec': 'none',
'categories': ['Esports'],
}
}, {
'url': 'http://www.ccma.cat/tv3/alacarta/crims/crims-josep-tallada-lespereu-me-capitol-1/video/6031387/',
'md5': 'b43c3d3486f430f3032b5b160d80cbc3',
'info_dict': {
'id': '6031387',
'ext': 'mp4',
'title': 'Crims - Josep Talleda, l\'"Espereu-me" (capítol 1)',
'description': 'md5:7cbdafb640da9d0d2c0f62bad1e74e60',
'timestamp': 1582577700,
'upload_date': '20200224',
'subtitles': 'mincount:4',
'age_limit': 16,
'series': 'Crims',
} }
}] }]
@ -72,17 +90,27 @@ def _real_extract(self, url):
informacio = media['informacio'] informacio = media['informacio']
title = informacio['titol'] title = informacio['titol']
durada = informacio.get('durada', {}) durada = informacio.get('durada') or {}
duration = int_or_none(durada.get('milisegons'), 1000) or parse_duration(durada.get('text')) duration = int_or_none(durada.get('milisegons'), 1000) or parse_duration(durada.get('text'))
timestamp = parse_iso8601(informacio.get('data_emissio', {}).get('utc')) tematica = try_get(informacio, lambda x: x['tematica']['text'])
timestamp = None
data_utc = try_get(informacio, lambda x: x['data_emissio']['utc'])
try:
timestamp = datetime.datetime.strptime(
data_utc, '%Y-%d-%mT%H:%M:%S%z').timestamp()
except TypeError:
pass
subtitles = {} subtitles = {}
subtitols = media.get('subtitols', {}) subtitols = media.get('subtitols') or []
if subtitols: if isinstance(subtitols, dict):
sub_url = subtitols.get('url') subtitols = [subtitols]
for st in subtitols:
sub_url = st.get('url')
if sub_url: if sub_url:
subtitles.setdefault( subtitles.setdefault(
subtitols.get('iso') or subtitols.get('text') or 'ca', []).append({ st.get('iso') or st.get('text') or 'ca', []).append({
'url': sub_url, 'url': sub_url,
}) })
@ -97,6 +125,16 @@ def _real_extract(self, url):
'height': int_or_none(imatges.get('alcada')), 'height': int_or_none(imatges.get('alcada')),
}] }]
age_limit = None
codi_etic = try_get(informacio, lambda x: x['codi_etic']['id'])
if codi_etic:
codi_etic_s = codi_etic.split('_')
if len(codi_etic_s) == 2:
if codi_etic_s[1] == 'TP':
age_limit = 0
else:
age_limit = int_or_none(codi_etic_s[1])
return { return {
'id': media_id, 'id': media_id,
'title': title, 'title': title,
@ -106,4 +144,9 @@ def _real_extract(self, url):
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'subtitles': subtitles, 'subtitles': subtitles,
'formats': formats, 'formats': formats,
'age_limit': age_limit,
'alt_title': informacio.get('titol_complet'),
'episode_number': int_or_none(informacio.get('capitol')),
'categories': [tematica] if tematica else None,
'series': informacio.get('programa'),
} }

View file

@ -96,7 +96,7 @@ def _real_extract(self, url):
raise ExtractorError('This video is only available for premium users.', expected=True) raise ExtractorError('This video is only available for premium users.', expected=True)
need_confirm_age = False need_confirm_age = False
if self._html_search_regex(r'(<form[^>]+action="/a/validatebirth")', if self._html_search_regex(r'(<form[^>]+action="[^"]*/a/validatebirth[^"]*")',
webpage, 'birthday validate form', default=None): webpage, 'birthday validate form', default=None):
webpage = self._download_age_confirm_page( webpage = self._download_age_confirm_page(
url, video_id, note='Confirming age') url, video_id, note='Confirming age')

View file

@ -12,7 +12,14 @@
) )
class EggheadCourseIE(InfoExtractor): class EggheadBaseIE(InfoExtractor):
def _call_api(self, path, video_id, resource, fatal=True):
return self._download_json(
'https://app.egghead.io/api/v1/' + path,
video_id, 'Downloading %s JSON' % resource, fatal=fatal)
class EggheadCourseIE(EggheadBaseIE):
IE_DESC = 'egghead.io course' IE_DESC = 'egghead.io course'
IE_NAME = 'egghead:course' IE_NAME = 'egghead:course'
_VALID_URL = r'https://egghead\.io/courses/(?P<id>[^/?#&]+)' _VALID_URL = r'https://egghead\.io/courses/(?P<id>[^/?#&]+)'
@ -28,10 +35,9 @@ class EggheadCourseIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
playlist_id = self._match_id(url) playlist_id = self._match_id(url)
series_path = 'series/' + playlist_id
lessons = self._download_json( lessons = self._call_api(
'https://egghead.io/api/v1/series/%s/lessons' % playlist_id, series_path + '/lessons', playlist_id, 'course lessons')
playlist_id, 'Downloading course lessons JSON')
entries = [] entries = []
for lesson in lessons: for lesson in lessons:
@ -44,9 +50,8 @@ def _real_extract(self, url):
entries.append(self.url_result( entries.append(self.url_result(
lesson_url, ie=EggheadLessonIE.ie_key(), video_id=lesson_id)) lesson_url, ie=EggheadLessonIE.ie_key(), video_id=lesson_id))
course = self._download_json( course = self._call_api(
'https://egghead.io/api/v1/series/%s' % playlist_id, series_path, playlist_id, 'course', False) or {}
playlist_id, 'Downloading course JSON', fatal=False) or {}
playlist_id = course.get('id') playlist_id = course.get('id')
if playlist_id: if playlist_id:
@ -57,7 +62,7 @@ def _real_extract(self, url):
course.get('description')) course.get('description'))
class EggheadLessonIE(InfoExtractor): class EggheadLessonIE(EggheadBaseIE):
IE_DESC = 'egghead.io lesson' IE_DESC = 'egghead.io lesson'
IE_NAME = 'egghead:lesson' IE_NAME = 'egghead:lesson'
_VALID_URL = r'https://egghead\.io/(?:api/v1/)?lessons/(?P<id>[^/?#&]+)' _VALID_URL = r'https://egghead\.io/(?:api/v1/)?lessons/(?P<id>[^/?#&]+)'
@ -74,7 +79,7 @@ class EggheadLessonIE(InfoExtractor):
'upload_date': '20161209', 'upload_date': '20161209',
'duration': 304, 'duration': 304,
'view_count': 0, 'view_count': 0,
'tags': ['javascript', 'free'], 'tags': 'count:2',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -88,8 +93,8 @@ class EggheadLessonIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
lesson = self._download_json( lesson = self._call_api(
'https://egghead.io/api/v1/lessons/%s' % display_id, display_id) 'lessons/' + display_id, display_id, 'lesson')
lesson_id = compat_str(lesson['id']) lesson_id = compat_str(lesson['id'])
title = lesson['title'] title = lesson['title']

View file

@ -1308,6 +1308,7 @@
TV2IE, TV2IE,
TV2ArticleIE, TV2ArticleIE,
KatsomoIE, KatsomoIE,
MTVUutisetArticleIE,
) )
from .tv2dk import ( from .tv2dk import (
TV2DKIE, TV2DKIE,
@ -1448,7 +1449,6 @@
VidmeUserIE, VidmeUserIE,
VidmeUserLikesIE, VidmeUserLikesIE,
) )
from .vidzi import VidziIE
from .vier import VierIE, VierVideosIE from .vier import VierIE, VierVideosIE
from .viewlift import ( from .viewlift import (
ViewLiftIE, ViewLiftIE,
@ -1508,6 +1508,7 @@
VRVSeriesIE, VRVSeriesIE,
) )
from .vshare import VShareIE from .vshare import VShareIE
from .vtm import VTMIE
from .medialaan import MedialaanIE from .medialaan import MedialaanIE
from .vube import VubeIE from .vube import VubeIE
from .vuclip import VuClipIE from .vuclip import VuClipIE

View file

@ -131,6 +131,7 @@
from .rcs import RCSEmbedsIE from .rcs import RCSEmbedsIE
from .bitchute import BitChuteIE from .bitchute import BitChuteIE
from .arcpublishing import ArcPublishingIE from .arcpublishing import ArcPublishingIE
from .medialaan import MedialaanIE
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
@ -2224,6 +2225,20 @@ class GenericIE(InfoExtractor):
'duration': 1581, 'duration': 1581,
}, },
}, },
{
# MyChannels SDK embed
# https://www.24kitchen.nl/populair/deskundige-dit-waarom-sommigen-gevoelig-zijn-voor-voedselallergieen
'url': 'https://www.demorgen.be/nieuws/burgemeester-rotterdam-richt-zich-in-videoboodschap-tot-relschoppers-voelt-het-goed~b0bcfd741/',
'md5': '90c0699c37006ef18e198c032d81739c',
'info_dict': {
'id': '194165',
'ext': 'mp4',
'title': 'Burgemeester Aboutaleb spreekt relschoppers toe',
'timestamp': 1611740340,
'upload_date': '20210127',
'duration': 159,
},
},
] ]
def report_following_redirect(self, new_url): def report_following_redirect(self, new_url):
@ -2463,6 +2478,9 @@ def _real_extract(self, url):
webpage = self._webpage_read_content( webpage = self._webpage_read_content(
full_response, url, video_id, prefix=first_bytes) full_response, url, video_id, prefix=first_bytes)
if '<title>DPG Media Privacy Gate</title>' in webpage:
webpage = self._download_webpage(url, video_id)
self.report_extraction(video_id) self.report_extraction(video_id)
# Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest? # Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest?
@ -2594,6 +2612,11 @@ def _real_extract(self, url):
if arc_urls: if arc_urls:
return self.playlist_from_matches(arc_urls, video_id, video_title, ie=ArcPublishingIE.ie_key()) return self.playlist_from_matches(arc_urls, video_id, video_title, ie=ArcPublishingIE.ie_key())
mychannels_urls = MedialaanIE._extract_urls(webpage)
if mychannels_urls:
return self.playlist_from_matches(
mychannels_urls, video_id, video_title, ie=MedialaanIE.ie_key())
# Look for embedded rtl.nl player # Look for embedded rtl.nl player
matches = re.findall( matches = re.findall(
r'<iframe[^>]+?src="((?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)"', r'<iframe[^>]+?src="((?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)"',

View file

@ -7,6 +7,7 @@
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
ExtractorError, ExtractorError,
get_element_by_class,
int_or_none, int_or_none,
lowercase_escape, lowercase_escape,
try_get, try_get,
@ -237,7 +238,7 @@ def add_source_format(urlh):
if confirmation_webpage: if confirmation_webpage:
confirm = self._search_regex( confirm = self._search_regex(
r'confirm=([^&"\']+)', confirmation_webpage, r'confirm=([^&"\']+)', confirmation_webpage,
'confirmation code', fatal=False) 'confirmation code', default=None)
if confirm: if confirm:
confirmed_source_url = update_url_query(source_url, { confirmed_source_url = update_url_query(source_url, {
'confirm': confirm, 'confirm': confirm,
@ -245,6 +246,11 @@ def add_source_format(urlh):
urlh = request_source_file(confirmed_source_url, 'confirmed source') urlh = request_source_file(confirmed_source_url, 'confirmed source')
if urlh and urlh.headers.get('Content-Disposition'): if urlh and urlh.headers.get('Content-Disposition'):
add_source_format(urlh) add_source_format(urlh)
else:
self.report_warning(
get_element_by_class('uc-error-subcaption', confirmation_webpage)
or get_element_by_class('uc-error-caption', confirmation_webpage)
or 'unable to extract confirmation code')
if not formats and reason: if not formats and reason:
raise ExtractorError(reason, expected=True) raise ExtractorError(reason, expected=True)

View file

@ -2,268 +2,113 @@
import re import re
from .gigya import GigyaBaseIE from .common import InfoExtractor
from ..compat import compat_str
from ..utils import ( from ..utils import (
extract_attributes,
int_or_none, int_or_none,
parse_duration, mimetype2ext,
try_get, parse_iso8601,
unified_timestamp,
) )
class MedialaanIE(GigyaBaseIE): class MedialaanIE(InfoExtractor):
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?:// https?://
(?:www\.|nieuws\.)?
(?: (?:
(?P<site_id>vtm|q2|vtmkzoom)\.be/ (?:embed\.)?mychannels.video/embed/|
embed\.mychannels\.video/(?:s(?:dk|cript)/)?production/|
(?:www\.)?(?:
(?: (?:
video(?:/[^/]+/id/|/?\?.*?\baid=)| 7sur7|
(?:[^/]+/)* demorgen|
hln|
joe|
qmusic
)\.be|
(?:
[abe]d|
bndestem|
destentor|
gelderlander|
pzc|
tubantia|
volkskrant
)\.nl
)/video/(?:[^/]+/)*[^/?&#]+~p
) )
) (?P<id>\d+)
(?P<id>[^/?#&]+)
''' '''
_NETRC_MACHINE = 'medialaan'
_APIKEY = '3_HZ0FtkMW_gOyKlqQzW5_0FHRC7Nd5XpXJZcDdXY4pk5eES2ZWmejRW5egwVm4ug-'
_SITE_TO_APP_ID = {
'vtm': 'vtm_watch',
'q2': 'q2',
'vtmkzoom': 'vtmkzoom',
}
_TESTS = [{ _TESTS = [{
# vod 'url': 'https://www.bndestem.nl/video/de-terugkeer-van-ally-de-aap-en-wie-vertrekt-er-nog-bij-nac~p193993',
'url': 'http://vtm.be/video/volledige-afleveringen/id/vtm_20170219_VM0678361_vtmwatch',
'info_dict': { 'info_dict': {
'id': 'vtm_20170219_VM0678361_vtmwatch', 'id': '193993',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Allemaal Chris afl. 6', 'title': 'De terugkeer van Ally de Aap en wie vertrekt er nog bij NAC?',
'description': 'md5:4be86427521e7b07e0adb0c9c554ddb2', 'timestamp': 1611663540,
'timestamp': 1487533280, 'upload_date': '20210126',
'upload_date': '20170219', 'duration': 238,
'duration': 2562,
'series': 'Allemaal Chris',
'season': 'Allemaal Chris',
'season_number': 1,
'season_id': '256936078124527',
'episode': 'Allemaal Chris afl. 6',
'episode_number': 6,
'episode_id': '256936078591527',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
'skip': 'Requires account credentials',
}, { }, {
# clip 'url': 'https://www.gelderlander.nl/video/kanalen/degelderlander~c320/series/snel-nieuws~s984/noodbevel-in-doetinchem-politie-stuurt-mensen-centrum-uit~p194093',
'url': 'http://vtm.be/video?aid=168332',
'info_dict': {
'id': '168332',
'ext': 'mp4',
'title': '"Veronique liegt!"',
'description': 'md5:1385e2b743923afe54ba4adc38476155',
'timestamp': 1489002029,
'upload_date': '20170308',
'duration': 96,
},
}, {
# vod
'url': 'http://vtm.be/video/volledige-afleveringen/id/257107153551000',
'only_matching': True, 'only_matching': True,
}, { }, {
# vod 'url': 'https://embed.mychannels.video/sdk/production/193993?options=TFTFF_default',
'url': 'http://vtm.be/video?aid=163157',
'only_matching': True, 'only_matching': True,
}, { }, {
# vod 'url': 'https://embed.mychannels.video/script/production/193993',
'url': 'http://www.q2.be/video/volledige-afleveringen/id/2be_20170301_VM0684442_q2',
'only_matching': True, 'only_matching': True,
}, { }, {
# clip 'url': 'https://embed.mychannels.video/production/193993',
'url': 'http://vtmkzoom.be/k3-dansstudio/een-nieuw-seizoen-van-k3-dansstudio',
'only_matching': True, 'only_matching': True,
}, { }, {
# http/s redirect 'url': 'https://mychannels.video/embed/193993',
'url': 'https://vtmkzoom.be/video?aid=45724', 'only_matching': True,
'info_dict': {
'id': '257136373657000',
'ext': 'mp4',
'title': 'K3 Dansstudio Ushuaia afl.6',
},
'params': {
'skip_download': True,
},
'skip': 'Requires account credentials',
}, { }, {
# nieuws.vtm.be 'url': 'https://embed.mychannels.video/embed/193993',
'url': 'https://nieuws.vtm.be/stadion/stadion/genk-nog-moeilijk-programma',
'only_matching': True, 'only_matching': True,
}] }]
def _real_initialize(self): @staticmethod
self._logged_in = False def _extract_urls(webpage):
entries = []
def _login(self): for element in re.findall(r'(<div[^>]+data-mychannels-type="video"[^>]*>)', webpage):
username, password = self._get_login_info() mychannels_id = extract_attributes(element).get('data-mychannels-id')
if username is None: if mychannels_id:
self.raise_login_required() entries.append('https://mychannels.video/embed/' + mychannels_id)
return entries
auth_data = {
'APIKey': self._APIKEY,
'sdk': 'js_6.1',
'format': 'json',
'loginID': username,
'password': password,
}
auth_info = self._gigya_login(auth_data)
self._uid = auth_info['UID']
self._uid_signature = auth_info['UIDSignature']
self._signature_timestamp = auth_info['signatureTimestamp']
self._logged_in = True
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) production_id = self._match_id(url)
video_id, site_id = mobj.group('id', 'site_id') production = self._download_json(
'https://embed.mychannels.video/sdk/production/' + production_id,
production_id, query={'options': 'UUUU_default'})['productions'][0]
title = production['title']
webpage = self._download_webpage(url, video_id) formats = []
for source in (production.get('sources') or []):
config = self._parse_json( src = source.get('src')
self._search_regex( if not src:
r'videoJSConfig\s*=\s*JSON\.parse\(\'({.+?})\'\);', continue
webpage, 'config', default='{}'), video_id, ext = mimetype2ext(source.get('type'))
transform_source=lambda s: s.replace( if ext == 'm3u8':
'\\\\', '\\').replace(r'\"', '"').replace(r"\'", "'")) formats.extend(self._extract_m3u8_formats(
src, production_id, 'mp4', 'm3u8_native',
vod_id = config.get('vodId') or self._search_regex( m3u8_id='hls', fatal=False))
(r'\\"vodId\\"\s*:\s*\\"(.+?)\\"',
r'"vodId"\s*:\s*"(.+?)"',
r'<[^>]+id=["\']vod-(\d+)'),
webpage, 'video_id', default=None)
# clip, no authentication required
if not vod_id:
player = self._parse_json(
self._search_regex(
r'vmmaplayer\(({.+?})\);', webpage, 'vmma player',
default=''),
video_id, transform_source=lambda s: '[%s]' % s, fatal=False)
if player:
video = player[-1]
if video['videoUrl'] in ('http', 'https'):
return self.url_result(video['url'], MedialaanIE.ie_key())
info = {
'id': video_id,
'url': video['videoUrl'],
'title': video['title'],
'thumbnail': video.get('imageUrl'),
'timestamp': int_or_none(video.get('createdDate')),
'duration': int_or_none(video.get('duration')),
}
else: else:
info = self._parse_html5_media_entries( formats.append({
url, webpage, video_id, m3u8_id='hls')[0] 'ext': ext,
info.update({ 'url': src,
'id': video_id,
'title': self._html_search_meta('description', webpage),
'duration': parse_duration(self._html_search_meta('duration', webpage)),
}) })
# vod, authentication required
else:
if not self._logged_in:
self._login()
settings = self._parse_json(
self._search_regex(
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
webpage, 'drupal settings', default='{}'),
video_id)
def get(container, item):
return try_get(
settings, lambda x: x[container][item],
compat_str) or self._search_regex(
r'"%s"\s*:\s*"([^"]+)' % item, webpage, item,
default=None)
app_id = get('vod', 'app_id') or self._SITE_TO_APP_ID.get(site_id, 'vtm_watch')
sso = get('vod', 'gigyaDatabase') or 'vtm-sso'
data = self._download_json(
'http://vod.medialaan.io/api/1.0/item/%s/video' % vod_id,
video_id, query={
'app_id': app_id,
'user_network': sso,
'UID': self._uid,
'UIDSignature': self._uid_signature,
'signatureTimestamp': self._signature_timestamp,
})
formats = self._extract_m3u8_formats(
data['response']['uri'], video_id, entry_protocol='m3u8_native',
ext='mp4', m3u8_id='hls')
self._sort_formats(formats) self._sort_formats(formats)
info = { return {
'id': vod_id, 'id': production_id,
'title': title,
'formats': formats, 'formats': formats,
'thumbnail': production.get('posterUrl'),
'timestamp': parse_iso8601(production.get('publicationDate'), ' '),
'duration': int_or_none(production.get('duration')) or None,
} }
api_key = get('vod', 'apiKey')
channel = get('medialaanGigya', 'channel')
if api_key:
videos = self._download_json(
'http://vod.medialaan.io/vod/v2/videos', video_id, fatal=False,
query={
'channels': channel,
'ids': vod_id,
'limit': 1,
'apikey': api_key,
})
if videos:
video = try_get(
videos, lambda x: x['response']['videos'][0], dict)
if video:
def get(container, item, expected_type=None):
return try_get(
video, lambda x: x[container][item], expected_type)
def get_string(container, item):
return get(container, item, compat_str)
info.update({
'series': get_string('program', 'title'),
'season': get_string('season', 'title'),
'season_number': int_or_none(get('season', 'number')),
'season_id': get_string('season', 'id'),
'episode': get_string('episode', 'title'),
'episode_number': int_or_none(get('episode', 'number')),
'episode_id': get_string('episode', 'id'),
'duration': int_or_none(
video.get('duration')) or int_or_none(
video.get('durationMillis'), scale=1000),
'title': get_string('episode', 'title'),
'description': get_string('episode', 'text'),
'timestamp': unified_timestamp(get_string(
'publication', 'begin')),
})
if not info.get('title'):
info['title'] = try_get(
config, lambda x: x['videoConfig']['title'],
compat_str) or self._html_search_regex(
r'\\"title\\"\s*:\s*\\"(.+?)\\"', webpage, 'title',
default=None) or self._og_search_title(webpage)
if not info.get('description'):
info['description'] = self._html_search_regex(
r'<div[^>]+class="field-item\s+even">\s*<p>(.+?)</p>',
webpage, 'description', default=None)
return info

View file

@ -22,11 +22,15 @@
orderedSet, orderedSet,
remove_quotes, remove_quotes,
str_to_int, str_to_int,
update_url_query,
urlencode_postdata,
url_or_none, url_or_none,
) )
class PornHubBaseIE(InfoExtractor): class PornHubBaseIE(InfoExtractor):
_NETRC_MACHINE = 'pornhub'
def _download_webpage_handle(self, *args, **kwargs): def _download_webpage_handle(self, *args, **kwargs):
def dl(*args, **kwargs): def dl(*args, **kwargs):
return super(PornHubBaseIE, self)._download_webpage_handle(*args, **kwargs) return super(PornHubBaseIE, self)._download_webpage_handle(*args, **kwargs)
@ -52,6 +56,66 @@ def dl(*args, **kwargs):
return webpage, urlh return webpage, urlh
def _real_initialize(self):
self._logged_in = False
def _login(self, host):
if self._logged_in:
return
site = host.split('.')[0]
# Both sites pornhub and pornhubpremium have separate accounts
# so there should be an option to provide credentials for both.
# At the same time some videos are available under the same video id
# on both sites so that we have to identify them as the same video.
# For that purpose we have to keep both in the same extractor
# but under different netrc machines.
username, password = self._get_login_info(netrc_machine=site)
if username is None:
return
login_url = 'https://www.%s/%slogin' % (host, 'premium/' if 'premium' in host else '')
login_page = self._download_webpage(
login_url, None, 'Downloading %s login page' % site)
def is_logged(webpage):
return any(re.search(p, webpage) for p in (
r'class=["\']signOut',
r'>Sign\s+[Oo]ut\s*<'))
if is_logged(login_page):
self._logged_in = True
return
login_form = self._hidden_inputs(login_page)
login_form.update({
'username': username,
'password': password,
})
response = self._download_json(
'https://www.%s/front/authenticate' % host, None,
'Logging in to %s' % site,
data=urlencode_postdata(login_form),
headers={
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
'Referer': login_url,
'X-Requested-With': 'XMLHttpRequest',
})
if response.get('success') == '1':
self._logged_in = True
return
message = response.get('message')
if message is not None:
raise ExtractorError(
'Unable to login: %s' % message, expected=True)
raise ExtractorError('Unable to log in')
class PornHubIE(PornHubBaseIE): class PornHubIE(PornHubBaseIE):
IE_DESC = 'PornHub and Thumbzilla' IE_DESC = 'PornHub and Thumbzilla'
@ -163,12 +227,20 @@ class PornHubIE(PornHubBaseIE):
}, { }, {
'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5e4acdae54a82', 'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5e4acdae54a82',
'only_matching': True, 'only_matching': True,
}, {
# Some videos are available with the same id on both premium
# and non-premium sites (e.g. this and the following test)
'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5f75b0f4b18e3',
'only_matching': True,
}, {
'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5f75b0f4b18e3',
'only_matching': True,
}] }]
@staticmethod @staticmethod
def _extract_urls(webpage): def _extract_urls(webpage):
return re.findall( return re.findall(
r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub\.(?:com|net|org)/embed/[\da-z]+)', r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub(?:premium)?\.(?:com|net|org)/embed/[\da-z]+)',
webpage) webpage)
def _extract_count(self, pattern, webpage, name): def _extract_count(self, pattern, webpage, name):
@ -180,12 +252,7 @@ def _real_extract(self, url):
host = mobj.group('host') or 'pornhub.com' host = mobj.group('host') or 'pornhub.com'
video_id = mobj.group('id') video_id = mobj.group('id')
if 'premium' in host: self._login(host)
if not self._downloader.params.get('cookiefile'):
raise ExtractorError(
'PornHub Premium requires authentication.'
' You may want to use --cookies.',
expected=True)
self._set_cookie(host, 'age_verified', '1') self._set_cookie(host, 'age_verified', '1')
@ -405,6 +472,10 @@ def extract_list(meta_key):
class PornHubPlaylistBaseIE(PornHubBaseIE): class PornHubPlaylistBaseIE(PornHubBaseIE):
def _extract_page(self, url):
return int_or_none(self._search_regex(
r'\bpage=(\d+)', url, 'page', default=None))
def _extract_entries(self, webpage, host): def _extract_entries(self, webpage, host):
# Only process container div with main playlist content skipping # Only process container div with main playlist content skipping
# drop-down menu that uses similar pattern for videos (see # drop-down menu that uses similar pattern for videos (see
@ -422,26 +493,6 @@ def _extract_entries(self, webpage, host):
container)) container))
] ]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
host = mobj.group('host')
playlist_id = mobj.group('id')
webpage = self._download_webpage(url, playlist_id)
entries = self._extract_entries(webpage, host)
playlist = self._parse_json(
self._search_regex(
r'(?:playlistObject|PLAYLIST_VIEW)\s*=\s*({.+?});', webpage,
'playlist', default='{}'),
playlist_id, fatal=False)
title = playlist.get('title') or self._search_regex(
r'>Videos\s+in\s+(.+?)\s+[Pp]laylist<', webpage, 'title', fatal=False)
return self.playlist_result(
entries, playlist_id, title, playlist.get('description'))
class PornHubUserIE(PornHubPlaylistBaseIE): class PornHubUserIE(PornHubPlaylistBaseIE):
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)' _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)'
@ -463,14 +514,27 @@ class PornHubUserIE(PornHubPlaylistBaseIE):
}, { }, {
'url': 'https://www.pornhub.com/model/zoe_ph?abc=1', 'url': 'https://www.pornhub.com/model/zoe_ph?abc=1',
'only_matching': True, 'only_matching': True,
}, {
# Unavailable via /videos page, but available with direct pagination
# on pornstar page (see [1]), requires premium
# 1. https://github.com/ytdl-org/youtube-dl/issues/27853
'url': 'https://www.pornhubpremium.com/pornstar/sienna-west',
'only_matching': True,
}, {
# Same as before, multi page
'url': 'https://www.pornhubpremium.com/pornstar/lily-labeau',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
user_id = mobj.group('id') user_id = mobj.group('id')
videos_url = '%s/videos' % mobj.group('url')
page = self._extract_page(url)
if page:
videos_url = update_url_query(videos_url, {'page': page})
return self.url_result( return self.url_result(
'%s/videos' % mobj.group('url'), ie=PornHubPagedVideoListIE.ie_key(), videos_url, ie=PornHubPagedVideoListIE.ie_key(), video_id=user_id)
video_id=user_id)
class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE): class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
@ -483,32 +547,55 @@ def _has_more(webpage):
<button[^>]+\bid=["\']moreDataBtn <button[^>]+\bid=["\']moreDataBtn
''', webpage) is not None ''', webpage) is not None
def _real_extract(self, url): def _entries(self, url, host, item_id):
mobj = re.match(self._VALID_URL, url) page = self._extract_page(url)
host = mobj.group('host')
item_id = mobj.group('id')
page = int_or_none(self._search_regex( VIDEOS = '/videos'
r'\bpage=(\d+)', url, 'page', default=None))
entries = [] def download_page(base_url, num, fallback=False):
for page_num in (page, ) if page is not None else itertools.count(1): note = 'Downloading page %d%s' % (num, ' (switch to fallback)' if fallback else '')
return self._download_webpage(
base_url, item_id, note, query={'page': num})
def is_404(e):
return isinstance(e.cause, compat_HTTPError) and e.cause.code == 404
base_url = url
has_page = page is not None
first_page = page if has_page else 1
for page_num in (first_page, ) if has_page else itertools.count(first_page):
try: try:
webpage = self._download_webpage( try:
url, item_id, 'Downloading page %d' % page_num, webpage = download_page(base_url, page_num)
query={'page': page_num})
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404: # Some sources may not be available via /videos page,
# trying to fallback to main page pagination (see [1])
# 1. https://github.com/ytdl-org/youtube-dl/issues/27853
if is_404(e) and page_num == first_page and VIDEOS in base_url:
base_url = base_url.replace(VIDEOS, '')
webpage = download_page(base_url, page_num, fallback=True)
else:
raise
except ExtractorError as e:
if is_404(e) and page_num != first_page:
break break
raise raise
page_entries = self._extract_entries(webpage, host) page_entries = self._extract_entries(webpage, host)
if not page_entries: if not page_entries:
break break
entries.extend(page_entries) for e in page_entries:
yield e
if not self._has_more(webpage): if not self._has_more(webpage):
break break
return self.playlist_result(orderedSet(entries), item_id) def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
host = mobj.group('host')
item_id = mobj.group('id')
self._login(host)
return self.playlist_result(self._entries(url, host, item_id), item_id)
class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE): class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):

View file

@ -255,8 +255,10 @@ def _real_extract(self, url):
svt_id = self._search_regex( svt_id = self._search_regex(
(r'<video[^>]+data-video-id=["\']([\da-zA-Z-]+)', (r'<video[^>]+data-video-id=["\']([\da-zA-Z-]+)',
r'["\']videoSvtId["\']\s*:\s*["\']([\da-zA-Z-]+)', r'["\']videoSvtId["\']\s*:\s*["\']([\da-zA-Z-]+)',
r'["\']videoSvtId\\?["\']\s*:\s*\\?["\']([\da-zA-Z-]+)',
r'"content"\s*:\s*{.*?"id"\s*:\s*"([\da-zA-Z-]+)"', r'"content"\s*:\s*{.*?"id"\s*:\s*"([\da-zA-Z-]+)"',
r'["\']svtId["\']\s*:\s*["\']([\da-zA-Z-]+)'), r'["\']svtId["\']\s*:\s*["\']([\da-zA-Z-]+)',
r'["\']svtId\\?["\']\s*:\s*\\?["\']([\da-zA-Z-]+)'),
webpage, 'video id') webpage, 'video id')
info_dict = self._extract_by_video_id(svt_id, webpage) info_dict = self._extract_by_video_id(svt_id, webpage)

View file

@ -20,7 +20,7 @@
class TV2IE(InfoExtractor): class TV2IE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tv2\.no/v/(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?tv2\.no/v/(?P<id>\d+)'
_TEST = { _TESTS = [{
'url': 'http://www.tv2.no/v/916509/', 'url': 'http://www.tv2.no/v/916509/',
'info_dict': { 'info_dict': {
'id': '916509', 'id': '916509',
@ -33,7 +33,7 @@ class TV2IE(InfoExtractor):
'view_count': int, 'view_count': int,
'categories': list, 'categories': list,
}, },
} }]
_API_DOMAIN = 'sumo.tv2.no' _API_DOMAIN = 'sumo.tv2.no'
_PROTOCOLS = ('HDS', 'HLS', 'DASH') _PROTOCOLS = ('HDS', 'HLS', 'DASH')
_GEO_COUNTRIES = ['NO'] _GEO_COUNTRIES = ['NO']
@ -42,6 +42,12 @@ def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
api_base = 'http://%s/api/web/asset/%s' % (self._API_DOMAIN, video_id) api_base = 'http://%s/api/web/asset/%s' % (self._API_DOMAIN, video_id)
asset = self._download_json(
api_base + '.json', video_id,
'Downloading metadata JSON')['asset']
title = asset.get('subtitle') or asset['title']
is_live = asset.get('live') is True
formats = [] formats = []
format_urls = [] format_urls = []
for protocol in self._PROTOCOLS: for protocol in self._PROTOCOLS:
@ -81,7 +87,8 @@ def _real_extract(self, url):
elif ext == 'm3u8': elif ext == 'm3u8':
if not data.get('drmProtected'): if not data.get('drmProtected'):
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
video_url, video_id, 'mp4', entry_protocol='m3u8_native', video_url, video_id, 'mp4',
'm3u8' if is_live else 'm3u8_native',
m3u8_id=format_id, fatal=False)) m3u8_id=format_id, fatal=False))
elif ext == 'mpd': elif ext == 'mpd':
formats.extend(self._extract_mpd_formats( formats.extend(self._extract_mpd_formats(
@ -99,11 +106,6 @@ def _real_extract(self, url):
raise ExtractorError('This video is DRM protected.', expected=True) raise ExtractorError('This video is DRM protected.', expected=True)
self._sort_formats(formats) self._sort_formats(formats)
asset = self._download_json(
api_base + '.json', video_id,
'Downloading metadata JSON')['asset']
title = asset['title']
thumbnails = [{ thumbnails = [{
'id': thumbnail.get('@type'), 'id': thumbnail.get('@type'),
'url': thumbnail.get('url'), 'url': thumbnail.get('url'),
@ -112,7 +114,7 @@ def _real_extract(self, url):
return { return {
'id': video_id, 'id': video_id,
'url': video_url, 'url': video_url,
'title': title, 'title': self._live_title(title) if is_live else title,
'description': strip_or_none(asset.get('description')), 'description': strip_or_none(asset.get('description')),
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'timestamp': parse_iso8601(asset.get('createTime')), 'timestamp': parse_iso8601(asset.get('createTime')),
@ -120,6 +122,7 @@ def _real_extract(self, url):
'view_count': int_or_none(asset.get('views')), 'view_count': int_or_none(asset.get('views')),
'categories': asset.get('keywords', '').split(','), 'categories': asset.get('keywords', '').split(','),
'formats': formats, 'formats': formats,
'is_live': is_live,
} }
@ -168,13 +171,13 @@ def _real_extract(self, url):
class KatsomoIE(TV2IE): class KatsomoIE(TV2IE):
_VALID_URL = r'https?://(?:www\.)?(?:katsomo|mtv)\.fi/(?:#!/)?(?:[^/]+/[0-9a-z-]+-\d+/[0-9a-z-]+-|[^/]+/\d+/[^/]+/)(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?(?:katsomo|mtv(uutiset)?)\.fi/(?:sarja/[0-9a-z-]+-\d+/[0-9a-z-]+-|(?:#!/)?jakso/(?:\d+/[^/]+/)?|video/prog)(?P<id>\d+)'
_TEST = { _TESTS = [{
'url': 'https://www.mtv.fi/sarja/mtv-uutiset-live-33001002003/lahden-pelicans-teki-kovan-ratkaisun-ville-nieminen-pihalle-1181321', 'url': 'https://www.mtv.fi/sarja/mtv-uutiset-live-33001002003/lahden-pelicans-teki-kovan-ratkaisun-ville-nieminen-pihalle-1181321',
'info_dict': { 'info_dict': {
'id': '1181321', 'id': '1181321',
'ext': 'mp4', 'ext': 'mp4',
'title': 'MTV Uutiset Live', 'title': 'Lahden Pelicans teki kovan ratkaisun Ville Nieminen pihalle',
'description': 'Päätöksen teki Pelicansin hallitus.', 'description': 'Päätöksen teki Pelicansin hallitus.',
'timestamp': 1575116484, 'timestamp': 1575116484,
'upload_date': '20191130', 'upload_date': '20191130',
@ -186,7 +189,60 @@ class KatsomoIE(TV2IE):
# m3u8 download # m3u8 download
'skip_download': True, 'skip_download': True,
}, },
} }, {
'url': 'http://www.katsomo.fi/#!/jakso/33001005/studio55-fi/658521/jukka-kuoppamaki-tekee-yha-lauluja-vaikka-lentokoneessa',
'only_matching': True,
}, {
'url': 'https://www.mtvuutiset.fi/video/prog1311159',
'only_matching': True,
}, {
'url': 'https://www.katsomo.fi/#!/jakso/1311159',
'only_matching': True,
}]
_API_DOMAIN = 'api.katsomo.fi' _API_DOMAIN = 'api.katsomo.fi'
_PROTOCOLS = ('HLS', 'MPD') _PROTOCOLS = ('HLS', 'MPD')
_GEO_COUNTRIES = ['FI'] _GEO_COUNTRIES = ['FI']
class MTVUutisetArticleIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)mtvuutiset\.fi/artikkeli/[^/]+/(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.mtvuutiset.fi/artikkeli/tallaisia-vaurioita-viking-amorellassa-on-useamman-osaston-alla-vetta/7931384',
'info_dict': {
'id': '1311159',
'ext': 'mp4',
'title': 'Viking Amorellan matkustajien evakuointi on alkanut tältä operaatio näyttää laivalla',
'description': 'Viking Amorellan matkustajien evakuointi on alkanut tältä operaatio näyttää laivalla',
'timestamp': 1600608966,
'upload_date': '20200920',
'duration': 153.7886666,
'view_count': int,
'categories': list,
},
'params': {
# m3u8 download
'skip_download': True,
},
}, {
# multiple Youtube embeds
'url': 'https://www.mtvuutiset.fi/artikkeli/50-vuotta-subarun-vastaiskua/6070962',
'only_matching': True,
}]
def _real_extract(self, url):
article_id = self._match_id(url)
article = self._download_json(
'http://api.mtvuutiset.fi/mtvuutiset/api/json/' + article_id,
article_id)
def entries():
for video in (article.get('videos') or []):
video_type = video.get('videotype')
video_url = video.get('url')
if not (video_url and video_type in ('katsomo', 'youtube')):
continue
yield self.url_result(
video_url, video_type.capitalize(), video.get('video_id'))
return self.playlist_result(
entries(), article_id, article.get('title'), article.get('description'))

View file

@ -17,7 +17,7 @@ class TV4IE(InfoExtractor):
tv4\.se/(?:[^/]+)/klipp/(?:.*)-| tv4\.se/(?:[^/]+)/klipp/(?:.*)-|
tv4play\.se/ tv4play\.se/
(?: (?:
(?:program|barn)/(?:[^/]+/|(?:[^\?]+)\?video_id=)| (?:program|barn)/(?:(?:[^/]+/){1,2}|(?:[^\?]+)\?video_id=)|
iframe/video/| iframe/video/|
film/| film/|
sport/| sport/|
@ -65,6 +65,10 @@ class TV4IE(InfoExtractor):
{ {
'url': 'http://www.tv4play.se/program/farang/3922081', 'url': 'http://www.tv4play.se/program/farang/3922081',
'only_matching': True, 'only_matching': True,
},
{
'url': 'https://www.tv4play.se/program/nyheterna/avsnitt/13315940',
'only_matching': True,
} }
] ]

View file

@ -4,7 +4,13 @@
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import int_or_none from ..utils import (
int_or_none,
parse_iso8601,
str_or_none,
strip_or_none,
try_get,
)
class VidioIE(InfoExtractor): class VidioIE(InfoExtractor):
@ -21,57 +27,63 @@ class VidioIE(InfoExtractor):
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 149, 'duration': 149,
'like_count': int, 'like_count': int,
'uploader': 'TWELVE Pic',
'timestamp': 1444902800,
'upload_date': '20151015',
'uploader_id': 'twelvepictures',
'channel': 'Cover Music Video',
'channel_id': '280236',
'view_count': int,
'dislike_count': int,
'comment_count': int,
'tags': 'count:4',
}, },
}, { }, {
'url': 'https://www.vidio.com/watch/77949-south-korea-test-fires-missile-that-can-strike-all-of-the-north', 'url': 'https://www.vidio.com/watch/77949-south-korea-test-fires-missile-that-can-strike-all-of-the-north',
'only_matching': True, 'only_matching': True,
}] }]
def _real_initialize(self):
self._api_key = self._download_json(
'https://www.vidio.com/auth', None, data=b'')['api_key']
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id, display_id = re.match(self._VALID_URL, url).groups()
video_id, display_id = mobj.group('id', 'display_id') data = self._download_json(
'https://api.vidio.com/videos/' + video_id, display_id, headers={
'Content-Type': 'application/vnd.api+json',
'X-API-KEY': self._api_key,
})
video = data['videos'][0]
title = video['title'].strip()
webpage = self._download_webpage(url, display_id)
title = self._og_search_title(webpage)
m3u8_url, duration, thumbnail = [None] * 3
clips = self._parse_json(
self._html_search_regex(
r'data-json-clips\s*=\s*(["\'])(?P<data>\[.+?\])\1',
webpage, 'video data', default='[]', group='data'),
display_id, fatal=False)
if clips:
clip = clips[0]
m3u8_url = clip.get('sources', [{}])[0].get('file')
duration = clip.get('clip_duration')
thumbnail = clip.get('image')
m3u8_url = m3u8_url or self._search_regex(
r'data(?:-vjs)?-clip-hls-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
webpage, 'hls url', group='url')
formats = self._extract_m3u8_formats( formats = self._extract_m3u8_formats(
m3u8_url, display_id, 'mp4', entry_protocol='m3u8_native') data['clips'][0]['hls_url'], display_id, 'mp4', 'm3u8_native')
self._sort_formats(formats) self._sort_formats(formats)
duration = int_or_none(duration or self._search_regex( get_first = lambda x: try_get(data, lambda y: y[x + 's'][0], dict) or {}
r'data-video-duration=(["\'])(?P<duration>\d+)\1', webpage, channel = get_first('channel')
'duration', fatal=False, group='duration')) user = get_first('user')
thumbnail = thumbnail or self._og_search_thumbnail(webpage) username = user.get('username')
get_count = lambda x: int_or_none(video.get('total_' + x))
like_count = int_or_none(self._search_regex(
(r'<span[^>]+data-comment-vote-count=["\'](\d+)',
r'<span[^>]+class=["\'].*?\blike(?:__|-)count\b.*?["\'][^>]*>\s*(\d+)'),
webpage, 'like count', fatal=False))
return { return {
'id': video_id, 'id': video_id,
'display_id': display_id, 'display_id': display_id,
'title': title, 'title': title,
'description': self._og_search_description(webpage), 'description': strip_or_none(video.get('description')),
'thumbnail': thumbnail, 'thumbnail': video.get('image_url_medium'),
'duration': duration, 'duration': int_or_none(video.get('duration')),
'like_count': like_count, 'like_count': get_count('likes'),
'formats': formats, 'formats': formats,
'uploader': user.get('name'),
'timestamp': parse_iso8601(video.get('created_at')),
'uploader_id': username,
'uploader_url': 'https://www.vidio.com/@' + username if username else None,
'channel': channel.get('name'),
'channel_id': str_or_none(channel.get('id')),
'view_count': get_count('view_count'),
'dislike_count': get_count('dislikes'),
'comment_count': get_count('comments'),
'tags': video.get('tag_list'),
} }

View file

@ -125,7 +125,7 @@ def _call_api(self, path_template, video_id, fields=None, limit=None):
headers={'Referer': 'https://www.vlive.tv/'}, query=query) headers={'Referer': 'https://www.vlive.tv/'}, query=query)
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
self.raise_login_required(json.loads(e.cause.read().decode())['message']) self.raise_login_required(json.loads(e.cause.read().decode('utf-8'))['message'])
raise raise
def _real_extract(self, url): def _real_extract(self, url):

View file

@ -0,0 +1,62 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_iso8601,
try_get,
)
class VTMIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?vtm\.be/([^/?&#]+)~v(?P<id>[0-9a-f]{8}(?:-[0-9a-f]{4}){3}-[0-9a-f]{12})'
_TEST = {
'url': 'https://vtm.be/gast-vernielt-genkse-hotelkamer~ve7534523-279f-4b4d-a5c9-a33ffdbe23e1',
'md5': '37dca85fbc3a33f2de28ceb834b071f8',
'info_dict': {
'id': '192445',
'ext': 'mp4',
'title': 'Gast vernielt Genkse hotelkamer',
'timestamp': 1611060180,
'upload_date': '20210119',
'duration': 74,
# TODO: fix url _type result processing
# 'series': 'Op Interventie',
}
}
def _real_extract(self, url):
uuid = self._match_id(url)
video = self._download_json(
'https://omc4vm23offuhaxx6hekxtzspi.appsync-api.eu-west-1.amazonaws.com/graphql',
uuid, query={
'query': '''{
getComponent(type: Video, uuid: "%s") {
... on Video {
description
duration
myChannelsVideo
program {
title
}
publishedAt
title
}
}
}''' % uuid,
}, headers={
'x-api-key': 'da2-lz2cab4tfnah3mve6wiye4n77e',
})['data']['getComponent']
return {
'_type': 'url',
'id': uuid,
'title': video.get('title'),
'url': 'http://mychannels.video/embed/%d' % video['myChannelsVideo'],
'description': video.get('description'),
'timestamp': parse_iso8601(video.get('publishedAt')),
'duration': int_or_none(video.get('duration')),
'series': try_get(video, lambda x: x['program']['title']),
'ie_key': 'Medialaan',
}

View file

@ -4,6 +4,7 @@
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from .youtube import YoutubeIE
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
@ -47,6 +48,22 @@ class VVVVIDIE(InfoExtractor):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
}, {
# video_type == 'video/youtube'
'url': 'https://www.vvvvid.it/show/404/one-punch-man/406/486683/trailer',
'md5': '33e0edfba720ad73a8782157fdebc648',
'info_dict': {
'id': 'RzmFKUDOUgw',
'ext': 'mp4',
'title': 'Trailer',
'upload_date': '20150906',
'description': 'md5:a5e802558d35247fee285875328c0b80',
'uploader_id': 'BandaiVisual',
'uploader': 'BANDAI NAMCO Arts Channel',
},
'params': {
'skip_download': True,
},
}, { }, {
'url': 'https://www.vvvvid.it/show/434/perche-dovrei-guardarlo-di-dario-moccia/437/489048', 'url': 'https://www.vvvvid.it/show/434/perche-dovrei-guardarlo-di-dario-moccia/437/489048',
'only_matching': True 'only_matching': True
@ -154,12 +171,13 @@ def metadata_from_url(r_url):
if season_number: if season_number:
info['season_number'] = int(season_number) info['season_number'] = int(season_number)
for quality in ('_sd', ''): video_type = video_data.get('video_type')
is_youtube = False
for quality in ('', '_sd'):
embed_code = video_data.get('embed_info' + quality) embed_code = video_data.get('embed_info' + quality)
if not embed_code: if not embed_code:
continue continue
embed_code = ds(embed_code) embed_code = ds(embed_code)
video_type = video_data.get('video_type')
if video_type in ('video/rcs', 'video/kenc'): if video_type in ('video/rcs', 'video/kenc'):
if video_type == 'video/kenc': if video_type == 'video/kenc':
kenc = self._download_json( kenc = self._download_json(
@ -172,19 +190,28 @@ def metadata_from_url(r_url):
if kenc_message: if kenc_message:
embed_code += '?' + ds(kenc_message) embed_code += '?' + ds(kenc_message)
formats.extend(self._extract_akamai_formats(embed_code, video_id)) formats.extend(self._extract_akamai_formats(embed_code, video_id))
elif video_type == 'video/youtube':
info.update({
'_type': 'url_transparent',
'ie_key': YoutubeIE.ie_key(),
'url': embed_code,
})
is_youtube = True
break
else: else:
formats.extend(self._extract_wowza_formats( formats.extend(self._extract_wowza_formats(
'http://sb.top-ix.org/videomg/_definst_/mp4:%s/playlist.m3u8' % embed_code, video_id)) 'http://sb.top-ix.org/videomg/_definst_/mp4:%s/playlist.m3u8' % embed_code, video_id))
metadata_from_url(embed_code) metadata_from_url(embed_code)
if not is_youtube:
self._sort_formats(formats) self._sort_formats(formats)
info['formats'] = formats
metadata_from_url(video_data.get('thumbnail')) metadata_from_url(video_data.get('thumbnail'))
info.update(self._extract_common_video_info(video_data)) info.update(self._extract_common_video_info(video_data))
info.update({ info.update({
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'formats': formats,
'duration': int_or_none(video_data.get('length')), 'duration': int_or_none(video_data.get('length')),
'series': video_data.get('show_title'), 'series': video_data.get('show_title'),
'season_id': season_id, 'season_id': season_id,

View file

@ -87,11 +87,16 @@ def _real_extract(self, url):
r'(["\'])(?P<url>(?:(?!\1).)+\.m3u8(?:(?!\1).)*)\1', r'(["\'])(?P<url>(?:(?!\1).)+\.m3u8(?:(?!\1).)*)\1',
body, 'm3u8 url', group='url', default=None) body, 'm3u8 url', group='url', default=None)
if not m3u8_url: if not m3u8_url:
source = self._parse_json(self._search_regex( source = self._search_regex(
r'(?s)sources\s*:\s*\[\s*({.+?})\s*\]', body, r'(?s)sources\s*:\s*\[\s*({.+?})\s*\]', body, 'source')
'source'), video_id, js_to_json)
if source.get('integration') == 'verizon-media': def get_attr(key):
m3u8_url = 'https://content.uplynk.com/%s.m3u8' % source['id'] return self._search_regex(
r'\b%s\s*:\s*([\'"])(?P<val>(?:(?!\1).)+)\1' % key,
source, key, group='val')
if get_attr('integration') == 'verizon-media':
m3u8_url = 'https://content.uplynk.com/%s.m3u8' % get_attr('id')
formats = self._extract_m3u8_formats( formats = self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls') m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
text_tracks = self._search_regex( text_tracks = self._search_regex(