mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-30 12:01:28 +00:00
Update to ytdl-2021.02.04.1 except youtube
This commit is contained in:
parent
e29663c644
commit
2181983a0c
|
@ -1,14 +1,15 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import calendar
|
|
||||||
import re
|
import re
|
||||||
import time
|
|
||||||
|
|
||||||
from .amp import AMPIE
|
from .amp import AMPIE
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .youtube import YoutubeIE
|
from ..utils import (
|
||||||
from ..compat import compat_urlparse
|
parse_duration,
|
||||||
|
parse_iso8601,
|
||||||
|
try_get,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class AbcNewsVideoIE(AMPIE):
|
class AbcNewsVideoIE(AMPIE):
|
||||||
|
@ -18,8 +19,8 @@ class AbcNewsVideoIE(AMPIE):
|
||||||
(?:
|
(?:
|
||||||
abcnews\.go\.com/
|
abcnews\.go\.com/
|
||||||
(?:
|
(?:
|
||||||
[^/]+/video/(?P<display_id>[0-9a-z-]+)-|
|
(?:[^/]+/)*video/(?P<display_id>[0-9a-z-]+)-|
|
||||||
video/embed\?.*?\bid=
|
video/(?:embed|itemfeed)\?.*?\bid=
|
||||||
)|
|
)|
|
||||||
fivethirtyeight\.abcnews\.go\.com/video/embed/\d+/
|
fivethirtyeight\.abcnews\.go\.com/video/embed/\d+/
|
||||||
)
|
)
|
||||||
|
@ -36,6 +37,8 @@ class AbcNewsVideoIE(AMPIE):
|
||||||
'description': 'George Stephanopoulos goes one-on-one with Iranian Foreign Minister Dr. Javad Zarif.',
|
'description': 'George Stephanopoulos goes one-on-one with Iranian Foreign Minister Dr. Javad Zarif.',
|
||||||
'duration': 180,
|
'duration': 180,
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'timestamp': 1380454200,
|
||||||
|
'upload_date': '20130929',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
|
@ -47,6 +50,12 @@ class AbcNewsVideoIE(AMPIE):
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://abcnews.go.com/2020/video/2020-husband-stands-teacher-jail-student-affairs-26119478',
|
'url': 'http://abcnews.go.com/2020/video/2020-husband-stands-teacher-jail-student-affairs-26119478',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://abcnews.go.com/video/itemfeed?id=46979033',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://abcnews.go.com/GMA/News/video/history-christmas-story-67894761',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -67,28 +76,23 @@ class AbcNewsIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://abcnews\.go\.com/(?:[^/]+/)+(?P<display_id>[0-9a-z-]+)/story\?id=(?P<id>\d+)'
|
_VALID_URL = r'https?://abcnews\.go\.com/(?:[^/]+/)+(?P<display_id>[0-9a-z-]+)/story\?id=(?P<id>\d+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://abcnews.go.com/Blotter/News/dramatic-video-rare-death-job-america/story?id=10498713#.UIhwosWHLjY',
|
# Youtube Embeds
|
||||||
|
'url': 'https://abcnews.go.com/Entertainment/peter-billingsley-child-actor-christmas-story-hollywood-power/story?id=51286501',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '10505354',
|
'id': '51286501',
|
||||||
'ext': 'flv',
|
'title': "Peter Billingsley: From child actor in 'A Christmas Story' to Hollywood power player",
|
||||||
'display_id': 'dramatic-video-rare-death-job-america',
|
'description': 'Billingsley went from a child actor to Hollywood power player.',
|
||||||
'title': 'Occupational Hazards',
|
|
||||||
'description': 'Nightline investigates the dangers that lurk at various jobs.',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
|
||||||
'upload_date': '20100428',
|
|
||||||
'timestamp': 1272412800,
|
|
||||||
},
|
},
|
||||||
'add_ie': ['AbcNewsVideo'],
|
'playlist_count': 5,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://abcnews.go.com/Entertainment/justin-timberlake-performs-stop-feeling-eurovision-2016/story?id=39125818',
|
'url': 'http://abcnews.go.com/Entertainment/justin-timberlake-performs-stop-feeling-eurovision-2016/story?id=39125818',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '38897857',
|
'id': '38897857',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'display_id': 'justin-timberlake-performs-stop-feeling-eurovision-2016',
|
|
||||||
'title': 'Justin Timberlake Drops Hints For Secret Single',
|
'title': 'Justin Timberlake Drops Hints For Secret Single',
|
||||||
'description': 'Lara Spencer reports the buzziest stories of the day in "GMA" Pop News.',
|
'description': 'Lara Spencer reports the buzziest stories of the day in "GMA" Pop News.',
|
||||||
'upload_date': '20160515',
|
'upload_date': '20160505',
|
||||||
'timestamp': 1463329500,
|
'timestamp': 1462442280,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
|
@ -100,49 +104,55 @@ class AbcNewsIE(InfoExtractor):
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343',
|
'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# inline.type == 'video'
|
||||||
|
'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
story_id = self._match_id(url)
|
||||||
display_id = mobj.group('display_id')
|
webpage = self._download_webpage(url, story_id)
|
||||||
video_id = mobj.group('id')
|
story = self._parse_json(self._search_regex(
|
||||||
|
r"window\['__abcnews__'\]\s*=\s*({.+?});",
|
||||||
|
webpage, 'data'), story_id)['page']['content']['story']['everscroll'][0]
|
||||||
|
article_contents = story.get('articleContents') or {}
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
def entries():
|
||||||
video_url = self._search_regex(
|
featured_video = story.get('featuredVideo') or {}
|
||||||
r'window\.abcnvideo\.url\s*=\s*"([^"]+)"', webpage, 'video URL')
|
feed = try_get(featured_video, lambda x: x['video']['feed'])
|
||||||
full_video_url = compat_urlparse.urljoin(url, video_url)
|
if feed:
|
||||||
|
yield {
|
||||||
|
'_type': 'url',
|
||||||
|
'id': featured_video.get('id'),
|
||||||
|
'title': featured_video.get('name'),
|
||||||
|
'url': feed,
|
||||||
|
'thumbnail': featured_video.get('images'),
|
||||||
|
'description': featured_video.get('description'),
|
||||||
|
'timestamp': parse_iso8601(featured_video.get('uploadDate')),
|
||||||
|
'duration': parse_duration(featured_video.get('duration')),
|
||||||
|
'ie_key': AbcNewsVideoIE.ie_key(),
|
||||||
|
}
|
||||||
|
|
||||||
youtube_url = YoutubeIE._extract_url(webpage)
|
for inline in (article_contents.get('inlines') or []):
|
||||||
|
inline_type = inline.get('type')
|
||||||
|
if inline_type == 'iframe':
|
||||||
|
iframe_url = try_get(inline, lambda x: x['attrs']['src'])
|
||||||
|
if iframe_url:
|
||||||
|
yield self.url_result(iframe_url)
|
||||||
|
elif inline_type == 'video':
|
||||||
|
video_id = inline.get('id')
|
||||||
|
if video_id:
|
||||||
|
yield {
|
||||||
|
'_type': 'url',
|
||||||
|
'id': video_id,
|
||||||
|
'url': 'http://abcnews.go.com/video/embed?id=' + video_id,
|
||||||
|
'thumbnail': inline.get('imgSrc') or inline.get('imgDefault'),
|
||||||
|
'description': inline.get('description'),
|
||||||
|
'duration': parse_duration(inline.get('duration')),
|
||||||
|
'ie_key': AbcNewsVideoIE.ie_key(),
|
||||||
|
}
|
||||||
|
|
||||||
timestamp = None
|
return self.playlist_result(
|
||||||
date_str = self._html_search_regex(
|
entries(), story_id, article_contents.get('headline'),
|
||||||
r'<span[^>]+class="timestamp">([^<]+)</span>',
|
article_contents.get('subHead'))
|
||||||
webpage, 'timestamp', fatal=False)
|
|
||||||
if date_str:
|
|
||||||
tz_offset = 0
|
|
||||||
if date_str.endswith(' ET'): # Eastern Time
|
|
||||||
tz_offset = -5
|
|
||||||
date_str = date_str[:-3]
|
|
||||||
date_formats = ['%b. %d, %Y', '%b %d, %Y, %I:%M %p']
|
|
||||||
for date_format in date_formats:
|
|
||||||
try:
|
|
||||||
timestamp = calendar.timegm(time.strptime(date_str.strip(), date_format))
|
|
||||||
except ValueError:
|
|
||||||
continue
|
|
||||||
if timestamp is not None:
|
|
||||||
timestamp -= tz_offset * 3600
|
|
||||||
|
|
||||||
entry = {
|
|
||||||
'_type': 'url_transparent',
|
|
||||||
'ie_key': AbcNewsVideoIE.ie_key(),
|
|
||||||
'url': full_video_url,
|
|
||||||
'id': video_id,
|
|
||||||
'display_id': display_id,
|
|
||||||
'timestamp': timestamp,
|
|
||||||
}
|
|
||||||
|
|
||||||
if youtube_url:
|
|
||||||
entries = [entry, self.url_result(youtube_url, ie=YoutubeIE.ie_key())]
|
|
||||||
return self.playlist_result(entries)
|
|
||||||
|
|
||||||
return entry
|
|
||||||
|
|
|
@ -26,6 +26,7 @@
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
try_get,
|
try_get,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -51,9 +52,12 @@ class ADNIE(InfoExtractor):
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_NETRC_MACHINE = 'animedigitalnetwork'
|
||||||
_BASE_URL = 'http://animedigitalnetwork.fr'
|
_BASE_URL = 'http://animedigitalnetwork.fr'
|
||||||
_API_BASE_URL = 'https://gw.api.animedigitalnetwork.fr/'
|
_API_BASE_URL = 'https://gw.api.animedigitalnetwork.fr/'
|
||||||
_PLAYER_BASE_URL = _API_BASE_URL + 'player/'
|
_PLAYER_BASE_URL = _API_BASE_URL + 'player/'
|
||||||
|
_HEADERS = {}
|
||||||
|
_LOGIN_ERR_MESSAGE = 'Unable to log in'
|
||||||
_RSA_KEY = (0x9B42B08905199A5CCE2026274399CA560ECB209EE9878A708B1C0812E1BB8CB5D1FB7441861147C1A1F2F3A0476DD63A9CAC20D3E983613346850AA6CB38F16DC7D720FD7D86FC6E5B3D5BBC72E14CD0BF9E869F2CEA2CCAD648F1DCE38F1FF916CEFB2D339B64AA0264372344BC775E265E8A852F88144AB0BD9AA06C1A4ABB, 65537)
|
_RSA_KEY = (0x9B42B08905199A5CCE2026274399CA560ECB209EE9878A708B1C0812E1BB8CB5D1FB7441861147C1A1F2F3A0476DD63A9CAC20D3E983613346850AA6CB38F16DC7D720FD7D86FC6E5B3D5BBC72E14CD0BF9E869F2CEA2CCAD648F1DCE38F1FF916CEFB2D339B64AA0264372344BC775E265E8A852F88144AB0BD9AA06C1A4ABB, 65537)
|
||||||
_POS_ALIGN_MAP = {
|
_POS_ALIGN_MAP = {
|
||||||
'start': 1,
|
'start': 1,
|
||||||
|
@ -129,19 +133,42 @@ def _get_subtitles(self, sub_url, video_id):
|
||||||
}])
|
}])
|
||||||
return subtitles
|
return subtitles
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
username, password = self._get_login_info()
|
||||||
|
if not username:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
access_token = (self._download_json(
|
||||||
|
self._API_BASE_URL + 'authentication/login', None,
|
||||||
|
'Logging in', self._LOGIN_ERR_MESSAGE, fatal=False,
|
||||||
|
data=urlencode_postdata({
|
||||||
|
'password': password,
|
||||||
|
'rememberMe': False,
|
||||||
|
'source': 'Web',
|
||||||
|
'username': username,
|
||||||
|
})) or {}).get('accessToken')
|
||||||
|
if access_token:
|
||||||
|
self._HEADERS = {'authorization': 'Bearer ' + access_token}
|
||||||
|
except ExtractorError as e:
|
||||||
|
message = None
|
||||||
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
|
||||||
|
resp = self._parse_json(
|
||||||
|
e.cause.read().decode(), None, fatal=False) or {}
|
||||||
|
message = resp.get('message') or resp.get('code')
|
||||||
|
self.report_warning(message or self._LOGIN_ERR_MESSAGE)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
video_base_url = self._PLAYER_BASE_URL + 'video/%s/' % video_id
|
video_base_url = self._PLAYER_BASE_URL + 'video/%s/' % video_id
|
||||||
player = self._download_json(
|
player = self._download_json(
|
||||||
video_base_url + 'configuration', video_id,
|
video_base_url + 'configuration', video_id,
|
||||||
'Downloading player config JSON metadata')['player']
|
'Downloading player config JSON metadata',
|
||||||
|
headers=self._HEADERS)['player']
|
||||||
options = player['options']
|
options = player['options']
|
||||||
|
|
||||||
user = options['user']
|
user = options['user']
|
||||||
if not user.get('hasAccess'):
|
if not user.get('hasAccess'):
|
||||||
raise ExtractorError(
|
self.raise_login_required()
|
||||||
'This video is only available for paying users', expected=True)
|
|
||||||
# self.raise_login_required() # FIXME: Login is not implemented
|
|
||||||
|
|
||||||
token = self._download_json(
|
token = self._download_json(
|
||||||
user.get('refreshTokenUrl') or (self._PLAYER_BASE_URL + 'refresh/token'),
|
user.get('refreshTokenUrl') or (self._PLAYER_BASE_URL + 'refresh/token'),
|
||||||
|
@ -188,8 +215,7 @@ def _real_extract(self, url):
|
||||||
message = error.get('message')
|
message = error.get('message')
|
||||||
if e.cause.code == 403 and error.get('code') == 'player-bad-geolocation-country':
|
if e.cause.code == 403 and error.get('code') == 'player-bad-geolocation-country':
|
||||||
self.raise_geo_restricted(msg=message)
|
self.raise_geo_restricted(msg=message)
|
||||||
else:
|
raise ExtractorError(message)
|
||||||
raise ExtractorError(message)
|
|
||||||
else:
|
else:
|
||||||
raise ExtractorError('Giving up retrying')
|
raise ExtractorError('Giving up retrying')
|
||||||
|
|
||||||
|
|
|
@ -252,7 +252,7 @@ class AENetworksShowIE(AENetworksListBaseIE):
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.history.com/shows/ancient-aliens',
|
'url': 'http://www.history.com/shows/ancient-aliens',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'SH012427480000',
|
'id': 'SERIES1574',
|
||||||
'title': 'Ancient Aliens',
|
'title': 'Ancient Aliens',
|
||||||
'description': 'md5:3f6d74daf2672ff3ae29ed732e37ea7f',
|
'description': 'md5:3f6d74daf2672ff3ae29ed732e37ea7f',
|
||||||
},
|
},
|
||||||
|
|
|
@ -8,6 +8,7 @@
|
||||||
int_or_none,
|
int_or_none,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
|
unified_timestamp,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -88,7 +89,7 @@ def get_media_node(name, default=None):
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
timestamp = parse_iso8601(item.get('pubDate'), ' ') or parse_iso8601(item.get('dc-date'))
|
timestamp = unified_timestamp(item.get('pubDate'), ' ') or parse_iso8601(item.get('dc-date'))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
|
|
@ -48,6 +48,7 @@ def _parse_video_data(self, video_data, video_id, is_live):
|
||||||
'duration': int_or_none(video_data.get('duration')),
|
'duration': int_or_none(video_data.get('duration')),
|
||||||
'timestamp': parse_iso8601(video_data.get('create_time'), ' '),
|
'timestamp': parse_iso8601(video_data.get('create_time'), ' '),
|
||||||
'is_live': is_live,
|
'is_live': is_live,
|
||||||
|
'uploader_id': video_data.get('user_id'),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -107,6 +108,7 @@ class AWAANLiveIE(AWAANBaseIE):
|
||||||
'title': 're:Dubai Al Oula [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
'title': 're:Dubai Al Oula [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||||
'upload_date': '20150107',
|
'upload_date': '20150107',
|
||||||
'timestamp': 1420588800,
|
'timestamp': 1420588800,
|
||||||
|
'uploader_id': '71',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
|
|
|
@ -47,7 +47,7 @@ class AZMedienIE(InfoExtractor):
|
||||||
'url': 'https://www.telebaern.tv/telebaern-news/montag-1-oktober-2018-ganze-sendung-133531189#video=0_7xjo9lf1',
|
'url': 'https://www.telebaern.tv/telebaern-news/montag-1-oktober-2018-ganze-sendung-133531189#video=0_7xjo9lf1',
|
||||||
'only_matching': True
|
'only_matching': True
|
||||||
}]
|
}]
|
||||||
_API_TEMPL = 'https://www.%s/api/pub/gql/%s/NewsArticleTeaser/cb9f2f81ed22e9b47f4ca64ea3cc5a5d13e88d1d'
|
_API_TEMPL = 'https://www.%s/api/pub/gql/%s/NewsArticleTeaser/a4016f65fe62b81dc6664dd9f4910e4ab40383be'
|
||||||
_PARTNER_ID = '1719221'
|
_PARTNER_ID = '1719221'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|
|
@ -90,13 +90,19 @@ class BleacherReportCMSIE(AMPIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36}|\d{5})'
|
_VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36}|\d{5})'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1&library=video-cms',
|
'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1&library=video-cms',
|
||||||
'md5': '2e4b0a997f9228ffa31fada5c53d1ed1',
|
'md5': '670b2d73f48549da032861130488c681',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
|
'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'Cena vs. Rollins Would Expose the Heavyweight Division',
|
'title': 'Cena vs. Rollins Would Expose the Heavyweight Division',
|
||||||
'description': 'md5:984afb4ade2f9c0db35f3267ed88b36e',
|
'description': 'md5:984afb4ade2f9c0db35f3267ed88b36e',
|
||||||
|
'upload_date': '20150723',
|
||||||
|
'timestamp': 1437679032,
|
||||||
|
|
||||||
},
|
},
|
||||||
|
'expected_warnings': [
|
||||||
|
'Unable to download f4m manifest'
|
||||||
|
]
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|
|
@ -12,7 +12,7 @@
|
||||||
|
|
||||||
|
|
||||||
class BravoTVIE(AdobePassIE):
|
class BravoTVIE(AdobePassIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?bravotv\.com/(?:[^/]+/)+(?P<id>[^/?#]+)'
|
_VALID_URL = r'https?://(?:www\.)?(?P<req_id>bravotv|oxygen)\.com/(?:[^/]+/)+(?P<id>[^/?#]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.bravotv.com/top-chef/season-16/episode-15/videos/the-top-chef-season-16-winner-is',
|
'url': 'https://www.bravotv.com/top-chef/season-16/episode-15/videos/the-top-chef-season-16-winner-is',
|
||||||
'md5': 'e34684cfea2a96cd2ee1ef3a60909de9',
|
'md5': 'e34684cfea2a96cd2ee1ef3a60909de9',
|
||||||
|
@ -28,10 +28,13 @@ class BravoTVIE(AdobePassIE):
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1',
|
'url': 'http://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.oxygen.com/in-ice-cold-blood/season-2/episode-16/videos/handling-the-horwitz-house-after-the-murder-season-2',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
site, display_id = re.match(self._VALID_URL, url).groups()
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
settings = self._parse_json(self._search_regex(
|
settings = self._parse_json(self._search_regex(
|
||||||
r'<script[^>]+data-drupal-selector="drupal-settings-json"[^>]*>({.+?})</script>', webpage, 'drupal settings'),
|
r'<script[^>]+data-drupal-selector="drupal-settings-json"[^>]*>({.+?})</script>', webpage, 'drupal settings'),
|
||||||
|
@ -53,11 +56,14 @@ def _real_extract(self, url):
|
||||||
tp_path = release_pid = tve['release_pid']
|
tp_path = release_pid = tve['release_pid']
|
||||||
if tve.get('entitlement') == 'auth':
|
if tve.get('entitlement') == 'auth':
|
||||||
adobe_pass = settings.get('tve_adobe_auth', {})
|
adobe_pass = settings.get('tve_adobe_auth', {})
|
||||||
|
if site == 'bravotv':
|
||||||
|
site = 'bravo'
|
||||||
resource = self._get_mvpd_resource(
|
resource = self._get_mvpd_resource(
|
||||||
adobe_pass.get('adobePassResourceId', 'bravo'),
|
adobe_pass.get('adobePassResourceId') or site,
|
||||||
tve['title'], release_pid, tve.get('rating'))
|
tve['title'], release_pid, tve.get('rating'))
|
||||||
query['auth'] = self._extract_mvpd_auth(
|
query['auth'] = self._extract_mvpd_auth(
|
||||||
url, release_pid, adobe_pass.get('adobePassRequestorId', 'bravo'), resource)
|
url, release_pid,
|
||||||
|
adobe_pass.get('adobePassRequestorId') or site, resource)
|
||||||
else:
|
else:
|
||||||
shared_playlist = settings['ls_playlist']
|
shared_playlist = settings['ls_playlist']
|
||||||
account_pid = shared_playlist['account_pid']
|
account_pid = shared_playlist['account_pid']
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import datetime
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
@ -8,8 +9,8 @@
|
||||||
clean_html,
|
clean_html,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_iso8601,
|
|
||||||
parse_resolution,
|
parse_resolution,
|
||||||
|
try_get,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -24,8 +25,9 @@ class CCMAIE(InfoExtractor):
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'L\'espot de La Marató de TV3',
|
'title': 'L\'espot de La Marató de TV3',
|
||||||
'description': 'md5:f12987f320e2f6e988e9908e4fe97765',
|
'description': 'md5:f12987f320e2f6e988e9908e4fe97765',
|
||||||
'timestamp': 1470918540,
|
'timestamp': 1478608140,
|
||||||
'upload_date': '20160811',
|
'upload_date': '20161108',
|
||||||
|
'age_limit': 0,
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.ccma.cat/catradio/alacarta/programa/el-consell-de-savis-analitza-el-derbi/audio/943685/',
|
'url': 'http://www.ccma.cat/catradio/alacarta/programa/el-consell-de-savis-analitza-el-derbi/audio/943685/',
|
||||||
|
@ -35,8 +37,24 @@ class CCMAIE(InfoExtractor):
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': 'El Consell de Savis analitza el derbi',
|
'title': 'El Consell de Savis analitza el derbi',
|
||||||
'description': 'md5:e2a3648145f3241cb9c6b4b624033e53',
|
'description': 'md5:e2a3648145f3241cb9c6b4b624033e53',
|
||||||
'upload_date': '20171205',
|
'upload_date': '20170512',
|
||||||
'timestamp': 1512507300,
|
'timestamp': 1494622500,
|
||||||
|
'vcodec': 'none',
|
||||||
|
'categories': ['Esports'],
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.ccma.cat/tv3/alacarta/crims/crims-josep-tallada-lespereu-me-capitol-1/video/6031387/',
|
||||||
|
'md5': 'b43c3d3486f430f3032b5b160d80cbc3',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6031387',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Crims - Josep Talleda, l\'"Espereu-me" (capítol 1)',
|
||||||
|
'description': 'md5:7cbdafb640da9d0d2c0f62bad1e74e60',
|
||||||
|
'timestamp': 1582577700,
|
||||||
|
'upload_date': '20200224',
|
||||||
|
'subtitles': 'mincount:4',
|
||||||
|
'age_limit': 16,
|
||||||
|
'series': 'Crims',
|
||||||
}
|
}
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
@ -72,17 +90,27 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
informacio = media['informacio']
|
informacio = media['informacio']
|
||||||
title = informacio['titol']
|
title = informacio['titol']
|
||||||
durada = informacio.get('durada', {})
|
durada = informacio.get('durada') or {}
|
||||||
duration = int_or_none(durada.get('milisegons'), 1000) or parse_duration(durada.get('text'))
|
duration = int_or_none(durada.get('milisegons'), 1000) or parse_duration(durada.get('text'))
|
||||||
timestamp = parse_iso8601(informacio.get('data_emissio', {}).get('utc'))
|
tematica = try_get(informacio, lambda x: x['tematica']['text'])
|
||||||
|
|
||||||
|
timestamp = None
|
||||||
|
data_utc = try_get(informacio, lambda x: x['data_emissio']['utc'])
|
||||||
|
try:
|
||||||
|
timestamp = datetime.datetime.strptime(
|
||||||
|
data_utc, '%Y-%d-%mT%H:%M:%S%z').timestamp()
|
||||||
|
except TypeError:
|
||||||
|
pass
|
||||||
|
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
subtitols = media.get('subtitols', {})
|
subtitols = media.get('subtitols') or []
|
||||||
if subtitols:
|
if isinstance(subtitols, dict):
|
||||||
sub_url = subtitols.get('url')
|
subtitols = [subtitols]
|
||||||
|
for st in subtitols:
|
||||||
|
sub_url = st.get('url')
|
||||||
if sub_url:
|
if sub_url:
|
||||||
subtitles.setdefault(
|
subtitles.setdefault(
|
||||||
subtitols.get('iso') or subtitols.get('text') or 'ca', []).append({
|
st.get('iso') or st.get('text') or 'ca', []).append({
|
||||||
'url': sub_url,
|
'url': sub_url,
|
||||||
})
|
})
|
||||||
|
|
||||||
|
@ -97,6 +125,16 @@ def _real_extract(self, url):
|
||||||
'height': int_or_none(imatges.get('alcada')),
|
'height': int_or_none(imatges.get('alcada')),
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
age_limit = None
|
||||||
|
codi_etic = try_get(informacio, lambda x: x['codi_etic']['id'])
|
||||||
|
if codi_etic:
|
||||||
|
codi_etic_s = codi_etic.split('_')
|
||||||
|
if len(codi_etic_s) == 2:
|
||||||
|
if codi_etic_s[1] == 'TP':
|
||||||
|
age_limit = 0
|
||||||
|
else:
|
||||||
|
age_limit = int_or_none(codi_etic_s[1])
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': media_id,
|
'id': media_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
|
@ -106,4 +144,9 @@ def _real_extract(self, url):
|
||||||
'thumbnails': thumbnails,
|
'thumbnails': thumbnails,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
'age_limit': age_limit,
|
||||||
|
'alt_title': informacio.get('titol_complet'),
|
||||||
|
'episode_number': int_or_none(informacio.get('capitol')),
|
||||||
|
'categories': [tematica] if tematica else None,
|
||||||
|
'series': informacio.get('programa'),
|
||||||
}
|
}
|
||||||
|
|
|
@ -96,7 +96,7 @@ def _real_extract(self, url):
|
||||||
raise ExtractorError('This video is only available for premium users.', expected=True)
|
raise ExtractorError('This video is only available for premium users.', expected=True)
|
||||||
|
|
||||||
need_confirm_age = False
|
need_confirm_age = False
|
||||||
if self._html_search_regex(r'(<form[^>]+action="/a/validatebirth")',
|
if self._html_search_regex(r'(<form[^>]+action="[^"]*/a/validatebirth[^"]*")',
|
||||||
webpage, 'birthday validate form', default=None):
|
webpage, 'birthday validate form', default=None):
|
||||||
webpage = self._download_age_confirm_page(
|
webpage = self._download_age_confirm_page(
|
||||||
url, video_id, note='Confirming age')
|
url, video_id, note='Confirming age')
|
||||||
|
|
|
@ -12,7 +12,14 @@
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class EggheadCourseIE(InfoExtractor):
|
class EggheadBaseIE(InfoExtractor):
|
||||||
|
def _call_api(self, path, video_id, resource, fatal=True):
|
||||||
|
return self._download_json(
|
||||||
|
'https://app.egghead.io/api/v1/' + path,
|
||||||
|
video_id, 'Downloading %s JSON' % resource, fatal=fatal)
|
||||||
|
|
||||||
|
|
||||||
|
class EggheadCourseIE(EggheadBaseIE):
|
||||||
IE_DESC = 'egghead.io course'
|
IE_DESC = 'egghead.io course'
|
||||||
IE_NAME = 'egghead:course'
|
IE_NAME = 'egghead:course'
|
||||||
_VALID_URL = r'https://egghead\.io/courses/(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https://egghead\.io/courses/(?P<id>[^/?#&]+)'
|
||||||
|
@ -28,10 +35,9 @@ class EggheadCourseIE(InfoExtractor):
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
playlist_id = self._match_id(url)
|
playlist_id = self._match_id(url)
|
||||||
|
series_path = 'series/' + playlist_id
|
||||||
lessons = self._download_json(
|
lessons = self._call_api(
|
||||||
'https://egghead.io/api/v1/series/%s/lessons' % playlist_id,
|
series_path + '/lessons', playlist_id, 'course lessons')
|
||||||
playlist_id, 'Downloading course lessons JSON')
|
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
for lesson in lessons:
|
for lesson in lessons:
|
||||||
|
@ -44,9 +50,8 @@ def _real_extract(self, url):
|
||||||
entries.append(self.url_result(
|
entries.append(self.url_result(
|
||||||
lesson_url, ie=EggheadLessonIE.ie_key(), video_id=lesson_id))
|
lesson_url, ie=EggheadLessonIE.ie_key(), video_id=lesson_id))
|
||||||
|
|
||||||
course = self._download_json(
|
course = self._call_api(
|
||||||
'https://egghead.io/api/v1/series/%s' % playlist_id,
|
series_path, playlist_id, 'course', False) or {}
|
||||||
playlist_id, 'Downloading course JSON', fatal=False) or {}
|
|
||||||
|
|
||||||
playlist_id = course.get('id')
|
playlist_id = course.get('id')
|
||||||
if playlist_id:
|
if playlist_id:
|
||||||
|
@ -57,7 +62,7 @@ def _real_extract(self, url):
|
||||||
course.get('description'))
|
course.get('description'))
|
||||||
|
|
||||||
|
|
||||||
class EggheadLessonIE(InfoExtractor):
|
class EggheadLessonIE(EggheadBaseIE):
|
||||||
IE_DESC = 'egghead.io lesson'
|
IE_DESC = 'egghead.io lesson'
|
||||||
IE_NAME = 'egghead:lesson'
|
IE_NAME = 'egghead:lesson'
|
||||||
_VALID_URL = r'https://egghead\.io/(?:api/v1/)?lessons/(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https://egghead\.io/(?:api/v1/)?lessons/(?P<id>[^/?#&]+)'
|
||||||
|
@ -74,7 +79,7 @@ class EggheadLessonIE(InfoExtractor):
|
||||||
'upload_date': '20161209',
|
'upload_date': '20161209',
|
||||||
'duration': 304,
|
'duration': 304,
|
||||||
'view_count': 0,
|
'view_count': 0,
|
||||||
'tags': ['javascript', 'free'],
|
'tags': 'count:2',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
|
@ -88,8 +93,8 @@ class EggheadLessonIE(InfoExtractor):
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
lesson = self._download_json(
|
lesson = self._call_api(
|
||||||
'https://egghead.io/api/v1/lessons/%s' % display_id, display_id)
|
'lessons/' + display_id, display_id, 'lesson')
|
||||||
|
|
||||||
lesson_id = compat_str(lesson['id'])
|
lesson_id = compat_str(lesson['id'])
|
||||||
title = lesson['title']
|
title = lesson['title']
|
||||||
|
|
|
@ -1308,6 +1308,7 @@
|
||||||
TV2IE,
|
TV2IE,
|
||||||
TV2ArticleIE,
|
TV2ArticleIE,
|
||||||
KatsomoIE,
|
KatsomoIE,
|
||||||
|
MTVUutisetArticleIE,
|
||||||
)
|
)
|
||||||
from .tv2dk import (
|
from .tv2dk import (
|
||||||
TV2DKIE,
|
TV2DKIE,
|
||||||
|
@ -1448,7 +1449,6 @@
|
||||||
VidmeUserIE,
|
VidmeUserIE,
|
||||||
VidmeUserLikesIE,
|
VidmeUserLikesIE,
|
||||||
)
|
)
|
||||||
from .vidzi import VidziIE
|
|
||||||
from .vier import VierIE, VierVideosIE
|
from .vier import VierIE, VierVideosIE
|
||||||
from .viewlift import (
|
from .viewlift import (
|
||||||
ViewLiftIE,
|
ViewLiftIE,
|
||||||
|
@ -1508,6 +1508,7 @@
|
||||||
VRVSeriesIE,
|
VRVSeriesIE,
|
||||||
)
|
)
|
||||||
from .vshare import VShareIE
|
from .vshare import VShareIE
|
||||||
|
from .vtm import VTMIE
|
||||||
from .medialaan import MedialaanIE
|
from .medialaan import MedialaanIE
|
||||||
from .vube import VubeIE
|
from .vube import VubeIE
|
||||||
from .vuclip import VuClipIE
|
from .vuclip import VuClipIE
|
||||||
|
|
|
@ -131,6 +131,7 @@
|
||||||
from .rcs import RCSEmbedsIE
|
from .rcs import RCSEmbedsIE
|
||||||
from .bitchute import BitChuteIE
|
from .bitchute import BitChuteIE
|
||||||
from .arcpublishing import ArcPublishingIE
|
from .arcpublishing import ArcPublishingIE
|
||||||
|
from .medialaan import MedialaanIE
|
||||||
|
|
||||||
|
|
||||||
class GenericIE(InfoExtractor):
|
class GenericIE(InfoExtractor):
|
||||||
|
@ -2224,6 +2225,20 @@ class GenericIE(InfoExtractor):
|
||||||
'duration': 1581,
|
'duration': 1581,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
# MyChannels SDK embed
|
||||||
|
# https://www.24kitchen.nl/populair/deskundige-dit-waarom-sommigen-gevoelig-zijn-voor-voedselallergieen
|
||||||
|
'url': 'https://www.demorgen.be/nieuws/burgemeester-rotterdam-richt-zich-in-videoboodschap-tot-relschoppers-voelt-het-goed~b0bcfd741/',
|
||||||
|
'md5': '90c0699c37006ef18e198c032d81739c',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '194165',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Burgemeester Aboutaleb spreekt relschoppers toe',
|
||||||
|
'timestamp': 1611740340,
|
||||||
|
'upload_date': '20210127',
|
||||||
|
'duration': 159,
|
||||||
|
},
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def report_following_redirect(self, new_url):
|
def report_following_redirect(self, new_url):
|
||||||
|
@ -2463,6 +2478,9 @@ def _real_extract(self, url):
|
||||||
webpage = self._webpage_read_content(
|
webpage = self._webpage_read_content(
|
||||||
full_response, url, video_id, prefix=first_bytes)
|
full_response, url, video_id, prefix=first_bytes)
|
||||||
|
|
||||||
|
if '<title>DPG Media Privacy Gate</title>' in webpage:
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
|
|
||||||
# Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest?
|
# Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest?
|
||||||
|
@ -2594,6 +2612,11 @@ def _real_extract(self, url):
|
||||||
if arc_urls:
|
if arc_urls:
|
||||||
return self.playlist_from_matches(arc_urls, video_id, video_title, ie=ArcPublishingIE.ie_key())
|
return self.playlist_from_matches(arc_urls, video_id, video_title, ie=ArcPublishingIE.ie_key())
|
||||||
|
|
||||||
|
mychannels_urls = MedialaanIE._extract_urls(webpage)
|
||||||
|
if mychannels_urls:
|
||||||
|
return self.playlist_from_matches(
|
||||||
|
mychannels_urls, video_id, video_title, ie=MedialaanIE.ie_key())
|
||||||
|
|
||||||
# Look for embedded rtl.nl player
|
# Look for embedded rtl.nl player
|
||||||
matches = re.findall(
|
matches = re.findall(
|
||||||
r'<iframe[^>]+?src="((?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)"',
|
r'<iframe[^>]+?src="((?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)"',
|
||||||
|
|
|
@ -7,6 +7,7 @@
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
get_element_by_class,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
lowercase_escape,
|
lowercase_escape,
|
||||||
try_get,
|
try_get,
|
||||||
|
@ -237,7 +238,7 @@ def add_source_format(urlh):
|
||||||
if confirmation_webpage:
|
if confirmation_webpage:
|
||||||
confirm = self._search_regex(
|
confirm = self._search_regex(
|
||||||
r'confirm=([^&"\']+)', confirmation_webpage,
|
r'confirm=([^&"\']+)', confirmation_webpage,
|
||||||
'confirmation code', fatal=False)
|
'confirmation code', default=None)
|
||||||
if confirm:
|
if confirm:
|
||||||
confirmed_source_url = update_url_query(source_url, {
|
confirmed_source_url = update_url_query(source_url, {
|
||||||
'confirm': confirm,
|
'confirm': confirm,
|
||||||
|
@ -245,6 +246,11 @@ def add_source_format(urlh):
|
||||||
urlh = request_source_file(confirmed_source_url, 'confirmed source')
|
urlh = request_source_file(confirmed_source_url, 'confirmed source')
|
||||||
if urlh and urlh.headers.get('Content-Disposition'):
|
if urlh and urlh.headers.get('Content-Disposition'):
|
||||||
add_source_format(urlh)
|
add_source_format(urlh)
|
||||||
|
else:
|
||||||
|
self.report_warning(
|
||||||
|
get_element_by_class('uc-error-subcaption', confirmation_webpage)
|
||||||
|
or get_element_by_class('uc-error-caption', confirmation_webpage)
|
||||||
|
or 'unable to extract confirmation code')
|
||||||
|
|
||||||
if not formats and reason:
|
if not formats and reason:
|
||||||
raise ExtractorError(reason, expected=True)
|
raise ExtractorError(reason, expected=True)
|
||||||
|
|
|
@ -2,268 +2,113 @@
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .gigya import GigyaBaseIE
|
from .common import InfoExtractor
|
||||||
|
|
||||||
from ..compat import compat_str
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
extract_attributes,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_duration,
|
mimetype2ext,
|
||||||
try_get,
|
parse_iso8601,
|
||||||
unified_timestamp,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class MedialaanIE(GigyaBaseIE):
|
class MedialaanIE(InfoExtractor):
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
(?:www\.|nieuws\.)?
|
|
||||||
(?:
|
(?:
|
||||||
(?P<site_id>vtm|q2|vtmkzoom)\.be/
|
(?:embed\.)?mychannels.video/embed/|
|
||||||
(?:
|
embed\.mychannels\.video/(?:s(?:dk|cript)/)?production/|
|
||||||
video(?:/[^/]+/id/|/?\?.*?\baid=)|
|
(?:www\.)?(?:
|
||||||
(?:[^/]+/)*
|
(?:
|
||||||
)
|
7sur7|
|
||||||
|
demorgen|
|
||||||
|
hln|
|
||||||
|
joe|
|
||||||
|
qmusic
|
||||||
|
)\.be|
|
||||||
|
(?:
|
||||||
|
[abe]d|
|
||||||
|
bndestem|
|
||||||
|
destentor|
|
||||||
|
gelderlander|
|
||||||
|
pzc|
|
||||||
|
tubantia|
|
||||||
|
volkskrant
|
||||||
|
)\.nl
|
||||||
|
)/video/(?:[^/]+/)*[^/?&#]+~p
|
||||||
)
|
)
|
||||||
(?P<id>[^/?#&]+)
|
(?P<id>\d+)
|
||||||
'''
|
'''
|
||||||
_NETRC_MACHINE = 'medialaan'
|
|
||||||
_APIKEY = '3_HZ0FtkMW_gOyKlqQzW5_0FHRC7Nd5XpXJZcDdXY4pk5eES2ZWmejRW5egwVm4ug-'
|
|
||||||
_SITE_TO_APP_ID = {
|
|
||||||
'vtm': 'vtm_watch',
|
|
||||||
'q2': 'q2',
|
|
||||||
'vtmkzoom': 'vtmkzoom',
|
|
||||||
}
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# vod
|
'url': 'https://www.bndestem.nl/video/de-terugkeer-van-ally-de-aap-en-wie-vertrekt-er-nog-bij-nac~p193993',
|
||||||
'url': 'http://vtm.be/video/volledige-afleveringen/id/vtm_20170219_VM0678361_vtmwatch',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'vtm_20170219_VM0678361_vtmwatch',
|
'id': '193993',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Allemaal Chris afl. 6',
|
'title': 'De terugkeer van Ally de Aap en wie vertrekt er nog bij NAC?',
|
||||||
'description': 'md5:4be86427521e7b07e0adb0c9c554ddb2',
|
'timestamp': 1611663540,
|
||||||
'timestamp': 1487533280,
|
'upload_date': '20210126',
|
||||||
'upload_date': '20170219',
|
'duration': 238,
|
||||||
'duration': 2562,
|
|
||||||
'series': 'Allemaal Chris',
|
|
||||||
'season': 'Allemaal Chris',
|
|
||||||
'season_number': 1,
|
|
||||||
'season_id': '256936078124527',
|
|
||||||
'episode': 'Allemaal Chris afl. 6',
|
|
||||||
'episode_number': 6,
|
|
||||||
'episode_id': '256936078591527',
|
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'skip': 'Requires account credentials',
|
|
||||||
}, {
|
}, {
|
||||||
# clip
|
'url': 'https://www.gelderlander.nl/video/kanalen/degelderlander~c320/series/snel-nieuws~s984/noodbevel-in-doetinchem-politie-stuurt-mensen-centrum-uit~p194093',
|
||||||
'url': 'http://vtm.be/video?aid=168332',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '168332',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': '"Veronique liegt!"',
|
|
||||||
'description': 'md5:1385e2b743923afe54ba4adc38476155',
|
|
||||||
'timestamp': 1489002029,
|
|
||||||
'upload_date': '20170308',
|
|
||||||
'duration': 96,
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
# vod
|
|
||||||
'url': 'http://vtm.be/video/volledige-afleveringen/id/257107153551000',
|
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
# vod
|
'url': 'https://embed.mychannels.video/sdk/production/193993?options=TFTFF_default',
|
||||||
'url': 'http://vtm.be/video?aid=163157',
|
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
# vod
|
'url': 'https://embed.mychannels.video/script/production/193993',
|
||||||
'url': 'http://www.q2.be/video/volledige-afleveringen/id/2be_20170301_VM0684442_q2',
|
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
# clip
|
'url': 'https://embed.mychannels.video/production/193993',
|
||||||
'url': 'http://vtmkzoom.be/k3-dansstudio/een-nieuw-seizoen-van-k3-dansstudio',
|
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
# http/s redirect
|
'url': 'https://mychannels.video/embed/193993',
|
||||||
'url': 'https://vtmkzoom.be/video?aid=45724',
|
'only_matching': True,
|
||||||
'info_dict': {
|
|
||||||
'id': '257136373657000',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'K3 Dansstudio Ushuaia afl.6',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
'skip': 'Requires account credentials',
|
|
||||||
}, {
|
}, {
|
||||||
# nieuws.vtm.be
|
'url': 'https://embed.mychannels.video/embed/193993',
|
||||||
'url': 'https://nieuws.vtm.be/stadion/stadion/genk-nog-moeilijk-programma',
|
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_initialize(self):
|
@staticmethod
|
||||||
self._logged_in = False
|
def _extract_urls(webpage):
|
||||||
|
entries = []
|
||||||
def _login(self):
|
for element in re.findall(r'(<div[^>]+data-mychannels-type="video"[^>]*>)', webpage):
|
||||||
username, password = self._get_login_info()
|
mychannels_id = extract_attributes(element).get('data-mychannels-id')
|
||||||
if username is None:
|
if mychannels_id:
|
||||||
self.raise_login_required()
|
entries.append('https://mychannels.video/embed/' + mychannels_id)
|
||||||
|
return entries
|
||||||
auth_data = {
|
|
||||||
'APIKey': self._APIKEY,
|
|
||||||
'sdk': 'js_6.1',
|
|
||||||
'format': 'json',
|
|
||||||
'loginID': username,
|
|
||||||
'password': password,
|
|
||||||
}
|
|
||||||
|
|
||||||
auth_info = self._gigya_login(auth_data)
|
|
||||||
|
|
||||||
self._uid = auth_info['UID']
|
|
||||||
self._uid_signature = auth_info['UIDSignature']
|
|
||||||
self._signature_timestamp = auth_info['signatureTimestamp']
|
|
||||||
|
|
||||||
self._logged_in = True
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
production_id = self._match_id(url)
|
||||||
video_id, site_id = mobj.group('id', 'site_id')
|
production = self._download_json(
|
||||||
|
'https://embed.mychannels.video/sdk/production/' + production_id,
|
||||||
|
production_id, query={'options': 'UUUU_default'})['productions'][0]
|
||||||
|
title = production['title']
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
formats = []
|
||||||
|
for source in (production.get('sources') or []):
|
||||||
config = self._parse_json(
|
src = source.get('src')
|
||||||
self._search_regex(
|
if not src:
|
||||||
r'videoJSConfig\s*=\s*JSON\.parse\(\'({.+?})\'\);',
|
continue
|
||||||
webpage, 'config', default='{}'), video_id,
|
ext = mimetype2ext(source.get('type'))
|
||||||
transform_source=lambda s: s.replace(
|
if ext == 'm3u8':
|
||||||
'\\\\', '\\').replace(r'\"', '"').replace(r"\'", "'"))
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
src, production_id, 'mp4', 'm3u8_native',
|
||||||
vod_id = config.get('vodId') or self._search_regex(
|
m3u8_id='hls', fatal=False))
|
||||||
(r'\\"vodId\\"\s*:\s*\\"(.+?)\\"',
|
|
||||||
r'"vodId"\s*:\s*"(.+?)"',
|
|
||||||
r'<[^>]+id=["\']vod-(\d+)'),
|
|
||||||
webpage, 'video_id', default=None)
|
|
||||||
|
|
||||||
# clip, no authentication required
|
|
||||||
if not vod_id:
|
|
||||||
player = self._parse_json(
|
|
||||||
self._search_regex(
|
|
||||||
r'vmmaplayer\(({.+?})\);', webpage, 'vmma player',
|
|
||||||
default=''),
|
|
||||||
video_id, transform_source=lambda s: '[%s]' % s, fatal=False)
|
|
||||||
if player:
|
|
||||||
video = player[-1]
|
|
||||||
if video['videoUrl'] in ('http', 'https'):
|
|
||||||
return self.url_result(video['url'], MedialaanIE.ie_key())
|
|
||||||
info = {
|
|
||||||
'id': video_id,
|
|
||||||
'url': video['videoUrl'],
|
|
||||||
'title': video['title'],
|
|
||||||
'thumbnail': video.get('imageUrl'),
|
|
||||||
'timestamp': int_or_none(video.get('createdDate')),
|
|
||||||
'duration': int_or_none(video.get('duration')),
|
|
||||||
}
|
|
||||||
else:
|
else:
|
||||||
info = self._parse_html5_media_entries(
|
formats.append({
|
||||||
url, webpage, video_id, m3u8_id='hls')[0]
|
'ext': ext,
|
||||||
info.update({
|
'url': src,
|
||||||
'id': video_id,
|
|
||||||
'title': self._html_search_meta('description', webpage),
|
|
||||||
'duration': parse_duration(self._html_search_meta('duration', webpage)),
|
|
||||||
})
|
})
|
||||||
# vod, authentication required
|
self._sort_formats(formats)
|
||||||
else:
|
|
||||||
if not self._logged_in:
|
|
||||||
self._login()
|
|
||||||
|
|
||||||
settings = self._parse_json(
|
return {
|
||||||
self._search_regex(
|
'id': production_id,
|
||||||
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
|
'title': title,
|
||||||
webpage, 'drupal settings', default='{}'),
|
'formats': formats,
|
||||||
video_id)
|
'thumbnail': production.get('posterUrl'),
|
||||||
|
'timestamp': parse_iso8601(production.get('publicationDate'), ' '),
|
||||||
def get(container, item):
|
'duration': int_or_none(production.get('duration')) or None,
|
||||||
return try_get(
|
}
|
||||||
settings, lambda x: x[container][item],
|
|
||||||
compat_str) or self._search_regex(
|
|
||||||
r'"%s"\s*:\s*"([^"]+)' % item, webpage, item,
|
|
||||||
default=None)
|
|
||||||
|
|
||||||
app_id = get('vod', 'app_id') or self._SITE_TO_APP_ID.get(site_id, 'vtm_watch')
|
|
||||||
sso = get('vod', 'gigyaDatabase') or 'vtm-sso'
|
|
||||||
|
|
||||||
data = self._download_json(
|
|
||||||
'http://vod.medialaan.io/api/1.0/item/%s/video' % vod_id,
|
|
||||||
video_id, query={
|
|
||||||
'app_id': app_id,
|
|
||||||
'user_network': sso,
|
|
||||||
'UID': self._uid,
|
|
||||||
'UIDSignature': self._uid_signature,
|
|
||||||
'signatureTimestamp': self._signature_timestamp,
|
|
||||||
})
|
|
||||||
|
|
||||||
formats = self._extract_m3u8_formats(
|
|
||||||
data['response']['uri'], video_id, entry_protocol='m3u8_native',
|
|
||||||
ext='mp4', m3u8_id='hls')
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
info = {
|
|
||||||
'id': vod_id,
|
|
||||||
'formats': formats,
|
|
||||||
}
|
|
||||||
|
|
||||||
api_key = get('vod', 'apiKey')
|
|
||||||
channel = get('medialaanGigya', 'channel')
|
|
||||||
|
|
||||||
if api_key:
|
|
||||||
videos = self._download_json(
|
|
||||||
'http://vod.medialaan.io/vod/v2/videos', video_id, fatal=False,
|
|
||||||
query={
|
|
||||||
'channels': channel,
|
|
||||||
'ids': vod_id,
|
|
||||||
'limit': 1,
|
|
||||||
'apikey': api_key,
|
|
||||||
})
|
|
||||||
if videos:
|
|
||||||
video = try_get(
|
|
||||||
videos, lambda x: x['response']['videos'][0], dict)
|
|
||||||
if video:
|
|
||||||
def get(container, item, expected_type=None):
|
|
||||||
return try_get(
|
|
||||||
video, lambda x: x[container][item], expected_type)
|
|
||||||
|
|
||||||
def get_string(container, item):
|
|
||||||
return get(container, item, compat_str)
|
|
||||||
|
|
||||||
info.update({
|
|
||||||
'series': get_string('program', 'title'),
|
|
||||||
'season': get_string('season', 'title'),
|
|
||||||
'season_number': int_or_none(get('season', 'number')),
|
|
||||||
'season_id': get_string('season', 'id'),
|
|
||||||
'episode': get_string('episode', 'title'),
|
|
||||||
'episode_number': int_or_none(get('episode', 'number')),
|
|
||||||
'episode_id': get_string('episode', 'id'),
|
|
||||||
'duration': int_or_none(
|
|
||||||
video.get('duration')) or int_or_none(
|
|
||||||
video.get('durationMillis'), scale=1000),
|
|
||||||
'title': get_string('episode', 'title'),
|
|
||||||
'description': get_string('episode', 'text'),
|
|
||||||
'timestamp': unified_timestamp(get_string(
|
|
||||||
'publication', 'begin')),
|
|
||||||
})
|
|
||||||
|
|
||||||
if not info.get('title'):
|
|
||||||
info['title'] = try_get(
|
|
||||||
config, lambda x: x['videoConfig']['title'],
|
|
||||||
compat_str) or self._html_search_regex(
|
|
||||||
r'\\"title\\"\s*:\s*\\"(.+?)\\"', webpage, 'title',
|
|
||||||
default=None) or self._og_search_title(webpage)
|
|
||||||
|
|
||||||
if not info.get('description'):
|
|
||||||
info['description'] = self._html_search_regex(
|
|
||||||
r'<div[^>]+class="field-item\s+even">\s*<p>(.+?)</p>',
|
|
||||||
webpage, 'description', default=None)
|
|
||||||
|
|
||||||
return info
|
|
||||||
|
|
|
@ -22,11 +22,15 @@
|
||||||
orderedSet,
|
orderedSet,
|
||||||
remove_quotes,
|
remove_quotes,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
|
update_url_query,
|
||||||
|
urlencode_postdata,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class PornHubBaseIE(InfoExtractor):
|
class PornHubBaseIE(InfoExtractor):
|
||||||
|
_NETRC_MACHINE = 'pornhub'
|
||||||
|
|
||||||
def _download_webpage_handle(self, *args, **kwargs):
|
def _download_webpage_handle(self, *args, **kwargs):
|
||||||
def dl(*args, **kwargs):
|
def dl(*args, **kwargs):
|
||||||
return super(PornHubBaseIE, self)._download_webpage_handle(*args, **kwargs)
|
return super(PornHubBaseIE, self)._download_webpage_handle(*args, **kwargs)
|
||||||
|
@ -52,6 +56,66 @@ def dl(*args, **kwargs):
|
||||||
|
|
||||||
return webpage, urlh
|
return webpage, urlh
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
self._logged_in = False
|
||||||
|
|
||||||
|
def _login(self, host):
|
||||||
|
if self._logged_in:
|
||||||
|
return
|
||||||
|
|
||||||
|
site = host.split('.')[0]
|
||||||
|
|
||||||
|
# Both sites pornhub and pornhubpremium have separate accounts
|
||||||
|
# so there should be an option to provide credentials for both.
|
||||||
|
# At the same time some videos are available under the same video id
|
||||||
|
# on both sites so that we have to identify them as the same video.
|
||||||
|
# For that purpose we have to keep both in the same extractor
|
||||||
|
# but under different netrc machines.
|
||||||
|
username, password = self._get_login_info(netrc_machine=site)
|
||||||
|
if username is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
login_url = 'https://www.%s/%slogin' % (host, 'premium/' if 'premium' in host else '')
|
||||||
|
login_page = self._download_webpage(
|
||||||
|
login_url, None, 'Downloading %s login page' % site)
|
||||||
|
|
||||||
|
def is_logged(webpage):
|
||||||
|
return any(re.search(p, webpage) for p in (
|
||||||
|
r'class=["\']signOut',
|
||||||
|
r'>Sign\s+[Oo]ut\s*<'))
|
||||||
|
|
||||||
|
if is_logged(login_page):
|
||||||
|
self._logged_in = True
|
||||||
|
return
|
||||||
|
|
||||||
|
login_form = self._hidden_inputs(login_page)
|
||||||
|
|
||||||
|
login_form.update({
|
||||||
|
'username': username,
|
||||||
|
'password': password,
|
||||||
|
})
|
||||||
|
|
||||||
|
response = self._download_json(
|
||||||
|
'https://www.%s/front/authenticate' % host, None,
|
||||||
|
'Logging in to %s' % site,
|
||||||
|
data=urlencode_postdata(login_form),
|
||||||
|
headers={
|
||||||
|
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
||||||
|
'Referer': login_url,
|
||||||
|
'X-Requested-With': 'XMLHttpRequest',
|
||||||
|
})
|
||||||
|
|
||||||
|
if response.get('success') == '1':
|
||||||
|
self._logged_in = True
|
||||||
|
return
|
||||||
|
|
||||||
|
message = response.get('message')
|
||||||
|
if message is not None:
|
||||||
|
raise ExtractorError(
|
||||||
|
'Unable to login: %s' % message, expected=True)
|
||||||
|
|
||||||
|
raise ExtractorError('Unable to log in')
|
||||||
|
|
||||||
|
|
||||||
class PornHubIE(PornHubBaseIE):
|
class PornHubIE(PornHubBaseIE):
|
||||||
IE_DESC = 'PornHub and Thumbzilla'
|
IE_DESC = 'PornHub and Thumbzilla'
|
||||||
|
@ -163,12 +227,20 @@ class PornHubIE(PornHubBaseIE):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5e4acdae54a82',
|
'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5e4acdae54a82',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# Some videos are available with the same id on both premium
|
||||||
|
# and non-premium sites (e.g. this and the following test)
|
||||||
|
'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5f75b0f4b18e3',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5f75b0f4b18e3',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_urls(webpage):
|
def _extract_urls(webpage):
|
||||||
return re.findall(
|
return re.findall(
|
||||||
r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub\.(?:com|net|org)/embed/[\da-z]+)',
|
r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub(?:premium)?\.(?:com|net|org)/embed/[\da-z]+)',
|
||||||
webpage)
|
webpage)
|
||||||
|
|
||||||
def _extract_count(self, pattern, webpage, name):
|
def _extract_count(self, pattern, webpage, name):
|
||||||
|
@ -180,12 +252,7 @@ def _real_extract(self, url):
|
||||||
host = mobj.group('host') or 'pornhub.com'
|
host = mobj.group('host') or 'pornhub.com'
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
if 'premium' in host:
|
self._login(host)
|
||||||
if not self._downloader.params.get('cookiefile'):
|
|
||||||
raise ExtractorError(
|
|
||||||
'PornHub Premium requires authentication.'
|
|
||||||
' You may want to use --cookies.',
|
|
||||||
expected=True)
|
|
||||||
|
|
||||||
self._set_cookie(host, 'age_verified', '1')
|
self._set_cookie(host, 'age_verified', '1')
|
||||||
|
|
||||||
|
@ -405,6 +472,10 @@ def extract_list(meta_key):
|
||||||
|
|
||||||
|
|
||||||
class PornHubPlaylistBaseIE(PornHubBaseIE):
|
class PornHubPlaylistBaseIE(PornHubBaseIE):
|
||||||
|
def _extract_page(self, url):
|
||||||
|
return int_or_none(self._search_regex(
|
||||||
|
r'\bpage=(\d+)', url, 'page', default=None))
|
||||||
|
|
||||||
def _extract_entries(self, webpage, host):
|
def _extract_entries(self, webpage, host):
|
||||||
# Only process container div with main playlist content skipping
|
# Only process container div with main playlist content skipping
|
||||||
# drop-down menu that uses similar pattern for videos (see
|
# drop-down menu that uses similar pattern for videos (see
|
||||||
|
@ -422,26 +493,6 @@ def _extract_entries(self, webpage, host):
|
||||||
container))
|
container))
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
|
||||||
host = mobj.group('host')
|
|
||||||
playlist_id = mobj.group('id')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, playlist_id)
|
|
||||||
|
|
||||||
entries = self._extract_entries(webpage, host)
|
|
||||||
|
|
||||||
playlist = self._parse_json(
|
|
||||||
self._search_regex(
|
|
||||||
r'(?:playlistObject|PLAYLIST_VIEW)\s*=\s*({.+?});', webpage,
|
|
||||||
'playlist', default='{}'),
|
|
||||||
playlist_id, fatal=False)
|
|
||||||
title = playlist.get('title') or self._search_regex(
|
|
||||||
r'>Videos\s+in\s+(.+?)\s+[Pp]laylist<', webpage, 'title', fatal=False)
|
|
||||||
|
|
||||||
return self.playlist_result(
|
|
||||||
entries, playlist_id, title, playlist.get('description'))
|
|
||||||
|
|
||||||
|
|
||||||
class PornHubUserIE(PornHubPlaylistBaseIE):
|
class PornHubUserIE(PornHubPlaylistBaseIE):
|
||||||
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)'
|
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)'
|
||||||
|
@ -463,14 +514,27 @@ class PornHubUserIE(PornHubPlaylistBaseIE):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.pornhub.com/model/zoe_ph?abc=1',
|
'url': 'https://www.pornhub.com/model/zoe_ph?abc=1',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# Unavailable via /videos page, but available with direct pagination
|
||||||
|
# on pornstar page (see [1]), requires premium
|
||||||
|
# 1. https://github.com/ytdl-org/youtube-dl/issues/27853
|
||||||
|
'url': 'https://www.pornhubpremium.com/pornstar/sienna-west',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# Same as before, multi page
|
||||||
|
'url': 'https://www.pornhubpremium.com/pornstar/lily-labeau',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
user_id = mobj.group('id')
|
user_id = mobj.group('id')
|
||||||
|
videos_url = '%s/videos' % mobj.group('url')
|
||||||
|
page = self._extract_page(url)
|
||||||
|
if page:
|
||||||
|
videos_url = update_url_query(videos_url, {'page': page})
|
||||||
return self.url_result(
|
return self.url_result(
|
||||||
'%s/videos' % mobj.group('url'), ie=PornHubPagedVideoListIE.ie_key(),
|
videos_url, ie=PornHubPagedVideoListIE.ie_key(), video_id=user_id)
|
||||||
video_id=user_id)
|
|
||||||
|
|
||||||
|
|
||||||
class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
|
class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
|
||||||
|
@ -483,32 +547,55 @@ def _has_more(webpage):
|
||||||
<button[^>]+\bid=["\']moreDataBtn
|
<button[^>]+\bid=["\']moreDataBtn
|
||||||
''', webpage) is not None
|
''', webpage) is not None
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _entries(self, url, host, item_id):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
page = self._extract_page(url)
|
||||||
host = mobj.group('host')
|
|
||||||
item_id = mobj.group('id')
|
|
||||||
|
|
||||||
page = int_or_none(self._search_regex(
|
VIDEOS = '/videos'
|
||||||
r'\bpage=(\d+)', url, 'page', default=None))
|
|
||||||
|
|
||||||
entries = []
|
def download_page(base_url, num, fallback=False):
|
||||||
for page_num in (page, ) if page is not None else itertools.count(1):
|
note = 'Downloading page %d%s' % (num, ' (switch to fallback)' if fallback else '')
|
||||||
|
return self._download_webpage(
|
||||||
|
base_url, item_id, note, query={'page': num})
|
||||||
|
|
||||||
|
def is_404(e):
|
||||||
|
return isinstance(e.cause, compat_HTTPError) and e.cause.code == 404
|
||||||
|
|
||||||
|
base_url = url
|
||||||
|
has_page = page is not None
|
||||||
|
first_page = page if has_page else 1
|
||||||
|
for page_num in (first_page, ) if has_page else itertools.count(first_page):
|
||||||
try:
|
try:
|
||||||
webpage = self._download_webpage(
|
try:
|
||||||
url, item_id, 'Downloading page %d' % page_num,
|
webpage = download_page(base_url, page_num)
|
||||||
query={'page': page_num})
|
except ExtractorError as e:
|
||||||
|
# Some sources may not be available via /videos page,
|
||||||
|
# trying to fallback to main page pagination (see [1])
|
||||||
|
# 1. https://github.com/ytdl-org/youtube-dl/issues/27853
|
||||||
|
if is_404(e) and page_num == first_page and VIDEOS in base_url:
|
||||||
|
base_url = base_url.replace(VIDEOS, '')
|
||||||
|
webpage = download_page(base_url, page_num, fallback=True)
|
||||||
|
else:
|
||||||
|
raise
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
|
if is_404(e) and page_num != first_page:
|
||||||
break
|
break
|
||||||
raise
|
raise
|
||||||
page_entries = self._extract_entries(webpage, host)
|
page_entries = self._extract_entries(webpage, host)
|
||||||
if not page_entries:
|
if not page_entries:
|
||||||
break
|
break
|
||||||
entries.extend(page_entries)
|
for e in page_entries:
|
||||||
|
yield e
|
||||||
if not self._has_more(webpage):
|
if not self._has_more(webpage):
|
||||||
break
|
break
|
||||||
|
|
||||||
return self.playlist_result(orderedSet(entries), item_id)
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
host = mobj.group('host')
|
||||||
|
item_id = mobj.group('id')
|
||||||
|
|
||||||
|
self._login(host)
|
||||||
|
|
||||||
|
return self.playlist_result(self._entries(url, host, item_id), item_id)
|
||||||
|
|
||||||
|
|
||||||
class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
|
class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
|
||||||
|
|
|
@ -255,8 +255,10 @@ def _real_extract(self, url):
|
||||||
svt_id = self._search_regex(
|
svt_id = self._search_regex(
|
||||||
(r'<video[^>]+data-video-id=["\']([\da-zA-Z-]+)',
|
(r'<video[^>]+data-video-id=["\']([\da-zA-Z-]+)',
|
||||||
r'["\']videoSvtId["\']\s*:\s*["\']([\da-zA-Z-]+)',
|
r'["\']videoSvtId["\']\s*:\s*["\']([\da-zA-Z-]+)',
|
||||||
|
r'["\']videoSvtId\\?["\']\s*:\s*\\?["\']([\da-zA-Z-]+)',
|
||||||
r'"content"\s*:\s*{.*?"id"\s*:\s*"([\da-zA-Z-]+)"',
|
r'"content"\s*:\s*{.*?"id"\s*:\s*"([\da-zA-Z-]+)"',
|
||||||
r'["\']svtId["\']\s*:\s*["\']([\da-zA-Z-]+)'),
|
r'["\']svtId["\']\s*:\s*["\']([\da-zA-Z-]+)',
|
||||||
|
r'["\']svtId\\?["\']\s*:\s*\\?["\']([\da-zA-Z-]+)'),
|
||||||
webpage, 'video id')
|
webpage, 'video id')
|
||||||
|
|
||||||
info_dict = self._extract_by_video_id(svt_id, webpage)
|
info_dict = self._extract_by_video_id(svt_id, webpage)
|
||||||
|
|
|
@ -20,7 +20,7 @@
|
||||||
|
|
||||||
class TV2IE(InfoExtractor):
|
class TV2IE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?tv2\.no/v/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?tv2\.no/v/(?P<id>\d+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.tv2.no/v/916509/',
|
'url': 'http://www.tv2.no/v/916509/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '916509',
|
'id': '916509',
|
||||||
|
@ -33,7 +33,7 @@ class TV2IE(InfoExtractor):
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'categories': list,
|
'categories': list,
|
||||||
},
|
},
|
||||||
}
|
}]
|
||||||
_API_DOMAIN = 'sumo.tv2.no'
|
_API_DOMAIN = 'sumo.tv2.no'
|
||||||
_PROTOCOLS = ('HDS', 'HLS', 'DASH')
|
_PROTOCOLS = ('HDS', 'HLS', 'DASH')
|
||||||
_GEO_COUNTRIES = ['NO']
|
_GEO_COUNTRIES = ['NO']
|
||||||
|
@ -42,6 +42,12 @@ def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
api_base = 'http://%s/api/web/asset/%s' % (self._API_DOMAIN, video_id)
|
api_base = 'http://%s/api/web/asset/%s' % (self._API_DOMAIN, video_id)
|
||||||
|
|
||||||
|
asset = self._download_json(
|
||||||
|
api_base + '.json', video_id,
|
||||||
|
'Downloading metadata JSON')['asset']
|
||||||
|
title = asset.get('subtitle') or asset['title']
|
||||||
|
is_live = asset.get('live') is True
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
format_urls = []
|
format_urls = []
|
||||||
for protocol in self._PROTOCOLS:
|
for protocol in self._PROTOCOLS:
|
||||||
|
@ -81,7 +87,8 @@ def _real_extract(self, url):
|
||||||
elif ext == 'm3u8':
|
elif ext == 'm3u8':
|
||||||
if not data.get('drmProtected'):
|
if not data.get('drmProtected'):
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
video_url, video_id, 'mp4',
|
||||||
|
'm3u8' if is_live else 'm3u8_native',
|
||||||
m3u8_id=format_id, fatal=False))
|
m3u8_id=format_id, fatal=False))
|
||||||
elif ext == 'mpd':
|
elif ext == 'mpd':
|
||||||
formats.extend(self._extract_mpd_formats(
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
@ -99,11 +106,6 @@ def _real_extract(self, url):
|
||||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
asset = self._download_json(
|
|
||||||
api_base + '.json', video_id,
|
|
||||||
'Downloading metadata JSON')['asset']
|
|
||||||
title = asset['title']
|
|
||||||
|
|
||||||
thumbnails = [{
|
thumbnails = [{
|
||||||
'id': thumbnail.get('@type'),
|
'id': thumbnail.get('@type'),
|
||||||
'url': thumbnail.get('url'),
|
'url': thumbnail.get('url'),
|
||||||
|
@ -112,7 +114,7 @@ def _real_extract(self, url):
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'title': title,
|
'title': self._live_title(title) if is_live else title,
|
||||||
'description': strip_or_none(asset.get('description')),
|
'description': strip_or_none(asset.get('description')),
|
||||||
'thumbnails': thumbnails,
|
'thumbnails': thumbnails,
|
||||||
'timestamp': parse_iso8601(asset.get('createTime')),
|
'timestamp': parse_iso8601(asset.get('createTime')),
|
||||||
|
@ -120,6 +122,7 @@ def _real_extract(self, url):
|
||||||
'view_count': int_or_none(asset.get('views')),
|
'view_count': int_or_none(asset.get('views')),
|
||||||
'categories': asset.get('keywords', '').split(','),
|
'categories': asset.get('keywords', '').split(','),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
'is_live': is_live,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -168,13 +171,13 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
|
|
||||||
class KatsomoIE(TV2IE):
|
class KatsomoIE(TV2IE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?:katsomo|mtv)\.fi/(?:#!/)?(?:[^/]+/[0-9a-z-]+-\d+/[0-9a-z-]+-|[^/]+/\d+/[^/]+/)(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?(?:katsomo|mtv(uutiset)?)\.fi/(?:sarja/[0-9a-z-]+-\d+/[0-9a-z-]+-|(?:#!/)?jakso/(?:\d+/[^/]+/)?|video/prog)(?P<id>\d+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'https://www.mtv.fi/sarja/mtv-uutiset-live-33001002003/lahden-pelicans-teki-kovan-ratkaisun-ville-nieminen-pihalle-1181321',
|
'url': 'https://www.mtv.fi/sarja/mtv-uutiset-live-33001002003/lahden-pelicans-teki-kovan-ratkaisun-ville-nieminen-pihalle-1181321',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1181321',
|
'id': '1181321',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'MTV Uutiset Live',
|
'title': 'Lahden Pelicans teki kovan ratkaisun – Ville Nieminen pihalle',
|
||||||
'description': 'Päätöksen teki Pelicansin hallitus.',
|
'description': 'Päätöksen teki Pelicansin hallitus.',
|
||||||
'timestamp': 1575116484,
|
'timestamp': 1575116484,
|
||||||
'upload_date': '20191130',
|
'upload_date': '20191130',
|
||||||
|
@ -186,7 +189,60 @@ class KatsomoIE(TV2IE):
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://www.katsomo.fi/#!/jakso/33001005/studio55-fi/658521/jukka-kuoppamaki-tekee-yha-lauluja-vaikka-lentokoneessa',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.mtvuutiset.fi/video/prog1311159',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.katsomo.fi/#!/jakso/1311159',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
_API_DOMAIN = 'api.katsomo.fi'
|
_API_DOMAIN = 'api.katsomo.fi'
|
||||||
_PROTOCOLS = ('HLS', 'MPD')
|
_PROTOCOLS = ('HLS', 'MPD')
|
||||||
_GEO_COUNTRIES = ['FI']
|
_GEO_COUNTRIES = ['FI']
|
||||||
|
|
||||||
|
|
||||||
|
class MTVUutisetArticleIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)mtvuutiset\.fi/artikkeli/[^/]+/(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.mtvuutiset.fi/artikkeli/tallaisia-vaurioita-viking-amorellassa-on-useamman-osaston-alla-vetta/7931384',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1311159',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Viking Amorellan matkustajien evakuointi on alkanut – tältä operaatio näyttää laivalla',
|
||||||
|
'description': 'Viking Amorellan matkustajien evakuointi on alkanut – tältä operaatio näyttää laivalla',
|
||||||
|
'timestamp': 1600608966,
|
||||||
|
'upload_date': '20200920',
|
||||||
|
'duration': 153.7886666,
|
||||||
|
'view_count': int,
|
||||||
|
'categories': list,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# multiple Youtube embeds
|
||||||
|
'url': 'https://www.mtvuutiset.fi/artikkeli/50-vuotta-subarun-vastaiskua/6070962',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
article_id = self._match_id(url)
|
||||||
|
article = self._download_json(
|
||||||
|
'http://api.mtvuutiset.fi/mtvuutiset/api/json/' + article_id,
|
||||||
|
article_id)
|
||||||
|
|
||||||
|
def entries():
|
||||||
|
for video in (article.get('videos') or []):
|
||||||
|
video_type = video.get('videotype')
|
||||||
|
video_url = video.get('url')
|
||||||
|
if not (video_url and video_type in ('katsomo', 'youtube')):
|
||||||
|
continue
|
||||||
|
yield self.url_result(
|
||||||
|
video_url, video_type.capitalize(), video.get('video_id'))
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
entries(), article_id, article.get('title'), article.get('description'))
|
||||||
|
|
|
@ -17,7 +17,7 @@ class TV4IE(InfoExtractor):
|
||||||
tv4\.se/(?:[^/]+)/klipp/(?:.*)-|
|
tv4\.se/(?:[^/]+)/klipp/(?:.*)-|
|
||||||
tv4play\.se/
|
tv4play\.se/
|
||||||
(?:
|
(?:
|
||||||
(?:program|barn)/(?:[^/]+/|(?:[^\?]+)\?video_id=)|
|
(?:program|barn)/(?:(?:[^/]+/){1,2}|(?:[^\?]+)\?video_id=)|
|
||||||
iframe/video/|
|
iframe/video/|
|
||||||
film/|
|
film/|
|
||||||
sport/|
|
sport/|
|
||||||
|
@ -65,6 +65,10 @@ class TV4IE(InfoExtractor):
|
||||||
{
|
{
|
||||||
'url': 'http://www.tv4play.se/program/farang/3922081',
|
'url': 'http://www.tv4play.se/program/farang/3922081',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'https://www.tv4play.se/program/nyheterna/avsnitt/13315940',
|
||||||
|
'only_matching': True,
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
|
@ -4,7 +4,13 @@
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import int_or_none
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
|
str_or_none,
|
||||||
|
strip_or_none,
|
||||||
|
try_get,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class VidioIE(InfoExtractor):
|
class VidioIE(InfoExtractor):
|
||||||
|
@ -21,57 +27,63 @@ class VidioIE(InfoExtractor):
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'duration': 149,
|
'duration': 149,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
|
'uploader': 'TWELVE Pic',
|
||||||
|
'timestamp': 1444902800,
|
||||||
|
'upload_date': '20151015',
|
||||||
|
'uploader_id': 'twelvepictures',
|
||||||
|
'channel': 'Cover Music Video',
|
||||||
|
'channel_id': '280236',
|
||||||
|
'view_count': int,
|
||||||
|
'dislike_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'tags': 'count:4',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.vidio.com/watch/77949-south-korea-test-fires-missile-that-can-strike-all-of-the-north',
|
'url': 'https://www.vidio.com/watch/77949-south-korea-test-fires-missile-that-can-strike-all-of-the-north',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
self._api_key = self._download_json(
|
||||||
|
'https://www.vidio.com/auth', None, data=b'')['api_key']
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id, display_id = re.match(self._VALID_URL, url).groups()
|
||||||
video_id, display_id = mobj.group('id', 'display_id')
|
data = self._download_json(
|
||||||
|
'https://api.vidio.com/videos/' + video_id, display_id, headers={
|
||||||
|
'Content-Type': 'application/vnd.api+json',
|
||||||
|
'X-API-KEY': self._api_key,
|
||||||
|
})
|
||||||
|
video = data['videos'][0]
|
||||||
|
title = video['title'].strip()
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
|
||||||
|
|
||||||
title = self._og_search_title(webpage)
|
|
||||||
|
|
||||||
m3u8_url, duration, thumbnail = [None] * 3
|
|
||||||
|
|
||||||
clips = self._parse_json(
|
|
||||||
self._html_search_regex(
|
|
||||||
r'data-json-clips\s*=\s*(["\'])(?P<data>\[.+?\])\1',
|
|
||||||
webpage, 'video data', default='[]', group='data'),
|
|
||||||
display_id, fatal=False)
|
|
||||||
if clips:
|
|
||||||
clip = clips[0]
|
|
||||||
m3u8_url = clip.get('sources', [{}])[0].get('file')
|
|
||||||
duration = clip.get('clip_duration')
|
|
||||||
thumbnail = clip.get('image')
|
|
||||||
|
|
||||||
m3u8_url = m3u8_url or self._search_regex(
|
|
||||||
r'data(?:-vjs)?-clip-hls-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
|
||||||
webpage, 'hls url', group='url')
|
|
||||||
formats = self._extract_m3u8_formats(
|
formats = self._extract_m3u8_formats(
|
||||||
m3u8_url, display_id, 'mp4', entry_protocol='m3u8_native')
|
data['clips'][0]['hls_url'], display_id, 'mp4', 'm3u8_native')
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
duration = int_or_none(duration or self._search_regex(
|
get_first = lambda x: try_get(data, lambda y: y[x + 's'][0], dict) or {}
|
||||||
r'data-video-duration=(["\'])(?P<duration>\d+)\1', webpage,
|
channel = get_first('channel')
|
||||||
'duration', fatal=False, group='duration'))
|
user = get_first('user')
|
||||||
thumbnail = thumbnail or self._og_search_thumbnail(webpage)
|
username = user.get('username')
|
||||||
|
get_count = lambda x: int_or_none(video.get('total_' + x))
|
||||||
like_count = int_or_none(self._search_regex(
|
|
||||||
(r'<span[^>]+data-comment-vote-count=["\'](\d+)',
|
|
||||||
r'<span[^>]+class=["\'].*?\blike(?:__|-)count\b.*?["\'][^>]*>\s*(\d+)'),
|
|
||||||
webpage, 'like count', fatal=False))
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': self._og_search_description(webpage),
|
'description': strip_or_none(video.get('description')),
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': video.get('image_url_medium'),
|
||||||
'duration': duration,
|
'duration': int_or_none(video.get('duration')),
|
||||||
'like_count': like_count,
|
'like_count': get_count('likes'),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
'uploader': user.get('name'),
|
||||||
|
'timestamp': parse_iso8601(video.get('created_at')),
|
||||||
|
'uploader_id': username,
|
||||||
|
'uploader_url': 'https://www.vidio.com/@' + username if username else None,
|
||||||
|
'channel': channel.get('name'),
|
||||||
|
'channel_id': str_or_none(channel.get('id')),
|
||||||
|
'view_count': get_count('view_count'),
|
||||||
|
'dislike_count': get_count('dislikes'),
|
||||||
|
'comment_count': get_count('comments'),
|
||||||
|
'tags': video.get('tag_list'),
|
||||||
}
|
}
|
||||||
|
|
|
@ -125,7 +125,7 @@ def _call_api(self, path_template, video_id, fields=None, limit=None):
|
||||||
headers={'Referer': 'https://www.vlive.tv/'}, query=query)
|
headers={'Referer': 'https://www.vlive.tv/'}, query=query)
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||||
self.raise_login_required(json.loads(e.cause.read().decode())['message'])
|
self.raise_login_required(json.loads(e.cause.read().decode('utf-8'))['message'])
|
||||||
raise
|
raise
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|
62
youtube_dlc/extractor/vtm.py
Normal file
62
youtube_dlc/extractor/vtm.py
Normal file
|
@ -0,0 +1,62 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
|
try_get,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class VTMIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?vtm\.be/([^/?&#]+)~v(?P<id>[0-9a-f]{8}(?:-[0-9a-f]{4}){3}-[0-9a-f]{12})'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://vtm.be/gast-vernielt-genkse-hotelkamer~ve7534523-279f-4b4d-a5c9-a33ffdbe23e1',
|
||||||
|
'md5': '37dca85fbc3a33f2de28ceb834b071f8',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '192445',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Gast vernielt Genkse hotelkamer',
|
||||||
|
'timestamp': 1611060180,
|
||||||
|
'upload_date': '20210119',
|
||||||
|
'duration': 74,
|
||||||
|
# TODO: fix url _type result processing
|
||||||
|
# 'series': 'Op Interventie',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
uuid = self._match_id(url)
|
||||||
|
video = self._download_json(
|
||||||
|
'https://omc4vm23offuhaxx6hekxtzspi.appsync-api.eu-west-1.amazonaws.com/graphql',
|
||||||
|
uuid, query={
|
||||||
|
'query': '''{
|
||||||
|
getComponent(type: Video, uuid: "%s") {
|
||||||
|
... on Video {
|
||||||
|
description
|
||||||
|
duration
|
||||||
|
myChannelsVideo
|
||||||
|
program {
|
||||||
|
title
|
||||||
|
}
|
||||||
|
publishedAt
|
||||||
|
title
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}''' % uuid,
|
||||||
|
}, headers={
|
||||||
|
'x-api-key': 'da2-lz2cab4tfnah3mve6wiye4n77e',
|
||||||
|
})['data']['getComponent']
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'url',
|
||||||
|
'id': uuid,
|
||||||
|
'title': video.get('title'),
|
||||||
|
'url': 'http://mychannels.video/embed/%d' % video['myChannelsVideo'],
|
||||||
|
'description': video.get('description'),
|
||||||
|
'timestamp': parse_iso8601(video.get('publishedAt')),
|
||||||
|
'duration': int_or_none(video.get('duration')),
|
||||||
|
'series': try_get(video, lambda x: x['program']['title']),
|
||||||
|
'ie_key': 'Medialaan',
|
||||||
|
}
|
|
@ -4,6 +4,7 @@
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from .youtube import YoutubeIE
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
@ -47,6 +48,22 @@ class VVVVIDIE(InfoExtractor):
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# video_type == 'video/youtube'
|
||||||
|
'url': 'https://www.vvvvid.it/show/404/one-punch-man/406/486683/trailer',
|
||||||
|
'md5': '33e0edfba720ad73a8782157fdebc648',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'RzmFKUDOUgw',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Trailer',
|
||||||
|
'upload_date': '20150906',
|
||||||
|
'description': 'md5:a5e802558d35247fee285875328c0b80',
|
||||||
|
'uploader_id': 'BandaiVisual',
|
||||||
|
'uploader': 'BANDAI NAMCO Arts Channel',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.vvvvid.it/show/434/perche-dovrei-guardarlo-di-dario-moccia/437/489048',
|
'url': 'https://www.vvvvid.it/show/434/perche-dovrei-guardarlo-di-dario-moccia/437/489048',
|
||||||
'only_matching': True
|
'only_matching': True
|
||||||
|
@ -154,12 +171,13 @@ def metadata_from_url(r_url):
|
||||||
if season_number:
|
if season_number:
|
||||||
info['season_number'] = int(season_number)
|
info['season_number'] = int(season_number)
|
||||||
|
|
||||||
for quality in ('_sd', ''):
|
video_type = video_data.get('video_type')
|
||||||
|
is_youtube = False
|
||||||
|
for quality in ('', '_sd'):
|
||||||
embed_code = video_data.get('embed_info' + quality)
|
embed_code = video_data.get('embed_info' + quality)
|
||||||
if not embed_code:
|
if not embed_code:
|
||||||
continue
|
continue
|
||||||
embed_code = ds(embed_code)
|
embed_code = ds(embed_code)
|
||||||
video_type = video_data.get('video_type')
|
|
||||||
if video_type in ('video/rcs', 'video/kenc'):
|
if video_type in ('video/rcs', 'video/kenc'):
|
||||||
if video_type == 'video/kenc':
|
if video_type == 'video/kenc':
|
||||||
kenc = self._download_json(
|
kenc = self._download_json(
|
||||||
|
@ -172,19 +190,28 @@ def metadata_from_url(r_url):
|
||||||
if kenc_message:
|
if kenc_message:
|
||||||
embed_code += '?' + ds(kenc_message)
|
embed_code += '?' + ds(kenc_message)
|
||||||
formats.extend(self._extract_akamai_formats(embed_code, video_id))
|
formats.extend(self._extract_akamai_formats(embed_code, video_id))
|
||||||
|
elif video_type == 'video/youtube':
|
||||||
|
info.update({
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'ie_key': YoutubeIE.ie_key(),
|
||||||
|
'url': embed_code,
|
||||||
|
})
|
||||||
|
is_youtube = True
|
||||||
|
break
|
||||||
else:
|
else:
|
||||||
formats.extend(self._extract_wowza_formats(
|
formats.extend(self._extract_wowza_formats(
|
||||||
'http://sb.top-ix.org/videomg/_definst_/mp4:%s/playlist.m3u8' % embed_code, video_id))
|
'http://sb.top-ix.org/videomg/_definst_/mp4:%s/playlist.m3u8' % embed_code, video_id))
|
||||||
metadata_from_url(embed_code)
|
metadata_from_url(embed_code)
|
||||||
|
|
||||||
self._sort_formats(formats)
|
if not is_youtube:
|
||||||
|
self._sort_formats(formats)
|
||||||
|
info['formats'] = formats
|
||||||
|
|
||||||
metadata_from_url(video_data.get('thumbnail'))
|
metadata_from_url(video_data.get('thumbnail'))
|
||||||
info.update(self._extract_common_video_info(video_data))
|
info.update(self._extract_common_video_info(video_data))
|
||||||
info.update({
|
info.update({
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'formats': formats,
|
|
||||||
'duration': int_or_none(video_data.get('length')),
|
'duration': int_or_none(video_data.get('length')),
|
||||||
'series': video_data.get('show_title'),
|
'series': video_data.get('show_title'),
|
||||||
'season_id': season_id,
|
'season_id': season_id,
|
||||||
|
|
|
@ -87,11 +87,16 @@ def _real_extract(self, url):
|
||||||
r'(["\'])(?P<url>(?:(?!\1).)+\.m3u8(?:(?!\1).)*)\1',
|
r'(["\'])(?P<url>(?:(?!\1).)+\.m3u8(?:(?!\1).)*)\1',
|
||||||
body, 'm3u8 url', group='url', default=None)
|
body, 'm3u8 url', group='url', default=None)
|
||||||
if not m3u8_url:
|
if not m3u8_url:
|
||||||
source = self._parse_json(self._search_regex(
|
source = self._search_regex(
|
||||||
r'(?s)sources\s*:\s*\[\s*({.+?})\s*\]', body,
|
r'(?s)sources\s*:\s*\[\s*({.+?})\s*\]', body, 'source')
|
||||||
'source'), video_id, js_to_json)
|
|
||||||
if source.get('integration') == 'verizon-media':
|
def get_attr(key):
|
||||||
m3u8_url = 'https://content.uplynk.com/%s.m3u8' % source['id']
|
return self._search_regex(
|
||||||
|
r'\b%s\s*:\s*([\'"])(?P<val>(?:(?!\1).)+)\1' % key,
|
||||||
|
source, key, group='val')
|
||||||
|
|
||||||
|
if get_attr('integration') == 'verizon-media':
|
||||||
|
m3u8_url = 'https://content.uplynk.com/%s.m3u8' % get_attr('id')
|
||||||
formats = self._extract_m3u8_formats(
|
formats = self._extract_m3u8_formats(
|
||||||
m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
|
m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
|
||||||
text_tracks = self._search_regex(
|
text_tracks = self._search_regex(
|
||||||
|
|
Loading…
Reference in a new issue