mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-01-21 00:06:37 +00:00
ITV BTCC new pages' URL update (articles instead of races)
Not my changes, but from @franhp that didn't get merged on yt-dl on time It supports BTCC new pages' schema from 2019 an on (/articles/ instead of /races/)
This commit is contained in:
parent
651bae3d23
commit
6857df609b
|
@ -20,6 +20,7 @@
|
||||||
merge_dicts,
|
merge_dicts,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
|
try_get,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
xpath_element,
|
xpath_element,
|
||||||
|
@ -280,12 +281,12 @@ def extract_subtitle(sub_url):
|
||||||
class ITVBTCCIE(InfoExtractor):
|
class ITVBTCCIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?itv\.com/btcc/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://(?:www\.)?itv\.com/btcc/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.itv.com/btcc/races/btcc-2018-all-the-action-from-brands-hatch',
|
'url': 'https://www.itv.com/btcc/articles/btcc-2019-brands-hatch-gp-race-action',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'btcc-2018-all-the-action-from-brands-hatch',
|
'id': 'btcc-2019-brands-hatch-gp-race-action',
|
||||||
'title': 'BTCC 2018: All the action from Brands Hatch',
|
'title': 'BTCC 2019: Brands Hatch GP race action',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 9,
|
'playlist_mincount': 12,
|
||||||
}
|
}
|
||||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1582188683001/HkiHLnNRx_default/index.html?videoId=%s'
|
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1582188683001/HkiHLnNRx_default/index.html?videoId=%s'
|
||||||
|
|
||||||
|
@ -294,6 +295,16 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
webpage = self._download_webpage(url, playlist_id)
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
|
json_map = try_get(self._parse_json(self._html_search_regex(
|
||||||
|
'(?s)<script[^>]+id=[\'"]__NEXT_DATA__[^>]*>([^<]+)</script>', webpage, 'json_map'), playlist_id),
|
||||||
|
lambda x: x['props']['pageProps']['article']['body']['content']) or []
|
||||||
|
|
||||||
|
# Discard empty objects
|
||||||
|
video_ids = []
|
||||||
|
for video in json_map:
|
||||||
|
if video['data'].get('id'):
|
||||||
|
video_ids.append(video['data']['id'])
|
||||||
|
|
||||||
entries = [
|
entries = [
|
||||||
self.url_result(
|
self.url_result(
|
||||||
smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, {
|
smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, {
|
||||||
|
@ -305,7 +316,7 @@ def _real_extract(self, url):
|
||||||
'referrer': url,
|
'referrer': url,
|
||||||
}),
|
}),
|
||||||
ie=BrightcoveNewIE.ie_key(), video_id=video_id)
|
ie=BrightcoveNewIE.ie_key(), video_id=video_id)
|
||||||
for video_id in re.findall(r'data-video-id=["\'](\d+)', webpage)]
|
for video_id in video_ids]
|
||||||
|
|
||||||
title = self._og_search_title(webpage, fatal=False)
|
title = self._og_search_title(webpage, fatal=False)
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue