[extractor/mediastream] Improve WinSports support (#6401)

Closes #6360
Authored by: bashonly
This commit is contained in:
bashonly 2023-03-03 03:37:23 -06:00 committed by GitHub
parent 77d6d13646
commit 2d5a8c5db2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 32 additions and 9 deletions

View File

@ -1,7 +1,13 @@
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import clean_html, get_element_html_by_class from ..utils import (
remove_end,
str_or_none,
strip_or_none,
traverse_obj,
urljoin,
)
class MediaStreamIE(InfoExtractor): class MediaStreamIE(InfoExtractor):
@ -117,39 +123,56 @@ class MediaStreamIE(InfoExtractor):
class WinSportsVideoIE(InfoExtractor): class WinSportsVideoIE(InfoExtractor):
_VALID_URL = r'https?://www\.winsports\.co/videos/(?P<display_id>[\w-]+)-(?P<id>\d+)' _VALID_URL = r'https?://www\.winsports\.co/videos/(?P<id>[\w-]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.winsports.co/videos/siempre-castellanos-gran-atajada-del-portero-cardenal-para-evitar-la-caida-de-su-arco-60536', 'url': 'https://www.winsports.co/videos/siempre-castellanos-gran-atajada-del-portero-cardenal-para-evitar-la-caida-de-su-arco-60536',
'info_dict': { 'info_dict': {
'id': '62dc8357162c4b0821fcfb3c', 'id': '62dc8357162c4b0821fcfb3c',
'display_id': 'siempre-castellanos-gran-atajada-del-portero-cardenal-para-evitar-la-caida-de-su-arco', 'display_id': 'siempre-castellanos-gran-atajada-del-portero-cardenal-para-evitar-la-caida-de-su-arco-60536',
'title': '¡Siempre Castellanos! Gran atajada del portero \'cardenal\' para evitar la caída de su arco', 'title': '¡Siempre Castellanos! Gran atajada del portero \'cardenal\' para evitar la caída de su arco',
'description': 'md5:eb811b2b2882bdc59431732c06b905f2', 'description': 'md5:eb811b2b2882bdc59431732c06b905f2',
'thumbnail': r're:^https?://[^?#]+62dc8357162c4b0821fcfb3c', 'thumbnail': r're:^https?://[^?#]+62dc8357162c4b0821fcfb3c',
'ext': 'mp4', 'ext': 'mp4',
}, },
'params': {'skip_download': 'm3u8'},
}, { }, {
'url': 'https://www.winsports.co/videos/observa-aqui-los-goles-del-empate-entre-tolima-y-nacional-60548', 'url': 'https://www.winsports.co/videos/observa-aqui-los-goles-del-empate-entre-tolima-y-nacional-60548',
'info_dict': { 'info_dict': {
'id': '62dcb875ef12a5526790b552', 'id': '62dcb875ef12a5526790b552',
'display_id': 'observa-aqui-los-goles-del-empate-entre-tolima-y-nacional', 'display_id': 'observa-aqui-los-goles-del-empate-entre-tolima-y-nacional-60548',
'title': 'Observa aquí los goles del empate entre Tolima y Nacional', 'title': 'Observa aquí los goles del empate entre Tolima y Nacional',
'description': 'md5:b19402ba6e46558b93fd24b873eea9c9', 'description': 'md5:b19402ba6e46558b93fd24b873eea9c9',
'thumbnail': r're:^https?://[^?#]+62dcb875ef12a5526790b552', 'thumbnail': r're:^https?://[^?#]+62dcb875ef12a5526790b552',
'ext': 'mp4', 'ext': 'mp4',
}, },
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://www.winsports.co/videos/equidad-vuelve-defender-su-arco-de-remates-de-junior',
'info_dict': {
'id': '63fa7eca72f1741ad3a4d515',
'display_id': 'equidad-vuelve-defender-su-arco-de-remates-de-junior',
'title': '⚽ Equidad vuelve a defender su arco de remates de Junior',
'description': 'Remate de Sierra',
'thumbnail': r're:^https?://[^?#]+63fa7eca72f1741ad3a4d515',
'ext': 'mp4',
},
'params': {'skip_download': 'm3u8'},
}] }]
def _real_extract(self, url): def _real_extract(self, url):
display_id, video_id = self._match_valid_url(url).group('display_id', 'id') display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
json_ld = self._search_json_ld(webpage, display_id, expected_type='VideoObject', default={})
media_setting_json = self._search_json( media_setting_json = self._search_json(
r'<script\s*[^>]+data-drupal-selector="drupal-settings-json">', webpage, 'drupal-setting-json', display_id) r'<script\s*[^>]+data-drupal-selector="drupal-settings-json">', webpage, 'drupal-setting-json', display_id)
mediastream_id = media_setting_json['settings']['mediastream_formatter'][video_id]['mediastream_id'] mediastream_id = traverse_obj(
media_setting_json, ('settings', 'mediastream_formatter', ..., 'mediastream_id', {str_or_none}),
get_all=False) or json_ld.get('url')
if not mediastream_id:
self.raise_no_formats('No MediaStream embed found in webpage')
return self.url_result( return self.url_result(
f'https://mdstrm.com/embed/{mediastream_id}', MediaStreamIE, video_id, url_transparent=True, urljoin('https://mdstrm.com/embed/', mediastream_id), MediaStreamIE, display_id, url_transparent=True,
display_id=display_id, video_title=clean_html(get_element_html_by_class('title-news', webpage))) display_id=display_id, video_title=strip_or_none(remove_end(json_ld.get('title'), '| Win Sports')))