mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-02 06:27:21 +00:00
[ie/atptour] add extractor and updated data extraction function
This commit is contained in:
parent
ad9a8115aa
commit
485cbe4990
|
@ -169,6 +169,10 @@
|
|||
AsobiChannelTagURLIE,
|
||||
)
|
||||
from .asobistage import AsobiStageIE
|
||||
from .atptour import (
|
||||
ATPTourNewsIE,
|
||||
ATPTourVideoIE,
|
||||
)
|
||||
from .atresplayer import AtresPlayerIE
|
||||
from .atscaleconf import AtScaleConfEventIE
|
||||
from .atvat import ATVAtIE
|
||||
|
|
106
yt_dlp/extractor/atptour.py
Normal file
106
yt_dlp/extractor/atptour.py
Normal file
|
@ -0,0 +1,106 @@
|
|||
import re
|
||||
|
||||
from .brightcove import BrightcoveNewIE
|
||||
from .common import InfoExtractor
|
||||
from ..utils import base_url, extract_attributes, get_element_html_by_id, traverse_obj, urljoin
|
||||
|
||||
|
||||
class ATPTourVideoIE(InfoExtractor):
|
||||
IE_NAME = 'atptour:video'
|
||||
_VALID_URL = r'https?://(?:www\.)?atptour\.com/en/video/(?P<id>[\w]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.atptour.com/en/video/challenger-highlights-nishikori-wins-in-como-2024',
|
||||
'md5': '4721002227d98fe89afafa40eba3068d',
|
||||
'info_dict': {
|
||||
'id': '6361099221112',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:ef8afed21c52cbe4ad3409045d59f413',
|
||||
'upload_date': '20240827',
|
||||
'duration': 105.152,
|
||||
'tags': 'count:6',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'title': 'Challenger Highlights: Nishikori wins in Como 2024',
|
||||
'uploader_id': '6057277721001',
|
||||
'timestamp': 1724775281,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.atptour.com/en/video/highlights-svajda-earns-highestranked-win-of-career-vs-cerundolo-winstonsalem-2024',
|
||||
'md5': 'a3829d10bdcb1829568fd88b9e6ecb15',
|
||||
'info_dict': {
|
||||
'id': '6360716257112',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:a334aeb73eac631ffab8249b1e68194c',
|
||||
'upload_date': '20240820',
|
||||
'duration': 139.691,
|
||||
'tags': 'count:5',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'title': 'Highlights: Svajda earns highest-ranked win of career vs. Cerundolo Winston-Salem 2024',
|
||||
'uploader_id': '6057277721001',
|
||||
'timestamp': 1724183755,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.atptour.com/en/video/highlights-sonego-dominates-michelsen-for-winston-salem-open-title-2024',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id, fatal=False, impersonate=True)
|
||||
|
||||
hidden_inputs = self._hidden_inputs(webpage, 'class')
|
||||
featured_videos_url = urljoin(base_url(url), hidden_inputs.get('atp_featured-videos-endpoint'))
|
||||
json_data = self._download_json(featured_videos_url, display_id, fatal=False, impersonate=True)
|
||||
video_data = traverse_obj(json_data, ('content', 0))
|
||||
account_id = traverse_obj(video_data, ('videoAccountId'))
|
||||
player_id = traverse_obj(video_data, ('videoPlayerId'))
|
||||
video_id = traverse_obj(video_data, ('videoId'))
|
||||
return self.url_result(
|
||||
f'https://players.brightcove.net/{account_id}/{player_id}/index.html?videoId={video_id}', BrightcoveNewIE)
|
||||
|
||||
|
||||
class ATPTourNewsIE(InfoExtractor):
|
||||
IE_NAME = 'atptour:news'
|
||||
_VALID_URL = r'https?://(?:www\.)?atptour\.com/en/news/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.atptour.com/en/news/sinner-zverev-cincinnati-2024-sf',
|
||||
'md5': '4721002227d98fe89afafa40eba3068d',
|
||||
'playlist_mincount': 2,
|
||||
'info_dict': {
|
||||
'id': 'sinner-zverev-cincinnati-2024-sf',
|
||||
'title': 'Jannik Sinner battles past Alexander Zverev to reach Cincinnati final | ATP Tour | Tennis',
|
||||
'description': 'md5:30cd3df666c8a5d45731d1e85d8d43ae',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.atptour.com/en/news/borges-us-open-2024-this-is-tennis',
|
||||
'md5': 'abcd',
|
||||
'playlist_mincount': 1,
|
||||
'info_dict': {
|
||||
'id': 'borges-us-open-2024-this-is-tennis',
|
||||
'title': 'Nuno Borges: Building legos, facing Nadal, Cirque du Soleil & more | ATP Tour | Tennis',
|
||||
'description': 'md5:aaef866660c4e3ced69118c0f6ed237a',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id, fatal=False, impersonate=True)
|
||||
|
||||
title = self._html_extract_title(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
|
||||
entries = []
|
||||
|
||||
first_video = get_element_html_by_id('articleVideoJSPlayer', webpage)
|
||||
if first_video is not None:
|
||||
attributes = extract_attributes(first_video)
|
||||
account_id = traverse_obj(attributes, ('data-account'))
|
||||
player_id = traverse_obj(attributes, ('data-player'))
|
||||
video_id = traverse_obj(attributes, ('data-video-id'))
|
||||
first_video_url = f'https://players.brightcove.net/{account_id}/{player_id}/index.html?videoId={video_id}'
|
||||
entries.append(self.url_result(first_video_url, BrightcoveNewIE))
|
||||
|
||||
iframe_urls = re.findall(r'<iframe[^>]src="(https://players\.brightcove\.net/[^"]+)"', webpage)
|
||||
for video_url in iframe_urls:
|
||||
entries.append(self.url_result(video_url, BrightcoveNewIE))
|
||||
|
||||
return self.playlist_result(entries, display_id, title, description)
|
|
@ -1779,7 +1779,7 @@ def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal
|
|||
return traverse_obj(ret, traverse) or {}
|
||||
|
||||
@staticmethod
|
||||
def _hidden_inputs(html):
|
||||
def _hidden_inputs(html, attr_list=('name', 'id')):
|
||||
html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html)
|
||||
hidden_inputs = {}
|
||||
for input_el in re.findall(r'(?i)(<input[^>]+>)', html):
|
||||
|
@ -1788,7 +1788,10 @@ def _hidden_inputs(html):
|
|||
continue
|
||||
if attrs.get('type') not in ('hidden', 'submit'):
|
||||
continue
|
||||
name = attrs.get('name') or attrs.get('id')
|
||||
for attr in variadic(attr_list):
|
||||
name = attrs.get(attr)
|
||||
if name is not None:
|
||||
break
|
||||
value = attrs.get('value')
|
||||
if name and value is not None:
|
||||
hidden_inputs[name] = value
|
||||
|
|
Loading…
Reference in a new issue