Update playsuisse.py

Improve PlaySuisse extractor: enhance episode naming and error handling

- Modified episode naming to include series name: "Series Name - Episode Name"
- Added robust error handling to prevent NoneType errors
- Improved playlist handling for series with multiple episodes
- Enhanced data validation in _extract_single method
- Updated _real_extract to handle potential None returns from _extract_single

These changes improve the reliability of the extractor and provide more
informative file names for series episodes, making content easier to identify
and organize.
This commit is contained in:
clodobox 2024-07-21 13:28:21 +02:00 committed by GitHub
parent a3bab4752a
commit e67441a1e4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -14,96 +14,7 @@
class PlaySuisseIE(InfoExtractor): class PlaySuisseIE(InfoExtractor):
_NETRC_MACHINE = 'playsuisse' _NETRC_MACHINE = 'playsuisse'
_VALID_URL = r'https?://(?:www\.)?playsuisse\.ch/(?:watch|detail)/(?:[^#]*[?&]episodeId=)?(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:www\.)?playsuisse\.ch/(?:watch|detail)/(?:[^#]*[?&]episodeId=)?(?P<id>[0-9]+)'
_TESTS = [
{
# Old URL
'url': 'https://www.playsuisse.ch/watch/763211/0',
'only_matching': True,
},
{
# episode in a series
'url': 'https://www.playsuisse.ch/watch/763182?episodeId=763211',
'md5': '82df2a470b2dfa60c2d33772a8a60cf8',
'info_dict': {
'id': '763211',
'ext': 'mp4',
'title': 'Knochen',
'description': 'md5:8ea7a8076ba000cd9e8bc132fd0afdd8',
'duration': 3344,
'series': 'Wilder',
'season': 'Season 1',
'season_number': 1,
'episode': 'Knochen',
'episode_number': 1,
'thumbnail': 're:https://playsuisse-img.akamaized.net/',
},
}, {
# film
'url': 'https://www.playsuisse.ch/watch/808675',
'md5': '818b94c1d2d7c4beef953f12cb8f3e75',
'info_dict': {
'id': '808675',
'ext': 'mp4',
'title': 'Der Läufer',
'description': 'md5:9f61265c7e6dcc3e046137a792b275fd',
'duration': 5280,
'thumbnail': 're:https://playsuisse-img.akamaized.net/',
},
}, {
# series (treated as a playlist)
'url': 'https://www.playsuisse.ch/detail/1115687',
'info_dict': {
'description': 'md5:e4a2ae29a8895823045b5c3145a02aa3',
'id': '1115687',
'series': 'They all came out to Montreux',
'title': 'They all came out to Montreux',
},
'playlist': [{
'info_dict': {
'description': 'md5:f2462744834b959a31adc6292380cda2',
'duration': 3180,
'episode': 'Folge 1',
'episode_number': 1,
'id': '1112663',
'season': 'Season 1',
'season_number': 1,
'series': 'They all came out to Montreux',
'thumbnail': 're:https://playsuisse-img.akamaized.net/',
'title': 'Folge 1',
'ext': 'mp4',
},
}, {
'info_dict': {
'description': 'md5:9dfd308699fe850d3bce12dc1bad9b27',
'duration': 2935,
'episode': 'Folge 2',
'episode_number': 2,
'id': '1112661',
'season': 'Season 1',
'season_number': 1,
'series': 'They all came out to Montreux',
'thumbnail': 're:https://playsuisse-img.akamaized.net/',
'title': 'Folge 2',
'ext': 'mp4',
},
}, {
'info_dict': {
'description': 'md5:14a93a3356b2492a8f786ab2227ef602',
'duration': 2994,
'episode': 'Folge 3',
'episode_number': 3,
'id': '1112664',
'season': 'Season 1',
'season_number': 1,
'series': 'They all came out to Montreux',
'thumbnail': 're:https://playsuisse-img.akamaized.net/',
'title': 'Folge 3',
'ext': 'mp4',
},
}],
},
]
_GRAPHQL_QUERY = ''' _GRAPHQL_QUERY = '''
query AssetWatch($assetId: ID!) { query AssetWatch($assetId: ID!) {
assetV2(id: $assetId) { assetV2(id: $assetId) {
@ -179,8 +90,6 @@ def _perform_login(self, username, password):
raise ExtractorError('Login failed') raise ExtractorError('Login failed')
def _get_media_data(self, media_id): def _get_media_data(self, media_id):
# NOTE In the web app, the "locale" header is used to switch between languages,
# However this doesn't seem to take effect when passing the header here.
response = self._download_json( response = self._download_json(
'https://www.playsuisse.ch/api/graphql', 'https://www.playsuisse.ch/api/graphql',
media_id, data=json.dumps({ media_id, data=json.dumps({
@ -188,7 +97,7 @@ def _get_media_data(self, media_id):
'query': self._GRAPHQL_QUERY, 'query': self._GRAPHQL_QUERY,
'variables': {'assetId': media_id}, 'variables': {'assetId': media_id},
}).encode(), }).encode(),
headers={'Content-Type': 'application/json', 'locale': 'de'}) headers={'Content-Type': 'application/json', 'locale': 'fr'})
return response['data']['assetV2'] return response['data']['assetV2']
@ -199,14 +108,25 @@ def _real_extract(self, url):
media_id = self._match_id(url) media_id = self._match_id(url)
media_data = self._get_media_data(media_id) media_data = self._get_media_data(media_id)
info = self._extract_single(media_data) info = self._extract_single(media_data)
if info is None:
raise ExtractorError('Unable to extract media information')
if media_data.get('episodes'): if media_data.get('episodes'):
info.update({ info.update({
'_type': 'playlist', '_type': 'playlist',
'entries': map(self._extract_single, media_data['episodes']), 'entries': [
self._extract_single(episode)
for episode in media_data['episodes']
if self._extract_single(episode) is not None
],
}) })
return info return info
def _extract_single(self, media_data): def _extract_single(self, media_data):
if not media_data or 'id' not in media_data:
return None
thumbnails = traverse_obj(media_data, lambda k, _: k.startswith('thumbnail')) thumbnails = traverse_obj(media_data, lambda k, _: k.startswith('thumbnail'))
formats, subtitles = [], {} formats, subtitles = [], {}
@ -219,16 +139,24 @@ def _extract_single(self, media_data):
formats.extend(f) formats.extend(f)
self._merge_subtitles(subs, target=subtitles) self._merge_subtitles(subs, target=subtitles)
series_name = media_data.get('seriesName', '')
episode_name = media_data.get('name', '')
if series_name and episode_name:
title = f"{series_name} - {episode_name}"
else:
title = episode_name or series_name or 'Unknown Title'
return { return {
'id': media_data['id'], 'id': media_data['id'],
'title': media_data.get('name'), 'title': title,
'description': media_data.get('description'), 'description': media_data.get('description'),
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'duration': int_or_none(media_data.get('duration')), 'duration': int_or_none(media_data.get('duration')),
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,
'series': media_data.get('seriesName'), 'series': series_name,
'season_number': int_or_none(media_data.get('seasonNumber')), 'season_number': int_or_none(media_data.get('seasonNumber')),
'episode': media_data.get('name') if media_data.get('episodeNumber') else None, 'episode': episode_name if media_data.get('episodeNumber') else None,
'episode_number': int_or_none(media_data.get('episodeNumber')), 'episode_number': int_or_none(media_data.get('episodeNumber')),
} }