From fd97fa7bfc59983d315892c26f861842820a9579 Mon Sep 17 00:00:00 2001 From: Mattias Wadman Date: Fri, 30 Mar 2018 20:02:09 +0200 Subject: [PATCH] [svtplay:series] Add extractor Related to #11130 --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/svt.py | 57 ++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index bded6e144..b46a304ac 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1031,6 +1031,7 @@ from .svt import ( SVTIE, SVTPlayIE, + SVTPlaylistIE, ) from .swrmediathek import SWRMediathekIE from .syfy import SyfyIE diff --git a/youtube_dl/extractor/svt.py b/youtube_dl/extractor/svt.py index 48bc4529e..d02fd9450 100644 --- a/youtube_dl/extractor/svt.py +++ b/youtube_dl/extractor/svt.py @@ -9,6 +9,8 @@ dict_get, int_or_none, try_get, + urljoin, + compat_str, ) @@ -189,3 +191,58 @@ def _real_extract(self, url): r'\s*\|\s*.+?$', '', info_dict.get('episode') or self._og_search_title(webpage)) return info_dict + + +class SVTPlaylistIE(InfoExtractor): + IE_DESC = 'SVT Play serie' + _VALID_URL = r'https?://(?:www\.)?svtplay\.se/(?P[^/?&#]+)' + IE_NAME = 'svtplay:serie' + _TESTS = [{ + 'url': 'https://www.svtplay.se/rederiet', + 'info_dict': { + 'id': 'rederiet', + 'title': 'Rederiet', + 'description': 'md5:505d491a58f4fcf6eb418ecab947e69e', + }, + 'playlist_mincount': 318, + }] + + @classmethod + def suitable(cls, url): + return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super(SVTPlaylistIE, cls).suitable(url) + + def _real_extract(self, url): + video_id = self._match_id(url) + + page = self._download_webpage( + url, video_id, + note='Downloading serie page', + errnote='unable to fetch serie page') + + root_json = self._search_regex( + r'root\[\'__svtplay\'\]\s*=(.+);\n', + page, 'root') + root = self._parse_json(root_json, video_id) + + metadata = root.get('metaData', {}) + related_videos_accordion = root['relatedVideoContent']['relatedVideosAccordion'] + + entries = [] + for season in related_videos_accordion: + videos = season.get('videos') + if not isinstance(videos, list): + continue + + for video in videos: + content_url = video.get('contentUrl') + if not isinstance(content_url, compat_str): + continue + entries.append( + self.url_result( + urljoin(url, content_url), + ie=SVTPlayIE.ie_key(), + video_title=video.get('title') + )) + + return self.playlist_result( + entries, video_id, metadata.get('title'), metadata.get('description'))