From 81f46ac573dc443ad48560f308582a26784d3015 Mon Sep 17 00:00:00 2001 From: Sebastian Koch Date: Sun, 17 Sep 2023 22:54:00 +0200 Subject: [PATCH] [ie/massengeschmack.tv] Fix title extraction (#7813) Authored by: sb0stn --- yt_dlp/extractor/massengeschmacktv.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/yt_dlp/extractor/massengeschmacktv.py b/yt_dlp/extractor/massengeschmacktv.py index 7dacb43e0..1490e9b21 100644 --- a/yt_dlp/extractor/massengeschmacktv.py +++ b/yt_dlp/extractor/massengeschmacktv.py @@ -17,11 +17,12 @@ class MassengeschmackTVIE(InfoExtractor): _TEST = { 'url': 'https://massengeschmack.tv/play/fktv202', - 'md5': 'a9e054db9c2b5a08f0a0527cc201e8d3', + 'md5': '9996f314994a49fefe5f39aa1b07ae21', 'info_dict': { 'id': 'fktv202', 'ext': 'mp4', - 'title': 'Fernsehkritik-TV - Folge 202', + 'title': 'Fernsehkritik-TV #202', + 'thumbnail': 'https://cache.massengeschmack.tv/img/mag/fktv202.jpg' }, } @@ -29,9 +30,6 @@ def _real_extract(self, url): episode = self._match_id(url) webpage = self._download_webpage(url, episode) - title = clean_html(self._html_search_regex( - '

([^<]+)

', webpage, 'title')) - thumbnail = self._search_regex(r'POSTER\s*=\s*"([^"]+)', webpage, 'thumbnail', fatal=False) sources = self._parse_json(self._search_regex(r'(?s)MEDIA\s*=\s*(\[.+?\]);', webpage, 'media'), episode, js_to_json) formats = [] @@ -67,7 +65,8 @@ def _real_extract(self, url): return { 'id': episode, - 'title': title, + 'title': clean_html(self._html_search_regex( + r']+\bid=["\']clip-title["\'][^>]*>([^<]+)', webpage, 'title', fatal=False)), 'formats': formats, - 'thumbnail': thumbnail, + 'thumbnail': self._search_regex(r'POSTER\s*=\s*"([^"]+)', webpage, 'thumbnail', fatal=False), }