From 237d07f1141c18a40c1179a82fd072d7bde56f66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 8 Feb 2018 03:58:35 +0700 Subject: [PATCH] [pokemon] Relax _VALID_URL and extend title extraction (closes #15518) --- youtube_dl/extractor/pokemon.py | 37 ++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/pokemon.py b/youtube_dl/extractor/pokemon.py index 2d87e7e70..4ff617163 100644 --- a/youtube_dl/extractor/pokemon.py +++ b/youtube_dl/extractor/pokemon.py @@ -11,19 +11,34 @@ class PokemonIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?pokemon\.com/[a-z]{2}(?:.*?play=(?P[a-z0-9]{32})|/[^/]+/\d+_\d+-(?P[^/?#]+))' + _VALID_URL = r'https?://(?:www\.)?pokemon\.com/[a-z]{2}(?:.*?play=(?P[a-z0-9]{32})|/(?:[^/]+/)+(?P[^/?#&]+))' _TESTS = [{ - 'url': 'http://www.pokemon.com/us/pokemon-episodes/19_01-from-a-to-z/?play=true', - 'md5': '9fb209ae3a569aac25de0f5afc4ee08f', + 'url': 'https://www.pokemon.com/us/pokemon-episodes/20_30-the-ol-raise-and-switch/', + 'md5': '2fe8eaec69768b25ef898cda9c43062e', 'info_dict': { - 'id': 'd0436c00c3ce4071ac6cee8130ac54a1', + 'id': 'afe22e30f01c41f49d4f1d9eab5cd9a4', 'ext': 'mp4', - 'title': 'From A to Z!', - 'description': 'Bonnie makes a new friend, Ash runs into an old friend, and a terrifying premonition begins to unfold!', - 'timestamp': 1460478136, - 'upload_date': '20160412', + 'title': 'The Ol’ Raise and Switch!', + 'description': 'md5:7db77f7107f98ba88401d3adc80ff7af', + 'timestamp': 1511824728, + 'upload_date': '20171127', + }, + 'add_id': ['LimelightMedia'], + }, { + # no data-video-title + 'url': 'https://www.pokemon.com/us/pokemon-episodes/pokemon-movies/pokemon-the-rise-of-darkrai-2008', + 'info_dict': { + 'id': '99f3bae270bf4e5097274817239ce9c8', + 'ext': 'mp4', + 'title': 'Pokémon: The Rise of Darkrai', + 'description': 'md5:ea8fbbf942e1e497d54b19025dd57d9d', + 'timestamp': 1417778347, + 'upload_date': '20141205', + }, + 'add_id': ['LimelightMedia'], + 'params': { + 'skip_download': True, }, - 'add_id': ['LimelightMedia'] }, { 'url': 'http://www.pokemon.com/uk/pokemon-episodes/?play=2e8b5c761f1d4a9286165d7748c1ece2', 'only_matching': True, @@ -42,7 +57,9 @@ def _real_extract(self, url): r'(<[^>]+data-video-id="%s"[^>]*>)' % (video_id if video_id else '[a-z0-9]{32}'), webpage, 'video data element')) video_id = video_data['data-video-id'] - title = video_data['data-video-title'] + title = video_data.get('data-video-title') or self._html_search_meta( + 'pkm-title', webpage,' title', default=None) or self._search_regex( + r']+\bclass=["\']us-title[^>]+>([^<]+)', webpage, 'title') return { '_type': 'url_transparent', 'id': video_id,