From d34e79492d1d9d9babf85af3737b90c4fe55eb42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 8 Mar 2015 16:54:11 +0600 Subject: [PATCH 01/16] [twitch] Fix live streams (Closes #5158) --- youtube_dl/extractor/twitch.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index b058891bdd..cbdaf9c7ab 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -358,13 +358,12 @@ def _real_extract(self, url): 'p': random.randint(1000000, 10000000), 'player': 'twitchweb', 'segment_preference': '4', - 'sig': access_token['sig'], - 'token': access_token['token'], + 'sig': access_token['sig'].encode('utf-8'), + 'token': access_token['token'].encode('utf-8'), } - formats = self._extract_m3u8_formats( '%s/api/channel/hls/%s.m3u8?%s' - % (self._USHER_BASE, channel_id, compat_urllib_parse.urlencode(query).encode('utf-8')), + % (self._USHER_BASE, channel_id, compat_urllib_parse.urlencode(query)), channel_id, 'mp4') self._prefer_source(formats) From 1132eae56d0f693b5dc55529d5cfadf32b32700d Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis Date: Sun, 8 Mar 2015 13:54:01 +0200 Subject: [PATCH 02/16] [gazeta] Add new extractor (Closes #4222) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/gazeta.py | 35 ++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) create mode 100644 youtube_dl/extractor/gazeta.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index b489d5770d..e2475a6340 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -175,6 +175,7 @@ from .gamespot import GameSpotIE from .gamestar import GameStarIE from .gametrailers import GametrailersIE +from .gazeta import GazetaIE from .gdcvault import GDCVaultIE from .generic import GenericIE from .giantbomb import GiantBombIE diff --git a/youtube_dl/extractor/gazeta.py b/youtube_dl/extractor/gazeta.py new file mode 100644 index 0000000000..3f9e580619 --- /dev/null +++ b/youtube_dl/extractor/gazeta.py @@ -0,0 +1,35 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class GazetaIE(InfoExtractor): + _VALID_URL = r'(?Phttps?://(?:www\.)?gazeta\.ru/(?:(?P[^/]*)/)?video/(?:main/)?(?P[A-Za-z0-9-_]+)\.s?html)' + _TEST = { + 'url': 'http://www.gazeta.ru/video/main/zadaite_vopros_vladislavu_yurevichu.shtml', + 'md5': 'd49c9bdc6e5a7888f27475dc215ee789', + 'info_dict': { + 'id': '205566', + 'ext': 'mp4', + 'title': '«70–80 процентов гражданских в Донецке на грани голода»', + 'description': 'md5:38617526050bd17b234728e7f9620a71', + 'thumbnail': 're:^https?://.*\.jpg', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + + display_id = mobj.group('id') + embed_url = '%s?p=embed' % mobj.group('url') + embed_page = self._download_webpage( + embed_url, display_id, 'Downloading embed page') + + video_id = self._search_regex( + r']*?class="eagleplayer"[^>]*?data-id="([^"]+)"', embed_page, 'video id') + + return self.url_result( + 'eagleplatform:gazeta.media.eagleplatform.com:%s' % video_id, 'EaglePlatform') From 28778d6bae2ff2a0ad57d2c5a694e22ac4ead749 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 8 Mar 2015 18:03:12 +0600 Subject: [PATCH 03/16] [pladform] Add extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/pladform.py | 86 ++++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+) create mode 100644 youtube_dl/extractor/pladform.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index e2475a6340..14172ca565 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -364,6 +364,7 @@ from .phoenix import PhoenixIE from .photobucket import PhotobucketIE from .planetaplay import PlanetaPlayIE +from .pladform import PladformIE from .played import PlayedIE from .playfm import PlayFMIE from .playvid import PlayvidIE diff --git a/youtube_dl/extractor/pladform.py b/youtube_dl/extractor/pladform.py new file mode 100644 index 0000000000..926e368a27 --- /dev/null +++ b/youtube_dl/extractor/pladform.py @@ -0,0 +1,86 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + int_or_none, + xpath_text, +) + + +class PladformIE(InfoExtractor): + _VALID_URL = r'''(?x) + https?:// + (?: + (?: + out\.pladform\.ru/player| + static\.pladform\.ru/player\.swf + ) + \?.*\bvideoid=| + video\.pladform\.ru/catalog/video/videoid/ + ) + (?P\d+) + ''' + _TESTS = [{ + # http://muz-tv.ru/kinozal/view/7400/ + 'url': 'http://out.pladform.ru/player?pl=24822&videoid=100183293', + 'md5': '61f37b575dd27f1bb2e1854777fe31f4', + 'info_dict': { + 'id': '100183293', + 'ext': 'mp4', + 'title': 'Тайны перевала Дятлова • Тайна перевала Дятлова 1 серия 2 часть', + 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века', + 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 694, + 'age_limit': 0, + }, + }, { + 'url': 'http://static.pladform.ru/player.swf?pl=21469&videoid=100183293&vkcid=0', + 'only_matching': True, + }, { + 'url': 'http://video.pladform.ru/catalog/video/videoid/100183293/vkcid/0', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + video = self._download_xml( + 'http://out.pladform.ru/getVideo?pl=1&videoid=%s' % video_id, + video_id) + + if video.tag == 'error': + raise ExtractorError( + '%s returned error: %s' % (self.IE_NAME, video.text), + expected=True) + + formats = [{ + 'url': src.text, + 'format_id': src.get('quality'), + } for src in video.findall('./src')] + self._sort_formats(formats) + + webpage = self._download_webpage( + 'http://video.pladform.ru/catalog/video/videoid/%s' % video_id, + video_id) + + title = self._og_search_title(webpage, fatal=False) or xpath_text( + video, './/title', 'title', fatal=True) + description = self._search_regex( + r'\s*

([^<]+)

', webpage, 'description', fatal=False) + thumbnail = self._og_search_thumbnail(webpage) or xpath_text( + video, './/cover', 'cover') + + duration = int_or_none(xpath_text(video, './/time', 'duration')) + age_limit = int_or_none(xpath_text(video, './/age18', 'age limit')) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'duration': duration, + 'age_limit': age_limit, + 'formats': formats, + } From f8388757269de8a5e87b9a507db55021b3090980 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 8 Mar 2015 18:07:10 +0600 Subject: [PATCH 04/16] [pladform] Add support for embeds --- youtube_dl/extractor/generic.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 013198b0df..4e6927b08b 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -596,6 +596,19 @@ class GenericIE(InfoExtractor): 'view_count': int, }, }, + # Pladform embed + { + 'url': 'http://muz-tv.ru/kinozal/view/7400/', + 'info_dict': { + 'id': '100183293', + 'ext': 'mp4', + 'title': 'Тайны перевала Дятлова • Тайна перевала Дятлова 1 серия 2 часть', + 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века', + 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 694, + 'age_limit': 0, + }, + }, # RSS feed with enclosure { 'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml', @@ -1193,6 +1206,12 @@ def _playlist_from_matches(matches, getter=None, ie=None): if mobj is not None: return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform') + # Look for Pladform embeds + mobj = re.search( + r']+src="(?Phttps?://out\.pladform\.ru/player\?.+?)"', webpage) + if mobj is not None: + return self.url_result(mobj.group('url'), 'Pladform') + def check_video(vurl): if YoutubeIE.suitable(vurl): return True From 11101076a1b0eb36dd56b1b5aa1b7559f2d230b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 8 Mar 2015 18:09:47 +0600 Subject: [PATCH 05/16] [pladform] Fix format quality sorting --- youtube_dl/extractor/pladform.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/pladform.py b/youtube_dl/extractor/pladform.py index 926e368a27..abde34b946 100644 --- a/youtube_dl/extractor/pladform.py +++ b/youtube_dl/extractor/pladform.py @@ -6,6 +6,7 @@ ExtractorError, int_or_none, xpath_text, + qualities, ) @@ -55,9 +56,12 @@ def _real_extract(self, url): '%s returned error: %s' % (self.IE_NAME, video.text), expected=True) + quality = qualities(('ld', 'sd', 'hd')) + formats = [{ 'url': src.text, 'format_id': src.get('quality'), + 'quality': quality(src.get('quality')), } for src in video.findall('./src')] self._sort_formats(formats) From 24993e3b396407af9ccdf8dd893588df31aff8af Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis Date: Sun, 8 Mar 2015 14:08:45 +0200 Subject: [PATCH 06/16] [vidme] Fix view_count extraction and remove comment_count extraction (Fixes #5133) Comment counts seem to no longer be listed on vid.me --- youtube_dl/extractor/vidme.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/youtube_dl/extractor/vidme.py b/youtube_dl/extractor/vidme.py index 339c3d897b..bd953fb4cc 100644 --- a/youtube_dl/extractor/vidme.py +++ b/youtube_dl/extractor/vidme.py @@ -41,13 +41,10 @@ def _real_extract(self, url): duration = float_or_none(self._html_search_regex( r'data-duration="([^"]+)"', webpage, 'duration', fatal=False)) view_count = str_to_int(self._html_search_regex( - r'\s*([\d,\.]+)\s*plays?', webpage, 'view count', fatal=False)) + r'<(?:li|span) class="video_views">\s*([\d,\.]+)\s*plays?', webpage, 'view count', fatal=False)) like_count = str_to_int(self._html_search_regex( r'class="score js-video-vote-score"[^>]+data-score="([\d,\.\s]+)">', webpage, 'like count', fatal=False)) - comment_count = str_to_int(self._html_search_regex( - r'class="js-comment-count"[^>]+data-count="([\d,\.\s]+)">', - webpage, 'comment count', fatal=False)) return { 'id': video_id, @@ -61,5 +58,4 @@ def _real_extract(self, url): 'duration': duration, 'view_count': view_count, 'like_count': like_count, - 'comment_count': comment_count, } From 8b910bda0cdba859c1971a410aa5d2a51c6e4918 Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis Date: Sun, 8 Mar 2015 14:28:53 +0200 Subject: [PATCH 07/16] [teamcoco] Fix extraction --- youtube_dl/extractor/teamcoco.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py index 5793dbc108..7cb06f351e 100644 --- a/youtube_dl/extractor/teamcoco.py +++ b/youtube_dl/extractor/teamcoco.py @@ -53,10 +53,10 @@ def _real_extract(self, url): embed = self._download_webpage( embed_url, video_id, 'Downloading embed page') - encoded_data = self._search_regex( - r'"preload"\s*:\s*"([^"]+)"', embed, 'encoded data') + player_data = self._parse_json(self._search_regex( + r'Y\.Ginger\.Module\.Player\((\{.*?\})\);', embed, 'player data'), video_id) data = self._parse_json( - base64.b64decode(encoded_data.encode('ascii')).decode('utf-8'), video_id) + base64.b64decode(player_data['preload'].encode('ascii')).decode('utf-8'), video_id) formats = [] get_quality = qualities(['500k', '480p', '1000k', '720p', '1080p']) From bdf6eee0aeed8df586569982eaaac04eecc0062d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 8 Mar 2015 19:17:54 +0600 Subject: [PATCH 08/16] [gazeta] Extend _VALID_URL --- youtube_dl/extractor/gazeta.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/gazeta.py b/youtube_dl/extractor/gazeta.py index 3f9e580619..ea32b621c3 100644 --- a/youtube_dl/extractor/gazeta.py +++ b/youtube_dl/extractor/gazeta.py @@ -7,8 +7,8 @@ class GazetaIE(InfoExtractor): - _VALID_URL = r'(?Phttps?://(?:www\.)?gazeta\.ru/(?:(?P[^/]*)/)?video/(?:main/)?(?P[A-Za-z0-9-_]+)\.s?html)' - _TEST = { + _VALID_URL = r'(?Phttps?://(?:www\.)?gazeta\.ru/(?:[^/]+/)?video/(?:(?:main|\d{4}/\d{2}/\d{2})/)?(?P[A-Za-z0-9-_.]+)\.s?html)' + _TESTS = [{ 'url': 'http://www.gazeta.ru/video/main/zadaite_vopros_vladislavu_yurevichu.shtml', 'md5': 'd49c9bdc6e5a7888f27475dc215ee789', 'info_dict': { @@ -18,7 +18,10 @@ class GazetaIE(InfoExtractor): 'description': 'md5:38617526050bd17b234728e7f9620a71', 'thumbnail': 're:^https?://.*\.jpg', }, - } + }, { + 'url': 'http://www.gazeta.ru/lifestyle/video/2015/03/08/master-klass_krasivoi_byt._delaem_vesennii_makiyazh.shtml', + 'only_matching': True, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) From a2aaf4dbc6e5f5d345329b5a845111851453b6a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 8 Mar 2015 20:55:22 +0600 Subject: [PATCH 09/16] [utils] Add sanitize_path --- test/test_utils.py | 21 +++++++++++++++++++++ youtube_dl/utils.py | 18 ++++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/test/test_utils.py b/test/test_utils.py index 64fad58ade..5ebb8d498e 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -38,6 +38,7 @@ parse_iso8601, read_batch_urls, sanitize_filename, + sanitize_path, shell_quote, smuggle_url, str_to_int, @@ -131,6 +132,26 @@ def test_sanitize_ids(self): self.assertEqual(sanitize_filename('_BD_eEpuzXw', is_id=True), '_BD_eEpuzXw') self.assertEqual(sanitize_filename('N0Y__7-UOdI', is_id=True), 'N0Y__7-UOdI') + def test_sanitize_path(self): + if sys.platform != 'win32': + return + + self.assertEqual(sanitize_path('abc'), 'abc') + self.assertEqual(sanitize_path('abc/def'), 'abc\\def') + self.assertEqual(sanitize_path('abc\\def'), 'abc\\def') + self.assertEqual(sanitize_path('abc|def'), 'abc#def') + self.assertEqual(sanitize_path('<>:"|?*'), '#######') + self.assertEqual(sanitize_path('C:/abc/def'), 'C:\\abc\\def') + self.assertEqual(sanitize_path('C?:/abc/def'), 'C##\\abc\\def') + + self.assertEqual(sanitize_path('\\\\?\\UNC\\ComputerName\\abc'), '\\\\?\\UNC\\ComputerName\\abc') + self.assertEqual(sanitize_path('\\\\?\\UNC/ComputerName/abc'), '\\\\?\\UNC\\ComputerName\\abc') + + self.assertEqual(sanitize_path('\\\\?\\C:\\abc'), '\\\\?\\C:\\abc') + self.assertEqual(sanitize_path('\\\\?\\C:/abc'), '\\\\?\\C:\\abc') + self.assertEqual(sanitize_path('\\\\?\\C:\\ab?c\\de:f'), '\\\\?\\C:\\ab#c\\de#f') + self.assertEqual(sanitize_path('\\\\?\\C:\\abc'), '\\\\?\\C:\\abc') + def test_ordered_set(self): self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7]) self.assertEqual(orderedSet([]), []) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 7426e2a1ff..0f49d602ea 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -311,6 +311,24 @@ def replace_insane(char): return result +def sanitize_path(s): + """Sanitizes and normalizes path on Windows""" + if sys.platform != 'win32': + return s + drive, _ = os.path.splitdrive(s) + unc, _ = os.path.splitunc(s) + unc_or_drive = unc or drive + norm_path = os.path.normpath(remove_start(s, unc_or_drive)).split(os.path.sep) + if unc_or_drive: + norm_path.pop(0) + sanitized_path = [ + re.sub('[/<>:"\\|\\\\?\\*]', '#', path_part) + for path_part in norm_path] + if unc_or_drive: + sanitized_path.insert(0, unc_or_drive + os.path.sep) + return os.path.join(*sanitized_path) + + def orderedSet(iterable): """ Remove all duplicates from the input iterable """ res = [] From d55de57b67bceca5d9116ddcc2ada2fa1957d89d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 8 Mar 2015 20:56:28 +0600 Subject: [PATCH 10/16] [utils] Fix sanitize_open --- youtube_dl/utils.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 0f49d602ea..e511232caa 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -252,15 +252,12 @@ def sanitize_open(filename, open_mode): raise # In case of error, try to remove win32 forbidden chars - alt_filename = os.path.join( - re.sub('[/<>:"\\|\\\\?\\*]', '#', path_part) - for path_part in os.path.split(filename) - ) + alt_filename = sanitize_path(filename) if alt_filename == filename: raise else: # An exception here should be caught in the caller - stream = open(encodeFilename(filename), open_mode) + stream = open(encodeFilename(alt_filename), open_mode) return (stream, alt_filename) From 1bb5c511a551ba7c5d3179d2d3a124f0977e74b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 8 Mar 2015 20:57:30 +0600 Subject: [PATCH 11/16] [YoutubeDL] Sanitize outtmpl as path --- youtube_dl/YoutubeDL.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index bae52e9c72..4b91511630 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -61,6 +61,7 @@ render_table, SameFileError, sanitize_filename, + sanitize_path, std_headers, subtitles_filename, takewhile_inclusive, @@ -553,20 +554,16 @@ def prepare_filename(self, info_dict): elif template_dict.get('width'): template_dict['resolution'] = '?x%d' % template_dict['width'] - restrict_filenames = self.params.get('restrictfilenames') - sanitize = lambda k, v: sanitize_filename( compat_str(v), - restricted=restrict_filenames, + restricted=self.params.get('restrictfilenames'), is_id=(k == 'id')) template_dict = dict((k, sanitize(k, v)) for k, v in template_dict.items() if v is not None) template_dict = collections.defaultdict(lambda: 'NA', template_dict) - outtmpl = sanitize_filename( - self.params.get('outtmpl', DEFAULT_OUTTMPL), - restricted=restrict_filenames) + outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL)) tmpl = compat_expanduser(outtmpl) filename = tmpl % template_dict # Temporary fix for #4787 From f18ef2d14463a13d80e967d1b18ece6a076f60fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 8 Mar 2015 22:08:48 +0600 Subject: [PATCH 12/16] [utils] Disallow trailing dot in sanitize_path for a path part --- test/test_utils.py | 11 +++++++++++ youtube_dl/utils.py | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/test/test_utils.py b/test/test_utils.py index 5ebb8d498e..28bda654e2 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -152,6 +152,17 @@ def test_sanitize_path(self): self.assertEqual(sanitize_path('\\\\?\\C:\\ab?c\\de:f'), '\\\\?\\C:\\ab#c\\de#f') self.assertEqual(sanitize_path('\\\\?\\C:\\abc'), '\\\\?\\C:\\abc') + self.assertEqual( + sanitize_path('youtube/%(uploader)s/%(autonumber)s-%(title)s-%(upload_date)s.%(ext)s'), + 'youtube\\%(uploader)s\\%(autonumber)s-%(title)s-%(upload_date)s.%(ext)s') + + self.assertEqual( + sanitize_path('youtube/TheWreckingYard ./00001-Not bad, Especially for Free! (1987 Yamaha 700)-20141116.mp4.part'), + 'youtube\\TheWreckingYard #\\00001-Not bad, Especially for Free! (1987 Yamaha 700)-20141116.mp4.part') + self.assertEqual(sanitize_path('abc/def...'), 'abc\\def..#') + self.assertEqual(sanitize_path('abc.../def'), 'abc..#\\def') + self.assertEqual(sanitize_path('abc.../def...'), 'abc..#\\def..#') + def test_ordered_set(self): self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7]) self.assertEqual(orderedSet([]), []) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index e511232caa..d5597d514d 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -319,7 +319,7 @@ def sanitize_path(s): if unc_or_drive: norm_path.pop(0) sanitized_path = [ - re.sub('[/<>:"\\|\\\\?\\*]', '#', path_part) + re.sub('(?:[/<>:"\\|\\\\?\\*]|\.$)', '#', path_part) for path_part in norm_path] if unc_or_drive: sanitized_path.insert(0, unc_or_drive + os.path.sep) From e5a11a2293069b1acfa5eb5d2694ad4082dd9755 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 8 Mar 2015 22:09:42 +0600 Subject: [PATCH 13/16] [YoutubeDL] Sanitize path before creating non-existent paths (Closes #4324) --- youtube_dl/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 4b91511630..bce7587fd8 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1262,7 +1262,7 @@ def process_info(self, info_dict): return try: - dn = os.path.dirname(encodeFilename(filename)) + dn = os.path.dirname(sanitize_path(encodeFilename(filename))) if dn and not os.path.exists(dn): os.makedirs(dn) except (OSError, IOError) as err: From 43d6280d0a03335ec5143383f15e2ca9a49f4046 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 8 Mar 2015 18:25:11 +0100 Subject: [PATCH 14/16] [downloader/f4m] Fix use of base64 in python 3.2 (fixes #5132) b64decode needs a byte string, but on 3.4 it also accepts strings. --- youtube_dl/downloader/f4m.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index 3dc796faaf..4ab000d673 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -281,7 +281,7 @@ def _parse_bootstrap_node(self, node, base_url): boot_info = self._get_bootstrap_from_url(bootstrap_url) else: bootstrap_url = None - bootstrap = base64.b64decode(node.text) + bootstrap = base64.b64decode(node.text.encode('ascii')) boot_info = read_bootstrap_info(bootstrap) return (boot_info, bootstrap_url) @@ -308,7 +308,7 @@ def real_download(self, filename, info_dict): live = boot_info['live'] metadata_node = media.find(_add_ns('metadata')) if metadata_node is not None: - metadata = base64.b64decode(metadata_node.text) + metadata = base64.b64decode(metadata_node.text.encode('ascii')) else: metadata = None From cc08b11d16efd125d765cc1a69ac795a592f012c Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis Date: Sun, 8 Mar 2015 21:32:42 +0200 Subject: [PATCH 15/16] [adultswim] Improve video_info extraction (Fixes #5152) Look for video_info inside `slugged_video`, if slug is not found among collections. Also, simplify a bit. --- youtube_dl/extractor/adultswim.py | 44 ++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/adultswim.py b/youtube_dl/extractor/adultswim.py index 34b8b01157..39335b8272 100644 --- a/youtube_dl/extractor/adultswim.py +++ b/youtube_dl/extractor/adultswim.py @@ -2,13 +2,12 @@ from __future__ import unicode_literals import re -import json from .common import InfoExtractor from ..utils import ( ExtractorError, - xpath_text, float_or_none, + xpath_text, ) @@ -60,6 +59,24 @@ class AdultSwimIE(InfoExtractor): 'title': 'American Dad - Putting Francine Out of Business', 'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].' }, + }, { + 'url': 'http://www.adultswim.com/videos/tim-and-eric-awesome-show-great-job/dr-steve-brule-for-your-wine/', + 'playlist': [ + { + 'md5': '3e346a2ab0087d687a05e1e7f3b3e529', + 'info_dict': { + 'id': 'sY3cMUR_TbuE4YmdjzbIcQ-0', + 'ext': 'flv', + 'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine', + 'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n', + }, + } + ], + 'info_dict': { + 'id': 'sY3cMUR_TbuE4YmdjzbIcQ', + 'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine', + 'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n', + }, }] @staticmethod @@ -80,6 +97,7 @@ def find_collection_containing_video(collections, slug): for video in collection.get('videos'): if video.get('slug') == slug: return collection, video + return None, None def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -90,28 +108,30 @@ def _real_extract(self, url): webpage = self._download_webpage(url, episode_path) # Extract the value of `bootstrappedData` from the Javascript in the page. - bootstrappedDataJS = self._search_regex(r'var bootstrappedData = ({.*});', webpage, episode_path) - - try: - bootstrappedData = json.loads(bootstrappedDataJS) - except ValueError as ve: - errmsg = '%s: Failed to parse JSON ' % episode_path - raise ExtractorError(errmsg, cause=ve) + bootstrapped_data = self._parse_json(self._search_regex( + r'var bootstrappedData = ({.*});', webpage, 'bootstraped data'), episode_path) # Downloading videos from a /videos/playlist/ URL needs to be handled differently. # NOTE: We are only downloading one video (the current one) not the playlist if is_playlist: - collections = bootstrappedData['playlists']['collections'] + collections = bootstrapped_data['playlists']['collections'] collection = self.find_collection_by_linkURL(collections, show_path) video_info = self.find_video_info(collection, episode_path) show_title = video_info['showTitle'] segment_ids = [video_info['videoPlaybackID']] else: - collections = bootstrappedData['show']['collections'] + collections = bootstrapped_data['show']['collections'] collection, video_info = self.find_collection_containing_video(collections, episode_path) - show = bootstrappedData['show'] + # Video wasn't found in the collections, let's try `slugged_video`. + if video_info is None: + if bootstrapped_data.get('slugged_video', {}).get('slug') == episode_path: + video_info = bootstrapped_data['slugged_video'] + else: + raise ExtractorError('Unable to find video info') + + show = bootstrapped_data['show'] show_title = show['title'] segment_ids = [clip['videoPlaybackID'] for clip in video_info['clips']] From dd7831fe94a0fb8270e7fa3699677c7476a5cd83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 9 Mar 2015 04:55:35 +0600 Subject: [PATCH 16/16] [breakcom] Process only play purpose media formats (Closes #5164) --- youtube_dl/extractor/breakcom.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/breakcom.py b/youtube_dl/extractor/breakcom.py index 4bcc897c95..809287d144 100644 --- a/youtube_dl/extractor/breakcom.py +++ b/youtube_dl/extractor/breakcom.py @@ -41,7 +41,7 @@ def _real_extract(self, url): 'tbr': media['bitRate'], 'width': media['width'], 'height': media['height'], - } for media in info['media']] + } for media in info['media'] if media.get('mediaPurpose') == 'play'] if not formats: formats.append({