From 059006292523264f4e7c7e03df3729612af8099c Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 7 Jan 2015 07:20:20 +0100 Subject: [PATCH 1/2] Respect age_limit when listing extractors (Fixes #4653) --- test/helper.py | 14 ++------------ test/test_utils.py | 8 ++++++++ youtube_dl/YoutubeDL.py | 10 +++------- youtube_dl/__init__.py | 8 +++----- youtube_dl/extractor/__init__.py | 13 +++++++++++++ youtube_dl/extractor/common.py | 30 ++++++++++++++++++++++++++++++ youtube_dl/extractor/xtube.py | 2 ++ youtube_dl/utils.py | 10 ++++++++++ 8 files changed, 71 insertions(+), 24 deletions(-) diff --git a/test/helper.py b/test/helper.py index 96d58b7c1..77225e4f7 100644 --- a/test/helper.py +++ b/test/helper.py @@ -82,18 +82,8 @@ def report_warning(self, message): def gettestcases(include_onlymatching=False): for ie in youtube_dl.extractor.gen_extractors(): - t = getattr(ie, '_TEST', None) - if t: - assert not hasattr(ie, '_TESTS'), \ - '%s has _TEST and _TESTS' % type(ie).__name__ - tests = [t] - else: - tests = getattr(ie, '_TESTS', []) - for t in tests: - if not include_onlymatching and t.get('only_matching', False): - continue - t['name'] = type(ie).__name__[:-len('IE')] - yield t + for tc in ie.get_testcases(include_onlymatching): + yield tc md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() diff --git a/test/test_utils.py b/test/test_utils.py index dd49a6d17..16e1a1ddf 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -16,6 +16,7 @@ import xml.etree.ElementTree from youtube_dl.utils import ( + age_restricted, args_to_str, clean_html, DateRange, @@ -402,5 +403,12 @@ def test_detect_exe_version(self): Success at /dev/dri/renderD128. ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4') + def test_age_restricted(self): + self.assertFalse(age_restricted(None, 10)) # unrestricted content + self.assertFalse(age_restricted(1, None)) # unrestricted policy + self.assertFalse(age_restricted(8, 10)) + self.assertTrue(age_restricted(18, 14)) + self.assertFalse(age_restricted(18, 18)) + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 806e7b239..36c71f947 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -63,6 +63,7 @@ YoutubeDLHandler, prepend_extension, args_to_str, + age_restricted, ) from .cache import Cache from .extractor import get_info_extractor, gen_extractors @@ -550,13 +551,8 @@ def _match_entry(self, info_dict): max_views = self.params.get('max_views') if max_views is not None and view_count > max_views: return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views) - age_limit = self.params.get('age_limit') - if age_limit is not None: - actual_age_limit = info_dict.get('age_limit') - if actual_age_limit is None: - actual_age_limit = 0 - if age_limit < actual_age_limit: - return 'Skipping "' + title + '" because it is age restricted' + if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')): + return 'Skipping "%s" because it is age restricted' % title if self.in_download_archive(info_dict): return '%s has already been recorded in archive' % video_title return None diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 56f560d26..4c21188a9 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -38,7 +38,7 @@ from .downloader import ( FileDownloader, ) -from .extractor import gen_extractors +from .extractor import list_extractors from .YoutubeDL import YoutubeDL @@ -95,17 +95,15 @@ def _real_main(argv=None): _enc = preferredencoding() all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls] - extractors = gen_extractors() - if opts.list_extractors: - for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()): + for ie in list_extractors(opts.age_limit): compat_print(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else '')) matchedUrls = [url for url in all_urls if ie.suitable(url)] for mu in matchedUrls: compat_print(' ' + mu) sys.exit(0) if opts.list_extractor_descriptions: - for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()): + for ie in list_extractors(opts.age_limit): if not ie._WORKING: continue desc = getattr(ie, 'IE_DESC', ie.IE_NAME) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index b523e9644..0145e350d 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -560,6 +560,8 @@ ZingMp3AlbumIE, ) +from ..utils import age_restricted + _ALL_CLASSES = [ klass for name, klass in globals().items() @@ -575,6 +577,17 @@ def gen_extractors(): return [klass() for klass in _ALL_CLASSES] +def list_extractors(age_limit): + """ + Return a list of extractors that are suitable for the given age, + sorted by extractor ID. + """ + + return sorted( + filter(lambda ie: ie.is_suitable(age_limit), gen_extractors()), + key=lambda ie: ie.IE_NAME.lower()) + + def get_info_extractor(ie_name): """Returns the info extractor class with the given ie_name""" return globals()[ie_name + 'IE'] diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 562e656e0..df32b5ca0 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -21,6 +21,7 @@ compat_str, ) from ..utils import ( + age_restricted, clean_html, compiled_regex_type, ExtractorError, @@ -877,6 +878,35 @@ def _set_cookie(self, domain, name, value, expire_time=None): None, '/', True, False, expire_time, '', None, None, None) self._downloader.cookiejar.set_cookie(cookie) + def get_testcases(self, include_onlymatching=False): + t = getattr(self, '_TEST', None) + if t: + assert not hasattr(self, '_TESTS'), \ + '%s has _TEST and _TESTS' % type(self).__name__ + tests = [t] + else: + tests = getattr(self, '_TESTS', []) + for t in tests: + if not include_onlymatching and t.get('only_matching', False): + continue + t['name'] = type(self).__name__[:-len('IE')] + yield t + + def is_suitable(self, age_limit): + """ Test whether the extractor is generally suitable for the given + age limit (i.e. pornographic sites are not, all others usually are) """ + + any_restricted = False + for tc in self.get_testcases(include_onlymatching=False): + if 'playlist' in tc: + tc = tc['playlist'][0] + is_restricted = age_restricted( + tc.get('info_dict', {}).get('age_limit'), age_limit) + if not is_restricted: + return True + any_restricted = any_restricted or is_restricted + return not any_restricted + class SearchInfoExtractor(InfoExtractor): """ diff --git a/youtube_dl/extractor/xtube.py b/youtube_dl/extractor/xtube.py index 95f1c8f3c..e8490b028 100644 --- a/youtube_dl/extractor/xtube.py +++ b/youtube_dl/extractor/xtube.py @@ -95,6 +95,7 @@ class XTubeUserIE(InfoExtractor): 'url': 'http://www.xtube.com/community/profile.php?user=greenshowers', 'info_dict': { 'id': 'greenshowers', + 'age_limit': 18, }, 'playlist_mincount': 155, } @@ -124,6 +125,7 @@ def _real_extract(self, url): return { '_type': 'playlist', 'id': username, + 'age_limit': 18, 'entries': [{ '_type': 'url', 'url': eurl, diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index d4951c406..29739a483 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1560,3 +1560,13 @@ def urlhandle_detect_ext(url_handle): getheader = url_handle.info().getheader return getheader('Content-Type').split("/")[1] + + +def age_restricted(content_limit, age_limit): + """ Returns True iff the content should be blocked """ + + if age_limit is None: # No limit set + return False + if content_limit is None: + return False # Content available for everyone + return age_limit < content_limit From 8ee341500dea885c79316fb8f12adde2028c55a5 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 7 Jan 2015 07:21:24 +0100 Subject: [PATCH 2/2] [viki] Modernize --- youtube_dl/extractor/viki.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index 15f315298..944901e14 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -17,7 +17,6 @@ class VikiIE(SubtitlesInfoExtractor): _VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P[0-9]+v)' _TEST = { 'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14', - 'md5': 'a21454021c2646f5433514177e2caa5f', 'info_dict': { 'id': '1023585v', 'ext': 'mp4', @@ -31,8 +30,7 @@ class VikiIE(SubtitlesInfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group(1) + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) title = self._og_search_title(webpage)