mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-30 12:01:28 +00:00
Match --download-archive during playlist processing (Fixes #1745)
This commit is contained in:
parent
50123be421
commit
7012b23c94
|
@ -84,16 +84,16 @@ def test_youtube_channel(self):
|
|||
dl = FakeYDL()
|
||||
ie = YoutubeChannelIE(dl)
|
||||
#test paginated channel
|
||||
result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w')[0]
|
||||
result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w')
|
||||
self.assertTrue(len(result['entries']) > 90)
|
||||
#test autogenerated channel
|
||||
result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')[0]
|
||||
result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
|
||||
self.assertTrue(len(result['entries']) >= 18)
|
||||
|
||||
def test_youtube_user(self):
|
||||
dl = FakeYDL()
|
||||
ie = YoutubeUserIE(dl)
|
||||
result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')[0]
|
||||
result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')
|
||||
self.assertTrue(len(result['entries']) >= 320)
|
||||
|
||||
def test_youtube_safe_search(self):
|
||||
|
|
|
@ -355,15 +355,17 @@ def prepare_filename(self, info_dict):
|
|||
def _match_entry(self, info_dict):
|
||||
""" Returns None iff the file should be downloaded """
|
||||
|
||||
title = info_dict['title']
|
||||
matchtitle = self.params.get('matchtitle', False)
|
||||
if matchtitle:
|
||||
if not re.search(matchtitle, title, re.IGNORECASE):
|
||||
return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
|
||||
rejecttitle = self.params.get('rejecttitle', False)
|
||||
if rejecttitle:
|
||||
if re.search(rejecttitle, title, re.IGNORECASE):
|
||||
return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
|
||||
if 'title' in info_dict:
|
||||
# This can happen when we're just evaluating the playlist
|
||||
title = info_dict['title']
|
||||
matchtitle = self.params.get('matchtitle', False)
|
||||
if matchtitle:
|
||||
if not re.search(matchtitle, title, re.IGNORECASE):
|
||||
return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
|
||||
rejecttitle = self.params.get('rejecttitle', False)
|
||||
if rejecttitle:
|
||||
if re.search(rejecttitle, title, re.IGNORECASE):
|
||||
return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
|
||||
date = info_dict.get('upload_date', None)
|
||||
if date is not None:
|
||||
dateRange = self.params.get('daterange', DateRange())
|
||||
|
@ -374,8 +376,8 @@ def _match_entry(self, info_dict):
|
|||
if age_limit < info_dict.get('age_limit', 0):
|
||||
return u'Skipping "' + title + '" because it is age restricted'
|
||||
if self.in_download_archive(info_dict):
|
||||
return (u'%(title)s has already been recorded in archive'
|
||||
% info_dict)
|
||||
return (u'%s has already been recorded in archive'
|
||||
% info_dict.get('title', info_dict.get('id', u'video')))
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
|
@ -454,7 +456,7 @@ def process_ie_result(self, ie_result, download=True, extra_info={}):
|
|||
ie_key=ie_result.get('ie_key'),
|
||||
extra_info=extra_info)
|
||||
elif result_type == 'playlist':
|
||||
self.add_extra_info(ie_result, extra_info)
|
||||
|
||||
# We process each entry in the playlist
|
||||
playlist = ie_result.get('title', None) or ie_result.get('id', None)
|
||||
self.to_screen(u'[download] Downloading playlist: %s' % playlist)
|
||||
|
@ -484,6 +486,12 @@ def process_ie_result(self, ie_result, download=True, extra_info={}):
|
|||
'webpage_url': ie_result['webpage_url'],
|
||||
'extractor_key': ie_result['extractor_key'],
|
||||
}
|
||||
|
||||
reason = self._match_entry(entry)
|
||||
if reason is not None:
|
||||
self.to_screen(u'[download] ' + reason)
|
||||
continue
|
||||
|
||||
entry_result = self.process_ie_result(entry,
|
||||
download=download,
|
||||
extra_info=extra)
|
||||
|
@ -810,7 +818,16 @@ def in_download_archive(self, info_dict):
|
|||
fn = self.params.get('download_archive')
|
||||
if fn is None:
|
||||
return False
|
||||
vid_id = info_dict['extractor'] + u' ' + info_dict['id']
|
||||
extractor = info_dict.get('extractor_id')
|
||||
if extractor is None:
|
||||
if 'id' in info_dict:
|
||||
extractor = info_dict.get('ie_key') # key in a playlist
|
||||
if extractor is None:
|
||||
return False # Incomplete video information
|
||||
# Future-proof against any change in case
|
||||
# and backwards compatibility with prior versions
|
||||
extractor = extractor.lower()
|
||||
vid_id = extractor + u' ' + info_dict['id']
|
||||
try:
|
||||
with locked_file(fn, 'r', encoding='utf-8') as archive_file:
|
||||
for line in archive_file:
|
||||
|
|
|
@ -229,12 +229,14 @@ def report_login(self):
|
|||
self.to_screen(u'Logging in')
|
||||
|
||||
#Methods for following #608
|
||||
def url_result(self, url, ie=None):
|
||||
def url_result(self, url, ie=None, video_id=None):
|
||||
"""Returns a url that points to a page that should be processed"""
|
||||
#TODO: ie should be the class used for getting the info
|
||||
video_info = {'_type': 'url',
|
||||
'url': url,
|
||||
'ie_key': ie}
|
||||
if video_id is not None:
|
||||
video_info['id'] = video_id
|
||||
return video_info
|
||||
def playlist_result(self, entries, playlist_id=None, playlist_title=None):
|
||||
"""Returns a playlist"""
|
||||
|
|
|
@ -1552,7 +1552,7 @@ def _real_extract(self, url):
|
|||
video_id = query_dict['v'][0]
|
||||
if self._downloader.params.get('noplaylist'):
|
||||
self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id)
|
||||
return self.url_result('https://www.youtube.com/watch?v=' + video_id, 'Youtube')
|
||||
return self.url_result(video_id, 'Youtube', video_id=video_id)
|
||||
else:
|
||||
self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))
|
||||
|
||||
|
@ -1571,7 +1571,8 @@ def _real_extract(self, url):
|
|||
|
||||
playlist_title = self._og_search_title(page)
|
||||
|
||||
url_results = [self.url_result(vid, 'Youtube') for vid in ids]
|
||||
url_results = [self.url_result(video_id, 'Youtube', video_id=video_id)
|
||||
for video_id in ids]
|
||||
return self.playlist_result(url_results, playlist_id, playlist_title)
|
||||
|
||||
|
||||
|
@ -1626,9 +1627,9 @@ def _real_extract(self, url):
|
|||
|
||||
self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
|
||||
|
||||
urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids]
|
||||
url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls]
|
||||
return [self.playlist_result(url_entries, channel_id)]
|
||||
url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id)
|
||||
for video_id in video_ids]
|
||||
return self.playlist_result(url_entries, channel_id)
|
||||
|
||||
|
||||
class YoutubeUserIE(InfoExtractor):
|
||||
|
@ -1692,9 +1693,11 @@ def _real_extract(self, url):
|
|||
if len(ids_in_page) < self._GDATA_PAGE_SIZE:
|
||||
break
|
||||
|
||||
urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
|
||||
url_results = [self.url_result(rurl, 'Youtube') for rurl in urls]
|
||||
return [self.playlist_result(url_results, playlist_title = username)]
|
||||
url_results = [
|
||||
self.url_result(video_id, 'Youtube', video_id=video_id)
|
||||
for video_id in video_ids]
|
||||
return self.playlist_result(url_results, playlist_title=username)
|
||||
|
||||
|
||||
class YoutubeSearchIE(SearchInfoExtractor):
|
||||
IE_DESC = u'YouTube.com searches'
|
||||
|
@ -1735,7 +1738,8 @@ def _get_n_results(self, query, n):
|
|||
|
||||
if len(video_ids) > n:
|
||||
video_ids = video_ids[:n]
|
||||
videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
|
||||
videos = [self.url_result(video_id, 'Youtube', video_id=video_id)
|
||||
for video_id in video_ids]
|
||||
return self.playlist_result(videos, query)
|
||||
|
||||
class YoutubeSearchDateIE(YoutubeSearchIE):
|
||||
|
@ -1795,7 +1799,9 @@ def _real_extract(self, url):
|
|||
feed_html = info['feed_html']
|
||||
m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
|
||||
ids = orderedSet(m.group(1) for m in m_ids)
|
||||
feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
|
||||
feed_entries.extend(
|
||||
self.url_result(video_id, 'Youtube', video_id=video_id)
|
||||
for video_id in ids)
|
||||
if info['paging'] is None:
|
||||
break
|
||||
return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
|
||||
|
|
Loading…
Reference in a new issue