[youtube] Add filesystem signature cache

This commit is contained in:
Philipp Hagemeister 2013-09-22 00:35:03 +02:00
parent 2f2ffea9ca
commit c4417ddb61
2 changed files with 30 additions and 7 deletions

View file

@ -39,6 +39,8 @@ class FileDownloader(object):
test: Download only first bytes to test the downloader. test: Download only first bytes to test the downloader.
min_filesize: Skip files smaller than this size min_filesize: Skip files smaller than this size
max_filesize: Skip files larger than this size max_filesize: Skip files larger than this size
cachedir: Location of the cache files in the filesystem.
False to disable filesystem cache.
""" """
params = None params = None

View file

@ -4,8 +4,10 @@
import itertools import itertools
import io import io
import json import json
import netrc import operator
import os.path
import re import re
import shutil
import socket import socket
import string import string
import struct import struct
@ -422,13 +424,28 @@ def report_rtmp_download(self):
"""Indicate the download will use the RTMP protocol.""" """Indicate the download will use the RTMP protocol."""
self.to_screen(u'RTMP download detected') self.to_screen(u'RTMP download detected')
def _extract_signature_function(self, video_id, player_url): def _extract_signature_function(self, video_id, player_url, slen):
id_m = re.match(r'.*-(?P<id>[a-zA-Z0-9]+)\.(?P<ext>[a-z]+)$', id_m = re.match(r'.*-(?P<id>[a-zA-Z0-9_-]+)\.(?P<ext>[a-z]+)$',
player_url) player_url)
player_type = id_m.group('ext') player_type = id_m.group('ext')
player_id = id_m.group('id') player_id = id_m.group('id')
# TODO read from filesystem cache # Read from filesystem cache
func_id = '%s_%s_%d' % (player_type, player_id, slen)
assert os.path.basename(func_id) == func_id
cache_dir = self.downloader.params.get('cachedir',
u'~/.youtube-dl/cache')
if cache_dir is not False:
cache_fn = os.path.join(os.path.expanduser(cache_dir),
u'youtube-sigfuncs',
func_id + '.json')
try:
with io.open(cache_fn, '', encoding='utf-8') as cachef:
cache_spec = json.load(cachef)
return lambda s: u''.join(s[i] for i in cache_spec)
except OSError:
pass # No cache available
if player_type == 'js': if player_type == 'js':
code = self._download_webpage( code = self._download_webpage(
@ -436,7 +453,7 @@ def _extract_signature_function(self, video_id, player_url):
note=u'Downloading %s player %s' % (player_type, player_id), note=u'Downloading %s player %s' % (player_type, player_id),
errnote=u'Download of %s failed' % player_url) errnote=u'Download of %s failed' % player_url)
res = self._parse_sig_js(code) res = self._parse_sig_js(code)
elif player_tpye == 'swf': elif player_type == 'swf':
urlh = self._request_webpage( urlh = self._request_webpage(
player_url, video_id, player_url, video_id,
note=u'Downloading %s player %s' % (player_type, player_id), note=u'Downloading %s player %s' % (player_type, player_id),
@ -446,7 +463,11 @@ def _extract_signature_function(self, video_id, player_url):
else: else:
assert False, 'Invalid player type %r' % player_type assert False, 'Invalid player type %r' % player_type
# TODO write cache if cache_dir is not False:
cache_res = res(map(compat_chr, range(slen)))
cache_spec = [ord(c) for c in cache_res]
shutil.makedirs(os.path.dirname(cache_fn))
write_json_file(cache_spec, cache_fn)
return res return res
@ -983,7 +1004,7 @@ def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
try: try:
if player_url not in self._player_cache: if player_url not in self._player_cache:
func = self._extract_signature_function( func = self._extract_signature_function(
video_id, player_url video_id, player_url, len(s)
) )
self._player_cache[player_url] = func self._player_cache[player_url] = func
return self._player_cache[player_url](s) return self._player_cache[player_url](s)