[utils] Improve cookie files support

+ Add support for UTF-8 in cookie files
* Skip malformed cookie file entries instead of crashing (invalid entry len, invalid expires at)
This commit is contained in:
Sergey M․ 2020-05-05 04:19:33 +07:00
parent f7f304910d
commit c380cc28c4
No known key found for this signature in database
GPG key ID: 2C393E0F18A9236D
3 changed files with 93 additions and 5 deletions

View file

@ -39,6 +39,13 @@ def assert_cookie_has_value(key):
assert_cookie_has_value('HTTPONLY_COOKIE') assert_cookie_has_value('HTTPONLY_COOKIE')
assert_cookie_has_value('JS_ACCESSIBLE_COOKIE') assert_cookie_has_value('JS_ACCESSIBLE_COOKIE')
def test_malformed_cookies(self):
cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/malformed_cookies.txt')
cookiejar.load(ignore_discard=True, ignore_expires=True)
# Cookies should be empty since all malformed cookie file entries
# will be ignored
self.assertFalse(cookiejar._cookies)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View file

@ -0,0 +1,9 @@
# Netscape HTTP Cookie File
# http://curl.haxx.se/rfc/cookie_spec.html
# This is a generated file! Do not edit.
# Cookie file entry with invalid number of fields - 6 instead of 7
www.foobar.foobar FALSE / FALSE 0 COOKIE
# Cookie file entry with invalid expires at
www.foobar.foobar FALSE / FALSE 1.7976931348623157e+308 COOKIE VALUE

View file

@ -7,6 +7,7 @@
import binascii import binascii
import calendar import calendar
import codecs import codecs
import collections
import contextlib import contextlib
import ctypes import ctypes
import datetime import datetime
@ -30,6 +31,7 @@
import subprocess import subprocess
import sys import sys
import tempfile import tempfile
import time
import traceback import traceback
import xml.etree.ElementTree import xml.etree.ElementTree
import zlib import zlib
@ -2735,14 +2737,66 @@ class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
1. https://curl.haxx.se/docs/http-cookies.html 1. https://curl.haxx.se/docs/http-cookies.html
""" """
_HTTPONLY_PREFIX = '#HttpOnly_' _HTTPONLY_PREFIX = '#HttpOnly_'
_ENTRY_LEN = 7
_HEADER = '''# Netscape HTTP Cookie File
# This file is generated by youtube-dl. Do not edit.
'''
_CookieFileEntry = collections.namedtuple(
'CookieFileEntry',
('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
def save(self, filename=None, ignore_discard=False, ignore_expires=False): def save(self, filename=None, ignore_discard=False, ignore_expires=False):
"""
Save cookies to a file.
Most of the code is taken from CPython 3.8 and slightly adapted
to support cookie files with UTF-8 in both python 2 and 3.
"""
if filename is None:
if self.filename is not None:
filename = self.filename
else:
raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
# Store session cookies with `expires` set to 0 instead of an empty # Store session cookies with `expires` set to 0 instead of an empty
# string # string
for cookie in self: for cookie in self:
if cookie.expires is None: if cookie.expires is None:
cookie.expires = 0 cookie.expires = 0
compat_cookiejar.MozillaCookieJar.save(self, filename, ignore_discard, ignore_expires)
with io.open(filename, 'w', encoding='utf-8') as f:
f.write(self._HEADER)
now = time.time()
for cookie in self:
if not ignore_discard and cookie.discard:
continue
if not ignore_expires and cookie.is_expired(now):
continue
if cookie.secure:
secure = 'TRUE'
else:
secure = 'FALSE'
if cookie.domain.startswith('.'):
initial_dot = 'TRUE'
else:
initial_dot = 'FALSE'
if cookie.expires is not None:
expires = compat_str(cookie.expires)
else:
expires = ''
if cookie.value is None:
# cookies.txt regards 'Set-Cookie: foo' as a cookie
# with no name, whereas http.cookiejar regards it as a
# cookie with no value.
name = ''
value = cookie.name
else:
name = cookie.name
value = cookie.value
f.write(
'\t'.join([cookie.domain, initial_dot, cookie.path,
secure, expires, name, value]) + '\n')
def load(self, filename=None, ignore_discard=False, ignore_expires=False): def load(self, filename=None, ignore_discard=False, ignore_expires=False):
"""Load cookies from a file.""" """Load cookies from a file."""
@ -2752,12 +2806,30 @@ def load(self, filename=None, ignore_discard=False, ignore_expires=False):
else: else:
raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT) raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
cf = io.StringIO() def prepare_line(line):
with open(filename) as f:
for line in f:
if line.startswith(self._HTTPONLY_PREFIX): if line.startswith(self._HTTPONLY_PREFIX):
line = line[len(self._HTTPONLY_PREFIX):] line = line[len(self._HTTPONLY_PREFIX):]
cf.write(compat_str(line)) # comments and empty lines are fine
if line.startswith('#') or not line.strip():
return line
cookie_list = line.split('\t')
if len(cookie_list) != self._ENTRY_LEN:
raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
cookie = self._CookieFileEntry(*cookie_list)
if cookie.expires_at and not cookie.expires_at.isdigit():
raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
return line
cf = io.StringIO()
with io.open(filename, encoding='utf-8') as f:
for line in f:
try:
cf.write(prepare_line(line))
except compat_cookiejar.LoadError as e:
write_string(
'WARNING: skipping cookie file entry due to %s: %r\n'
% (e, line), sys.stderr)
continue
cf.seek(0) cf.seek(0)
self._really_load(cf, filename, ignore_discard, ignore_expires) self._really_load(cf, filename, ignore_discard, ignore_expires)
# Session cookies are denoted by either `expires` field set to # Session cookies are denoted by either `expires` field set to