Deal with implicitly UTF-16 decoded webpages

These webpages don't specify an encoding and rely on the BOM
This commit is contained in:
Philipp Hagemeister 2014-01-21 01:39:39 +01:00
parent 5aafe895fc
commit b60016e831

View file

@ -220,6 +220,8 @@ def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=
webpage_bytes[:1024]) webpage_bytes[:1024])
if m: if m:
encoding = m.group(1).decode('ascii') encoding = m.group(1).decode('ascii')
elif webpage_bytes.startswith(b'\xff\xfe'):
encoding = 'utf-16'
else: else:
encoding = 'utf-8' encoding = 'utf-8'
if self._downloader.params.get('dump_intermediate_pages', False): if self._downloader.params.get('dump_intermediate_pages', False):