Correct distinction between unicode and bytes (Closes: #257)

This commit is contained in:
Philipp Hagemeister 2012-01-05 10:46:21 +01:00
parent c92e184f75
commit 1413cd87eb

View file

@ -290,6 +290,15 @@ def _orderedSet(iterable):
res.append(el) res.append(el)
return res return res
def _unescapeHTML(s):
"""
@param s a string (of type unicode)
"""
assert type(s) == type(u'')
htmlParser = HTMLParser.HTMLParser()
return htmlParser.unescape(s)
class DownloadError(Exception): class DownloadError(Exception):
"""Download Error exception. """Download Error exception.
@ -1590,8 +1599,6 @@ def report_extraction(self, video_id):
self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id) self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id)
def _real_extract(self, url): def _real_extract(self, url):
htmlParser = HTMLParser.HTMLParser()
# Extract id and simplified title from URL # Extract id and simplified title from URL
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
if mobj is None: if mobj is None:
@ -1635,7 +1642,7 @@ def _real_extract(self, url):
if mobj is None: if mobj is None:
self._downloader.trouble(u'ERROR: unable to extract title') self._downloader.trouble(u'ERROR: unable to extract title')
return return
video_title = htmlParser.unescape(mobj.group('title')).decode('utf-8') video_title = _unescapeHTML(mobj.group('title').decode('utf-8'))
video_title = sanitize_title(video_title) video_title = sanitize_title(video_title)
simple_title = _simplify_title(video_title) simple_title = _simplify_title(video_title)