diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py index 8308142211..cf0ab54788 100644 --- a/youtube_dl/extractor/sohu.py +++ b/youtube_dl/extractor/sohu.py @@ -7,7 +7,7 @@ import urllib2 from .common import InfoExtractor -from ..utils import compat_urllib_request +from ..utils import compat_urllib_request, clean_html class SohuIE(InfoExtractor): @@ -22,16 +22,6 @@ class SohuIE(InfoExtractor): }, } - def _clearn_html(self, string): - tags = re.findall(r'<.+?>', string) - for t in tags: - string = string.replace(t, ' ') - for i in range(2): - spaces = re.findall(r'\s+', string) - for s in spaces: - string = string.replace(s, ' ') - string = string.strip() - return string def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -40,7 +30,7 @@ def _real_extract(self, url): pattern = r'