From d5b00ee6e0ba70fd5d87752e8772fc1c39e4bd59 Mon Sep 17 00:00:00 2001 From: huohuarong Date: Tue, 6 Aug 2013 10:26:57 +0800 Subject: [PATCH] improve sohu extractor --- youtube_dl/extractor/sohu.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py index cd049b6f0..24fc3a5d7 100644 --- a/youtube_dl/extractor/sohu.py +++ b/youtube_dl/extractor/sohu.py @@ -31,6 +31,7 @@ def _real_extract(self, url): compiled = re.compile(pattern, re.DOTALL) title = self._search_regex(compiled, webpage, u'video title') title = clean_html(title).split('-')[0].strip() + self.to_screen('Title: %s' % title) pattern = re.compile(r'var vid="(\d+)"') result = re.search(pattern, webpage) if not result: @@ -70,6 +71,7 @@ def _real_extract(self, url): base_url_3 = 'http://allot/?prot=prot&file=clipsURL[i]&new=su[i]' files_info = [] for i in range(num_of_parts): + self.to_screen('Geting json infomation of part %s/%s' % (i + 1, num_of_parts)) middle_url = 'http://%s/?prot=%s&file=%s&new=%s' % (allot, prot, clipsURL[i], su[i]) logging.info('middle url part %d: %s' % (i, middle_url)) middle_info = urllib2.urlopen(middle_url).read().split('|')