From 40634747f74d2c85b28ee33f11672378c9b30949 Mon Sep 17 00:00:00 2001 From: Johny Mo Swag Date: Wed, 6 Mar 2013 21:09:55 -0800 Subject: [PATCH 1/8] Support for WorldStarHipHop.com --- youtube_dl/InfoExtractors.py | 63 +++++++++++++++++++++++++++++++++++- 1 file changed, 62 insertions(+), 1 deletion(-) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 6b03bf307..8be2f160c 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -2557,7 +2557,7 @@ def _real_extract(self, url): 'uploader': showName, 'upload_date': None, 'title': showName, - 'ext': 'mp4', + 'ext': 'flv', 'thumbnail': imgUrl, 'description': description, 'player_url': playerUrl, @@ -3654,6 +3654,66 @@ def _real_extract(self, url): } return [info] +class WorldStarHipHopIE(InfoExtractor): + _VALID_URL = r"""(http://(?:www|m).worldstar(?:candy|hiphop)\.com.*)""" + IE_NAME = u'WorldStarHipHop' + + def _real_extract(self, url): + results = [] + + _src_url = r"""(http://hw-videos.*(?:mp4|flv))""" + + webpage_src = compat_urllib_request.urlopen(str(url)).read() + + mobj = re.search(_src_url, webpage_src) + + if mobj is not None: + video_url = mobj.group() + if 'mp4' in video_url: + ext = '.mp4' + else: + ext = '.flv' + else: + video_url = None + ext = None + + _title = r"""(.*)""" + + mobj = re.search(_title, webpage_src) + + if mobj is not None: + title = mobj.group(1) + title = title.replace("'", "") + title = title.replace("'", "") + title = title.replace('Video: ', '') + title = title.replace('"', '"') + title = title.replace('&', 'n') + else: + title = None + + _thumbnail = r"""rel="image_src" href="(.*)" />""" + + mobj = re.search(_thumbnail, webpage_src) + + # Getting thumbnail and if not thumbnail sets correct title for WSHH candy video. + if mobj is not None: + thumbnail = mobj.group(1) + else: + _title = r"""candytitles.*>(.*)""" + mobj = re.search(_title, webpage_src) + if mobj is not None: + title = mobj.group(1) + thumbnail = None + + results.append({ + 'url' : video_url, + 'title' : title, + 'thumbnail' : thumbnail, + 'ext' : ext + }) + + return results + class RBMARadioIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/(?P[^/]+)$' @@ -4133,6 +4193,7 @@ def gen_extractors(): GooglePlusIE(), ArteTvIE(), NBAIE(), + WorldStarHipHopIE(), JustinTVIE(), FunnyOrDieIE(), SteamIE(), From 61e40c88a989d31b6f06d7001f614d62f06941a5 Mon Sep 17 00:00:00 2001 From: Johny Mo Swag Date: Wed, 6 Mar 2013 21:14:46 -0800 Subject: [PATCH 2/8] fixed typo --- youtube_dl/InfoExtractors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 8be2f160c..58803c48a 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -2557,7 +2557,7 @@ def _real_extract(self, url): 'uploader': showName, 'upload_date': None, 'title': showName, - 'ext': 'flv', + 'ext': 'mp4', 'thumbnail': imgUrl, 'description': description, 'player_url': playerUrl, From b3bcca0844cc8197cbb5e1e8127b1b8164304940 Mon Sep 17 00:00:00 2001 From: Johny Mo Swag Date: Thu, 7 Mar 2013 15:39:17 -0800 Subject: [PATCH 3/8] clean up --- youtube_dl/InfoExtractors.py | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 58803c48a..178b0beed 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -3659,20 +3659,19 @@ class WorldStarHipHopIE(InfoExtractor): IE_NAME = u'WorldStarHipHop' def _real_extract(self, url): - results = [] - _src_url = r"""(http://hw-videos.*(?:mp4|flv))""" webpage_src = compat_urllib_request.urlopen(str(url)).read() + webpage_src = webpage_src.decode('utf-8') mobj = re.search(_src_url, webpage_src) if mobj is not None: video_url = mobj.group() if 'mp4' in video_url: - ext = '.mp4' + ext = 'mp4' else: - ext = '.flv' + ext = 'flv' else: video_url = None ext = None @@ -3683,16 +3682,12 @@ def _real_extract(self, url): if mobj is not None: title = mobj.group(1) - title = title.replace("'", "") - title = title.replace("'", "") - title = title.replace('Video: ', '') - title = title.replace('"', '"') - title = title.replace('&', 'n') else: - title = None + title = 'World Start Hip Hop - %s' % time.ctime() _thumbnail = r"""rel="image_src" href="(.*)" />""" + print title mobj = re.search(_thumbnail, webpage_src) # Getting thumbnail and if not thumbnail sets correct title for WSHH candy video. @@ -3705,13 +3700,12 @@ def _real_extract(self, url): title = mobj.group(1) thumbnail = None - results.append({ - 'url' : video_url, - 'title' : title, - 'thumbnail' : thumbnail, - 'ext' : ext - }) - + results = [{ + 'url' : video_url, + 'title' : title, + 'thumbnail' : thumbnail, + 'ext' : ext, + }] return results class RBMARadioIE(InfoExtractor): From 64c78d50ccf05f34e27b652530fc8b702aa54122 Mon Sep 17 00:00:00 2001 From: Johny Mo Swag Date: Thu, 7 Mar 2013 16:27:21 -0800 Subject: [PATCH 4/8] working - worldstarhiphop IE Support for WorldStarHipHop --- .gitignore | 2 ++ youtube_dl/InfoExtractors.py | 8 +++++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 77469b8a7..328fed8bd 100644 --- a/.gitignore +++ b/.gitignore @@ -18,3 +18,5 @@ youtube-dl.tar.gz cover/ updates_key.pem *.egg-info + +*.flv diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 178b0beed..f69bad4f3 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -3655,7 +3655,7 @@ def _real_extract(self, url): return [info] class WorldStarHipHopIE(InfoExtractor): - _VALID_URL = r"""(http://(?:www|m).worldstar(?:candy|hiphop)\.com.*)""" + _VALID_URL = r'http://(?:www|m)\.worldstar(?:candy|hiphop)\.com/videos/video\.php\?v=(?P.*)' IE_NAME = u'WorldStarHipHop' def _real_extract(self, url): @@ -3686,8 +3686,6 @@ def _real_extract(self, url): title = 'World Start Hip Hop - %s' % time.ctime() _thumbnail = r"""rel="image_src" href="(.*)" />""" - - print title mobj = re.search(_thumbnail, webpage_src) # Getting thumbnail and if not thumbnail sets correct title for WSHH candy video. @@ -3700,7 +3698,11 @@ def _real_extract(self, url): title = mobj.group(1) thumbnail = None + m = re.match(self._VALID_URL, url) + video_id = m.group('id') + results = [{ + 'id': video_id, 'url' : video_url, 'title' : title, 'thumbnail' : thumbnail, From 3b221c540640f7df9e4dc453a736dd25fe2505c4 Mon Sep 17 00:00:00 2001 From: Johny Mo Swag Date: Fri, 8 Mar 2013 22:39:45 -0800 Subject: [PATCH 5/8] removed str used for other project. --- youtube_dl/InfoExtractors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index f69bad4f3..c2e3c8983 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -3661,7 +3661,7 @@ class WorldStarHipHopIE(InfoExtractor): def _real_extract(self, url): _src_url = r"""(http://hw-videos.*(?:mp4|flv))""" - webpage_src = compat_urllib_request.urlopen(str(url)).read() + webpage_src = compat_urllib_request.urlopen(url).read() webpage_src = webpage_src.decode('utf-8') mobj = re.search(_src_url, webpage_src) From 08ec0af7c69f5da0f8c75c84886694877b9b08bf Mon Sep 17 00:00:00 2001 From: Johny Mo Swag Date: Fri, 8 Mar 2013 22:48:05 -0800 Subject: [PATCH 6/8] catch fatal error --- youtube_dl/InfoExtractors.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index c2e3c8983..a31aa759e 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -3666,6 +3666,9 @@ def _real_extract(self, url): mobj = re.search(_src_url, webpage_src) + m = re.match(self._VALID_URL, url) + video_id = m.group('id') + if mobj is not None: video_url = mobj.group() if 'mp4' in video_url: @@ -3673,8 +3676,8 @@ def _real_extract(self, url): else: ext = 'flv' else: - video_url = None - ext = None + self._downloader.trouble(u'ERROR: Cannot find video url for %s' % video_id) + return _title = r"""(.*)""" @@ -3697,9 +3700,6 @@ def _real_extract(self, url): if mobj is not None: title = mobj.group(1) thumbnail = None - - m = re.match(self._VALID_URL, url) - video_id = m.group('id') results = [{ 'id': video_id, From 51af426d89f9a9e720d70f3cac1ce24b3b8e4d8f Mon Sep 17 00:00:00 2001 From: Johny Mo Swag Date: Fri, 8 Mar 2013 22:52:17 -0800 Subject: [PATCH 7/8] forgot to fix this. --- .gitignore | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 328fed8bd..ca4e8f353 100644 --- a/.gitignore +++ b/.gitignore @@ -17,6 +17,4 @@ youtube-dl.tar.gz .coverage cover/ updates_key.pem -*.egg-info - -*.flv +*.egg-info \ No newline at end of file From 44e939514ebb37f002bc9a2663e8669c3a201da8 Mon Sep 17 00:00:00 2001 From: Johny Mo Swag Date: Thu, 28 Mar 2013 20:05:28 -0700 Subject: [PATCH 8/8] Added test for WorldStarHipHop --- test/tests.json | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/test/tests.json b/test/tests.json index 7af3c2892..4190c5387 100644 --- a/test/tests.json +++ b/test/tests.json @@ -293,5 +293,14 @@ "info_dict": { "title": "Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2" } + }, + { + "name": "WorldStarHipHop", + "url": "http://www.worldstarhiphop.com/videos/video.php?v=wshh6a7q1ny0G34ZwuIO", + "file": "wshh6a7q1ny0G34ZwuIO.mp4", + "md5": "9d04de741161603bf7071bbf4e883186", + "info_dict": { + "title": "Video: KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick! " + } } ]