From 64102296818f94d3814a8183daa5d92cbdd952fd Mon Sep 17 00:00:00 2001 From: newtonelectron Date: Sun, 5 Apr 2015 12:50:21 -0700 Subject: [PATCH 1/5] [SpankBang] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/spankbang.py | 38 +++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100644 youtube_dl/extractor/spankbang.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 0f7d44616..e6fdf1297 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -471,6 +471,7 @@ SouthparkDeIE, ) from .space import SpaceIE +from .spankbang import SpankBangIE from .spankwire import SpankwireIE from .spiegel import SpiegelIE, SpiegelArticleIE from .spiegeltv import SpiegeltvIE diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py new file mode 100644 index 000000000..8e845ef26 --- /dev/null +++ b/youtube_dl/extractor/spankbang.py @@ -0,0 +1,38 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +import re + +class SpankBangIE(InfoExtractor): + """Extractor for http://spankbang.com""" + + _VALID_URL = r"https?://(?:www\.)?spankbang\.com/(?P\w+)/video/.*" + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + title = self._html_search_regex(r"

(?:)?(.*?)

", webpage, "title") + + stream_key = self._html_search_regex(r"""var\s+stream_key\s*[=]\s*['"](.+?)['"]\s*;""", webpage, "stream_key") + + qualities = re.findall(r"([0-9]+p).*?", webpage) + + formats = [] + for q in sorted(qualities): + formats.append({ + "format_id": q, + "format": q, + "ext": "mp4", + "url": "http://spankbang.com/_{}/{}/title/{}__mp4".format(video_id, stream_key, q) + }) + + return { + "id": video_id, + "title": title, + "description": self._og_search_description(webpage), + "formats": formats + } + +# vim: tabstop=4 expandtab From 2e7daef50220ee90e8a2e2b979600f8bd4a3e40e Mon Sep 17 00:00:00 2001 From: newtonelectron Date: Sun, 5 Apr 2015 13:43:21 -0700 Subject: [PATCH 2/5] [SpankBang] Use python2.6 compatible string formatting spec --- youtube_dl/extractor/spankbang.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py index 8e845ef26..61fd64d17 100644 --- a/youtube_dl/extractor/spankbang.py +++ b/youtube_dl/extractor/spankbang.py @@ -25,7 +25,7 @@ def _real_extract(self, url): "format_id": q, "format": q, "ext": "mp4", - "url": "http://spankbang.com/_{}/{}/title/{}__mp4".format(video_id, stream_key, q) + "url": "http://spankbang.com/_{0}/{1}/title/{2}__mp4".format(video_id, stream_key, q) }) return { From 5c1d459ae91d2681be88023e9056dcae3f48a70a Mon Sep 17 00:00:00 2001 From: newtonelectron Date: Sun, 5 Apr 2015 13:57:59 -0700 Subject: [PATCH 3/5] [SpankBang] Add test --- youtube_dl/extractor/spankbang.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py index 61fd64d17..2e20a5ad5 100644 --- a/youtube_dl/extractor/spankbang.py +++ b/youtube_dl/extractor/spankbang.py @@ -8,6 +8,20 @@ class SpankBangIE(InfoExtractor): """Extractor for http://spankbang.com""" _VALID_URL = r"https?://(?:www\.)?spankbang\.com/(?P\w+)/video/.*" + + _TEST = { + "url": "http://spankbang.com/3vvn/video/fantasy+solo", + "md5": "1cc433e1d6aa14bc376535b8679302f7", + "info_dict": { + "id": "3vvn", + "title": "fantasy solo", + "description": "Watch fantasy solo free HD porn video - 05 minutes - dillion harper masturbates on a bed free adult movies.", + "format": "720p", + "format_id": "720p", + "ext": "mp4", + "url": "re:http://spankbang.com/_3vvn/IjE0MjgyNjY5MTcuMzUi.IaGrcF-vDrvktMhjd-1fWixiCzU/title/720p__mp4" + } + } def _real_extract(self, url): video_id = self._match_id(url) From c7ac5dce8c692f82f10363e40a7085ac53113bc8 Mon Sep 17 00:00:00 2001 From: newtonelectron Date: Sun, 5 Apr 2015 14:02:05 -0700 Subject: [PATCH 4/5] [SpankBang] Remove regexp type prefix from _TEST url. --- youtube_dl/extractor/spankbang.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py index 2e20a5ad5..d0b5ba278 100644 --- a/youtube_dl/extractor/spankbang.py +++ b/youtube_dl/extractor/spankbang.py @@ -19,7 +19,7 @@ class SpankBangIE(InfoExtractor): "format": "720p", "format_id": "720p", "ext": "mp4", - "url": "re:http://spankbang.com/_3vvn/IjE0MjgyNjY5MTcuMzUi.IaGrcF-vDrvktMhjd-1fWixiCzU/title/720p__mp4" + "url": "http://spankbang.com/_3vvn/IjE0MjgyNjY5MTcuMzUi.IaGrcF-vDrvktMhjd-1fWixiCzU/title/720p__mp4" } } From d97aae75724fc301243a00c5a71ac93b235d62fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 6 Apr 2015 21:24:17 +0600 Subject: [PATCH 5/5] [spankbang] Improve and simplify --- youtube_dl/extractor/spankbang.py | 80 +++++++++++++++++-------------- 1 file changed, 44 insertions(+), 36 deletions(-) diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py index d0b5ba278..7f060b15b 100644 --- a/youtube_dl/extractor/spankbang.py +++ b/youtube_dl/extractor/spankbang.py @@ -1,25 +1,23 @@ -# coding: utf-8 from __future__ import unicode_literals -from .common import InfoExtractor import re +from .common import InfoExtractor + + class SpankBangIE(InfoExtractor): - """Extractor for http://spankbang.com""" - - _VALID_URL = r"https?://(?:www\.)?spankbang\.com/(?P\w+)/video/.*" - + _VALID_URL = r'https?://(?:(?:www|[a-z]{2})\.)?spankbang\.com/(?P[\da-z]+)/video' _TEST = { - "url": "http://spankbang.com/3vvn/video/fantasy+solo", - "md5": "1cc433e1d6aa14bc376535b8679302f7", - "info_dict": { - "id": "3vvn", - "title": "fantasy solo", - "description": "Watch fantasy solo free HD porn video - 05 minutes - dillion harper masturbates on a bed free adult movies.", - "format": "720p", - "format_id": "720p", - "ext": "mp4", - "url": "http://spankbang.com/_3vvn/IjE0MjgyNjY5MTcuMzUi.IaGrcF-vDrvktMhjd-1fWixiCzU/title/720p__mp4" + 'url': 'http://spankbang.com/3vvn/video/fantasy+solo', + 'md5': '1cc433e1d6aa14bc376535b8679302f7', + 'info_dict': { + 'id': '3vvn', + 'ext': 'mp4', + 'title': 'fantasy solo', + 'description': 'dillion harper masturbates on a bed', + 'thumbnail': 're:^https?://.*\.jpg$', + 'uploader': 'silly2587', + 'age_limit': 18, } } @@ -27,26 +25,36 @@ def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - title = self._html_search_regex(r"

(?:)?(.*?)

", webpage, "title") - - stream_key = self._html_search_regex(r"""var\s+stream_key\s*[=]\s*['"](.+?)['"]\s*;""", webpage, "stream_key") - - qualities = re.findall(r"([0-9]+p).*?", webpage) - - formats = [] - for q in sorted(qualities): - formats.append({ - "format_id": q, - "format": q, - "ext": "mp4", - "url": "http://spankbang.com/_{0}/{1}/title/{2}__mp4".format(video_id, stream_key, q) - }) + stream_key = self._html_search_regex( + r'''var\s+stream_key\s*=\s*['"](.+?)['"]''', + webpage, 'stream key') + + formats = [{ + 'url': 'http://spankbang.com/_%s/%s/title/%sp__mp4' % (video_id, stream_key, height), + 'ext': 'mp4', + 'format_id': '%sp' % height, + 'height': int(height), + } for height in re.findall(r']+q_(\d+)p', webpage)] + self._sort_formats(formats) + + title = self._html_search_regex( + r'(?s)

(.+?)

', webpage, 'title') + description = self._search_regex( + r'class="desc"[^>]*>([^<]+)', + webpage, 'description', default=None) + thumbnail = self._og_search_thumbnail(webpage) + uploader = self._search_regex( + r'class="user"[^>]*>([^<]+)', + webpage, 'uploader', fatal=False) + + age_limit = self._rta_search(webpage) return { - "id": video_id, - "title": title, - "description": self._og_search_description(webpage), - "formats": formats + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'uploader': uploader, + 'formats': formats, + 'age_limit': age_limit, } - -# vim: tabstop=4 expandtab