Merge remote-tracking branch 'peugeot/sexu'

2024-11-17 21:59:17 +00:00 · 2014-11-13 15:18:38 +01:00 · 2014-11-13 15:18:38 +01:00 · c47ec62b83
parent e4bdb37ec6 437f68d868
commit c47ec62b83
2 changed files with 62 additions and 0 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -324,6 +324,7 @@
 from .scivee import SciVeeIE
 from .screencast import ScreencastIE
 from .servingsys import ServingSysIE
 from .sexu import SexuIE
 from .sexykarma import SexyKarmaIE
 from .shared import SharedIE
 from .sharesix import ShareSixIE
--- a/youtube_dl/extractor/sexu.py
+++ b/youtube_dl/extractor/sexu.py
@ -0,0 +1,61 @@
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 class SexuIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?sexu\.com/(?P<id>\d+)'
    _TEST = {
        'url': 'http://sexu.com/961791/',
        'md5': 'ff615aca9691053c94f8f10d96cd7884',
        'info_dict': {
            'id': '961791',
            'ext': 'mp4',
            'title': 'md5:4d05a19a5fc049a63dbbaf05fb71d91b',
            'description': 'md5:c5ed8625eb386855d5a7967bd7b77a54',
            'categories': list,  # NSFW
            'thumbnail': 're:https?://.*\.jpg$',
            'age_limit': 18,
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        quality_arr = self._search_regex(r'sources:\s*\[([^\]]+)\]', webpage, 'quality formats')
        formats = [{
            'url': fmt[0].replace('\\', ''),
            'format_id': fmt[1],
            'height': int(fmt[1][:3]),
        } for fmt in re.findall(r'"file":"([^"]+)","label":"([^"]+)"', quality_arr)]
        self._sort_formats(formats)
        title = self._html_search_regex(
            r'<title>([^<]+)\s*-\s*Sexu.Com</title>', webpage, 'title')
        description = self._html_search_meta('description', webpage, 'description')
        thumbnail = self._html_search_regex(
            r'image:\s*"([^"]+)"',
            webpage, 'thumbnail', fatal=False)
        categories_str = self._html_search_meta('keywords', webpage, 'categories', fatal=False)
        categories = (
            None if categories_str is None
            else categories_str.split(','))
        return {
            'id': video_id,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
            'categories': categories,
            'formats': formats,
            'age_limit': 18,
        }