yt-dlp/yt_dlp/extractor/extremetube.py

49 lines
1.7 KiB
Python
Raw Normal View History

2016-08-16 23:02:12 +00:00
from ..utils import str_to_int
from .keezmovies import KeezMoviesIE
2016-08-16 23:02:12 +00:00
class ExtremeTubeIE(KeezMoviesIE):
2016-08-17 00:02:13 +00:00
_VALID_URL = r'https?://(?:www\.)?extremetube\.com/(?:[^/]+/)?video/(?P<id>[^/#?&]+)'
_TESTS = [{
2014-04-19 17:42:51 +00:00
'url': 'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431',
2018-04-19 15:36:33 +00:00
'md5': '92feaafa4b58e82f261e5419f39c60cb',
2014-04-19 17:42:51 +00:00
'info_dict': {
2016-08-17 00:02:13 +00:00
'id': 'music-video-14-british-euro-brit-european-cumshots-swallow-652431',
2014-04-19 17:42:51 +00:00
'ext': 'mp4',
'title': 'Music Video 14 british euro brit european cumshots swallow',
2018-04-19 15:36:33 +00:00
'uploader': 'anonim',
2014-09-27 15:36:53 +00:00
'view_count': int,
2014-04-19 17:42:51 +00:00
'age_limit': 18,
}
}, {
'url': 'http://www.extremetube.com/gay/video/abcde-1234',
'only_matching': True,
2015-11-08 13:14:39 +00:00
}, {
'url': 'http://www.extremetube.com/video/latina-slut-fucked-by-fat-black-dick',
'only_matching': True,
}, {
'url': 'http://www.extremetube.com/video/652431',
'only_matching': True,
}]
def _real_extract(self, url):
2016-08-16 23:02:12 +00:00
webpage, info = self._extract_info(url)
2016-08-16 23:02:12 +00:00
if not info['title']:
info['title'] = self._search_regex(
r'<h1[^>]+title="([^"]+)"[^>]*>', webpage, 'title')
2014-04-19 17:42:51 +00:00
uploader = self._html_search_regex(
2018-04-19 15:36:33 +00:00
r'Uploaded by:\s*</[^>]+>\s*<a[^>]+>(.+?)</a>',
2014-09-27 15:36:53 +00:00
webpage, 'uploader', fatal=False)
2016-08-16 23:02:12 +00:00
view_count = str_to_int(self._search_regex(
2018-04-19 15:36:33 +00:00
r'Views:\s*</[^>]+>\s*<[^>]+>([\d,\.]+)</',
2014-09-27 15:36:53 +00:00
webpage, 'view count', fatal=False))
2016-08-16 23:02:12 +00:00
info.update({
'uploader': uploader,
2014-09-27 15:36:53 +00:00
'view_count': view_count,
2016-08-16 23:02:12 +00:00
})
return info