added YouJizz extractor

This commit is contained in:
Jeff Crouse 2012-12-16 00:26:27 -05:00
parent 9a2cf56d51
commit 187da2c093
3 changed files with 93 additions and 9 deletions

View file

@ -25,7 +25,7 @@ # OPTIONS
--list-extractors List all supported extractors and the URLs they --list-extractors List all supported extractors and the URLs they
would handle would handle
## Video Selection: Video Selection:
--playlist-start NUMBER playlist video to start at (default is 1) --playlist-start NUMBER playlist video to start at (default is 1)
--playlist-end NUMBER playlist video to end at (default is last) --playlist-end NUMBER playlist video to end at (default is last)
--match-title REGEX download only matching titles (regex or caseless --match-title REGEX download only matching titles (regex or caseless
@ -34,7 +34,7 @@ ## Video Selection:
caseless sub-string) caseless sub-string)
--max-downloads NUMBER Abort after downloading NUMBER files --max-downloads NUMBER Abort after downloading NUMBER files
## Filesystem Options: Filesystem Options:
-t, --title use title in file name -t, --title use title in file name
--id use video ID in file name --id use video ID in file name
-l, --literal use literal title in file name -l, --literal use literal title in file name
@ -59,7 +59,7 @@ ## Filesystem Options:
--write-description write video description to a .description file --write-description write video description to a .description file
--write-info-json write video metadata to a .info.json file --write-info-json write video metadata to a .info.json file
## Verbosity / Simulation Options: Verbosity / Simulation Options:
-q, --quiet activates quiet mode -q, --quiet activates quiet mode
-s, --simulate do not download the video and do not write anything -s, --simulate do not download the video and do not write anything
to disk to disk
@ -74,7 +74,7 @@ ## Verbosity / Simulation Options:
--console-title display progress in console titlebar --console-title display progress in console titlebar
-v, --verbose print various debugging information -v, --verbose print various debugging information
## Video Format Options: Video Format Options:
-f, --format FORMAT video format code -f, --format FORMAT video format code
--all-formats download all available video formats --all-formats download all available video formats
--prefer-free-formats prefer free video formats unless a specific one is --prefer-free-formats prefer free video formats unless a specific one is
@ -86,12 +86,12 @@ ## Video Format Options:
--srt-lang LANG language of the closed captions to download --srt-lang LANG language of the closed captions to download
(optional) use IETF language tags like 'en' (optional) use IETF language tags like 'en'
## Authentication Options: Authentication Options:
-u, --username USERNAME account username -u, --username USERNAME account username
-p, --password PASSWORD account password -p, --password PASSWORD account password
-n, --netrc use .netrc authentication data -n, --netrc use .netrc authentication data
## Post-processing Options: Post-processing Options:
-x, --extract-audio convert video files to audio-only files (requires -x, --extract-audio convert video files to audio-only files (requires
ffmpeg or avconv and ffprobe or avprobe) ffmpeg or avconv and ffprobe or avprobe)
--audio-format FORMAT "best", "aac", "vorbis", "mp3", "m4a", or "wav"; --audio-format FORMAT "best", "aac", "vorbis", "mp3", "m4a", or "wav";
@ -133,7 +133,7 @@ ### SyntaxError: Non-ASCII character ###
The error The error
File "youtube-dl", line 2 File "youtube-dl", line 2
SyntaxError: Non-ASCII character '\x93' ... SyntaxError: Non-ASCII character '' ...
means you're using an outdated version of Python. Please update to Python 2.6 or 2.7. means you're using an outdated version of Python. Please update to Python 2.6 or 2.7.

View file

@ -3439,7 +3439,7 @@ def _real_extract(self, url):
return return
self.report_webpage(url) self.report_webpage(url)
# Get the video URL # Get the video title
result = re.search(self.VIDEO_TITLE_RE, webpage) result = re.search(self.VIDEO_TITLE_RE, webpage)
if result is None: if result is None:
self._downloader.trouble(u'ERROR: unable to extract video title') self._downloader.trouble(u'ERROR: unable to extract video title')
@ -3610,3 +3610,87 @@ def _real_extract(self, url):
return [info] return [info]
class YouJizzIE(InfoExtractor):
"""Information extractor for youjizz.com."""
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?youjizz\.com/videos/([^.]+).html$'
IE_NAME = u'youjizz'
VIDEO_TITLE_RE = r'<title>(?P<title>.*)</title>'
EMBED_PAGE_RE = r'http://www.youjizz.com/videos/embed/(?P<videoid>[0-9]+)'
SOURCE_RE = r'so.addVariable\("file",encodeURIComponent\("(?P<source>[^"]+)"\)\);'
def __init__(self, downloader=None):
InfoExtractor.__init__(self, downloader)
def report_extract_entry(self, url):
"""Report downloading extry"""
self._downloader.to_screen(u'[youjizz] Downloading entry: %s' % url.decode('utf-8'))
def report_webpage(self, url):
"""Report downloading page"""
self._downloader.to_screen(u'[youjizz] Downloaded page: %s' % url)
def report_title(self, video_title):
"""Report downloading extry"""
self._downloader.to_screen(u'[youjizz] Title: %s' % video_title.decode('utf-8'))
def report_embed_page(self, embed_page):
"""Report downloading extry"""
self._downloader.to_screen(u'[youjizz] Embed Page: %s' % embed_page.decode('utf-8'))
def _real_extract(self, url):
# Get webpage content
try:
webpage = urllib2.urlopen(url).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % err)
return
self.report_webpage(url)
# Get the video title
result = re.search(self.VIDEO_TITLE_RE, webpage)
if result is None:
self._downloader.trouble(u'ERROR: unable to extract video title')
return
video_title = result.group('title').decode('utf-8').strip()
self.report_title(video_title)
# Get the embed page
result = re.search(self.EMBED_PAGE_RE, webpage)
if result is None:
self._downloader.trouble(u'ERROR: unable to extract embed page')
return
embed_page_url = result.group(0).decode('utf-8').strip()
video_id = result.group('videoid').decode('utf-8')
self.report_embed_page(embed_page_url)
try:
webpage = urllib2.urlopen(embed_page_url).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video embed page: %s' % err)
return
# Get the video URL
result = re.search(self.SOURCE_RE, webpage)
if result is None:
self._downloader.trouble(u'ERROR: unable to extract video url')
return
video_url = result.group('source').decode('utf-8')
self.report_extract_entry(video_url)
info = {'id': video_id,
'url': video_url,
'uploader': None,
'upload_date': None,
'title': video_title,
'ext': 'flv',
'format': 'flv',
'thumbnail': None,
'description': None,
'player_url': embed_page_url}
return [info]

View file

@ -363,7 +363,7 @@ def gen_extractors():
GooglePlusIE(), GooglePlusIE(),
PornotubeIE(), PornotubeIE(),
YouPornIE(), YouPornIE(),
YouJizzIE(),
GenericIE() GenericIE()
] ]