Add support for Howcast.com - closes #835

This commit is contained in:
Filippo Valsorda 2013-05-18 19:17:19 +02:00
parent 340fa21198
commit d4f76f1674
2 changed files with 48 additions and 1 deletions

View file

@ -441,5 +441,15 @@
"title": "Obama Celebrates Iraq Victory" "title": "Obama Celebrates Iraq Victory"
}, },
"skip": "Requires rtmpdump" "skip": "Requires rtmpdump"
},
{
"name": "Howcast",
"url": "http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly",
"file": "390161.mp4",
"md5": "1d7ba54e2c9d7dc6935ef39e00529138",
"info_dict":{
"title":"How to Tie a Square Knot Properly",
"description":"The square knot, also known as the reef knot, is one of the oldest, most basic knots to tie, and can be used in many different ways. Here's the proper way to tie a square knot."
}
} }
] ]

View file

@ -4041,7 +4041,7 @@ def _real_extract(self,url):
class InaIE(InfoExtractor): class InaIE(InfoExtractor):
"""Information Extractor for Ina.fr""" """Information Extractor for Ina.fr"""
_VALID_URL = r'(?:http://)?(?:www.)?ina\.fr/video/(?P<id>I[0-9]+)/.*' _VALID_URL = r'(?:http://)?(?:www\.)?ina\.fr/video/(?P<id>I[0-9]+)/.*'
def _real_extract(self,url): def _real_extract(self,url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
@ -4068,6 +4068,42 @@ def _real_extract(self,url):
'title': video_title, 'title': video_title,
}] }]
class HowcastIE(InfoExtractor):
"""Information Extractor for Ina.fr"""
_VALID_URL = r'(?:https?://)?(?:www\.)?howcast\.com/videos/(?P<id>[\d]+)'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage_url = 'http://www.howcast.com/videos/' + video_id
webpage = self._download_webpage(webpage_url, video_id)
mobj = re.search(r'\'file\': "(http://mobile-media\.howcast\.com/\d+\.mp4)"', webpage)
if mobj is None:
raise ExtractorError(u'Unable to extract video URL')
video_url = mobj.group(1)
mobj = re.search(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') property=\'og:title\'', webpage)
if mobj is None:
raise ExtractorError(u'Unable to extract title')
video_title = mobj.group(1) or mobj.group(2)
mobj = re.search(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') name=\'description\'', webpage)
if mobj is None:
self._downloader.report_warning(u'unable to extract description')
video_description = None
else:
video_description = mobj.group(1) or mobj.group(2)
return [{
'id': video_id,
'url': video_url,
'ext': 'mp4',
'title': video_title,
'description': video_description,
}]
def gen_extractors(): def gen_extractors():
""" Return a list of an instance of every supported extractor. """ Return a list of an instance of every supported extractor.
The order does matter; the first extractor matched is the one handling the URL. The order does matter; the first extractor matched is the one handling the URL.
@ -4125,6 +4161,7 @@ def gen_extractors():
BandcampIE(), BandcampIE(),
RedTubeIE(), RedTubeIE(),
InaIE(), InaIE(),
HowcastIE(),
GenericIE() GenericIE()
] ]