From ceb3367320e06be2307adc9ff134718eb96ad38d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 26 Nov 2014 20:02:40 +0600 Subject: [PATCH] [gorillavid] Generalize extraction with countdown timeout and support faststream.in (Closes #4297) --- youtube_dl/extractor/gorillavid.py | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/gorillavid.py b/youtube_dl/extractor/gorillavid.py index 65b153417..506521097 100644 --- a/youtube_dl/extractor/gorillavid.py +++ b/youtube_dl/extractor/gorillavid.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals import re +import time from .common import InfoExtractor from ..utils import ( @@ -9,6 +10,7 @@ determine_ext, compat_urllib_parse, compat_urllib_request, + int_or_none, ) @@ -16,7 +18,7 @@ class GorillaVidIE(InfoExtractor): IE_DESC = 'GorillaVid.in, daclips.in and movpod.in' _VALID_URL = r'''(?x) https?://(?P(?:www\.)? - (?:daclips\.in|gorillavid\.in|movpod\.in))/ + (?:daclips\.in|gorillavid\.in|movpod\.in|fastvideo\.in))/ (?:embed-)?(?P[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)? ''' @@ -49,6 +51,16 @@ class GorillaVidIE(InfoExtractor): 'title': 'Micro Pig piglets ready on 16th July 2009-bG0PdrCdxUc', 'thumbnail': 're:http://.*\.jpg', } + }, { + # video with countdown timeout + 'url': 'http://fastvideo.in/1qmdn1lmsmbw', + 'md5': '8b87ec3f6564a3108a0e8e66594842ba', + 'info_dict': { + 'id': '1qmdn1lmsmbw', + 'ext': 'mp4', + 'title': 'Man of Steel - Trailer', + 'thumbnail': 're:http://.*\.jpg', + }, }, { 'url': 'http://movpod.in/0wguyyxi1yca', 'only_matching': True, @@ -71,6 +83,12 @@ def _real_extract(self, url): ''', webpage)) if fields['op'] == 'download1': + countdown = int_or_none(self._search_regex( + r'(?:[Ww]ait)?\s*(\d+)\s*(?:seconds?)?', + webpage, 'countdown', default=None)) + if countdown: + self._sleep(countdown, video_id) + post = compat_urllib_parse.urlencode(fields) req = compat_urllib_request.Request(url, post) @@ -78,9 +96,13 @@ def _real_extract(self, url): webpage = self._download_webpage(req, video_id, 'Downloading video page') - title = self._search_regex(r'style="z-index: [0-9]+;">([^<]+)', webpage, 'title') - video_url = self._search_regex(r'file\s*:\s*\'(http[^\']+)\',', webpage, 'file url') - thumbnail = self._search_regex(r'image\s*:\s*\'(http[^\']+)\',', webpage, 'thumbnail', fatal=False) + title = self._search_regex( + r'style="z-index: [0-9]+;">([^<]+)', + webpage, 'title', default=None) or self._og_search_title(webpage) + video_url = self._search_regex( + r'file\s*:\s*["\'](http[^"\']+)["\'],', webpage, 'file url') + thumbnail = self._search_regex( + r'image\s*:\s*["\'](http[^"\']+)["\'],', webpage, 'thumbnail', fatal=False) formats = [{ 'format_id': 'sd',