From 52afe996656987ad17999212d8a25e12151147a7 Mon Sep 17 00:00:00 2001 From: Pierre Rudloff Date: Tue, 3 Sep 2013 01:51:17 +0200 Subject: [PATCH] Extractor for defense.gouv.fr --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/defense.py | 37 ++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 youtube_dl/extractor/defense.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 9f56e427c..a96b62d37 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -21,6 +21,7 @@ from .depositfiles import DepositFilesIE from .dotsub import DotsubIE from .dreisat import DreiSatIE +from .defense import DefenseGouvFrIE from .ehow import EHowIE from .eighttracks import EightTracksIE from .escapist import EscapistIE diff --git a/youtube_dl/extractor/defense.py b/youtube_dl/extractor/defense.py new file mode 100644 index 000000000..963fb897f --- /dev/null +++ b/youtube_dl/extractor/defense.py @@ -0,0 +1,37 @@ +# coding: utf-8 +'''Extractor for defense.gouv.fr''' +import re +import json + +from .common import InfoExtractor + + +class DefenseGouvFrIE(InfoExtractor): + '''Extractor for defense.gouv.fr''' + _IE_NAME = 'defense.gouv.fr' + _VALID_URL = (r'http://.*?\.defense\.gouv\.fr/layout/set/' + 'ligthboxvideo/base-de-medias/webtv/(.*)') + + _TEST = { + u'url': (u'http://www.defense.gouv.fr/layout/set/ligthboxvideo/', + 'base-de-medias/webtv/attaque-chimique-syrienne-du-21-aout-2013-1') + } + + def _real_extract(self, url): + title = re.match(self._VALID_URL, url).group(1) + webpage = self._download_webpage(url, title) + video_id = self._search_regex( + r"flashvars.pvg_id=\"(\d+)\";", + webpage, 'ID') + + json_url = ('http://static.videos.gouv.fr/brightcovehub/export/json/' + + video_id) + info = self._download_webpage(json_url, title, + 'Downloading JSON config') + video_url = json.loads(info)['renditions'][0]['url'] + + return {'id': video_id, + 'ext': 'mp4', + 'url': video_url, + 'title': title, + }