From 3af1fac7b0f43778e44b3b86e0c74bf25fb6f489 Mon Sep 17 00:00:00 2001 From: remitamine Date: Sat, 18 Jul 2015 09:51:59 +0100 Subject: [PATCH 1/5] [dcn] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/dcn.py | 46 ++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) create mode 100644 youtube_dl/extractor/dcn.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 06f21064b..cc0da81d1 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -110,6 +110,7 @@ ) from .daum import DaumIE from .dbtv import DBTVIE +from .dcn import DcnIE from .dctp import DctpTvIE from .deezer import DeezerPlaylistIE from .dfb import DFBIE diff --git a/youtube_dl/extractor/dcn.py b/youtube_dl/extractor/dcn.py new file mode 100644 index 000000000..5263def4c --- /dev/null +++ b/youtube_dl/extractor/dcn.py @@ -0,0 +1,46 @@ +from .common import InfoExtractor + +class DcnIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?(?:video/.+|show/\d+/.+?)/(?P\d+)/?' + _TEST = { + 'url': 'http://www.dcndigital.ae/#/show/199074/%D8%B1%D8%AD%D9%84%D8%A9-%D8%A7%D9%84%D8%B9%D9%85%D8%B1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/17375/6887', + 'info_dict': + { + 'id': '17375', + 'ext': 'm3u8', + 'title': 'رحلة العمر : الحلقة 1', + 'description': '"في هذه الحلقة من برنامج رحلة العمر يقدّم الدكتور عمر عبد الكافي تبسيطاً لمناسك الحج والعمرة ويجيب مباشرة على استفسارات حجاج بيت الله الحرام بخصوص مناسك الحج والعمرة1"', + 'thumbnail': 'http://admin.mangomolo.com/analytics/uploads/71/images/media/2/2cefc09d7bec80afa754682f40e49503.jpg', + 'duration': '2041' + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + json_data = self._download_json( + 'http://admin.mangomolo.com/analytics/index.php/plus/video?id='+video_id, + video_id + ) + title = json_data['title_ar']; + thumbnail = 'http://admin.mangomolo.com/analytics/'+json_data['img']; + duration = json_data['duration']; + description = json_data['description_ar']; + webpage = self._download_webpage( + 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?id='+json_data['id']+'&user_id='+json_data['user_id']+'&countries=Q0M=&w=100%&h=100%&filter=DENY&signature='+json_data['signature'], + video_id + ) + m3u8_url = self._html_search_regex( + r'file: "(?P.*?)"', + webpage, + 'm3u8_url', + group='m3u8_url' + ) + formats = self._extract_m3u8_formats(m3u8_url, video_id) + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'duration': duration, + 'description': description, + 'formats': formats, + } From 9d681c2bb3b75a666b76d8e346ffab66b65f9132 Mon Sep 17 00:00:00 2001 From: remitamine Date: Sat, 18 Jul 2015 10:00:24 +0100 Subject: [PATCH 2/5] remove unnecessary group name --- youtube_dl/extractor/dcn.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/dcn.py b/youtube_dl/extractor/dcn.py index 5263def4c..f76ebda9e 100644 --- a/youtube_dl/extractor/dcn.py +++ b/youtube_dl/extractor/dcn.py @@ -30,10 +30,9 @@ def _real_extract(self, url): video_id ) m3u8_url = self._html_search_regex( - r'file: "(?P.*?)"', + r'file:\s*"([^"]+)', webpage, - 'm3u8_url', - group='m3u8_url' + 'm3u8_url' ) formats = self._extract_m3u8_formats(m3u8_url, video_id) return { From cd6b555e19c601d575679dd29da0080eda7f8890 Mon Sep 17 00:00:00 2001 From: remitamine Date: Thu, 6 Aug 2015 19:17:50 +0100 Subject: [PATCH 3/5] [dcn] add origin to api request and fix the test and check with flake8 --- youtube_dl/extractor/dcn.py | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/dcn.py b/youtube_dl/extractor/dcn.py index f76ebda9e..d44e8cef0 100644 --- a/youtube_dl/extractor/dcn.py +++ b/youtube_dl/extractor/dcn.py @@ -1,4 +1,9 @@ +# coding: utf-8 +from __future__ import unicode_literals + from .common import InfoExtractor +from ..compat import compat_urllib_request + class DcnIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?(?:video/.+|show/\d+/.+?)/(?P\d+)/?' @@ -9,24 +14,29 @@ class DcnIE(InfoExtractor): 'id': '17375', 'ext': 'm3u8', 'title': 'رحلة العمر : الحلقة 1', - 'description': '"في هذه الحلقة من برنامج رحلة العمر يقدّم الدكتور عمر عبد الكافي تبسيطاً لمناسك الحج والعمرة ويجيب مباشرة على استفسارات حجاج بيت الله الحرام بخصوص مناسك الحج والعمرة1"', + 'description': 'في هذه الحلقة من برنامج رحلة العمر يقدّم الدكتور عمر عبد الكافي تبسيطاً لمناسك الحج والعمرة ويجيب مباشرة على استفسارات حجاج بيت الله الحرام بخصوص مناسك الحج والعمرة\n1', 'thumbnail': 'http://admin.mangomolo.com/analytics/uploads/71/images/media/2/2cefc09d7bec80afa754682f40e49503.jpg', 'duration': '2041' - } + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, } def _real_extract(self, url): video_id = self._match_id(url) - json_data = self._download_json( - 'http://admin.mangomolo.com/analytics/index.php/plus/video?id='+video_id, - video_id + request = compat_urllib_request.Request( + 'http://admin.mangomolo.com/analytics/index.php/plus/video?id=' + video_id, + headers={'Origin': 'http://www.dcndigital.ae'} ) - title = json_data['title_ar']; - thumbnail = 'http://admin.mangomolo.com/analytics/'+json_data['img']; - duration = json_data['duration']; - description = json_data['description_ar']; + json_data = self._download_json(request, video_id) + title = json_data['title_ar'] + thumbnail = 'http://admin.mangomolo.com/analytics/' + json_data['img'] + duration = json_data['duration'] + description = json_data['description_ar'] webpage = self._download_webpage( - 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?id='+json_data['id']+'&user_id='+json_data['user_id']+'&countries=Q0M=&w=100%&h=100%&filter=DENY&signature='+json_data['signature'], + 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?id=' + json_data['id'] + '&user_id=' + json_data['user_id'] + '&countries=Q0M=&w=100%&h=100%&filter=DENY&signature=' + json_data['signature'], video_id ) m3u8_url = self._html_search_regex( From 6d30cf04db9c9662dbb30c2490e24eb5c6dca4c3 Mon Sep 17 00:00:00 2001 From: remitamine Date: Fri, 7 Aug 2015 10:01:18 +0100 Subject: [PATCH 4/5] [dcn] fix type and key errors --- youtube_dl/extractor/dcn.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/dcn.py b/youtube_dl/extractor/dcn.py index d44e8cef0..22ff35b56 100644 --- a/youtube_dl/extractor/dcn.py +++ b/youtube_dl/extractor/dcn.py @@ -3,6 +3,7 @@ from .common import InfoExtractor from ..compat import compat_urllib_request +from ..utils import int_or_none class DcnIE(InfoExtractor): @@ -16,7 +17,7 @@ class DcnIE(InfoExtractor): 'title': 'رحلة العمر : الحلقة 1', 'description': 'في هذه الحلقة من برنامج رحلة العمر يقدّم الدكتور عمر عبد الكافي تبسيطاً لمناسك الحج والعمرة ويجيب مباشرة على استفسارات حجاج بيت الله الحرام بخصوص مناسك الحج والعمرة\n1', 'thumbnail': 'http://admin.mangomolo.com/analytics/uploads/71/images/media/2/2cefc09d7bec80afa754682f40e49503.jpg', - 'duration': '2041' + 'duration': 2041 }, 'params': { # m3u8 download @@ -32,9 +33,9 @@ def _real_extract(self, url): ) json_data = self._download_json(request, video_id) title = json_data['title_ar'] - thumbnail = 'http://admin.mangomolo.com/analytics/' + json_data['img'] - duration = json_data['duration'] - description = json_data['description_ar'] + thumbnail = 'http://admin.mangomolo.com/analytics/' + json_data.get('img') + duration = int_or_none(json_data.get('duration')) + description = json_data.get('description_ar') webpage = self._download_webpage( 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?id=' + json_data['id'] + '&user_id=' + json_data['user_id'] + '&countries=Q0M=&w=100%&h=100%&filter=DENY&signature=' + json_data['signature'], video_id From f94639fadf91312bf3365802981f506ecba698dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 8 Aug 2015 00:06:03 +0600 Subject: [PATCH 5/5] [dcn] Improve --- youtube_dl/extractor/__init__.py | 2 +- youtube_dl/extractor/dcn.py | 78 ++++++++++++++++++++++---------- 2 files changed, 54 insertions(+), 26 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index eb8ef1fe3..922d9b3d8 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -118,7 +118,7 @@ ) from .daum import DaumIE from .dbtv import DBTVIE -from .dcn import DcnIE +from .dcn import DCNIE from .dctp import DctpTvIE from .deezer import DeezerPlaylistIE from .dfb import DFBIE diff --git a/youtube_dl/extractor/dcn.py b/youtube_dl/extractor/dcn.py index 22ff35b56..b98a6c032 100644 --- a/youtube_dl/extractor/dcn.py +++ b/youtube_dl/extractor/dcn.py @@ -2,22 +2,30 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urllib_request -from ..utils import int_or_none +from ..compat import ( + compat_urllib_parse, + compat_urllib_request, +) +from ..utils import ( + int_or_none, + parse_iso8601, +) -class DcnIE(InfoExtractor): +class DCNIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?(?:video/.+|show/\d+/.+?)/(?P\d+)/?' _TEST = { 'url': 'http://www.dcndigital.ae/#/show/199074/%D8%B1%D8%AD%D9%84%D8%A9-%D8%A7%D9%84%D8%B9%D9%85%D8%B1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/17375/6887', 'info_dict': { 'id': '17375', - 'ext': 'm3u8', + 'ext': 'mp4', 'title': 'رحلة العمر : الحلقة 1', - 'description': 'في هذه الحلقة من برنامج رحلة العمر يقدّم الدكتور عمر عبد الكافي تبسيطاً لمناسك الحج والعمرة ويجيب مباشرة على استفسارات حجاج بيت الله الحرام بخصوص مناسك الحج والعمرة\n1', - 'thumbnail': 'http://admin.mangomolo.com/analytics/uploads/71/images/media/2/2cefc09d7bec80afa754682f40e49503.jpg', - 'duration': 2041 + 'description': 'md5:0156e935d870acb8ef0a66d24070c6d6', + 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 2041, + 'timestamp': 1227504126, + 'upload_date': '20081124', }, 'params': { # m3u8 download @@ -27,30 +35,50 @@ class DcnIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) + request = compat_urllib_request.Request( - 'http://admin.mangomolo.com/analytics/index.php/plus/video?id=' + video_id, - headers={'Origin': 'http://www.dcndigital.ae'} - ) - json_data = self._download_json(request, video_id) - title = json_data['title_ar'] - thumbnail = 'http://admin.mangomolo.com/analytics/' + json_data.get('img') - duration = int_or_none(json_data.get('duration')) - description = json_data.get('description_ar') + 'http://admin.mangomolo.com/analytics/index.php/plus/video?id=%s' % video_id, + headers={'Origin': 'http://www.dcndigital.ae'}) + + video = self._download_json(request, video_id) + title = video.get('title_en') or video['title_ar'] + webpage = self._download_webpage( - 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?id=' + json_data['id'] + '&user_id=' + json_data['user_id'] + '&countries=Q0M=&w=100%&h=100%&filter=DENY&signature=' + json_data['signature'], - video_id - ) - m3u8_url = self._html_search_regex( - r'file:\s*"([^"]+)', - webpage, - 'm3u8_url' - ) - formats = self._extract_m3u8_formats(m3u8_url, video_id) + 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + + compat_urllib_parse.urlencode({ + 'id': video['id'], + 'user_id': video['user_id'], + 'signature': video['signature'], + 'countries': 'Q0M=', + 'filter': 'DENY', + }), video_id) + + m3u8_url = self._html_search_regex(r'file:\s*"([^"]+)', webpage, 'm3u8 url') + formats = self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls') + + rtsp_url = self._search_regex( + r']+href="(rtsp://[^"]+)"', webpage, 'rtsp url', fatal=False) + if rtsp_url: + formats.append({ + 'url': rtsp_url, + 'format_id': 'rtsp', + }) + + self._sort_formats(formats) + + img = video.get('img') + thumbnail = 'http://admin.mangomolo.com/analytics/%s' % img if img else None + duration = int_or_none(video.get('duration')) + description = video.get('description_en') or video.get('description_ar') + timestamp = parse_iso8601(video.get('create_time') or video.get('update_time'), ' ') + return { 'id': video_id, 'title': title, + 'description': description, 'thumbnail': thumbnail, 'duration': duration, - 'description': description, + 'timestamp': timestamp, 'formats': formats, }