[closertotruth] Update and improve (Closes #8680)

This commit is contained in:
Sergey M․ 2016-06-19 00:35:29 +07:00
parent 41c1023300
commit cb23192bc4
No known key found for this signature in database
GPG key ID: 2C393E0F18A9236D
2 changed files with 71 additions and 47 deletions

View file

@ -1,69 +1,92 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
class CloserToTruthIE(InfoExtractor): class CloserToTruthIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?closertotruth\.com/(episodes/|(series|interviews)/(?:[^#]+#video-)?(?P<id>\d+))' _VALID_URL = r'https?://(?:www\.)?closertotruth\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [ _TESTS = [{
{ 'url': 'http://closertotruth.com/series/solutions-the-mind-body-problem#video-3688',
'url': 'http://closertotruth.com/series/solutions-the-mind-body-problem#video-3688', 'info_dict': {
'md5': '5c548bde260a9247ddfdc07c7458ed29', 'id': '0_zof1ktre',
'info_dict': { 'display_id': 'solutions-the-mind-body-problem',
'id': '0_zof1ktre', 'ext': 'mov',
'ext': 'mov', 'title': 'Solutions to the Mind-Body Problem?',
'title': 'Solutions to the Mind-Body Problem?', 'upload_date': '20140221',
'upload_date': '20140221', 'timestamp': 1392956007,
'timestamp': 1392956007, 'uploader_id': 'CTTXML'
'uploader_id': 'CTTXML'
}
}, },
{ 'params': {
'url': 'http://closertotruth.com/interviews/1725', 'skip_download': True,
'md5': 'b00598fd6a38372edb976408f72c5792',
'info_dict': {
'id': '0_19qv5rn1',
'ext': 'mov',
'title': 'AyaFr-002 - Francisco J. Ayala',
'upload_date': '20140307',
'timestamp': 1394236431,
'uploader_id': 'CTTXML'
}
}, },
{ }, {
'url': 'http://closertotruth.com/episodes/how-do-brains-work', 'url': 'http://closertotruth.com/episodes/how-do-brains-work',
'md5': '4dd96aa0a5c296afa5c0bd24895c2f16', 'info_dict': {
'info_dict': { 'id': '0_iuxai6g6',
'id': '0_iuxai6g6', 'display_id': 'how-do-brains-work',
'ext': 'mov', 'ext': 'mov',
'title': 'How do Brains Work?', 'title': 'How do Brains Work?',
'upload_date': '20140221', 'upload_date': '20140221',
'timestamp': 1392956024, 'timestamp': 1392956024,
'uploader_id': 'CTTXML' 'uploader_id': 'CTTXML'
}
}, },
] 'params': {
'skip_download': True,
},
}, {
'url': 'http://closertotruth.com/interviews/1725',
'info_dict': {
'id': '1725',
'title': 'AyaFr-002',
},
'playlist_mincount': 2,
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_title = self._search_regex(r'<title>(.+) \|.+</title>', webpage, 'video title') webpage = self._download_webpage(url, display_id)
entry_id = self._search_regex(r'<a[^>]+id="(?:video-%s|embed-kaltura)"[^>]+data-kaltura="([^"]+)' % video_id, webpage, "video entry_id") partner_id = self._search_regex(
r'<script[^>]+src=["\'].*?\b(?:partner_id|p)/(\d+)',
webpage, 'kaltura partner_id')
interviewee_name = self._search_regex(r'<div id="(?:node_interview_full_group_white_wrapper|node_interview_series_full_group_ajax_content)"(?:.|\n)*<h3>(.*)</h3>.+', webpage, "video interviewee_name", False) title = self._search_regex(
r'<title>(.+?)\s*\|\s*.+?</title>', webpage, 'video title')
if interviewee_name: select = self._search_regex(
video_title = video_title + ' - ' + interviewee_name r'(?s)<select[^>]+id="select-version"[^>]*>(.+?)</select>',
webpage, 'select version', default=None)
if select:
entry_ids = set()
entries = []
for mobj in re.finditer(
r'<option[^>]+value=(["\'])(?P<id>[0-9a-z_]+)(?:#.+?)?\1[^>]*>(?P<title>[^<]+)',
webpage):
entry_id = mobj.group('id')
if entry_id in entry_ids:
continue
entry_ids.add(entry_id)
entries.append({
'_type': 'url_transparent',
'url': 'kaltura:%s:%s' % (partner_id, entry_id),
'ie_key': 'Kaltura',
'title': mobj.group('title'),
})
if entries:
return self.playlist_result(entries, display_id, title)
p_id = self._search_regex(r'<script[^>]+src=["\'].+?partner_id/(\d+)', webpage, "kaltura partner_id") entry_id = self._search_regex(
r'<a[^>]+id=(["\'])embed-kaltura\1[^>]+data-kaltura=(["\'])(?P<id>[0-9a-z_]+)\2',
webpage, 'kaltura entry_id', group='id')
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',
'id': entry_id, 'display_id': display_id,
'url': 'kaltura:%s:%s' % (p_id, entry_id), 'url': 'kaltura:%s:%s' % (partner_id, entry_id),
'ie_key': 'Kaltura', 'ie_key': 'Kaltura',
'title': video_title 'title': title
} }

View file

@ -140,6 +140,7 @@
from .clipfish import ClipfishIE from .clipfish import ClipfishIE
from .cliphunter import CliphunterIE from .cliphunter import CliphunterIE
from .clipsyndicate import ClipsyndicateIE from .clipsyndicate import ClipsyndicateIE
from .closertotruth import CloserToTruthIE
from .cloudy import CloudyIE from .cloudy import CloudyIE
from .clubic import ClubicIE from .clubic import ClubicIE
from .clyp import ClypIE from .clyp import ClypIE