Merge remote-tracking branch 'upstream/master'

This commit is contained in:
rupertbaxter2 2014-08-19 07:15:33 -07:00
commit ca7a9c1bf7
7 changed files with 237 additions and 14 deletions

View file

@ -147,6 +147,7 @@
from .izlesene import IzleseneIE from .izlesene import IzleseneIE
from .jadorecettepub import JadoreCettePubIE from .jadorecettepub import JadoreCettePubIE
from .jeuxvideo import JeuxVideoIE from .jeuxvideo import JeuxVideoIE
from .jove import JoveIE
from .jukebox import JukeboxIE from .jukebox import JukeboxIE
from .justintv import JustinTVIE from .justintv import JustinTVIE
from .jpopsukitv import JpopsukiIE from .jpopsukitv import JpopsukiIE
@ -178,6 +179,7 @@
from .metacafe import MetacafeIE from .metacafe import MetacafeIE
from .metacritic import MetacriticIE from .metacritic import MetacriticIE
from .mit import TechTVMITIE, MITIE, OCWMITIE from .mit import TechTVMITIE, MITIE, OCWMITIE
from .mitele import MiTeleIE
from .mixcloud import MixcloudIE from .mixcloud import MixcloudIE
from .mlb import MLBIE from .mlb import MLBIE
from .mpora import MporaIE from .mpora import MporaIE
@ -252,6 +254,7 @@
from .rottentomatoes import RottenTomatoesIE from .rottentomatoes import RottenTomatoesIE
from .roxwel import RoxwelIE from .roxwel import RoxwelIE
from .rtbf import RTBFIE from .rtbf import RTBFIE
from .rtlnl import RtlXlIE
from .rtlnow import RTLnowIE from .rtlnow import RTLnowIE
from .rts import RTSIE from .rts import RTSIE
from .rtve import RTVEALaCartaIE from .rtve import RTVEALaCartaIE

View file

@ -30,7 +30,7 @@ def _real_extract(self, url):
video_id) video_id)
video_info = player_info.find('video') video_info = player_info.find('video')
f4m_info = self._download_xml(video_info.find('url').text, video_id) f4m_info = self._download_xml(self._proto_relative_url(video_info.find('url').text.strip()), video_id)
token_el = f4m_info.find('token') token_el = f4m_info.find('token')
manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth'] + '&hdcore=3.2.0' manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth'] + '&hdcore=3.2.0'

View file

@ -0,0 +1,80 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
unified_strdate
)
class JoveIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?jove\.com/video/(?P<id>[0-9]+)'
_CHAPTERS_URL = 'http://www.jove.com/video-chapters?videoid={video_id:}'
_TESTS = [
{
'url': 'http://www.jove.com/video/2744/electrode-positioning-montage-transcranial-direct-current',
'md5': '93723888d82dbd6ba8b3d7d0cd65dd2b',
'info_dict': {
'id': '2744',
'ext': 'mp4',
'title': 'Electrode Positioning and Montage in Transcranial Direct Current Stimulation',
'description': 'md5:015dd4509649c0908bc27f049e0262c6',
'thumbnail': 're:^https?://.*\.png$',
'upload_date': '20110523',
}
},
{
'url': 'http://www.jove.com/video/51796/culturing-caenorhabditis-elegans-axenic-liquid-media-creation',
'md5': '914aeb356f416811d911996434811beb',
'info_dict': {
'id': '51796',
'ext': 'mp4',
'title': 'Culturing Caenorhabditis elegans in Axenic Liquid Media and Creation of Transgenic Worms by Microparticle Bombardment',
'description': 'md5:35ff029261900583970c4023b70f1dc9',
'thumbnail': 're:^https?://.*\.png$',
'upload_date': '20140802',
}
},
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
chapters_id = self._html_search_regex(
r'/video-chapters\?videoid=([0-9]+)', webpage, 'chapters id')
chapters_xml = self._download_xml(
self._CHAPTERS_URL.format(video_id=chapters_id),
video_id, note='Downloading chapters XML',
errnote='Failed to download chapters XML')
video_url = chapters_xml.attrib.get('video')
if not video_url:
raise ExtractorError('Failed to get the video URL')
title = self._html_search_meta('citation_title', webpage, 'title')
thumbnail = self._og_search_thumbnail(webpage)
description = self._html_search_regex(
r'<div id="section_body_summary"><p class="jove_content">(.+?)</p>',
webpage, 'description', fatal=False)
publish_date = unified_strdate(self._html_search_meta(
'citation_publication_date', webpage, 'publish date', fatal=False))
comment_count = self._html_search_regex(
r'<meta name="num_comments" content="(\d+) Comments?"',
webpage, 'comment count', fatal=False)
return {
'id': video_id,
'title': title,
'url': video_url,
'thumbnail': thumbnail,
'description': description,
'upload_date': publish_date,
'comment_count': comment_count,
}

View file

@ -0,0 +1,60 @@
from __future__ import unicode_literals
import re
import json
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse,
get_element_by_attribute,
parse_duration,
strip_jsonp,
)
class MiTeleIE(InfoExtractor):
IE_NAME = 'mitele.es'
_VALID_URL = r'http://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<episode>[^/]+)/'
_TEST = {
'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',
'md5': '6a75fe9d0d3275bead0cb683c616fddb',
'info_dict': {
'id': '0fce117d',
'ext': 'mp4',
'title': 'Programa 144 - Tor, la web invisible',
'description': 'md5:3b6fce7eaa41b2d97358726378d9369f',
'display_id': 'programa-144',
'duration': 2913,
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
episode = mobj.group('episode')
webpage = self._download_webpage(url, episode)
embed_data_json = self._search_regex(
r'MSV\.embedData\[.*?\]\s*=\s*({.*?});', webpage, 'embed data',
flags=re.DOTALL
).replace('\'', '"')
embed_data = json.loads(embed_data_json)
info_url = embed_data['flashvars']['host']
info_el = self._download_xml(info_url, episode).find('./video/info')
video_link = info_el.find('videoUrl/link').text
token_query = compat_urllib_parse.urlencode({'id': video_link})
token_info = self._download_json(
'http://token.mitele.es/?' + token_query, episode,
transform_source=strip_jsonp
)
return {
'id': embed_data['videoId'],
'display_id': episode,
'title': info_el.find('title').text,
'url': token_info['tokenizedUrl'],
'description': get_element_by_attribute('class', 'text', webpage),
'thumbnail': info_el.find('thumb').text,
'duration': parse_duration(info_el.find('duration').text),
}

View file

@ -20,17 +20,41 @@ class PBSIE(InfoExtractor):
) )
''' '''
_TEST = { _TESTS = [
'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/', {
'md5': 'ce1888486f0908d555a8093cac9a7362', 'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/',
'info_dict': { 'md5': 'ce1888486f0908d555a8093cac9a7362',
'id': '2365006249', 'info_dict': {
'ext': 'mp4', 'id': '2365006249',
'title': 'A More Perfect Union', 'ext': 'mp4',
'description': 'md5:ba0c207295339c8d6eced00b7c363c6a', 'title': 'A More Perfect Union',
'duration': 3190, 'description': 'md5:ba0c207295339c8d6eced00b7c363c6a',
'duration': 3190,
},
}, },
} {
'url': 'http://www.pbs.org/wgbh/pages/frontline/losing-iraq/',
'md5': '143c98aa54a346738a3d78f54c925321',
'info_dict': {
'id': '2365297690',
'ext': 'mp4',
'title': 'Losing Iraq',
'description': 'md5:f5bfbefadf421e8bb8647602011caf8e',
'duration': 5050,
},
},
{
'url': 'http://www.pbs.org/newshour/bb/education-jan-june12-cyberschools_02-23/',
'md5': 'b19856d7f5351b17a5ab1dc6a64be633',
'info_dict': {
'id': '2201174722',
'ext': 'mp4',
'title': 'Cyber Schools Gain Popularity, but Quality Questions Persist',
'description': 'md5:5871c15cba347c1b3d28ac47a73c7c28',
'duration': 801,
},
},
]
def _extract_ids(self, url): def _extract_ids(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
@ -40,10 +64,13 @@ def _extract_ids(self, url):
if presumptive_id: if presumptive_id:
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
# frontline video embed MEDIA_ID_REGEXES = [
r"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'", # frontline video embed
r'class="coveplayerid">([^<]+)<', # coveplayer
]
media_id = self._search_regex( media_id = self._search_regex(
r"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'", MEDIA_ID_REGEXES, webpage, 'media ID', fatal=False, default=None)
webpage, 'frontline video ID', fatal=False, default=None)
if media_id: if media_id:
return media_id, presumptive_id return media_id, presumptive_id

View file

@ -0,0 +1,52 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class RtlXlIE(InfoExtractor):
IE_NAME = 'rtlxl.nl'
_VALID_URL = r'https?://www\.rtlxl\.nl/#!/[^/]+/(?P<uuid>[^/?]+)'
_TEST = {
'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/6e4203a6-0a5e-3596-8424-c599a59e0677',
'info_dict': {
'id': '6e4203a6-0a5e-3596-8424-c599a59e0677',
'ext': 'flv',
'title': 'RTL Nieuws - Laat',
'description': 'Dagelijks het laatste nieuws uit binnen- en '
'buitenland. Voor nog meer nieuws kunt u ook gebruikmaken van '
'onze mobiele apps.',
'timestamp': 1408051800,
'upload_date': '20140814',
},
'params': {
# We download the first bytes of the first fragment, it can't be
# processed by the f4m downloader beacuse it isn't complete
'skip_download': True,
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
uuid = mobj.group('uuid')
info = self._download_json(
'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=flash/' % uuid,
uuid)
meta = info['meta']
material = info['material'][0]
episode_info = info['episodes'][0]
f4m_url = 'http://manifest.us.rtl.nl' + material['videopath']
progname = info['abstracts'][0]['name']
subtitle = material['title'] or info['episodes'][0]['name']
return {
'id': uuid,
'title': '%s - %s' % (progname, subtitle),
'formats': self._extract_f4m_formats(f4m_url, uuid),
'timestamp': material['original_date'],
'description': episode_info['synopsis'],
}

View file

@ -827,6 +827,7 @@ def unified_strdate(date_str):
'%b %dnd %Y %I:%M%p', '%b %dnd %Y %I:%M%p',
'%b %dth %Y %I:%M%p', '%b %dth %Y %I:%M%p',
'%Y-%m-%d', '%Y-%m-%d',
'%Y/%m/%d',
'%d.%m.%Y', '%d.%m.%Y',
'%d/%m/%Y', '%d/%m/%Y',
'%Y/%m/%d %H:%M:%S', '%Y/%m/%d %H:%M:%S',