mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-01-05 23:54:24 +00:00
[limelight] Improve embeds extraction (closes #12761)
* Move extraction code to extractor * Add extraction for LimelightEmbeddedPlayerFlash embeds * Extract multiple video
This commit is contained in:
parent
751c89a27d
commit
e5d39886ec
|
@ -85,6 +85,7 @@
|
|||
from .openload import OpenloadIE
|
||||
from .videopress import VideoPressIE
|
||||
from .rutube import RutubeIE
|
||||
from .limelight import LimelightBaseIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
|
@ -2483,6 +2484,11 @@ def _real_extract(self, url):
|
|||
return self.url_result(piksel_url, PikselIE.ie_key())
|
||||
|
||||
# Look for Limelight embeds
|
||||
limelight_urls = LimelightBaseIE._extract_urls(webpage, url)
|
||||
if limelight_urls:
|
||||
return self.playlist_result(
|
||||
limelight_urls, video_id, video_title, video_description)
|
||||
|
||||
mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
|
||||
if mobj:
|
||||
lm = {
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
smuggle_url,
|
||||
unsmuggle_url,
|
||||
ExtractorError,
|
||||
)
|
||||
|
@ -18,6 +19,42 @@ class LimelightBaseIE(InfoExtractor):
|
|||
_PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/%s/%s/%s'
|
||||
_API_URL = 'http://api.video.limelight.com/rest/organizations/%s/%s/%s/%s.json'
|
||||
|
||||
@classmethod
|
||||
def _extract_urls(cls, webpage, source_url):
|
||||
lm = {
|
||||
'Media': 'media',
|
||||
'Channel': 'channel',
|
||||
'ChannelList': 'channel_list',
|
||||
}
|
||||
entries = []
|
||||
for kind, video_id in re.findall(
|
||||
r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})',
|
||||
webpage):
|
||||
print('video_id', video_id)
|
||||
entries.append(cls.url_result(
|
||||
smuggle_url(
|
||||
'limelight:%s:%s' % (lm[kind], video_id),
|
||||
{'source_url': source_url}),
|
||||
'Limelight%s' % kind, video_id))
|
||||
for mobj in re.finditer(
|
||||
# As per [1] class attribute should be exactly equal to
|
||||
# LimelightEmbeddedPlayerFlash but numerous examples seen
|
||||
# that don't exactly match it (e.g. [2]).
|
||||
# 1. http://support.3playmedia.com/hc/en-us/articles/227732408-Limelight-Embedding-the-Captions-Plugin-with-the-Limelight-Player-on-Your-Webpage
|
||||
# 2. http://www.sedona.com/FacilitatorTraining2017
|
||||
r'''(?sx)
|
||||
<object[^>]+class=(["\'])(?:(?!\1).)*\bLimelightEmbeddedPlayerFlash\b(?:(?!\1).)*\1[^>]*>.*?
|
||||
<param[^>]+
|
||||
name=(["\'])flashVars\2[^>]+
|
||||
value=(["\'])(?:(?!\3).)*mediaId=(?P<id>[a-z0-9]{32})
|
||||
''', webpage):
|
||||
entries.append(cls.url_result(
|
||||
smuggle_url(
|
||||
'limelight:media:%s' % mobj.group('id'),
|
||||
{'source_url': source_url}),
|
||||
'LimelightMedia', mobj.group('id')))
|
||||
return entries
|
||||
|
||||
def _call_playlist_service(self, item_id, method, fatal=True, referer=None):
|
||||
headers = {}
|
||||
if referer:
|
||||
|
|
Loading…
Reference in a new issue