[nexx] Improve JS embed extraction

This commit is contained in:
Sergey M․ 2017-07-16 04:30:48 +07:00
parent decf86044d
commit 089b97cfee
No known key found for this signature in database
GPG key ID: 2C393E0F18A9236D

View file

@ -72,13 +72,17 @@ def _extract_urls(webpage):
entries = [] entries = []
# JavaScript Integration # JavaScript Integration
for domain_id, video_id in re.findall( mobj = re.search(
r'''(?isx) r'<script\b[^>]+\bsrc=["\']https?://require\.nexx(?:\.cloud|cdn\.com)/(?P<id>\d+)',
<script\b[^>]+\bsrc=["\']https?://require\.nexx(?:\.cloud|cdn\.com)/(\d+).+? webpage)
onPLAYReady.+? if mobj:
_play\.init\s*\(.+?\s*,\s*(\d+)\s*,\s*.+?\) domain_id = mobj.group('id')
''', webpage): for video_id in re.findall(
entries.append('https://api.nexx.cloud/v3/%s/videos/byid/%s' % (domain_id, video_id)) r'(?is)onPLAYReady.+?_play\.init\s*\(.+?\s*,\s*["\']?(\d+)',
webpage):
entries.append(
'https://api.nexx.cloud/v3/%s/videos/byid/%s'
% (domain_id, video_id))
# TODO: support more embed formats # TODO: support more embed formats