Update to ytdl-commit-a726009

[blinkx] Remove extractor
a726009987
This commit is contained in:
pukkandan 2021-05-06 21:31:20 +05:30
parent 717297545b
commit 41d1cca328
No known key found for this signature in database
GPG key ID: 0F00D95A001F4698
17 changed files with 249 additions and 105 deletions

View file

@ -41,11 +41,18 @@ jobs:
- name: Install Jython - name: Install Jython
if: ${{ matrix.python-impl == 'jython' }} if: ${{ matrix.python-impl == 'jython' }}
run: | run: |
wget http://search.maven.org/remotecontent?filepath=org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar -O jython-installer.jar wget https://repo1.maven.org/maven2/org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar -O jython-installer.jar
java -jar jython-installer.jar -s -d "$HOME/jython" java -jar jython-installer.jar -s -d "$HOME/jython"
echo "$HOME/jython/bin" >> $GITHUB_PATH echo "$HOME/jython/bin" >> $GITHUB_PATH
- name: Install nose - name: Install nose
if: ${{ matrix.python-impl != 'jython' }}
run: pip install nose run: pip install nose
- name: Install nose (Jython)
if: ${{ matrix.python-impl == 'jython' }}
# Working around deprecation of support for non-SNI clients at PyPI CDN (see https://status.python.org/incidents/hzmjhqsdjqgb)
run: |
wget https://files.pythonhosted.org/packages/99/4f/13fb671119e65c4dce97c60e67d3fd9e6f7f809f2b307e2611f4701205cb/nose-1.3.7-py2-none-any.whl
pip install nose-1.3.7-py2-none-any.whl
- name: Run tests - name: Run tests
continue-on-error: ${{ matrix.ytdl-test-set == 'download' || matrix.python-impl == 'jython' }} continue-on-error: ${{ matrix.ytdl-test-set == 'download' || matrix.python-impl == 'jython' }}
env: env:

View file

@ -41,11 +41,18 @@ jobs:
- name: Install Jython - name: Install Jython
if: ${{ matrix.python-impl == 'jython' }} if: ${{ matrix.python-impl == 'jython' }}
run: | run: |
wget http://search.maven.org/remotecontent?filepath=org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar -O jython-installer.jar wget https://repo1.maven.org/maven2/org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar -O jython-installer.jar
java -jar jython-installer.jar -s -d "$HOME/jython" java -jar jython-installer.jar -s -d "$HOME/jython"
echo "$HOME/jython/bin" >> $GITHUB_PATH echo "$HOME/jython/bin" >> $GITHUB_PATH
- name: Install nose - name: Install nose
if: ${{ matrix.python-impl != 'jython' }}
run: pip install nose run: pip install nose
- name: Install nose (Jython)
if: ${{ matrix.python-impl == 'jython' }}
# Working around deprecation of support for non-SNI clients at PyPI CDN (see https://status.python.org/incidents/hzmjhqsdjqgb)
run: |
wget https://files.pythonhosted.org/packages/99/4f/13fb671119e65c4dce97c60e67d3fd9e6f7f809f2b307e2611f4701205cb/nose-1.3.7-py2-none-any.whl
pip install nose-1.3.7-py2-none-any.whl
- name: Run tests - name: Run tests
continue-on-error: ${{ matrix.ytdl-test-set == 'download' || matrix.python-impl == 'jython' }} continue-on-error: ${{ matrix.ytdl-test-set == 'download' || matrix.python-impl == 'jython' }}
env: env:

View file

@ -1056,11 +1056,20 @@ def add_extra_info(info_dict, extra_info):
def extract_info(self, url, download=True, ie_key=None, extra_info={}, def extract_info(self, url, download=True, ie_key=None, extra_info={},
process=True, force_generic_extractor=False): process=True, force_generic_extractor=False):
''' """
Returns a list with a dictionary for each video we find. Return a list with a dictionary for each video extracted.
If 'download', also downloads the videos.
extra_info is a dict containing the extra values to add to each result Arguments:
''' url -- URL to extract
Keyword arguments:
download -- whether to download videos during extraction
ie_key -- extractor key hint
extra_info -- dictionary containing the extra values to add to each result
process -- whether to resolve all unresolved references (URLs, playlist items),
must be True for download to work.
force_generic_extractor -- force using the generic extractor
"""
if not ie_key and force_generic_extractor: if not ie_key and force_generic_extractor:
ie_key = 'Generic' ie_key = 'Generic'

View file

@ -133,6 +133,8 @@ def _real_extract(self, url):
'age_limit': 18 if need_confirm_age else 0, 'age_limit': 18 if need_confirm_age else 0,
} }
info = self._search_json_ld(webpage, video_id, default={})
# Source: https://www.cda.pl/js/player.js?t=1606154898 # Source: https://www.cda.pl/js/player.js?t=1606154898
def decrypt_file(a): def decrypt_file(a):
for p in ('_XDDD', '_CDA', '_ADC', '_CXD', '_QWE', '_Q5', '_IKSDE'): for p in ('_XDDD', '_CDA', '_ADC', '_CXD', '_QWE', '_Q5', '_IKSDE'):
@ -197,7 +199,7 @@ def extract_format(page, version):
handler = self._download_webpage handler = self._download_webpage
webpage = handler( webpage = handler(
self._BASE_URL + href, video_id, urljoin(self._BASE_URL, href), video_id,
'Downloading %s version information' % resolution, fatal=False) 'Downloading %s version information' % resolution, fatal=False)
if not webpage: if not webpage:
# Manually report warning because empty page is returned when # Manually report warning because empty page is returned when
@ -209,6 +211,4 @@ def extract_format(page, version):
self._sort_formats(formats) self._sort_formats(formats)
info = self._search_json_ld(webpage, video_id, default={})
return merge_dicts(info_dict, info) return merge_dicts(info_dict, info)

View file

@ -32,6 +32,18 @@ class DigitallySpeakingIE(InfoExtractor):
# From http://www.gdcvault.com/play/1013700/Advanced-Material # From http://www.gdcvault.com/play/1013700/Advanced-Material
'url': 'http://sevt.dispeak.com/ubm/gdc/eur10/xml/11256_1282118587281VNIT.xml', 'url': 'http://sevt.dispeak.com/ubm/gdc/eur10/xml/11256_1282118587281VNIT.xml',
'only_matching': True, 'only_matching': True,
}, {
# From https://gdcvault.com/play/1016624, empty speakerVideo
'url': 'https://sevt.dispeak.com/ubm/gdc/online12/xml/201210-822101_1349794556671DDDD.xml',
'info_dict': {
'id': '201210-822101_1349794556671DDDD',
'ext': 'flv',
'title': 'Pre-launch - Preparing to Take the Plunge',
},
}, {
# From http://www.gdcvault.com/play/1014846/Conference-Keynote-Shigeru, empty slideVideo
'url': 'http://events.digitallyspeaking.com/gdc/project25/xml/p25-miyamoto1999_1282467389849HSVB.xml',
'only_matching': True,
}] }]
def _parse_mp4(self, metadata): def _parse_mp4(self, metadata):
@ -85,25 +97,19 @@ def _parse_flv(self, metadata):
'quality': 1, 'quality': 1,
'format_id': audio.get('code'), 'format_id': audio.get('code'),
}) })
slide_video_path = xpath_text(metadata, './slideVideo', fatal=True) for video_key, format_id, preference in (
formats.append({ ('slide', 'slides', -2), ('speaker', 'speaker', -1)):
'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url, video_path = xpath_text(metadata, './%sVideo' % video_key)
'play_path': remove_end(slide_video_path, '.flv'), if not video_path:
'ext': 'flv', continue
'format_note': 'slide deck video', formats.append({
'quality': -2, 'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
'format_id': 'slides', 'play_path': remove_end(video_path, '.flv'),
'acodec': 'none', 'ext': 'flv',
}) 'format_note': '%s video' % video_key,
speaker_video_path = xpath_text(metadata, './speakerVideo', fatal=True) 'quality': preference,
formats.append({ 'format_id': format_id,
'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url, })
'play_path': remove_end(speaker_video_path, '.flv'),
'ext': 'flv',
'format_note': 'speaker video',
'quality': -1,
'format_id': 'speaker',
})
return formats return formats
def _real_extract(self, url): def _real_extract(self, url):

View file

@ -151,7 +151,6 @@
BleacherReportIE, BleacherReportIE,
BleacherReportCMSIE, BleacherReportCMSIE,
) )
from .blinkx import BlinkxIE
from .bloomberg import BloombergIE from .bloomberg import BloombergIE
from .bokecc import BokeCCIE from .bokecc import BokeCCIE
from .bongacams import BongaCamsIE from .bongacams import BongaCamsIE

View file

@ -402,6 +402,10 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
}, { }, {
'url': 'http://france3-regions.francetvinfo.fr/limousin/emissions/jt-1213-limousin', 'url': 'http://france3-regions.francetvinfo.fr/limousin/emissions/jt-1213-limousin',
'only_matching': True, 'only_matching': True,
}, {
# "<figure id=" pattern (#28792)
'url': 'https://www.francetvinfo.fr/culture/patrimoine/incendie-de-notre-dame-de-paris/notre-dame-de-paris-de-l-incendie-de-la-cathedrale-a-sa-reconstruction_4372291.html',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -419,8 +423,7 @@ def _real_extract(self, url):
(r'player\.load[^;]+src:\s*["\']([^"\']+)', (r'player\.load[^;]+src:\s*["\']([^"\']+)',
r'id-video=([^@]+@[^"]+)', r'id-video=([^@]+@[^"]+)',
r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"', r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"',
r'data-id=["\']([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})', r'(?:data-id|<figure[^<]+\bid)=["\']([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'),
r'<figure[^>]+id=["\']([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'),
webpage, 'video id') webpage, 'video id')
return self._make_url_result(video_id) return self._make_url_result(video_id)

View file

@ -16,7 +16,7 @@
class FunimationIE(InfoExtractor): class FunimationIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?funimation(?:\.com|now\.uk)/shows/[^/]+/(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:www\.)?funimation(?:\.com|now\.uk)/(?:[^/]+/)?shows/[^/]+/(?P<id>[^/?#&]+)'
_NETRC_MACHINE = 'funimation' _NETRC_MACHINE = 'funimation'
_TOKEN = None _TOKEN = None
@ -51,6 +51,10 @@ class FunimationIE(InfoExtractor):
}, { }, {
'url': 'https://www.funimationnow.uk/shows/puzzle-dragons-x/drop-impact/simulcast/', 'url': 'https://www.funimationnow.uk/shows/puzzle-dragons-x/drop-impact/simulcast/',
'only_matching': True, 'only_matching': True,
}, {
# with lang code
'url': 'https://www.funimation.com/en/shows/hacksign/role-play/',
'only_matching': True,
}] }]
def _login(self): def _login(self):

View file

@ -5,7 +5,10 @@
from .common import InfoExtractor from .common import InfoExtractor
from .kaltura import KalturaIE from .kaltura import KalturaIE
from ..utils import ( from ..utils import (
HEADRequest,
remove_start,
sanitized_Request, sanitized_Request,
smuggle_url,
urlencode_postdata, urlencode_postdata,
) )
@ -100,6 +103,26 @@ class GDCVaultIE(InfoExtractor):
'format': 'mp4-408', 'format': 'mp4-408',
}, },
}, },
{
# Kaltura embed, whitespace between quote and embedded URL in iframe's src
'url': 'https://www.gdcvault.com/play/1025699',
'info_dict': {
'id': '0_zagynv0a',
'ext': 'mp4',
'title': 'Tech Toolbox',
'upload_date': '20190408',
'uploader_id': 'joe@blazestreaming.com',
'timestamp': 1554764629,
},
'params': {
'skip_download': True,
},
},
{
# HTML5 video
'url': 'http://www.gdcvault.com/play/1014846/Conference-Keynote-Shigeru',
'only_matching': True,
},
] ]
def _login(self, webpage_url, display_id): def _login(self, webpage_url, display_id):
@ -120,38 +143,78 @@ def _login(self, webpage_url, display_id):
request = sanitized_Request(login_url, urlencode_postdata(login_form)) request = sanitized_Request(login_url, urlencode_postdata(login_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded') request.add_header('Content-Type', 'application/x-www-form-urlencoded')
self._download_webpage(request, display_id, 'Logging in') self._download_webpage(request, display_id, 'Logging in')
webpage = self._download_webpage(webpage_url, display_id, 'Getting authenticated video page') start_page = self._download_webpage(webpage_url, display_id, 'Getting authenticated video page')
self._download_webpage(logout_url, display_id, 'Logging out') self._download_webpage(logout_url, display_id, 'Logging out')
return webpage return start_page
def _real_extract(self, url): def _real_extract(self, url):
video_id, name = re.match(self._VALID_URL, url).groups() video_id, name = re.match(self._VALID_URL, url).groups()
display_id = name or video_id display_id = name or video_id
webpage = self._download_webpage(url, display_id) webpage_url = 'http://www.gdcvault.com/play/' + video_id
start_page = self._download_webpage(webpage_url, display_id)
title = self._html_search_regex( direct_url = self._search_regex(
r'<td><strong>Session Name:?</strong></td>\s*<td>(.*?)</td>', r's1\.addVariable\("file",\s*encodeURIComponent\("(/[^"]+)"\)\);',
webpage, 'title') start_page, 'url', default=None)
if direct_url:
title = self._html_search_regex(
r'<td><strong>Session Name:?</strong></td>\s*<td>(.*?)</td>',
start_page, 'title')
video_url = 'http://www.gdcvault.com' + direct_url
# resolve the url so that we can detect the correct extension
video_url = self._request_webpage(
HEADRequest(video_url), video_id).geturl()
PLAYER_REGEX = r'<iframe src=\"(?P<manifest_url>.*?)\".*?</iframe>' return {
manifest_url = self._html_search_regex( 'id': video_id,
PLAYER_REGEX, webpage, 'manifest_url') 'display_id': display_id,
'url': video_url,
'title': title,
}
partner_id = self._search_regex( embed_url = KalturaIE._extract_url(start_page)
r'/p(?:artner_id)?/(\d+)', manifest_url, 'partner id', if embed_url:
default='1670711') embed_url = smuggle_url(embed_url, {'source_url': url})
ie_key = 'Kaltura'
else:
PLAYER_REGEX = r'<iframe src="(?P<xml_root>.+?)/(?:gdc-)?player.*?\.html.*?".*?</iframe>'
kaltura_id = self._search_regex( xml_root = self._html_search_regex(
r'entry_id=(?P<id>(?:[^&])+)', manifest_url, PLAYER_REGEX, start_page, 'xml root', default=None)
'kaltura id', group='id') if xml_root is None:
# Probably need to authenticate
login_res = self._login(webpage_url, display_id)
if login_res is None:
self.report_warning('Could not login.')
else:
start_page = login_res
# Grab the url from the authenticated page
xml_root = self._html_search_regex(
PLAYER_REGEX, start_page, 'xml root')
xml_name = self._html_search_regex(
r'<iframe src=".*?\?xml(?:=|URL=xml/)(.+?\.xml).*?".*?</iframe>',
start_page, 'xml filename', default=None)
if not xml_name:
info = self._parse_html5_media_entries(url, start_page, video_id)[0]
info.update({
'title': remove_start(self._search_regex(
r'>Session Name:\s*<.*?>\s*<td>(.+?)</td>', start_page,
'title', default=None) or self._og_search_title(
start_page, default=None), 'GDC Vault - '),
'id': video_id,
'display_id': display_id,
})
return info
embed_url = '%s/xml/%s' % (xml_root, xml_name)
ie_key = 'DigitallySpeaking'
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',
'url': 'kaltura:%s:%s' % (partner_id, kaltura_id),
'ie_key': KalturaIE.ie_key(),
'id': video_id, 'id': video_id,
'display_id': display_id, 'display_id': display_id,
'title': title, 'url': embed_url,
'ie_key': ie_key,
} }

View file

@ -120,7 +120,7 @@ def _extract_url(webpage):
def _extract_urls(webpage): def _extract_urls(webpage):
# Embed codes: https://knowledge.kaltura.com/embedding-kaltura-media-players-your-site # Embed codes: https://knowledge.kaltura.com/embedding-kaltura-media-players-your-site
finditer = ( finditer = (
re.finditer( list(re.finditer(
r"""(?xs) r"""(?xs)
kWidget\.(?:thumb)?[Ee]mbed\( kWidget\.(?:thumb)?[Ee]mbed\(
\{.*? \{.*?
@ -128,8 +128,8 @@ def _extract_urls(webpage):
(?P<q2>['"])_?(?P<partner_id>(?:(?!(?P=q2)).)+)(?P=q2),.*? (?P<q2>['"])_?(?P<partner_id>(?:(?!(?P=q2)).)+)(?P=q2),.*?
(?P<q3>['"])entry_?[Ii]d(?P=q3)\s*:\s* (?P<q3>['"])entry_?[Ii]d(?P=q3)\s*:\s*
(?P<q4>['"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\}) (?P<q4>['"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\})
""", webpage) """, webpage))
or re.finditer( or list(re.finditer(
r'''(?xs) r'''(?xs)
(?P<q1>["']) (?P<q1>["'])
(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)* (?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)*
@ -142,16 +142,16 @@ def _extract_urls(webpage):
\[\s*(?P<q2_1>["'])entry_?[Ii]d(?P=q2_1)\s*\]\s*=\s* \[\s*(?P<q2_1>["'])entry_?[Ii]d(?P=q2_1)\s*\]\s*=\s*
) )
(?P<q3>["'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3) (?P<q3>["'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3)
''', webpage) ''', webpage))
or re.finditer( or list(re.finditer(
r'''(?xs) r'''(?xs)
<(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P<q1>["']) <(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P<q1>["'])\s*
(?:https?:)?//(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+) (?:https?:)?//(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)
(?:(?!(?P=q1)).)* (?:(?!(?P=q1)).)*
[?&;]entry_id=(?P<id>(?:(?!(?P=q1))[^&])+) [?&;]entry_id=(?P<id>(?:(?!(?P=q1))[^&])+)
(?:(?!(?P=q1)).)* (?:(?!(?P=q1)).)*
(?P=q1) (?P=q1)
''', webpage) ''', webpage))
) )
urls = [] urls = []
for mobj in finditer: for mobj in finditer:

View file

@ -15,33 +15,39 @@
class MedalTVIE(InfoExtractor): class MedalTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?medal\.tv/clips/(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:www\.)?medal\.tv/clips/(?P<id>[^/?#&]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://medal.tv/clips/34934644/3Is9zyGMoBMr', 'url': 'https://medal.tv/clips/2mA60jWAGQCBH',
'md5': '7b07b064331b1cf9e8e5c52a06ae68fa', 'md5': '7b07b064331b1cf9e8e5c52a06ae68fa',
'info_dict': { 'info_dict': {
'id': '34934644', 'id': '2mA60jWAGQCBH',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Quad Cold', 'title': 'Quad Cold',
'description': 'Medal,https://medal.tv/desktop/', 'description': 'Medal,https://medal.tv/desktop/',
'uploader': 'MowgliSB', 'uploader': 'MowgliSB',
'timestamp': 1603165266, 'timestamp': 1603165266,
'upload_date': '20201020', 'upload_date': '20201020',
'uploader_id': 10619174, 'uploader_id': '10619174',
} }
}, { }, {
'url': 'https://medal.tv/clips/36787208', 'url': 'https://medal.tv/clips/2um24TWdty0NA',
'md5': 'b6dc76b78195fff0b4f8bf4a33ec2148', 'md5': 'b6dc76b78195fff0b4f8bf4a33ec2148',
'info_dict': { 'info_dict': {
'id': '36787208', 'id': '2um24TWdty0NA',
'ext': 'mp4', 'ext': 'mp4',
'title': 'u tk me i tk u bigger', 'title': 'u tk me i tk u bigger',
'description': 'Medal,https://medal.tv/desktop/', 'description': 'Medal,https://medal.tv/desktop/',
'uploader': 'Mimicc', 'uploader': 'Mimicc',
'timestamp': 1605580939, 'timestamp': 1605580939,
'upload_date': '20201117', 'upload_date': '20201117',
'uploader_id': 5156321, 'uploader_id': '5156321',
} }
}, {
'url': 'https://medal.tv/clips/37rMeFpryCC-9',
'only_matching': True,
}, {
'url': 'https://medal.tv/clips/2WRj40tpY_EU9',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View file

@ -146,7 +146,7 @@ class SVTPlayIE(SVTPlayBaseIE):
) )
(?P<svt_id>[^/?#&]+)| (?P<svt_id>[^/?#&]+)|
https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp|kanaler)/(?P<id>[^/?#&]+) https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp|kanaler)/(?P<id>[^/?#&]+)
(?:.*?modalId=(?P<modal_id>[\da-zA-Z-]+))? (?:.*?(?:modalId|id)=(?P<modal_id>[\da-zA-Z-]+))?
) )
''' '''
_TESTS = [{ _TESTS = [{
@ -177,6 +177,9 @@ class SVTPlayIE(SVTPlayBaseIE):
}, { }, {
'url': 'https://www.svtplay.se/video/30479064/husdrommar/husdrommar-sasong-8-designdrommar-i-stenungsund?modalId=8zVbDPA', 'url': 'https://www.svtplay.se/video/30479064/husdrommar/husdrommar-sasong-8-designdrommar-i-stenungsund?modalId=8zVbDPA',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.svtplay.se/video/30684086/rapport/rapport-24-apr-18-00-7?id=e72gVpa',
'only_matching': True,
}, { }, {
# geo restricted to Sweden # geo restricted to Sweden
'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten', 'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten',
@ -259,7 +262,7 @@ def _real_extract(self, url):
if not svt_id: if not svt_id:
svt_id = self._search_regex( svt_id = self._search_regex(
(r'<video[^>]+data-video-id=["\']([\da-zA-Z-]+)', (r'<video[^>]+data-video-id=["\']([\da-zA-Z-]+)',
r'<[^>]+\bdata-rt=["\']top-area-play-button["\'][^>]+\bhref=["\'][^"\']*video/%s/[^"\']*\bmodalId=([\da-zA-Z-]+)' % re.escape(video_id), r'<[^>]+\bdata-rt=["\']top-area-play-button["\'][^>]+\bhref=["\'][^"\']*video/%s/[^"\']*\b(?:modalId|id)=([\da-zA-Z-]+)' % re.escape(video_id),
r'["\']videoSvtId["\']\s*:\s*["\']([\da-zA-Z-]+)', r'["\']videoSvtId["\']\s*:\s*["\']([\da-zA-Z-]+)',
r'["\']videoSvtId\\?["\']\s*:\s*\\?["\']([\da-zA-Z-]+)', r'["\']videoSvtId\\?["\']\s*:\s*\\?["\']([\da-zA-Z-]+)',
r'"content"\s*:\s*{.*?"id"\s*:\s*"([\da-zA-Z-]+)"', r'"content"\s*:\s*{.*?"id"\s*:\s*"([\da-zA-Z-]+)"',

View file

@ -74,6 +74,12 @@ def _real_extract(self, url):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
entries = [] entries = []
def add_entry(partner_id, kaltura_id):
entries.append(self.url_result(
'kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura',
video_id=kaltura_id))
for video_el in re.findall(r'(?s)<[^>]+\bdata-entryid\s*=[^>]*>', webpage): for video_el in re.findall(r'(?s)<[^>]+\bdata-entryid\s*=[^>]*>', webpage):
video = extract_attributes(video_el) video = extract_attributes(video_el)
kaltura_id = video.get('data-entryid') kaltura_id = video.get('data-entryid')
@ -82,9 +88,14 @@ def _real_extract(self, url):
partner_id = video.get('data-partnerid') partner_id = video.get('data-partnerid')
if not partner_id: if not partner_id:
continue continue
entries.append(self.url_result( add_entry(partner_id, kaltura_id)
'kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura', if not entries:
video_id=kaltura_id)) kaltura_id = self._search_regex(
r'entry_id\s*:\s*["\']([0-9a-z_]+)', webpage, 'kaltura id')
partner_id = self._search_regex(
(r'\\u002Fp\\u002F(\d+)\\u002F', r'/p/(\d+)/'), webpage,
'partner id')
add_entry(partner_id, kaltura_id)
return self.playlist_result(entries) return self.playlist_result(entries)

View file

@ -9,7 +9,6 @@
int_or_none, int_or_none,
remove_start, remove_start,
smuggle_url, smuggle_url,
strip_or_none,
try_get, try_get,
) )
@ -45,32 +44,18 @@ def _real_extract(self, url):
query={'token': self._TOKEN})['main'] query={'token': self._TOKEN})['main']
p_id = main['publisher_id'] p_id = main['publisher_id']
service = remove_start(main['service'], 'ts_') service = remove_start(main['service'], 'ts_')
info = {
r_id = main['reference_id']
if service not in ('tx', 'russia2018', 'sebare2018live', 'gorin'):
r_id = 'ref:' + r_id
bc_url = smuggle_url(
self.BRIGHTCOVE_URL_TEMPLATE % (p_id, r_id),
{'geo_countries': ['JP']})
return {
'_type': 'url_transparent', '_type': 'url_transparent',
'description': try_get(main, lambda x: x['note'][0]['text'], compat_str), 'description': try_get(main, lambda x: x['note'][0]['text'], compat_str),
'episode_number': int_or_none(try_get(main, lambda x: x['ext']['episode_number'])), 'episode_number': int_or_none(try_get(main, lambda x: x['ext']['episode_number'])),
'url': bc_url,
'ie_key': 'BrightcoveNew',
} }
if service == 'cx':
title = main['title']
subtitle = strip_or_none(main.get('subtitle'))
if subtitle:
title += ' - ' + subtitle
info.update({
'title': title,
'url': 'https://i.fod.fujitv.co.jp/plus7/web/%s/%s.html' % (p_id[:4], p_id),
'ie_key': 'FujiTVFODPlus7',
})
else:
r_id = main['reference_id']
if service not in ('tx', 'russia2018', 'sebare2018live', 'gorin'):
r_id = 'ref:' + r_id
bc_url = smuggle_url(
self.BRIGHTCOVE_URL_TEMPLATE % (p_id, r_id),
{'geo_countries': ['JP']})
info.update({
'url': bc_url,
'ie_key': 'BrightcoveNew',
})
return info

View file

@ -19,6 +19,7 @@
strip_or_none, strip_or_none,
unified_timestamp, unified_timestamp,
update_url_query, update_url_query,
url_or_none,
xpath_text, xpath_text,
) )
@ -52,6 +53,9 @@ def _extract_variant_formats(self, variant, video_id):
return [f], {} return [f], {}
def _extract_formats_from_vmap_url(self, vmap_url, video_id): def _extract_formats_from_vmap_url(self, vmap_url, video_id):
vmap_url = url_or_none(vmap_url)
if not vmap_url:
return []
vmap_data = self._download_xml(vmap_url, video_id) vmap_data = self._download_xml(vmap_url, video_id)
formats = [] formats = []
subtitles = {} subtitles = {}

View file

@ -58,6 +58,7 @@ class XFileShareIE(InfoExtractor):
(r'vidlocker\.xyz', 'VidLocker'), (r'vidlocker\.xyz', 'VidLocker'),
(r'vidshare\.tv', 'VidShare'), (r'vidshare\.tv', 'VidShare'),
(r'vup\.to', 'VUp'), (r'vup\.to', 'VUp'),
(r'wolfstream\.tv', 'WolfStream'),
(r'xvideosharing\.com', 'XVideoSharing'), (r'xvideosharing\.com', 'XVideoSharing'),
) )
@ -82,6 +83,9 @@ class XFileShareIE(InfoExtractor):
}, { }, {
'url': 'https://aparat.cam/n4d6dh0wvlpr', 'url': 'https://aparat.cam/n4d6dh0wvlpr',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://wolfstream.tv/nthme29v9u2x',
'only_matching': True,
}] }]
@staticmethod @staticmethod

View file

@ -11,6 +11,7 @@
parse_duration, parse_duration,
sanitized_Request, sanitized_Request,
str_to_int, str_to_int,
url_or_none,
) )
@ -71,10 +72,10 @@ def _real_extract(self, url):
'Cookie': 'age_verified=1; cookiesAccepted=1', 'Cookie': 'age_verified=1; cookiesAccepted=1',
}) })
title, thumbnail, duration = [None] * 3 title, thumbnail, duration, sources, media_definition = [None] * 5
config = self._parse_json(self._search_regex( config = self._parse_json(self._search_regex(
r'playerConf\s*=\s*({.+?})\s*,\s*(?:\n|loaderConf)', webpage, 'config', r'playerConf\s*=\s*({.+?})\s*,\s*(?:\n|loaderConf|playerWrapper)', webpage, 'config',
default='{}'), video_id, transform_source=js_to_json, fatal=False) default='{}'), video_id, transform_source=js_to_json, fatal=False)
if config: if config:
config = config.get('mainRoll') config = config.get('mainRoll')
@ -83,20 +84,52 @@ def _real_extract(self, url):
thumbnail = config.get('poster') thumbnail = config.get('poster')
duration = int_or_none(config.get('duration')) duration = int_or_none(config.get('duration'))
sources = config.get('sources') or config.get('format') sources = config.get('sources') or config.get('format')
media_definition = config.get('mediaDefinition')
if not isinstance(sources, dict): if not isinstance(sources, dict) and not media_definition:
sources = self._parse_json(self._search_regex( sources = self._parse_json(self._search_regex(
r'(["\'])?sources\1?\s*:\s*(?P<sources>{.+?}),', r'(["\'])?sources\1?\s*:\s*(?P<sources>{.+?}),',
webpage, 'sources', group='sources'), video_id, webpage, 'sources', group='sources'), video_id,
transform_source=js_to_json) transform_source=js_to_json)
formats = [] formats = []
for format_id, format_url in sources.items(): format_urls = set()
formats.append({
'url': format_url, if isinstance(sources, dict):
'format_id': format_id, for format_id, format_url in sources.items():
'height': int_or_none(format_id), format_url = url_or_none(format_url)
}) if not format_url:
continue
if format_url in format_urls:
continue
format_urls.add(format_url)
formats.append({
'url': format_url,
'format_id': format_id,
'height': int_or_none(format_id),
})
if isinstance(media_definition, list):
for media in media_definition:
video_url = url_or_none(media.get('videoUrl'))
if not video_url:
continue
if video_url in format_urls:
continue
format_urls.add(video_url)
format_id = media.get('format')
if format_id == 'hls':
formats.extend(self._extract_m3u8_formats(
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
elif format_id == 'mp4':
height = int_or_none(media.get('quality'))
formats.append({
'url': video_url,
'format_id': '%s-%d' % (format_id, height) if height else format_id,
'height': height,
})
self._remove_duplicate_formats(formats) self._remove_duplicate_formats(formats)
self._sort_formats(formats) self._sort_formats(formats)