[mtvn] update mtv network related extractors

This commit is contained in:
Unknown 2020-10-09 07:50:22 +02:00
parent b6e0c7d2e3
commit cf7cb94287
7 changed files with 36 additions and 17 deletions

View file

@ -3,6 +3,8 @@
from .mtv import MTVServicesInfoExtractor from .mtv import MTVServicesInfoExtractor
from ..utils import unified_strdate from ..utils import unified_strdate
# TODO Remove - Reason: Outdated Site
class BetIE(MTVServicesInfoExtractor): class BetIE(MTVServicesInfoExtractor):
_VALID_URL = r'https?://(?:www\.)?bet\.com/(?:[^/]+/)+(?P<id>.+?)\.html' _VALID_URL = r'https?://(?:www\.)?bet\.com/(?:[^/]+/)+(?P<id>.+?)\.html'

View file

@ -2,6 +2,8 @@
from .mtv import MTVIE from .mtv import MTVIE
# TODO Remove - Reason: Outdated Site
class CMTIE(MTVIE): class CMTIE(MTVIE):
IE_NAME = 'cmt.com' IE_NAME = 'cmt.com'
@ -39,7 +41,7 @@ class CMTIE(MTVIE):
'only_matching': True, 'only_matching': True,
}] }]
def _extract_mgid(self, webpage): def _extract_mgid(self, webpage, url):
mgid = self._search_regex( mgid = self._search_regex(
r'MTVN\.VIDEO\.contentUri\s*=\s*([\'"])(?P<mgid>.+?)\1', r'MTVN\.VIDEO\.contentUri\s*=\s*([\'"])(?P<mgid>.+?)\1',
webpage, 'mgid', group='mgid', default=None) webpage, 'mgid', group='mgid', default=None)
@ -50,5 +52,5 @@ def _extract_mgid(self, webpage):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
mgid = self._extract_mgid(webpage) mgid = self._extract_mgid(webpage, url)
return self.url_result('http://media.mtvnservices.com/embed/%s' % mgid) return self.url_result('http://media.mtvnservices.com/embed/%s' % mgid)

View file

@ -48,7 +48,7 @@ class ComedyCentralFullEpisodesIE(MTVServicesInfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
playlist_id = self._match_id(url) playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id) webpage = self._download_webpage(url, playlist_id)
mgid = self._extract_triforce_mgid(webpage, data_zone='t2_lc_promo1') mgid = self._extract_mgid(webpage, url, data_zone='t2_lc_promo1')
videos_info = self._get_videos_info(mgid) videos_info = self._get_videos_info(mgid)
return videos_info return videos_info

View file

@ -255,8 +255,10 @@ def _extract_triforce_mgid(self, webpage, data_zone=None, video_id=None):
return try_get(feed, lambda x: x['result']['data']['id'], compat_str) return try_get(feed, lambda x: x['result']['data']['id'], compat_str)
def _extract_new_triforce_mgid(self, webpage, url='', data_zone=None, video_id=None): def _extract_new_triforce_mgid(self, webpage, url='', video_id=None):
# print(compat_urlparse.urlparse(url).netloc) # print(compat_urlparse.urlparse(url).netloc)
if url == '':
return
domain = get_domain(url) domain = get_domain(url)
if domain is None: if domain is None:
raise ExtractorError( raise ExtractorError(
@ -268,13 +270,14 @@ def _extract_new_triforce_mgid(self, webpage, url='', data_zone=None, video_id=N
triforce_manifest_url = _TRIFORCE_V8_TEMPLATE % (domain, enc_url) triforce_manifest_url = _TRIFORCE_V8_TEMPLATE % (domain, enc_url)
manifest = self._download_json(triforce_manifest_url, video_id, fatal=False) manifest = self._download_json(triforce_manifest_url, video_id, fatal=False)
if manifest.get('manifest').get('type') == 'redirect': if manifest:
self.to_screen('Found a redirect. Downloading manifest from new location') if manifest.get('manifest').get('type') == 'redirect':
new_loc = manifest.get('manifest').get('newLocation') self.to_screen('Found a redirect. Downloading manifest from new location')
new_loc = new_loc.replace("https://", "http://") new_loc = manifest.get('manifest').get('newLocation')
enc_new_loc = compat_urlparse.quote(new_loc, safe='') new_loc = new_loc.replace("https://", "http://")
triforce_manifest_new_loc = _TRIFORCE_V8_TEMPLATE % (domain, enc_new_loc) enc_new_loc = compat_urlparse.quote(new_loc, safe='')
manifest = self._download_json(triforce_manifest_new_loc, video_id, fatal=False) triforce_manifest_new_loc = _TRIFORCE_V8_TEMPLATE % (domain, enc_new_loc)
manifest = self._download_json(triforce_manifest_new_loc, video_id, fatal=False)
item_id = try_get(manifest, lambda x: x['manifest']['reporting']['itemId'], compat_str) item_id = try_get(manifest, lambda x: x['manifest']['reporting']['itemId'], compat_str)
if not item_id: if not item_id:
@ -287,7 +290,7 @@ def _extract_new_triforce_mgid(self, webpage, url='', data_zone=None, video_id=N
return mgid return mgid
def _extract_mgid(self, webpage, url): def _extract_mgid(self, webpage, url, data_zone=None):
try: try:
# the url can be http://media.mtvnservices.com/fb/{mgid}.swf # the url can be http://media.mtvnservices.com/fb/{mgid}.swf
# or http://media.mtvnservices.com/{mgid} # or http://media.mtvnservices.com/{mgid}
@ -313,7 +316,7 @@ def _extract_mgid(self, webpage, url):
mgid = self._extract_new_triforce_mgid(webpage, url) mgid = self._extract_new_triforce_mgid(webpage, url)
if not mgid: if not mgid:
mgid = self._extract_triforce_mgid(webpage) mgid = self._extract_triforce_mgid(webpage, data_zone)
return mgid return mgid

View file

@ -245,5 +245,5 @@ class NickRuIE(MTVServicesInfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
mgid = self._extract_mgid(webpage) mgid = self._extract_mgid(webpage, url)
return self.url_result('http://media.mtvnservices.com/embed/%s' % mgid) return self.url_result('http://media.mtvnservices.com/embed/%s' % mgid)

View file

@ -20,8 +20,18 @@ class BellatorIE(MTVServicesInfoExtractor):
_FEED_URL = 'http://www.bellator.com/feeds/mrss/' _FEED_URL = 'http://www.bellator.com/feeds/mrss/'
_GEO_COUNTRIES = ['US'] _GEO_COUNTRIES = ['US']
def _extract_mgid(self, webpage): def _extract_mgid(self, webpage, url):
return self._extract_triforce_mgid(webpage) mgid = None
if not mgid:
mgid = self._extract_triforce_mgid(webpage)
if not mgid:
mgid = self._extract_new_triforce_mgid(webpage, url)
return mgid
# TODO Remove - Reason: Outdated Site
class ParamountNetworkIE(MTVServicesInfoExtractor): class ParamountNetworkIE(MTVServicesInfoExtractor):
@ -43,7 +53,7 @@ class ParamountNetworkIE(MTVServicesInfoExtractor):
_FEED_URL = 'http://www.paramountnetwork.com/feeds/mrss/' _FEED_URL = 'http://www.paramountnetwork.com/feeds/mrss/'
_GEO_COUNTRIES = ['US'] _GEO_COUNTRIES = ['US']
def _extract_mgid(self, webpage): def _extract_mgid(self, webpage, url):
root_data = self._parse_json(self._search_regex( root_data = self._parse_json(self._search_regex(
r'window\.__DATA__\s*=\s*({.+})', r'window\.__DATA__\s*=\s*({.+})',
webpage, 'data'), None) webpage, 'data'), None)

View file

@ -3,6 +3,8 @@
from .mtv import MTVServicesInfoExtractor from .mtv import MTVServicesInfoExtractor
# TODO Remove - Reason: Outdated Site
class VH1IE(MTVServicesInfoExtractor): class VH1IE(MTVServicesInfoExtractor):
IE_NAME = 'vh1.com' IE_NAME = 'vh1.com'