mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-01-07 08:31:17 +00:00
[mtv] Fix extractor by reverting changes made in youtube-dlc
youtube-dl has since fixed the extractor and the changes from the two sources are incompatible
This commit is contained in:
parent
ec5e77c558
commit
ee1e05581e
|
@ -7,7 +7,6 @@
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_str,
|
compat_str,
|
||||||
compat_xpath,
|
compat_xpath,
|
||||||
compat_urlparse,
|
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
@ -23,7 +22,6 @@
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
url_basename,
|
url_basename,
|
||||||
get_domain,
|
|
||||||
xpath_text,
|
xpath_text,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -45,7 +43,7 @@ def _remove_template_parameter(url):
|
||||||
# Remove the templates, like &device={device}
|
# Remove the templates, like &device={device}
|
||||||
return re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', url)
|
return re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', url)
|
||||||
|
|
||||||
def _get_feed_url(self, uri, url=None):
|
def _get_feed_url(self, uri):
|
||||||
return self._FEED_URL
|
return self._FEED_URL
|
||||||
|
|
||||||
def _get_thumbnail_url(self, uri, itemdoc):
|
def _get_thumbnail_url(self, uri, itemdoc):
|
||||||
|
@ -211,9 +209,9 @@ def _get_feed_query(self, uri):
|
||||||
data['lang'] = self._LANG
|
data['lang'] = self._LANG
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def _get_videos_info(self, uri, use_hls=True, url=None):
|
def _get_videos_info(self, uri, use_hls=True):
|
||||||
video_id = self._id_from_uri(uri)
|
video_id = self._id_from_uri(uri)
|
||||||
feed_url = self._get_feed_url(uri, url)
|
feed_url = self._get_feed_url(uri)
|
||||||
info_url = update_url_query(feed_url, self._get_feed_query(uri))
|
info_url = update_url_query(feed_url, self._get_feed_query(uri))
|
||||||
return self._get_videos_info_from_url(info_url, video_id, use_hls)
|
return self._get_videos_info_from_url(info_url, video_id, use_hls)
|
||||||
|
|
||||||
|
@ -259,41 +257,7 @@ def _extract_triforce_mgid(self, webpage, data_zone=None, video_id=None):
|
||||||
def _extract_child_with_type(parent, t):
|
def _extract_child_with_type(parent, t):
|
||||||
return next(c for c in parent['children'] if c.get('type') == t)
|
return next(c for c in parent['children'] if c.get('type') == t)
|
||||||
|
|
||||||
def _extract_new_triforce_mgid(self, webpage, url='', video_id=None):
|
def _extract_mgid(self, webpage):
|
||||||
if url == '':
|
|
||||||
return
|
|
||||||
domain = get_domain(url)
|
|
||||||
if domain is None:
|
|
||||||
raise ExtractorError(
|
|
||||||
'[%s] could not get domain' % self.IE_NAME,
|
|
||||||
expected=True)
|
|
||||||
url = url.replace("https://", "http://")
|
|
||||||
enc_url = compat_urlparse.quote(url, safe='')
|
|
||||||
_TRIFORCE_V8_TEMPLATE = 'https://%s/feeds/triforce/manifest/v8?url=%s'
|
|
||||||
triforce_manifest_url = _TRIFORCE_V8_TEMPLATE % (domain, enc_url)
|
|
||||||
|
|
||||||
manifest = self._download_json(triforce_manifest_url, video_id, fatal=False)
|
|
||||||
if manifest:
|
|
||||||
if manifest.get('manifest').get('type') == 'redirect':
|
|
||||||
self.to_screen('Found a redirect. Downloading manifest from new location')
|
|
||||||
new_loc = manifest.get('manifest').get('newLocation')
|
|
||||||
new_loc = new_loc.replace("https://", "http://")
|
|
||||||
enc_new_loc = compat_urlparse.quote(new_loc, safe='')
|
|
||||||
triforce_manifest_new_loc = _TRIFORCE_V8_TEMPLATE % (domain, enc_new_loc)
|
|
||||||
manifest = self._download_json(triforce_manifest_new_loc, video_id, fatal=False)
|
|
||||||
|
|
||||||
item_id = try_get(manifest, lambda x: x['manifest']['reporting']['itemId'], compat_str)
|
|
||||||
if not item_id:
|
|
||||||
self.to_screen('No id found!')
|
|
||||||
return
|
|
||||||
|
|
||||||
# 'episode' can be anything. 'content' is used often as well
|
|
||||||
_MGID_TEMPLATE = 'mgid:arc:episode:%s:%s'
|
|
||||||
mgid = _MGID_TEMPLATE % (domain, item_id)
|
|
||||||
|
|
||||||
return mgid
|
|
||||||
|
|
||||||
def _extract_mgid(self, webpage, url, title=None, data_zone=None):
|
|
||||||
try:
|
try:
|
||||||
# the url can be http://media.mtvnservices.com/fb/{mgid}.swf
|
# the url can be http://media.mtvnservices.com/fb/{mgid}.swf
|
||||||
# or http://media.mtvnservices.com/{mgid}
|
# or http://media.mtvnservices.com/{mgid}
|
||||||
|
@ -304,21 +268,6 @@ def _extract_mgid(self, webpage, url, title=None, data_zone=None):
|
||||||
except RegexNotFoundError:
|
except RegexNotFoundError:
|
||||||
mgid = None
|
mgid = None
|
||||||
|
|
||||||
if not title:
|
|
||||||
title = url_basename(url)
|
|
||||||
|
|
||||||
try:
|
|
||||||
window_data = self._parse_json(self._search_regex(
|
|
||||||
r'(?s)window.__DATA__ = (?P<json>{.+});', webpage,
|
|
||||||
'JSON Window Data', default=None, fatal=False, group='json'), title, fatal=False)
|
|
||||||
main_container = None
|
|
||||||
for i in range(len(window_data['children'])):
|
|
||||||
if window_data['children'][i]['type'] == 'MainContainer':
|
|
||||||
main_container = window_data['children'][i]
|
|
||||||
mgid = main_container['children'][0]['props']['media']['video']['config']['uri']
|
|
||||||
except (KeyError, IndexError, TypeError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
if mgid is None or ':' not in mgid:
|
if mgid is None or ':' not in mgid:
|
||||||
mgid = self._search_regex(
|
mgid = self._search_regex(
|
||||||
[r'data-mgid="(.*?)"', r'swfobject\.embedSWF\(".*?(mgid:.*?)"'],
|
[r'data-mgid="(.*?)"', r'swfobject\.embedSWF\(".*?(mgid:.*?)"'],
|
||||||
|
@ -331,10 +280,7 @@ def _extract_mgid(self, webpage, url, title=None, data_zone=None):
|
||||||
r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid', default=None)
|
r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid', default=None)
|
||||||
|
|
||||||
if not mgid:
|
if not mgid:
|
||||||
mgid = self._extract_new_triforce_mgid(webpage, url)
|
mgid = self._extract_triforce_mgid(webpage)
|
||||||
|
|
||||||
if not mgid:
|
|
||||||
mgid = self._extract_triforce_mgid(webpage, data_zone)
|
|
||||||
|
|
||||||
if not mgid:
|
if not mgid:
|
||||||
data = self._parse_json(self._search_regex(
|
data = self._parse_json(self._search_regex(
|
||||||
|
@ -348,8 +294,8 @@ def _extract_mgid(self, webpage, url, title=None, data_zone=None):
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
title = url_basename(url)
|
title = url_basename(url)
|
||||||
webpage = self._download_webpage(url, title)
|
webpage = self._download_webpage(url, title)
|
||||||
mgid = self._extract_mgid(webpage, url, title=title)
|
mgid = self._extract_mgid(webpage)
|
||||||
videos_info = self._get_videos_info(mgid, url=url)
|
videos_info = self._get_videos_info(mgid)
|
||||||
return videos_info
|
return videos_info
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue