Merge pull request #188 from blackjack4494/SouthparkDE_MTV

[SouthparkDE/MTV] another mgid extraction (mtv_base) feed url updated
This commit is contained in:
Tom-Oliver Heidel 2020-10-13 01:03:29 +02:00 committed by GitHub
commit d8f97cc1d3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 28 additions and 33 deletions

View file

@ -45,7 +45,7 @@ def _remove_template_parameter(url):
# Remove the templates, like &device={device} # Remove the templates, like &device={device}
return re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', url) return re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', url)
def _get_feed_url(self, uri): def _get_feed_url(self, uri, url=None):
return self._FEED_URL return self._FEED_URL
def _get_thumbnail_url(self, uri, itemdoc): def _get_thumbnail_url(self, uri, itemdoc):
@ -211,9 +211,9 @@ def _get_feed_query(self, uri):
data['lang'] = self._LANG data['lang'] = self._LANG
return data return data
def _get_videos_info(self, uri, use_hls=True): def _get_videos_info(self, uri, use_hls=True, url=None):
video_id = self._id_from_uri(uri) video_id = self._id_from_uri(uri)
feed_url = self._get_feed_url(uri) feed_url = self._get_feed_url(uri, url)
info_url = update_url_query(feed_url, self._get_feed_query(uri)) info_url = update_url_query(feed_url, self._get_feed_query(uri))
return self._get_videos_info_from_url(info_url, video_id, use_hls) return self._get_videos_info_from_url(info_url, video_id, use_hls)
@ -256,7 +256,6 @@ def _extract_triforce_mgid(self, webpage, data_zone=None, video_id=None):
return try_get(feed, lambda x: x['result']['data']['id'], compat_str) return try_get(feed, lambda x: x['result']['data']['id'], compat_str)
def _extract_new_triforce_mgid(self, webpage, url='', video_id=None): def _extract_new_triforce_mgid(self, webpage, url='', video_id=None):
# print(compat_urlparse.urlparse(url).netloc)
if url == '': if url == '':
return return
domain = get_domain(url) domain = get_domain(url)
@ -281,7 +280,7 @@ def _extract_new_triforce_mgid(self, webpage, url='', video_id=None):
item_id = try_get(manifest, lambda x: x['manifest']['reporting']['itemId'], compat_str) item_id = try_get(manifest, lambda x: x['manifest']['reporting']['itemId'], compat_str)
if not item_id: if not item_id:
self.to_screen('Found no id!') self.to_screen('No id found!')
return return
# 'episode' can be anything. 'content' is used often as well # 'episode' can be anything. 'content' is used often as well
@ -301,6 +300,16 @@ def _extract_mgid(self, webpage, url, data_zone=None):
except RegexNotFoundError: except RegexNotFoundError:
mgid = None mgid = None
title = self._match_id(url)
try:
window_data = self._parse_json(self._search_regex(
r'(?s)window.__DATA__ = (?P<json>{.+});', webpage,
'JSON Window Data', default=None, fatal=False, group='json'), title, fatal=False)
mgid = window_data['children'][4]['children'][0]['props']['media']['video']['config']['uri']
except (KeyError, IndexError, TypeError):
pass
if mgid is None or ':' not in mgid: if mgid is None or ':' not in mgid:
mgid = self._search_regex( mgid = self._search_regex(
[r'data-mgid="(.*?)"', r'swfobject\.embedSWF\(".*?(mgid:.*?)"'], [r'data-mgid="(.*?)"', r'swfobject\.embedSWF\(".*?(mgid:.*?)"'],
@ -324,7 +333,7 @@ def _real_extract(self, url):
title = url_basename(url) title = url_basename(url)
webpage = self._download_webpage(url, title) webpage = self._download_webpage(url, title)
mgid = self._extract_mgid(webpage, url) mgid = self._extract_mgid(webpage, url)
videos_info = self._get_videos_info(mgid) videos_info = self._get_videos_info(mgid, url=url)
return videos_info return videos_info

View file

@ -44,40 +44,26 @@ class SouthParkEsIE(SouthParkIE):
class SouthParkDeIE(SouthParkIE): class SouthParkDeIE(SouthParkIE):
IE_NAME = 'southpark.de' IE_NAME = 'southpark.de'
_VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.de/(?:clips|alle-episoden|collections)/(?P<id>.+?)(\?|#|$))' _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.de/(?:videoclip|collections|folgen)/(?P<id>(?P<unique_id>.+?)/.+?)(?:\?|#|$))'
_FEED_URL = 'http://www.southpark.de/feeds/video-player/mrss/' # _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed'
_TESTS = [{ _TESTS = [{
'url': 'http://www.southpark.de/clips/uygssh/the-government-wont-respect-my-privacy#tab=featured', 'url': 'https://www.southpark.de/videoclip/rsribv/south-park-rueckzug-zum-gummibonbon-wald',
'info_dict': { 'only_matching': True,
'id': '85487c96-b3b9-4e39-9127-ad88583d9bf2',
'ext': 'mp4',
'title': 'South Park|The Government Won\'t Respect My Privacy',
'description': 'Cartman explains the benefits of "Shitter" to Stan, Kyle and Craig.',
'timestamp': 1380160800,
'upload_date': '20130926',
},
}, { }, {
# non-ASCII characters in initial URL 'url': 'https://www.southpark.de/folgen/jiru42/south-park-verkabelung-staffel-23-ep-9',
'url': 'http://www.southpark.de/alle-episoden/s18e09-hashtag-aufwärmen', 'only_matching': True,
'info_dict': {
'title': 'Hashtag „Aufwärmen“',
'description': 'Kyle will mit seinem kleinen Bruder Ike Videospiele spielen. Als der nicht mehr mit ihm spielen will, hat Kyle Angst, dass er die Kids von heute nicht mehr versteht.',
},
'playlist_count': 3,
}, { }, {
# non-ASCII characters in redirect URL 'url': 'https://www.southpark.de/collections/zzno5a/south-park-good-eats/7q26gp',
'url': 'http://www.southpark.de/alle-episoden/s18e09',
'info_dict': {
'title': 'Hashtag „Aufwärmen“',
'description': 'Kyle will mit seinem kleinen Bruder Ike Videospiele spielen. Als der nicht mehr mit ihm spielen will, hat Kyle Angst, dass er die Kids von heute nicht mehr versteht.',
},
'playlist_count': 3,
}, {
'url': 'http://www.southpark.de/collections/2476/superhero-showdown/1',
'only_matching': True, 'only_matching': True,
}] }]
def _get_feed_url(self, uri, url=None):
video_id = self._id_from_uri(uri)
config = self._download_json(
'http://media.mtvnservices.com/pmt/e1/access/index.html?uri=%s&configtype=edge&ref=%s' % (uri, url), video_id)
return self._remove_template_parameter(config['feedWithQueryParams'])
class SouthParkNlIE(SouthParkIE): class SouthParkNlIE(SouthParkIE):
IE_NAME = 'southpark.nl' IE_NAME = 'southpark.nl'