[gdcvault] Fix extraction (Closes #9107, closes #9114)

This commit is contained in:
Sergey M․ 2016-04-08 23:16:02 +06:00
parent fb38aa8b53
commit 3c6c7e7d7e

View file

@ -159,9 +159,10 @@ def _real_extract(self, url):
'title': title,
}
PLAYER_REGEX = r'<iframe src="(?P<xml_root>.+?)/player.*?\.html.*?".*?</iframe>'
xml_root = self._html_search_regex(
r'<iframe src="(?P<xml_root>.*?)player.html.*?".*?</iframe>',
start_page, 'xml root', default=None)
PLAYER_REGEX, start_page, 'xml root', default=None)
if xml_root is None:
# Probably need to authenticate
login_res = self._login(webpage_url, display_id)
@ -171,18 +172,19 @@ def _real_extract(self, url):
start_page = login_res
# Grab the url from the authenticated page
xml_root = self._html_search_regex(
r'<iframe src="(.*?)player.html.*?".*?</iframe>',
start_page, 'xml root')
PLAYER_REGEX, start_page, 'xml root')
xml_name = self._html_search_regex(
r'<iframe src=".*?\?xml=(.+?\.xml).*?".*?</iframe>',
start_page, 'xml filename', default=None)
if xml_name is None:
# Fallback to the older format
xml_name = self._html_search_regex(r'<iframe src=".*?\?xmlURL=xml/(?P<xml_file>.+?\.xml).*?".*?</iframe>', start_page, 'xml filename')
xml_name = self._html_search_regex(
r'<iframe src=".*?\?xmlURL=xml/(?P<xml_file>.+?\.xml).*?".*?</iframe>',
start_page, 'xml filename')
xml_description_url = xml_root + 'xml/' + xml_name
xml_description = self._download_xml(xml_description_url, display_id)
xml_description = self._download_xml(
'%s/xml/%s' % (xml_root, xml_name), display_id)
video_title = xml_description.find('./metadata/title').text
video_formats = self._parse_mp4(xml_description)