[generic] Parse RSS enclosure URLs (Fixes #5091)

This commit is contained in:
Philipp Hagemeister 2015-03-02 15:21:11 +01:00
parent c75f0b361a
commit 76c73715fb

View file

@ -26,6 +26,7 @@
unsmuggle_url,
UnsupportedError,
url_basename,
xpath_text,
)
from .brightcove import BrightcoveIE
from .ooyala import OoyalaIE
@ -569,6 +570,16 @@ class GenericIE(InfoExtractor):
'title': 'John Carlson Postgame 2/25/15',
},
},
# RSS feed with enclosure
{
'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
'info_dict': {
'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
'ext': 'm4v',
'upload_date': '20150228',
'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
}
}
]
def report_following_redirect(self, new_url):
@ -580,11 +591,24 @@ def _extract_rss(self, url, video_id, doc):
playlist_desc_el = doc.find('./channel/description')
playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
entries = [{
entries = []
for it in doc.findall('./channel/item'):
next_url = xpath_text(it, 'link', fatal=False)
if not next_url:
enclosure_nodes = it.findall('./enclosure')
for e in enclosure_nodes:
next_url = e.attrib.get('url')
if next_url:
break
if not next_url:
continue
entries.append({
'_type': 'url',
'url': e.find('link').text,
'title': e.find('title').text,
} for e in doc.findall('./channel/item')]
'url': next_url,
'title': it.find('title').text,
})
return {
'_type': 'playlist',