From 76c73715fb1e0eee61ace5ff7855d8237abdcd54 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 2 Mar 2015 15:21:11 +0100 Subject: [PATCH] [generic] Parse RSS enclosure URLs (Fixes #5091) --- youtube_dl/extractor/generic.py | 34 ++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 27e2bc3001..5dc53685cf 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -26,6 +26,7 @@ unsmuggle_url, UnsupportedError, url_basename, + xpath_text, ) from .brightcove import BrightcoveIE from .ooyala import OoyalaIE @@ -569,6 +570,16 @@ class GenericIE(InfoExtractor): 'title': 'John Carlson Postgame 2/25/15', }, }, + # RSS feed with enclosure + { + 'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml', + 'info_dict': { + 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624', + 'ext': 'm4v', + 'upload_date': '20150228', + 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624', + } + } ] def report_following_redirect(self, new_url): @@ -580,11 +591,24 @@ def _extract_rss(self, url, video_id, doc): playlist_desc_el = doc.find('./channel/description') playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text - entries = [{ - '_type': 'url', - 'url': e.find('link').text, - 'title': e.find('title').text, - } for e in doc.findall('./channel/item')] + entries = [] + for it in doc.findall('./channel/item'): + next_url = xpath_text(it, 'link', fatal=False) + if not next_url: + enclosure_nodes = it.findall('./enclosure') + for e in enclosure_nodes: + next_url = e.attrib.get('url') + if next_url: + break + + if not next_url: + continue + + entries.append({ + '_type': 'url', + 'url': next_url, + 'title': it.find('title').text, + }) return { '_type': 'playlist',