[tlc] fix extraction and update extractor to use BrightcoveNewIE

2025-01-05 23:54:24 +00:00 · 2016-03-14 21:53:00 +01:00 · 2016-03-14 21:53:00 +01:00 · bf475e1990
parent 203f3d779a
commit bf475e1990
1 changed files with 13 additions and 22 deletions
--- a/youtube_dl/extractor/tlc.py
+++ b/youtube_dl/extractor/tlc.py
@ -4,12 +4,12 @@

 from .common import InfoExtractor
 from .brightcove import BrightcoveLegacyIE
-from ..compat import compat_urlparse
+from ..compat import compat_parse_qs


 class TlcDeIE(InfoExtractor):
    IE_NAME = 'tlc.de'
-    _VALID_URL = r'http://www\.tlc\.de/sendungen/[^/]+/videos/(?P<title>[^/?]+)'
+    _VALID_URL = r'http://www\.tlc\.de/(?:[^/]+/)*videos/(?P<title>[^/?#]+)?(?:.*#(?P<id>\d+))?'

    _TEST = {
        'url': 'http://www.tlc.de/sendungen/breaking-amish/videos/#3235167922001',
@ -17,32 +17,23 @@ class TlcDeIE(InfoExtractor):
            'id': '3235167922001',
            'ext': 'mp4',
            'title': 'Breaking Amish: Die Welt da draußen',
-            'uploader': 'Discovery Networks - Germany',
            'description': (
                'Vier Amische und eine Mennonitin wagen in New York'
                '  den Sprung in ein komplett anderes Leben. Begleitet sie auf'
                ' ihrem spannenden Weg.'),
+            'timestamp': 1396598084,
+            'upload_date': '20140404',
+            'uploader_id': '1659832546',
        },
    }
+    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1659832546/default_default/index.html?videoId=%s'

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        title = mobj.group('title')
-        webpage = self._download_webpage(url, title)
-        iframe_url = self._search_regex(
-            '<iframe src="(http://www\.tlc\.de/wp-content/.+?)"', webpage,
-            'iframe url')
-        # Otherwise we don't get the correct 'BrightcoveExperience' element,
-        # example: http://www.tlc.de/sendungen/cake-boss/videos/cake-boss-cannoli-drama/
-        iframe_url = iframe_url.replace('.htm?', '.php?')
-        url_fragment = compat_urlparse.urlparse(url).fragment
-        if url_fragment:
-            # Since the fragment is not send to the server, we always get the same iframe
-            iframe_url = re.sub(r'playlist=(\d+)', 'playlist=%s' % url_fragment, iframe_url)
-        iframe = self._download_webpage(iframe_url, title)
-
-        return {
-            '_type': 'url',
-            'url': BrightcoveLegacyIE._extract_brightcove_url(iframe),
-            'ie': BrightcoveLegacyIE.ie_key(),
-        }
+        brightcove_id = mobj.group('id')
+        if not brightcove_id:
+            title = mobj.group('title')
+            webpage = self._download_webpage(url, title)
+            brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
+            brightcove_id = compat_parse_qs(brightcove_legacy_url)['@videoPlayer'][0]
+        return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)