From eb6d4ad1caf04ddf8c4278866790a259fed09629 Mon Sep 17 00:00:00 2001 From: Aleri Kaisattera <73682764+alerikaisattera@users.noreply.github.com> Date: Fri, 24 Sep 2021 06:53:51 +0600 Subject: [PATCH] [Theta] Add extractor (#1068) Authored by: alerikaisattera --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/theta.py | 51 ++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 yt_dlp/extractor/theta.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 7be6eec1f..4b1559886 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1428,6 +1428,7 @@ from .thescene import TheSceneIE from .thestar import TheStarIE from .thesun import TheSunIE +from .theta import ThetaIE from .theweatherchannel import TheWeatherChannelIE from .thisamericanlife import ThisAmericanLifeIE from .thisav import ThisAVIE diff --git a/yt_dlp/extractor/theta.py b/yt_dlp/extractor/theta.py new file mode 100644 index 000000000..34c0da815 --- /dev/null +++ b/yt_dlp/extractor/theta.py @@ -0,0 +1,51 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import try_get + + +class ThetaIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?theta\.tv/(?P[a-z0-9]+)' + _TESTS = [{ + 'url': 'https://www.theta.tv/davirus', + 'skip': 'The live may have ended', + 'info_dict': { + 'id': 'DaVirus', + 'ext': 'mp4', + 'title': 'I choose you - My Community is King -👀 - YO HABLO ESPANOL - CODE DAVIRUS', + 'thumbnail': r're:https://live-thumbnails-prod-theta-tv\.imgix\.net/thumbnail/.+\.jpg', + } + }, { + 'url': 'https://www.theta.tv/mst3k', + 'note': 'This channel is live 24/7', + 'info_dict': { + 'id': 'MST3K', + 'ext': 'mp4', + 'title': 'Mystery Science Theatre 3000 24/7 Powered by the THETA Network.', + 'thumbnail': r're:https://user-prod-theta-tv\.imgix\.net/.+\.jpg', + } + }] + + def _real_extract(self, url): + channel_id = self._match_id(url) + info = self._download_json(f'https://api.theta.tv/v1/channel?alias={channel_id}', channel_id)['body'] + + m3u8_playlist = next( + data['url'] for data in info['live_stream']['video_urls'] + if data.get('type') != 'embed' and data.get('resolution') in ('master', 'source')) + + formats = self._extract_m3u8_formats(m3u8_playlist, channel_id, 'mp4', m3u8_id='hls', live=True) + self._sort_formats(formats) + + channel = try_get(info, lambda x: x['user']['username']) # using this field instead of channel_id due to capitalization + + return { + 'id': channel, + 'title': try_get(info, lambda x: x['live_stream']['title']), + 'channel': channel, + 'view_count': try_get(info, lambda x: x['live_stream']['view_count']), + 'is_live': True, + 'formats': formats, + 'thumbnail': try_get(info, lambda x: x['live_stream']['thumbnail_url']), + }