[extractor/orf:radio] Rewrite extractors

Closes #4522
This commit is contained in:
pukkandan 2022-08-02 03:53:27 +05:30
parent 0647d9251f
commit 5770293d25
No known key found for this signature in database
GPG key ID: 7EEE9E1E817D0A39
2 changed files with 103 additions and 194 deletions

View file

@ -1210,19 +1210,8 @@
from .ora import OraTVIE from .ora import OraTVIE
from .orf import ( from .orf import (
ORFTVthekIE, ORFTVthekIE,
ORFFM4IE,
ORFFM4StoryIE, ORFFM4StoryIE,
ORFOE1IE, ORFRadioIE,
ORFOE3IE,
ORFNOEIE,
ORFWIEIE,
ORFBGLIE,
ORFOOEIE,
ORFSTMIE,
ORFKTNIE,
ORFSBGIE,
ORFTIRIE,
ORFVBGIE,
ORFIPTVIE, ORFIPTVIE,
) )
from .outsidetv import OutsideTVIE from .outsidetv import OutsideTVIE

View file

@ -12,9 +12,10 @@
join_nonempty, join_nonempty,
orderedSet, orderedSet,
remove_end, remove_end,
make_archive_id,
smuggle_url, smuggle_url,
str_or_none,
strip_jsonp, strip_jsonp,
try_call,
unescapeHTML, unescapeHTML,
unified_strdate, unified_strdate,
unsmuggle_url, unsmuggle_url,
@ -200,208 +201,99 @@ def _real_extract(self, url):
class ORFRadioIE(InfoExtractor): class ORFRadioIE(InfoExtractor):
def _real_extract(self, url): IE_NAME = 'orf:radio'
mobj = self._match_valid_url(url)
show_date = mobj.group('date')
show_id = mobj.group('show')
data = self._download_json( STATION_INFO = {
'http://audioapi.orf.at/%s/api/json/current/broadcast/%s/%s' 'fm4': ('fm4', 'fm4', 'orffm4'),
% (self._API_STATION, show_id, show_date), show_id) 'noe': ('noe', 'oe2n', 'orfnoe'),
'wien': ('wie', 'oe2w', 'orfwie'),
entries = [] 'burgenland': ('bgl', 'oe2b', 'orfbgl'),
for info in data['streams']: 'ooe': ('ooe', 'oe2o', 'orfooe'),
loop_stream_id = str_or_none(info.get('loopStreamId')) 'steiermark': ('stm', 'oe2st', 'orfstm'),
if not loop_stream_id: 'kaernten': ('ktn', 'oe2k', 'orfktn'),
continue 'salzburg': ('sbg', 'oe2s', 'orfsbg'),
title = str_or_none(data.get('title')) 'tirol': ('tir', 'oe2t', 'orftir'),
if not title: 'vorarlberg': ('vbg', 'oe2v', 'orfvbg'),
continue 'oe3': ('oe3', 'oe3', 'orfoe3'),
start = int_or_none(info.get('start'), scale=1000) 'oe1': ('oe1', 'oe1', 'orfoe1'),
end = int_or_none(info.get('end'), scale=1000)
duration = end - start if end and start else None
entries.append({
'id': loop_stream_id.replace('.mp3', ''),
'url': 'https://loopstream01.apa.at/?channel=%s&id=%s' % (self._LOOP_STATION, loop_stream_id),
'title': title,
'description': clean_html(data.get('subtitle')),
'duration': duration,
'timestamp': start,
'ext': 'mp3',
'series': data.get('programTitle'),
})
return {
'_type': 'playlist',
'id': show_id,
'title': data.get('title'),
'description': clean_html(data.get('subtitle')),
'entries': entries,
} }
_STATION_RE = '|'.join(map(re.escape, STATION_INFO.keys()))
_VALID_URL = rf'''(?x)
https?://(?:
(?P<station>{_STATION_RE})\.orf\.at/player|
radiothek\.orf\.at/(?P<station2>{_STATION_RE})
)/(?P<date>[0-9]+)/(?P<show>\w+)'''
class ORFFM4IE(ORFRadioIE): _TESTS = [{
IE_NAME = 'orf:fm4' 'url': 'https://radiothek.orf.at/ooe/20220801/OGMO',
IE_DESC = 'radio FM4'
_VALID_URL = r'https?://(?P<station>fm4)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>4\w+)'
_API_STATION = 'fm4'
_LOOP_STATION = 'fm4'
_TEST = {
'url': 'http://fm4.orf.at/player/20170107/4CC',
'md5': '2b0be47375432a7ef104453432a19212',
'info_dict': { 'info_dict': {
'id': '2017-01-07_2100_tl_54_7DaysSat18_31295', 'id': 'OGMO',
'ext': 'mp3', 'title': 'Guten Morgen OÖ',
'title': 'Solid Steel Radioshow', 'description': 'md5:a3f6083399ef92b8cbe2d421b180835a',
'description': 'Die Mixshow von Coldcut und Ninja Tune.',
'duration': 3599,
'timestamp': 1483819257,
'upload_date': '20170107',
}, },
'skip': 'Shows from ORF radios are only available for 7 days.', 'playlist': [{
'only_matching': True, 'md5': 'f33147d954a326e338ea52572c2810e8',
'info_dict': {
'id': '2022-08-01_0459_tl_66_7DaysMon1_319062',
'ext': 'mp3',
'title': 'Guten Morgen OÖ',
'upload_date': '20220801',
'duration': 18000,
'timestamp': 1659322789,
'description': 'md5:a3f6083399ef92b8cbe2d421b180835a',
} }
}]
}, {
class ORFNOEIE(ORFRadioIE): 'url': 'https://ooe.orf.at/player/20220801/OGMO',
IE_NAME = 'orf:noe' 'info_dict': {
IE_DESC = 'Radio Niederösterreich' 'id': 'OGMO',
_VALID_URL = r'https?://(?P<station>noe)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)' 'title': 'Guten Morgen OÖ',
_API_STATION = 'noe' 'description': 'md5:a3f6083399ef92b8cbe2d421b180835a',
_LOOP_STATION = 'oe2n' },
'playlist': [{
_TEST = { 'md5': 'f33147d954a326e338ea52572c2810e8',
'info_dict': {
'id': '2022-08-01_0459_tl_66_7DaysMon1_319062',
'ext': 'mp3',
'title': 'Guten Morgen OÖ',
'upload_date': '20220801',
'duration': 18000,
'timestamp': 1659322789,
'description': 'md5:a3f6083399ef92b8cbe2d421b180835a',
}
}]
}, {
'url': 'http://fm4.orf.at/player/20170107/4CC',
'only_matching': True,
}, {
'url': 'https://noe.orf.at/player/20200423/NGM', 'url': 'https://noe.orf.at/player/20200423/NGM',
'only_matching': True, 'only_matching': True,
} }, {
class ORFWIEIE(ORFRadioIE):
IE_NAME = 'orf:wien'
IE_DESC = 'Radio Wien'
_VALID_URL = r'https?://(?P<station>wien)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
_API_STATION = 'wie'
_LOOP_STATION = 'oe2w'
_TEST = {
'url': 'https://wien.orf.at/player/20200423/WGUM', 'url': 'https://wien.orf.at/player/20200423/WGUM',
'only_matching': True, 'only_matching': True,
} }, {
class ORFBGLIE(ORFRadioIE):
IE_NAME = 'orf:burgenland'
IE_DESC = 'Radio Burgenland'
_VALID_URL = r'https?://(?P<station>burgenland)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
_API_STATION = 'bgl'
_LOOP_STATION = 'oe2b'
_TEST = {
'url': 'https://burgenland.orf.at/player/20200423/BGM', 'url': 'https://burgenland.orf.at/player/20200423/BGM',
'only_matching': True, 'only_matching': True,
} }, {
class ORFOOEIE(ORFRadioIE):
IE_NAME = 'orf:oberoesterreich'
IE_DESC = 'Radio Oberösterreich'
_VALID_URL = r'https?://(?P<station>ooe)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
_API_STATION = 'ooe'
_LOOP_STATION = 'oe2o'
_TEST = {
'url': 'https://ooe.orf.at/player/20200423/OGMO',
'only_matching': True,
}
class ORFSTMIE(ORFRadioIE):
IE_NAME = 'orf:steiermark'
IE_DESC = 'Radio Steiermark'
_VALID_URL = r'https?://(?P<station>steiermark)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
_API_STATION = 'stm'
_LOOP_STATION = 'oe2st'
_TEST = {
'url': 'https://steiermark.orf.at/player/20200423/STGMS', 'url': 'https://steiermark.orf.at/player/20200423/STGMS',
'only_matching': True, 'only_matching': True,
} }, {
class ORFKTNIE(ORFRadioIE):
IE_NAME = 'orf:kaernten'
IE_DESC = 'Radio Kärnten'
_VALID_URL = r'https?://(?P<station>kaernten)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
_API_STATION = 'ktn'
_LOOP_STATION = 'oe2k'
_TEST = {
'url': 'https://kaernten.orf.at/player/20200423/KGUMO', 'url': 'https://kaernten.orf.at/player/20200423/KGUMO',
'only_matching': True, 'only_matching': True,
} }, {
class ORFSBGIE(ORFRadioIE):
IE_NAME = 'orf:salzburg'
IE_DESC = 'Radio Salzburg'
_VALID_URL = r'https?://(?P<station>salzburg)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
_API_STATION = 'sbg'
_LOOP_STATION = 'oe2s'
_TEST = {
'url': 'https://salzburg.orf.at/player/20200423/SGUM', 'url': 'https://salzburg.orf.at/player/20200423/SGUM',
'only_matching': True, 'only_matching': True,
} }, {
class ORFTIRIE(ORFRadioIE):
IE_NAME = 'orf:tirol'
IE_DESC = 'Radio Tirol'
_VALID_URL = r'https?://(?P<station>tirol)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
_API_STATION = 'tir'
_LOOP_STATION = 'oe2t'
_TEST = {
'url': 'https://tirol.orf.at/player/20200423/TGUMO', 'url': 'https://tirol.orf.at/player/20200423/TGUMO',
'only_matching': True, 'only_matching': True,
} }, {
class ORFVBGIE(ORFRadioIE):
IE_NAME = 'orf:vorarlberg'
IE_DESC = 'Radio Vorarlberg'
_VALID_URL = r'https?://(?P<station>vorarlberg)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
_API_STATION = 'vbg'
_LOOP_STATION = 'oe2v'
_TEST = {
'url': 'https://vorarlberg.orf.at/player/20200423/VGUM', 'url': 'https://vorarlberg.orf.at/player/20200423/VGUM',
'only_matching': True, 'only_matching': True,
} }, {
class ORFOE3IE(ORFRadioIE):
IE_NAME = 'orf:oe3'
IE_DESC = 'Radio Österreich 3'
_VALID_URL = r'https?://(?P<station>oe3)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
_API_STATION = 'oe3'
_LOOP_STATION = 'oe3'
_TEST = {
'url': 'https://oe3.orf.at/player/20200424/3WEK', 'url': 'https://oe3.orf.at/player/20200424/3WEK',
'only_matching': True, 'only_matching': True,
} }, {
class ORFOE1IE(ORFRadioIE):
IE_NAME = 'orf:oe1'
IE_DESC = 'Radio Österreich 1'
_VALID_URL = r'https?://(?P<station>oe1)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
_API_STATION = 'oe1'
_LOOP_STATION = 'oe1'
_TEST = {
'url': 'http://oe1.orf.at/player/20170108/456544', 'url': 'http://oe1.orf.at/player/20170108/456544',
'md5': '34d8a6e67ea888293741c86a099b745b', 'md5': '34d8a6e67ea888293741c86a099b745b',
'info_dict': { 'info_dict': {
@ -413,8 +305,36 @@ class ORFOE1IE(ORFRadioIE):
'upload_date': '20170108', 'upload_date': '20170108',
}, },
'skip': 'Shows from ORF radios are only available for 7 days.' 'skip': 'Shows from ORF radios are only available for 7 days.'
}]
def _entries(self, data, station):
_, loop_station, old_ie = self.STATION_INFO[station]
for info in data['streams']:
item_id = info.get('loopStreamId')
if not item_id:
continue
video_id = item_id.replace('.mp3', '')
yield {
'id': video_id,
'ext': 'mp3',
'url': f'https://loopstream01.apa.at/?channel={loop_station}&id={item_id}',
'_old_archive_ids': [make_archive_id(old_ie, video_id)],
'title': data.get('title'),
'description': clean_html(data.get('subtitle')),
'duration': try_call(lambda: (info['end'] - info['start']) / 1000),
'timestamp': int_or_none(info.get('start'), scale=1000),
'series': data.get('programTitle'),
} }
def _real_extract(self, url):
station, station2, show_date, show_id = self._match_valid_url(url).group('station', 'station2', 'date', 'show')
api_station, _, _ = self.STATION_INFO[station or station2]
data = self._download_json(
f'http://audioapi.orf.at/{api_station}/api/json/current/broadcast/{show_id}/{show_date}', show_id)
return self.playlist_result(
self._entries(data, station or station2), show_id, data.get('title'), clean_html(data.get('subtitle')))
class ORFIPTVIE(InfoExtractor): class ORFIPTVIE(InfoExtractor):
IE_NAME = 'orf:iptv' IE_NAME = 'orf:iptv'