[ie/podbayfm] Fix extraction (#10195)

Authored by: bashonly, seproDev

Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com>
This commit is contained in:
bashonly 2024-06-16 19:05:46 -05:00 committed by GitHub
parent d6c2c2bc84
commit d4b52ce3fc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -1,28 +1,40 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import OnDemandPagedList, int_or_none, jwt_decode_hs256, try_call from ..utils import (
OnDemandPagedList,
clean_html,
int_or_none,
jwt_decode_hs256,
url_or_none,
)
from ..utils.traversal import traverse_obj
def result_from_props(props, episode_id=None): def result_from_props(props):
return { return {
'id': props.get('podcast_id') or episode_id, **traverse_obj(props, {
'title': props.get('title'), 'id': ('_id', {str}),
'url': props['mediaURL'], 'title': ('title', {str}),
'url': ('mediaURL', {url_or_none}),
'description': ('description', {clean_html}),
'thumbnail': ('image', {jwt_decode_hs256}, 'url', {url_or_none}),
'timestamp': ('timestamp', {int_or_none}),
'duration': ('duration', {int_or_none}),
}),
'ext': 'mp3', 'ext': 'mp3',
'thumbnail': try_call(lambda: jwt_decode_hs256(props['image'])['url']), 'vcodec': 'none',
'timestamp': props.get('timestamp'),
'duration': int_or_none(props.get('duration')),
} }
class PodbayFMIE(InfoExtractor): class PodbayFMIE(InfoExtractor):
_VALID_URL = r'https?://podbay\.fm/p/[^/]*/e/(?P<id>[^/]*)/?(?:[\?#].*)?$' _VALID_URL = r'https?://podbay\.fm/p/[^/?#]+/e/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://podbay.fm/p/behind-the-bastards/e/1647338400', 'url': 'https://podbay.fm/p/behind-the-bastards/e/1647338400',
'md5': '98b41285dcf7989d105a4ed0404054cf', 'md5': '895ac8505de349515f5ee8a4a3195c93',
'info_dict': { 'info_dict': {
'id': '1647338400', 'id': '62306451f4a48e58d0c4d6a8',
'title': 'Part One: Kissinger', 'title': 'Part One: Kissinger',
'ext': 'mp3', 'ext': 'mp3',
'description': r're:^We begin our epic six part series on Henry Kissinger.+',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'timestamp': 1647338400, 'timestamp': 1647338400,
'duration': 5001, 'duration': 5001,
@ -34,24 +46,25 @@ def _real_extract(self, url):
episode_id = self._match_id(url) episode_id = self._match_id(url)
webpage = self._download_webpage(url, episode_id) webpage = self._download_webpage(url, episode_id)
data = self._search_nextjs_data(webpage, episode_id) data = self._search_nextjs_data(webpage, episode_id)
return result_from_props(data['props']['pageProps']['episode'], episode_id) return result_from_props(data['props']['pageProps']['episode'])
class PodbayFMChannelIE(InfoExtractor): class PodbayFMChannelIE(InfoExtractor):
_VALID_URL = r'https?://podbay\.fm/p/(?P<id>[^/]*)/?(?:[\?#].*)?$' _VALID_URL = r'https?://podbay\.fm/p/(?P<id>[^/?#]+)/?(?:$|[?#])'
_TESTS = [{ _TESTS = [{
'url': 'https://podbay.fm/p/behind-the-bastards', 'url': 'https://podbay.fm/p/behind-the-bastards',
'info_dict': { 'info_dict': {
'id': 'behind-the-bastards', 'id': 'behind-the-bastards',
'title': 'Behind the Bastards', 'title': 'Behind the Bastards',
}, },
'playlist_mincount': 21,
}] }]
_PAGE_SIZE = 10 _PAGE_SIZE = 10
def _fetch_page(self, channel_id, pagenum): def _fetch_page(self, channel_id, pagenum):
return self._download_json( return self._download_json(
f'https://podbay.fm/api/podcast?reverse=true&page={pagenum}&slug={channel_id}', f'https://podbay.fm/api/podcast?reverse=true&page={pagenum}&slug={channel_id}',
channel_id)['podcast'] f'Downloading channel JSON page {pagenum + 1}', channel_id)['podcast']
@staticmethod @staticmethod
def _results_from_page(channel_id, page): def _results_from_page(channel_id, page):