refactor and improve metadata extraction

Authored by: bashonly
This commit is contained in:
bashonly 2024-10-29 16:44:21 -05:00
parent bf0cf9995d
commit 5aa715915b
No known key found for this signature in database
GPG key ID: 783F096F253D15B0

View file

@ -1,6 +1,15 @@
import json
from .common import InfoExtractor
from ..utils import (
clean_html,
int_or_none,
join_nonempty,
parse_iso8601,
str_or_none,
url_or_none,
)
from ..utils.traversal import traverse_obj
class GameDevTVDashboardIE(InfoExtractor):
@ -37,34 +46,53 @@ def _real_initialize(self):
self.raise_login_required(
'This content is only available with purchase', method='password')
def _entries(self, data, course_id):
course_list = []
for section in data['data']['sections']:
for lecture in section['lectures']:
video_id = str(lecture['order']).zfill(2)
title = lecture['title']
def _entries(self, data, course_id, course_info):
for section in traverse_obj(data, ('sections', ..., {dict})):
section_info = traverse_obj(section, {
'season_id': ('id', {str_or_none}),
'season': ('title', {str}),
'season_number': ('order', {int_or_none}),
})
for lecture in traverse_obj(section, ('lectures', lambda _, v: url_or_none(v['video']['playListUrl']))):
display_id = join_nonempty(course_id, section_info.get('season_id'), lecture.get('id'))
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
lecture['video']['playListUrl'], course_id, 'mp4', m3u8_id='hls')
playlist_title = data['data']['title']
playlist_id = data['data']['id']
chapter_id = str(section['order']).zfill(2)
chapter = section['title']
course_list.append({
'id': video_id,
'title': title,
lecture['video']['playListUrl'], display_id, 'mp4', m3u8_id='hls')
yield {
**course_info,
**section_info,
'id': display_id, # fallback
'display_id': display_id,
'formats': formats,
'subtitles': subtitles,
'playlist': playlist_title,
'playlist_id': playlist_id,
'chapter_id': chapter_id,
'chapter': chapter,
})
yield from course_list
'series': course_info.get('title'),
'series_id': course_id,
**traverse_obj(lecture, {
'id': ('video', 'guid', {str}),
'title': ('title', {str}),
'alt_title': ('video', 'title', {str}),
'description': ('description', {clean_html}),
'episode': ('title', {str}),
'episode_number': ('order', {int_or_none}),
'duration': ('video', 'duration_in_sec', {int_or_none}),
'timestamp': ('video', 'created_at', {parse_iso8601}),
'modified_timestamp': ('video', 'updated_at', {parse_iso8601}),
'thumbnail': ('video', 'thumbnailUrl', {url_or_none}),
}),
}
def _real_extract(self, url):
course_id = self._match_id(url)
data = self._download_json(
f'https://api.gamedev.tv/api/courses/my/{course_id}', course_id,
headers=self._API_HEADERS)
headers=self._API_HEADERS)['data']
return self.playlist_result(self._entries(data, course_id), course_id)
course_info = traverse_obj(data, {
'title': ('title', {str}),
'tags': ('tags', ..., 'name', {str}),
'categories': ('categories', ..., 'title', {str}),
'timestamp': ('created_at', {parse_iso8601}),
'modified_timestamp': ('updated_at', {parse_iso8601}),
'thumbnail': ('image', {url_or_none}),
})
return self.playlist_result(self._entries(data, course_id, course_info), course_id, **course_info)