[ie/pialive] Follow your steps (#1)

* [ie/pialive] Support detecting upcoming and ended live events

* Pack API arguments

* fix UnboundLocalError for "chat_room_url"

* extract video_id from query string by "parse_qs()"

* Fix tests
This commit is contained in:
Mozi 2024-09-25 14:47:15 +08:00 committed by GitHub
parent d993580e6f
commit a75a02ad2a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 46 additions and 9 deletions

View file

@ -1,5 +1,14 @@
from .common import InfoExtractor
from ..utils import extract_attributes, multipart_encode, url_or_none
from ..utils import (
ExtractorError,
clean_html,
extract_attributes,
get_element_by_class,
get_element_html_by_class,
multipart_encode,
unified_timestamp,
url_or_none,
)
from ..utils.traversal import traverse_obj
@ -24,6 +33,7 @@ class PiaLiveIE(InfoExtractor):
'skip_download': True,
'ignore_no_formats_error': True,
},
'skip': 'The video is no longer available',
},
{
'url': 'https://player.pia-live.jp/stream/4JagFBEIM14s_hK9aXHKf3k3F3bY5eoHFQxu68TC6krJdu0GVBVbVy01IwpJ6J3qBEm3d9TCTt1d0eWpsZGj7DrOjVOmS7GAWGwyscMgiThopJvzgWC4H5b-7XQjAfRZ',
@ -39,6 +49,7 @@ class PiaLiveIE(InfoExtractor):
'skip_download': True,
'ignore_no_formats_error': True,
},
'skip': 'The video is no longer available',
},
]
@ -53,28 +64,51 @@ def _real_extract(self, url):
program_code = self._extract_vars('programCode', webpage)
article_code = self._extract_vars('articleCode', webpage)
title = self._html_extract_title(webpage)
if get_element_html_by_class('play-end', webpage):
raise ExtractorError('The video is no longer available', expected=True, video_id=program_code)
if start_info := clean_html(get_element_by_class('play-waiting__date', webpage)):
date, time = self._search_regex(
r'(?P<date>\d{4}/\d{1,2}/\d{1,2})\([月火水木金土日]\)(?P<time>\d{2}:\d{2})',
start_info, 'start_info', fatal=False, group=('date', 'time'))
if all((date, time)):
release_timestamp_str = f'{date} {time} +09:00'
release_timestamp = unified_timestamp(release_timestamp_str)
self.raise_no_formats(f'The video will be available after {release_timestamp_str}', expected=True)
return {
'id': program_code,
'title': title,
'live_status': 'is_upcoming',
'release_timestamp': release_timestamp,
}
payload, content_type = multipart_encode({
'play_url': video_key,
'api_key': self.API_KEY,
})
api_kwargs = {
'video_id': program_code,
'data': payload,
'headers': {'Content-Type': content_type, 'Referer': self.PLAYER_ROOT_URL},
}
player_tag_list = self._download_json(
f'{self.PIA_LIVE_API_URL}/perf/player-tag-list/{program_code}', program_code,
data=payload, headers={'Content-Type': content_type, 'Referer': self.PLAYER_ROOT_URL},
f'{self.PIA_LIVE_API_URL}/perf/player-tag-list/{program_code}', **api_kwargs,
note='Fetching player tag list', errnote='Unable to fetch player tag list')
if self.get_param('getcomments'):
chat_room_url = traverse_obj(self._download_json(
f'{self.PIA_LIVE_API_URL}/perf/chat-tag-list/{program_code}/{article_code}', program_code,
data=payload, headers={'Content-Type': content_type, 'Referer': self.PLAYER_ROOT_URL},
f'{self.PIA_LIVE_API_URL}/perf/chat-tag-list/{program_code}/{article_code}', **api_kwargs,
note='Fetching chat info', errnote='Unable to fetch chat info', fatal=False),
('data', 'chat_one_tag', {extract_attributes}, 'src', {url_or_none}))
else:
chat_room_url = None
return self.url_result(
extract_attributes(player_tag_list['data']['movie_one_tag'])['src'], url_transparent=True,
video_title=self._html_extract_title(webpage), display_id=program_code,
__post_extractor=self.extract_comments(program_code, chat_room_url))
video_title=title, display_id=program_code, __post_extractor=self.extract_comments(
program_code, chat_room_url))
def _get_comments(self, video_id, chat_room_url):
if not chat_room_url:

View file

@ -10,6 +10,7 @@ class PIAULIZAPortalAPIIE(InfoExtractor):
'url': 'https://player-api.p.uliza.jp/v1/players/timeshift-disabled/pia/admin?type=normal&playerobjectname=ulizaPlayer&name=livestream01_dvr&repeatable=true',
'info_dict': {
'id': '88f3109a-f503-4d0f-a9f7-9f39ac745d84',
'ext': 'mp4',
'title': '88f3109a-f503-4d0f-a9f7-9f39ac745d84',
'live_status': 'was_live',
},
@ -18,6 +19,7 @@ class PIAULIZAPortalAPIIE(InfoExtractor):
'url': 'https://player-api.p.uliza.jp/v1/players/uliza_jp_gallery_normal/promotion/admin?type=presentation&name=cookings&targetid=player1',
'info_dict': {
'id': 'ae350126-5e22-4a7f-a8ac-8d0fd448b800',
'ext': 'mp4',
'title': 'ae350126-5e22-4a7f-a8ac-8d0fd448b800',
'live_status': 'not_live',
},
@ -26,6 +28,7 @@ class PIAULIZAPortalAPIIE(InfoExtractor):
'url': 'https://player-api.p.uliza.jp/v1/players/default-player/pia/admin?type=normal&name=pia_movie_uliza_fix&targetid=ulizahtml5&repeatable=true',
'info_dict': {
'id': '0644ecc8-e354-41b4-b957-3b08a2d63df1',
'ext': 'mp4',
'title': '0644ecc8-e354-41b4-b957-3b08a2d63df1',
'live_status': 'not_live',
},
@ -42,7 +45,7 @@ def _real_extract(self, url):
m3u8_url = self._search_regex(
r'["\'](https://vms-api\.p\.uliza\.jp/v1/prog-index\.m3u8[^"\']+)', player_data,
'm3u8 url', default=None)
video_id = self._search_regex(r'&?ss=([\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})&?', m3u8_url, 'video id', default=display_id)
video_id = parse_qs(m3u8_url).get('ss', [display_id])[0]
formats = self._extract_m3u8_formats(m3u8_url, video_id)
m3u8_type = self._search_regex(