[ie/pialive] Follow your steps (#1)

* [ie/pialive] Support detecting upcoming and ended live events * Pack API arguments * fix UnboundLocalError for "chat_room_url" * extract video_id from query string by "parse_qs()" * Fix tests
2024-11-02 14:37:21 +00:00 · 2024-09-25 14:47:15 +08:00 · 2024-09-25 14:47:15 +08:00 · a75a02ad2a
parent d993580e6f
commit a75a02ad2a
2 changed files with 46 additions and 9 deletions
--- a/yt_dlp/extractor/pialive.py
+++ b/yt_dlp/extractor/pialive.py
@ -1,5 +1,14 @@
 from .common import InfoExtractor
-from ..utils import extract_attributes, multipart_encode, url_or_none
+from ..utils import (
+    ExtractorError,
+    clean_html,
+    extract_attributes,
+    get_element_by_class,
+    get_element_html_by_class,
+    multipart_encode,
+    unified_timestamp,
+    url_or_none,
+)
 from ..utils.traversal import traverse_obj


@ -24,6 +33,7 @@ class PiaLiveIE(InfoExtractor):
                'skip_download': True,
                'ignore_no_formats_error': True,
            },
+            'skip': 'The video is no longer available',
        },
        {
            'url': 'https://player.pia-live.jp/stream/4JagFBEIM14s_hK9aXHKf3k3F3bY5eoHFQxu68TC6krJdu0GVBVbVy01IwpJ6J3qBEm3d9TCTt1d0eWpsZGj7DrOjVOmS7GAWGwyscMgiThopJvzgWC4H5b-7XQjAfRZ',
@ -39,6 +49,7 @@ class PiaLiveIE(InfoExtractor):
                'skip_download': True,
                'ignore_no_formats_error': True,
            },
+            'skip': 'The video is no longer available',
        },
    ]

@ -53,28 +64,51 @@ def _real_extract(self, url):

        program_code = self._extract_vars('programCode', webpage)
        article_code = self._extract_vars('articleCode', webpage)
+        title = self._html_extract_title(webpage)
+
+        if get_element_html_by_class('play-end', webpage):
+            raise ExtractorError('The video is no longer available', expected=True, video_id=program_code)
+
+        if start_info := clean_html(get_element_by_class('play-waiting__date', webpage)):
+            date, time = self._search_regex(
+                r'(?P<date>\d{4}/\d{1,2}/\d{1,2})\([月火水木金土日]\)(?P<time>\d{2}:\d{2})',
+                start_info, 'start_info', fatal=False, group=('date', 'time'))
+            if all((date, time)):
+                release_timestamp_str = f'{date} {time} +09:00'
+                release_timestamp = unified_timestamp(release_timestamp_str)
+                self.raise_no_formats(f'The video will be available after {release_timestamp_str}', expected=True)
+                return {
+                    'id': program_code,
+                    'title': title,
+                    'live_status': 'is_upcoming',
+                    'release_timestamp': release_timestamp,
+                }

        payload, content_type = multipart_encode({
            'play_url': video_key,
            'api_key': self.API_KEY,
        })
-
+        api_kwargs = {
+            'video_id': program_code,
+            'data': payload,
+            'headers': {'Content-Type': content_type, 'Referer': self.PLAYER_ROOT_URL},
+        }

        player_tag_list = self._download_json(
-            f'{self.PIA_LIVE_API_URL}/perf/player-tag-list/{program_code}', program_code,
-            data=payload, headers={'Content-Type': content_type, 'Referer': self.PLAYER_ROOT_URL},
+            f'{self.PIA_LIVE_API_URL}/perf/player-tag-list/{program_code}', **api_kwargs,
            note='Fetching player tag list', errnote='Unable to fetch player tag list')
        if self.get_param('getcomments'):
            chat_room_url = traverse_obj(self._download_json(
-                f'{self.PIA_LIVE_API_URL}/perf/chat-tag-list/{program_code}/{article_code}', program_code,
-                data=payload, headers={'Content-Type': content_type, 'Referer': self.PLAYER_ROOT_URL},
+                f'{self.PIA_LIVE_API_URL}/perf/chat-tag-list/{program_code}/{article_code}', **api_kwargs,
                note='Fetching chat info', errnote='Unable to fetch chat info', fatal=False),
                ('data', 'chat_one_tag', {extract_attributes}, 'src', {url_or_none}))
+        else:
+            chat_room_url = None

        return self.url_result(
            extract_attributes(player_tag_list['data']['movie_one_tag'])['src'], url_transparent=True,
-            video_title=self._html_extract_title(webpage), display_id=program_code,
-            __post_extractor=self.extract_comments(program_code, chat_room_url))
+            video_title=title, display_id=program_code, __post_extractor=self.extract_comments(
+                program_code, chat_room_url))

    def _get_comments(self, video_id, chat_room_url):
        if not chat_room_url:
--- a/yt_dlp/extractor/piaulizaportal.py
+++ b/yt_dlp/extractor/piaulizaportal.py
@ -10,6 +10,7 @@ class PIAULIZAPortalAPIIE(InfoExtractor):
            'url': 'https://player-api.p.uliza.jp/v1/players/timeshift-disabled/pia/admin?type=normal&playerobjectname=ulizaPlayer&name=livestream01_dvr&repeatable=true',
            'info_dict': {
                'id': '88f3109a-f503-4d0f-a9f7-9f39ac745d84',
+                'ext': 'mp4',
                'title': '88f3109a-f503-4d0f-a9f7-9f39ac745d84',
                'live_status': 'was_live',
            },
@ -18,6 +19,7 @@ class PIAULIZAPortalAPIIE(InfoExtractor):
            'url': 'https://player-api.p.uliza.jp/v1/players/uliza_jp_gallery_normal/promotion/admin?type=presentation&name=cookings&targetid=player1',
            'info_dict': {
                'id': 'ae350126-5e22-4a7f-a8ac-8d0fd448b800',
+                'ext': 'mp4',
                'title': 'ae350126-5e22-4a7f-a8ac-8d0fd448b800',
                'live_status': 'not_live',
            },
@ -26,6 +28,7 @@ class PIAULIZAPortalAPIIE(InfoExtractor):
            'url': 'https://player-api.p.uliza.jp/v1/players/default-player/pia/admin?type=normal&name=pia_movie_uliza_fix&targetid=ulizahtml5&repeatable=true',
            'info_dict': {
                'id': '0644ecc8-e354-41b4-b957-3b08a2d63df1',
+                'ext': 'mp4',
                'title': '0644ecc8-e354-41b4-b957-3b08a2d63df1',
                'live_status': 'not_live',
            },
@ -42,7 +45,7 @@ def _real_extract(self, url):
        m3u8_url = self._search_regex(
            r'["\'](https://vms-api\.p\.uliza\.jp/v1/prog-index\.m3u8[^"\']+)', player_data,
            'm3u8 url', default=None)
-        video_id = self._search_regex(r'&?ss=([\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})&?', m3u8_url, 'video id', default=display_id)
+        video_id = parse_qs(m3u8_url).get('ss', [display_id])[0]

        formats = self._extract_m3u8_formats(m3u8_url, video_id)
        m3u8_type = self._search_regex(