From b60419c51aa3eb9872e278e526cc5e62bf484462 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan@gmail.com>
Date: Tue, 2 Feb 2021 21:51:32 +0530
Subject: [PATCH] [youtube] More metadata extraction for channels/playlists

---
 youtube_dlc/extractor/common.py  |  8 ++--
 youtube_dlc/extractor/youtube.py | 69 +++++++++++++++++++++++---------
 2 files changed, 53 insertions(+), 24 deletions(-)

diff --git a/youtube_dlc/extractor/common.py b/youtube_dlc/extractor/common.py
index e13ba5a394..49d99bb557 100644
--- a/youtube_dlc/extractor/common.py
+++ b/youtube_dlc/extractor/common.py
@@ -336,9 +336,8 @@ class InfoExtractor(object):
     There must be a key "entries", which is a list, an iterable, or a PagedList
     object, each element of which is a valid dictionary by this specification.
 
-    Additionally, playlists can have "id", "title", "description", "uploader",
-    "uploader_id", "uploader_url", "duration" attributes with the same semantics
-    as videos (see above).
+    Additionally, playlists can have "id", "title", and any other relevent
+    attributes with the same semantics as videos (see above).
 
 
     _type "multi_video" indicates that there are multiple videos that
@@ -967,10 +966,11 @@ def playlist_from_matches(self, matches, playlist_id=None, playlist_title=None,
             urls, playlist_id=playlist_id, playlist_title=playlist_title)
 
     @staticmethod
-    def playlist_result(entries, playlist_id=None, playlist_title=None, playlist_description=None):
+    def playlist_result(entries, playlist_id=None, playlist_title=None, playlist_description=None, **kwargs):
         """Returns a playlist"""
         video_info = {'_type': 'playlist',
                       'entries': entries}
+        video_info.update(kwargs)
         if playlist_id:
             video_info['id'] = playlist_id
         if playlist_title:
diff --git a/youtube_dlc/extractor/youtube.py b/youtube_dlc/extractor/youtube.py
index 0ba6a299e0..9b71776945 100644
--- a/youtube_dlc/extractor/youtube.py
+++ b/youtube_dlc/extractor/youtube.py
@@ -31,6 +31,7 @@
     clean_html,
     error_to_compat_str,
     ExtractorError,
+    format_field,
     float_or_none,
     get_element_by_id,
     int_or_none,
@@ -2675,6 +2676,7 @@ def decrypt_sig(mobj):
             'uploader': video_uploader,
             'uploader_id': video_uploader_id,
             'uploader_url': video_uploader_url,
+            'channel': video_uploader,
             'channel_id': channel_id,
             'channel_url': channel_url,
             'upload_date': upload_date,
@@ -3402,44 +3404,71 @@ def _extract_uploader(data):
                     uploader['uploader_url'] = urljoin(
                         'https://www.youtube.com/',
                         try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
-        return uploader
+        return {k:v for k, v in uploader.items() if v is not None}
 
     def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token):
+        playlist_id = title = description = channel_url = channel_name = channel_id = None
+        thumbnails_list = tags = []
+
         selected_tab = self._extract_selected_tab(tabs)
         renderer = try_get(
             data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
-        playlist_id = title = description = None
         if renderer:
-            channel_title = renderer.get('title') or item_id
-            tab_title = selected_tab.get('title')
-            title = channel_title or item_id
-            if tab_title:
-                title += ' - %s' % tab_title
-            description = renderer.get('description')
-            playlist_id = renderer.get('externalId')
+            channel_name = renderer.get('title')
+            channel_url = renderer.get('channelUrl')
+            channel_id = renderer.get('externalId')
 
-        # this has thumbnails, but there is currently no thumbnail field for playlists
-        # sidebar.playlistSidebarRenderer has even more data, but its stucture is more complec
-        renderer = try_get(
-            data, lambda x: x['microformat']['microformatDataRenderer'], dict)
         if not renderer:
             renderer = try_get(
                 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
         if renderer:
             title = renderer.get('title')
             description = renderer.get('description')
-            playlist_id = item_id
+            playlist_id = channel_id
+            tags = renderer.get('keywords', '').split()
+            thumbnails_list = (
+                try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
+                or data['sidebar']['playlistSidebarRenderer']['items'][0]['playlistSidebarPrimaryInfoRenderer']['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails']
+                or [])
+
+        thumbnails = []
+        for t in thumbnails_list:
+            if not isinstance(t, dict):
+                continue
+            thumbnail_url = url_or_none(t.get('url'))
+            if not thumbnail_url:
+                continue
+            thumbnails.append({
+                'url': thumbnail_url,
+                'width': int_or_none(t.get('width')),
+                'height': int_or_none(t.get('height')),
+            })
 
         if playlist_id is None:
             playlist_id = item_id
         if title is None:
-            title = "Youtube " + playlist_id.title()
-        playlist = self.playlist_result(
+            title = playlist_id
+        title += format_field(selected_tab, 'title', ' - %s')
+
+        metadata = {
+            'playlist_id': playlist_id,
+            'playlist_title': title,
+            'playlist_description': description,
+            'uploader': channel_name,
+            'uploader_id': channel_id,
+            'uploader_url': channel_url,
+            'thumbnails': thumbnails,
+            'tags': tags,
+        }
+        if not channel_id:
+            metadata.update(self._extract_uploader(data))
+        metadata.update({
+            'channel': metadata['uploader'],
+            'channel_id': metadata['uploader_id'],
+            'channel_url': metadata['uploader_url']})
+        return self.playlist_result(
             self._entries(selected_tab, identity_token),
-            playlist_id=playlist_id, playlist_title=title,
-            playlist_description=description)
-        playlist.update(self._extract_uploader(data))
-        return playlist
+            **metadata)
 
     def _extract_from_playlist(self, item_id, url, data, playlist):
         title = playlist.get('title') or try_get(