Compare commits

...

4 commits

Author SHA1 Message Date
HobbyistDev 28b8f57b4b
[extractor/noice] Add NoicePodcast extractor (#5621)
Authored by: HobbyistDev
2022-12-08 19:28:36 +05:30
lkw123 dfc186d422
[extractor/xiami] Remove extractors (#5711)
Authored by: synthpop123
2022-12-08 18:13:29 +05:30
David Turner 42ec478fc4
[extractor/plutotv] Fix videos with non-zero start (#5745)
Authored by: digitall
2022-12-08 18:08:52 +05:30
pukkandan 7991ae57a8
[extractor/sibnet] Separate from VKIE
Fixes bfd973ece3 (commitcomment-91834251)
2022-12-08 17:20:02 +05:30
8 changed files with 150 additions and 217 deletions

View file

@ -1624,10 +1624,6 @@ # Supported sites
- **XHamster** - **XHamster**
- **XHamsterEmbed** - **XHamsterEmbed**
- **XHamsterUser** - **XHamsterUser**
- **xiami:album**: 虾米音乐 - 专辑
- **xiami:artist**: 虾米音乐 - 歌手
- **xiami:collection**: 虾米音乐 - 精选集
- **xiami:song**: 虾米音乐
- **ximalaya**: 喜马拉雅FM - **ximalaya**: 喜马拉雅FM
- **ximalaya:album**: 喜马拉雅FM 专辑 - **ximalaya:album**: 喜马拉雅FM 专辑
- **xinpianchang**: xinpianchang.com - **xinpianchang**: xinpianchang.com

View file

@ -1211,6 +1211,7 @@
from .nitter import NitterIE from .nitter import NitterIE
from .njpwworld import NJPWWorldIE from .njpwworld import NJPWWorldIE
from .nobelprize import NobelPrizeIE from .nobelprize import NobelPrizeIE
from .noice import NoicePodcastIE
from .nonktube import NonkTubeIE from .nonktube import NonkTubeIE
from .noodlemagazine import NoodleMagazineIE from .noodlemagazine import NoodleMagazineIE
from .noovo import NoovoIE from .noovo import NoovoIE
@ -1639,6 +1640,7 @@
VivoIE, VivoIE,
) )
from .sharevideos import ShareVideosEmbedIE from .sharevideos import ShareVideosEmbedIE
from .sibnet import SibnetEmbedIE
from .shemaroome import ShemarooMeIE from .shemaroome import ShemarooMeIE
from .showroomlive import ShowRoomLiveIE from .showroomlive import ShowRoomLiveIE
from .simplecast import ( from .simplecast import (
@ -2235,12 +2237,6 @@
XHamsterEmbedIE, XHamsterEmbedIE,
XHamsterUserIE, XHamsterUserIE,
) )
from .xiami import (
XiamiSongIE,
XiamiAlbumIE,
XiamiArtistIE,
XiamiCollectionIE
)
from .ximalaya import ( from .ximalaya import (
XimalayaIE, XimalayaIE,
XimalayaAlbumIE XimalayaAlbumIE

View file

@ -1864,11 +1864,6 @@ class GenericIE(InfoExtractor):
'title': 'I AM BIO Podcast | BIO', 'title': 'I AM BIO Podcast | BIO',
}, },
'playlist_mincount': 52, 'playlist_mincount': 52,
},
{
# Sibnet embed (https://help.sibnet.ru/?sibnet_video_embed)
'url': 'https://phpbb3.x-tk.ru/bbcode-video-sibnet-t24.html',
'only_matching': True,
}, { }, {
# WimTv embed player # WimTv embed player
'url': 'http://www.msmotor.tv/wearefmi-pt-2-2021/', 'url': 'http://www.msmotor.tv/wearefmi-pt-2-2021/',

116
yt_dlp/extractor/noice.py Normal file
View file

@ -0,0 +1,116 @@
from .common import InfoExtractor
from ..utils import (
clean_html,
determine_ext,
int_or_none,
parse_iso8601,
traverse_obj,
variadic,
)
class NoicePodcastIE(InfoExtractor):
_VALID_URL = r'https?://open\.noice\.id/content/(?P<id>[a-fA-F0-9-]+)'
_TESTS = [{
'url': 'https://open.noice.id/content/7694bb04-ff0f-40fa-a60b-5b39f29584b2',
'info_dict': {
'id': '7694bb04-ff0f-40fa-a60b-5b39f29584b2',
'ext': 'm4a',
'season': 'Season 1',
'description': 'md5:58d1274e6857b6fbbecf47075885380d',
'release_date': '20221115',
'timestamp': 1668496642,
'season_number': 1,
'upload_date': '20221115',
'release_timestamp': 1668496642,
'title': 'Eps 1. Belajar dari Wishnutama: Kreatif Bukan Followers! (bersama Wishnutama)',
'modified_date': '20221121',
'categories': ['Bisnis dan Keuangan'],
'duration': 3567,
'modified_timestamp': 1669030647,
'thumbnail': 'https://images.noiceid.cc/catalog/content-1668496302560',
'channel_id': '9dab1024-5b92-4265-ae1c-63da87359832',
'like_count': int,
'channel': 'Noice Space Talks',
'comment_count': int,
'dislike_count': int,
'channel_follower_count': int,
}
}, {
'url': 'https://open.noice.id/content/222134e4-99f2-456f-b8a2-b8be404bf063',
'info_dict': {
'id': '222134e4-99f2-456f-b8a2-b8be404bf063',
'ext': 'm4a',
'release_timestamp': 1653488220,
'description': 'md5:35074f6190cef52b05dd133bb2ef460e',
'upload_date': '20220525',
'timestamp': 1653460637,
'release_date': '20220525',
'thumbnail': 'https://images.noiceid.cc/catalog/content-1653460337625',
'title': 'Eps 1: Dijodohin Sama Anak Pak RT',
'modified_timestamp': 1669030647,
'season_number': 1,
'modified_date': '20221121',
'categories': ['Cerita dan Drama'],
'duration': 1830,
'season': 'Season 1',
'channel_id': '60193f6b-d24d-4b23-913b-ceed5a731e74',
'dislike_count': int,
'like_count': int,
'comment_count': int,
'channel': 'Dear Jerome',
'channel_follower_count': int,
}
}]
def _get_formats_and_subtitles(self, media_url, video_id):
formats, subtitles = [], {}
for url in variadic(media_url):
ext = determine_ext(url)
if ext == 'm3u8':
fmts, subs = self._extract_m3u8_formats_and_subtitles(url, video_id)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
else:
formats.append({
'url': url,
'ext': 'mp3',
'vcodec': 'none',
'acodec': 'mp3',
})
return formats, subtitles
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
nextjs_data = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['contentDetails']
media_url_list = traverse_obj(nextjs_data, (('rawContentUrl', 'url'), ))
formats, subtitles = self._get_formats_and_subtitles(media_url_list, display_id)
return {
'id': nextjs_data.get('id') or display_id,
'title': nextjs_data.get('title') or self._html_search_meta('og:title', webpage),
'formats': formats,
'subtitles': subtitles,
'description': (nextjs_data.get('description') or clean_html(nextjs_data.get('htmlDescription'))
or self._html_search_meta(['description', 'og:description'], webpage)),
'thumbnail': nextjs_data.get('image') or self._html_search_meta('og:image', webpage),
'timestamp': parse_iso8601(nextjs_data.get('createdAt')),
'release_timestamp': parse_iso8601(nextjs_data.get('publishedAt')),
'modified_timestamp': parse_iso8601(
nextjs_data.get('updatedAt') or self._html_search_meta('og:updated_time', webpage)),
'duration': int_or_none(nextjs_data.get('duration')),
'categories': traverse_obj(nextjs_data, ('genres', ..., 'name')),
'season': nextjs_data.get('seasonName'),
'season_number': int_or_none(nextjs_data.get('seasonNumber')),
'channel': traverse_obj(nextjs_data, ('catalog', 'title')),
'channel_id': traverse_obj(nextjs_data, ('catalog', 'id'), 'catalogId'),
**traverse_obj(nextjs_data, ('meta', 'aggregations', {
'like_count': 'likes',
'dislike_count': 'dislikes',
'comment_count': 'comments',
'channel_follower_count': 'followers',
}))
}

View file

@ -84,6 +84,17 @@ class PlutoTVIE(InfoExtractor):
}, { }, {
'url': 'https://pluto.tv/it/on-demand/series/csi-vegas/episode/legacy-2021-1-1', 'url': 'https://pluto.tv/it/on-demand/series/csi-vegas/episode/legacy-2021-1-1',
'only_matching': True, 'only_matching': True,
},
{
'url': 'https://pluto.tv/en/on-demand/movies/attack-of-the-killer-tomatoes-1977-1-1-ptv1',
'md5': '7db56369c0da626a32d505ec6eb3f89f',
'info_dict': {
'id': '5b190c7bb0875c36c90c29c4',
'ext': 'mp4',
'title': 'Attack of the Killer Tomatoes',
'description': 'A group of scientists band together to save the world from mutated tomatoes that KILL! (1978)',
'duration': 5700,
}
} }
] ]
@ -103,7 +114,7 @@ def _to_ad_free_formats(self, video_id, formats, subtitles):
compat_urlparse.urljoin(first_segment_url.group(1), '0-end/master.m3u8')) compat_urlparse.urljoin(first_segment_url.group(1), '0-end/master.m3u8'))
continue continue
first_segment_url = re.search( first_segment_url = re.search(
r'^(https?://.*/).+\-0+\.ts$', res, r'^(https?://.*/).+\-0+[0-1]0\.ts$', res,
re.MULTILINE) re.MULTILINE)
if first_segment_url: if first_segment_url:
m3u8_urls.add( m3u8_urls.add(

View file

@ -0,0 +1,17 @@
from .common import InfoExtractor
class SibnetEmbedIE(InfoExtractor):
# Ref: https://help.sibnet.ru/?sibnet_video_embed
_VALID_URL = False
_EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//video\.sibnet\.ru/shell\.php\?.*?\bvideoid=\d+.*?)\1']
_WEBPAGE_TESTS = [{
'url': 'https://phpbb3.x-tk.ru/bbcode-video-sibnet-t24.html',
'info_dict': {
'id': 'shell', # FIXME?
'ext': 'mp4',
'age_limit': 0,
'thumbnail': 'https://video.sibnet.ru/upload/cover/video_1887072_0.jpg',
'title': 'КВН Москва не сразу строилась - Девушка впервые играет в Mortal Kombat',
}
}]

View file

@ -6,6 +6,7 @@
from .dailymotion import DailymotionIE from .dailymotion import DailymotionIE
from .odnoklassniki import OdnoklassnikiIE from .odnoklassniki import OdnoklassnikiIE
from .pladform import PladformIE from .pladform import PladformIE
from .sibnet import SibnetEmbedIE
from .vimeo import VimeoIE from .vimeo import VimeoIE
from .youtube import YoutubeIE from .youtube import YoutubeIE
from ..compat import compat_urlparse from ..compat import compat_urlparse
@ -101,8 +102,7 @@ class VKIE(VKBaseIE):
(?P<videoid>-?\d+_\d+)(?:.*\blist=(?P<list_id>([\da-f]+)|(ln-[\da-zA-Z]+)))? (?P<videoid>-?\d+_\d+)(?:.*\blist=(?P<list_id>([\da-f]+)|(ln-[\da-zA-Z]+)))?
) )
''' '''
# https://help.sibnet.ru/?sibnet_video_embed
_EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//video\.sibnet\.ru/shell\.php\?.*?\bvideoid=\d+.*?)\1']
_TESTS = [ _TESTS = [
{ {
'url': 'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521', 'url': 'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521',
@ -455,7 +455,7 @@ def _real_extract(self, url):
if odnoklassniki_url: if odnoklassniki_url:
return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key()) return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key())
sibnet_url = next(self._extract_embed_urls(url, info_page), None) sibnet_url = next(SibnetEmbedIE._extract_embed_urls(url, info_page), None)
if sibnet_url: if sibnet_url:
return self.url_result(sibnet_url) return self.url_result(sibnet_url)

View file

@ -1,198 +0,0 @@
from .common import InfoExtractor
from ..compat import compat_urllib_parse_unquote
from ..utils import int_or_none
class XiamiBaseIE(InfoExtractor):
_API_BASE_URL = 'https://emumo.xiami.com/song/playlist/cat/json/id'
def _download_webpage_handle(self, *args, **kwargs):
webpage = super(XiamiBaseIE, self)._download_webpage_handle(*args, **kwargs)
if '>Xiami is currently not available in your country.<' in webpage:
self.raise_geo_restricted('Xiami is currently not available in your country')
return webpage
def _extract_track(self, track, track_id=None):
track_name = track.get('songName') or track.get('name') or track['subName']
artist = track.get('artist') or track.get('artist_name') or track.get('singers')
title = '%s - %s' % (artist, track_name) if artist else track_name
track_url = self._decrypt(track['location'])
subtitles = {}
lyrics_url = track.get('lyric_url') or track.get('lyric')
if lyrics_url and lyrics_url.startswith('http'):
subtitles['origin'] = [{'url': lyrics_url}]
return {
'id': track.get('song_id') or track_id,
'url': track_url,
'title': title,
'thumbnail': track.get('pic') or track.get('album_pic'),
'duration': int_or_none(track.get('length')),
'creator': track.get('artist', '').split(';')[0],
'track': track_name,
'track_number': int_or_none(track.get('track')),
'album': track.get('album_name') or track.get('title'),
'artist': artist,
'subtitles': subtitles,
}
def _extract_tracks(self, item_id, referer, typ=None):
playlist = self._download_json(
'%s/%s%s' % (self._API_BASE_URL, item_id, '/type/%s' % typ if typ else ''),
item_id, headers={
'Referer': referer,
})
return [
self._extract_track(track, item_id)
for track in playlist['data']['trackList']]
@staticmethod
def _decrypt(origin):
n = int(origin[0])
origin = origin[1:]
short_length = len(origin) // n
long_num = len(origin) - short_length * n
l = tuple()
for i in range(0, n):
length = short_length
if i < long_num:
length += 1
l += (origin[0:length], )
origin = origin[length:]
ans = ''
for i in range(0, short_length + 1):
for j in range(0, n):
if len(l[j]) > i:
ans += l[j][i]
return compat_urllib_parse_unquote(ans).replace('^', '0')
class XiamiSongIE(XiamiBaseIE):
IE_NAME = 'xiami:song'
IE_DESC = '虾米音乐'
_VALID_URL = r'https?://(?:www\.)?xiami\.com/song/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'http://www.xiami.com/song/1775610518',
'md5': '521dd6bea40fd5c9c69f913c232cb57e',
'info_dict': {
'id': '1775610518',
'ext': 'mp3',
'title': 'HONNE - Woman',
'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg',
'duration': 265,
'creator': 'HONNE',
'track': 'Woman',
'album': 'Woman',
'artist': 'HONNE',
'subtitles': {
'origin': [{
'ext': 'lrc',
}],
},
},
'skip': 'Georestricted',
}, {
'url': 'http://www.xiami.com/song/1775256504',
'md5': '932a3abd45c6aa2b1fdbe028fcb4c4fc',
'info_dict': {
'id': '1775256504',
'ext': 'mp3',
'title': '戴荃 - 悟空',
'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg',
'duration': 200,
'creator': '戴荃',
'track': '悟空',
'album': '悟空',
'artist': '戴荃',
'subtitles': {
'origin': [{
'ext': 'lrc',
}],
},
},
'skip': 'Georestricted',
}, {
'url': 'http://www.xiami.com/song/1775953850',
'info_dict': {
'id': '1775953850',
'ext': 'mp3',
'title': 'До Скону - Чума Пожирает Землю',
'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg',
'duration': 683,
'creator': 'До Скону',
'track': 'Чума Пожирает Землю',
'track_number': 7,
'album': 'Ад',
'artist': 'До Скону',
},
'params': {
'skip_download': True,
},
}, {
'url': 'http://www.xiami.com/song/xLHGwgd07a1',
'only_matching': True,
}]
def _real_extract(self, url):
return self._extract_tracks(self._match_id(url), url)[0]
class XiamiPlaylistBaseIE(XiamiBaseIE):
def _real_extract(self, url):
item_id = self._match_id(url)
return self.playlist_result(self._extract_tracks(item_id, url, self._TYPE), item_id)
class XiamiAlbumIE(XiamiPlaylistBaseIE):
IE_NAME = 'xiami:album'
IE_DESC = '虾米音乐 - 专辑'
_VALID_URL = r'https?://(?:www\.)?xiami\.com/album/(?P<id>[^/?#&]+)'
_TYPE = '1'
_TESTS = [{
'url': 'http://www.xiami.com/album/2100300444',
'info_dict': {
'id': '2100300444',
},
'playlist_count': 10,
'skip': 'Georestricted',
}, {
'url': 'http://www.xiami.com/album/512288?spm=a1z1s.6843761.1110925389.6.hhE9p9',
'only_matching': True,
}, {
'url': 'http://www.xiami.com/album/URVDji2a506',
'only_matching': True,
}]
class XiamiArtistIE(XiamiPlaylistBaseIE):
IE_NAME = 'xiami:artist'
IE_DESC = '虾米音乐 - 歌手'
_VALID_URL = r'https?://(?:www\.)?xiami\.com/artist/(?P<id>[^/?#&]+)'
_TYPE = '2'
_TESTS = [{
'url': 'http://www.xiami.com/artist/2132?spm=0.0.0.0.dKaScp',
'info_dict': {
'id': '2132',
},
'playlist_count': 20,
'skip': 'Georestricted',
}, {
'url': 'http://www.xiami.com/artist/bC5Tk2K6eb99',
'only_matching': True,
}]
class XiamiCollectionIE(XiamiPlaylistBaseIE):
IE_NAME = 'xiami:collection'
IE_DESC = '虾米音乐 - 精选集'
_VALID_URL = r'https?://(?:www\.)?xiami\.com/collect/(?P<id>[^/?#&]+)'
_TYPE = '3'
_TEST = {
'url': 'http://www.xiami.com/collect/156527391?spm=a1z1s.2943601.6856193.12.4jpBnr',
'info_dict': {
'id': '156527391',
},
'playlist_mincount': 29,
'skip': 'Georestricted',
}