Update to ytdl-2021.01.08

This commit is contained in:
pukkandan 2021-01-08 21:44:50 +05:30
parent 0c0ff18f7d
commit 00dd0cd573
24 changed files with 845 additions and 424 deletions

View file

@ -55,6 +55,7 @@ # Supported sites
- **Aparat** - **Aparat**
- **AppleConnect** - **AppleConnect**
- **AppleDaily**: 臺灣蘋果日報 - **AppleDaily**: 臺灣蘋果日報
- **ApplePodcasts**
- **appletrailers** - **appletrailers**
- **appletrailers:section** - **appletrailers:section**
- **archive.org**: archive.org videos - **archive.org**: archive.org videos
@ -99,6 +100,10 @@ # Supported sites
- **BellMedia** - **BellMedia**
- **Bet** - **Bet**
- **bfi:player** - **bfi:player**
- **bfmtv**
- **bfmtv:article**
- **bfmtv:live**
- **BibelTV**
- **Bigflix** - **Bigflix**
- **Bild**: Bild.de - **Bild**: Bild.de
- **BiliBili** - **BiliBili**
@ -346,6 +351,8 @@ # Supported sites
- **Go** - **Go**
- **GodTube** - **GodTube**
- **Golem** - **Golem**
- **google:podcasts**
- **google:podcasts:feed**
- **GoogleDrive** - **GoogleDrive**
- **Goshgay** - **Goshgay**
- **GPUTechConf** - **GPUTechConf**
@ -381,6 +388,8 @@ # Supported sites
- **HungamaSong** - **HungamaSong**
- **Hypem** - **Hypem**
- **ign.com** - **ign.com**
- **IHeartRadio**
- **iheartradio:podcast**
- **imdb**: Internet Movie Database trailers - **imdb**: Internet Movie Database trailers
- **imdb:list**: Internet Movie Database lists - **imdb:list**: Internet Movie Database lists
- **Imgur** - **Imgur**
@ -706,7 +715,6 @@ # Supported sites
- **Playwire** - **Playwire**
- **pluralsight** - **pluralsight**
- **pluralsight:course** - **pluralsight:course**
- **plus.google**: Google Plus
- **podomatic** - **podomatic**
- **Pokemon** - **Pokemon**
- **PokemonWatch** - **PokemonWatch**
@ -1146,7 +1154,7 @@ # Supported sites
- **WWE** - **WWE**
- **XBef** - **XBef**
- **XboxClips** - **XboxClips**
- **XFileShare**: XFileShare based sites: ClipWatching, GoUnlimited, GoVid, HolaVid, Streamty, TheVideoBee, Uqload, VidBom, vidlo, VidLocker, VidShare, VUp, XVideoSharing - **XFileShare**: XFileShare based sites: Aparat, ClipWatching, GoUnlimited, GoVid, HolaVid, Streamty, TheVideoBee, Uqload, VidBom, vidlo, VidLocker, VidShare, VUp, XVideoSharing
- **XHamster** - **XHamster**
- **XHamsterEmbed** - **XHamsterEmbed**
- **XHamsterUser** - **XHamsterUser**

View file

@ -264,16 +264,24 @@ def test_allsubtitles(self):
class TestRaiPlaySubtitles(BaseTestSubtitles): class TestRaiPlaySubtitles(BaseTestSubtitles):
url = 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html'
IE = RaiPlayIE IE = RaiPlayIE
def test_allsubtitles(self): def test_subtitles_key(self):
self.url = 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html'
self.DL.params['writesubtitles'] = True self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles() subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['it'])) self.assertEqual(set(subtitles.keys()), set(['it']))
self.assertEqual(md5(subtitles['it']), 'b1d90a98755126b61e667567a1f6680a') self.assertEqual(md5(subtitles['it']), 'b1d90a98755126b61e667567a1f6680a')
def test_subtitles_array_key(self):
self.url = 'https://www.raiplay.it/video/2020/12/Report---04-01-2021-2e90f1de-8eee-4de4-ac0e-78d21db5b600.html'
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['it']))
self.assertEqual(md5(subtitles['it']), '4b3264186fbb103508abe5311cfcb9cd')
class TestVikiSubtitles(BaseTestSubtitles): class TestVikiSubtitles(BaseTestSubtitles):
url = 'http://www.viki.com/videos/1060846v-punch-episode-18' url = 'http://www.viki.com/videos/1060846v-punch-episode-18'

View file

@ -21,6 +21,7 @@
encode_base_n, encode_base_n,
caesar, caesar,
clean_html, clean_html,
clean_podcast_url,
date_from_str, date_from_str,
DateRange, DateRange,
detect_exe_version, detect_exe_version,
@ -1497,6 +1498,10 @@ def test_iri_to_uri(self):
iri_to_uri('http://导航.中国/'), iri_to_uri('http://导航.中国/'),
'http://xn--fet810g.xn--fiqs8s/') 'http://xn--fet810g.xn--fiqs8s/')
def test_clean_podcast_url(self):
self.assertEqual(clean_podcast_url('https://www.podtrac.com/pts/redirect.mp3/chtbl.com/track/5899E/traffic.megaphone.fm/HSW7835899191.mp3'), 'https://traffic.megaphone.fm/HSW7835899191.mp3')
self.assertEqual(clean_podcast_url('https://play.podtrac.com/npr-344098539/edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3'), 'https://edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3')
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View file

@ -172,8 +172,12 @@ def is_ad_fragment_end(s):
iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence) iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence)
decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen( decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen(
self._prepare_url(info_dict, info_dict.get('_decryption_key_url') or decrypt_info['URI'])).read() self._prepare_url(info_dict, info_dict.get('_decryption_key_url') or decrypt_info['URI'])).read()
frag_content = AES.new( # Don't decrypt the content in tests since the data is explicitly truncated and it's not to a valid block
decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content) # size (see https://github.com/ytdl-org/youtube-dl/pull/27660). Tests only care that the correct data downloaded,
# not what it decrypts to.
if not test:
frag_content = AES.new(
decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
self._append_fragment(ctx, frag_content) self._append_fragment(ctx, frag_content)
# We only download the first fragment during the test # We only download the first fragment during the test
if test: if test:

View file

@ -6,6 +6,7 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
clean_html, clean_html,
clean_podcast_url,
int_or_none, int_or_none,
parse_iso8601, parse_iso8601,
) )
@ -17,7 +18,7 @@ def _extract_episode(self, episode, show_info):
info = { info = {
'id': episode['id'], 'id': episode['id'],
'display_id': episode.get('episodeUrl'), 'display_id': episode.get('episodeUrl'),
'url': episode['url'], 'url': clean_podcast_url(episode['url']),
'title': title, 'title': title,
'description': clean_html(episode.get('description') or episode.get('summary')), 'description': clean_html(episode.get('description') or episode.get('summary')),
'thumbnail': episode.get('image'), 'thumbnail': episode.get('image'),

View file

@ -0,0 +1,61 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
clean_podcast_url,
int_or_none,
parse_iso8601,
try_get,
)
class ApplePodcastsIE(InfoExtractor):
_VALID_URL = r'https?://podcasts\.apple\.com/(?:[^/]+/)?podcast(?:/[^/]+){1,2}.*?\bi=(?P<id>\d+)'
_TESTS = [{
'url': 'https://podcasts.apple.com/us/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
'md5': 'df02e6acb11c10e844946a39e7222b08',
'info_dict': {
'id': '1000482637777',
'ext': 'mp3',
'title': '207 - Whitney Webb Returns',
'description': 'md5:13a73bade02d2e43737751e3987e1399',
'upload_date': '20200705',
'timestamp': 1593921600,
'duration': 6425,
'series': 'The Tim Dillon Show',
}
}, {
'url': 'https://podcasts.apple.com/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
'only_matching': True,
}, {
'url': 'https://podcasts.apple.com/podcast/207-whitney-webb-returns?i=1000482637777',
'only_matching': True,
}, {
'url': 'https://podcasts.apple.com/podcast/id1135137367?i=1000482637777',
'only_matching': True,
}]
def _real_extract(self, url):
episode_id = self._match_id(url)
webpage = self._download_webpage(url, episode_id)
ember_data = self._parse_json(self._search_regex(
r'id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
webpage, 'ember data'), episode_id)
episode = ember_data['data']['attributes']
description = episode.get('description') or {}
series = None
for inc in (ember_data.get('included') or []):
if inc.get('type') == 'media/podcast':
series = try_get(inc, lambda x: x['attributes']['name'])
return {
'id': episode_id,
'title': episode['name'],
'url': clean_podcast_url(episode['assetUrl']),
'description': description.get('standard') or description.get('short'),
'timestamp': parse_iso8601(episode.get('releaseDateTime')),
'duration': int_or_none(episode.get('durationInMilliseconds'), 1000),
'series': series,
}

View file

@ -0,0 +1,103 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import extract_attributes
class BFMTVBaseIE(InfoExtractor):
_VALID_URL_BASE = r'https?://(?:www\.)?bfmtv\.com/'
_VALID_URL_TMPL = _VALID_URL_BASE + r'(?:[^/]+/)*[^/?&#]+_%s[A-Z]-(?P<id>\d{12})\.html'
_VIDEO_BLOCK_REGEX = r'(<div[^>]+class="video_block"[^>]*>)'
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
def _brightcove_url_result(self, video_id, video_block):
account_id = video_block.get('accountid') or '876450612001'
player_id = video_block.get('playerid') or 'I2qBTln4u'
return self.url_result(
self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, video_id),
'BrightcoveNew', video_id)
class BFMTVIE(BFMTVBaseIE):
IE_NAME = 'bfmtv'
_VALID_URL = BFMTVBaseIE._VALID_URL_TMPL % 'V'
_TESTS = [{
'url': 'https://www.bfmtv.com/politique/emmanuel-macron-l-islam-est-une-religion-qui-vit-une-crise-aujourd-hui-partout-dans-le-monde_VN-202010020146.html',
'info_dict': {
'id': '6196747868001',
'ext': 'mp4',
'title': 'Emmanuel Macron: "L\'Islam est une religion qui vit une crise aujourdhui, partout dans le monde"',
'description': 'Le Président s\'exprime sur la question du séparatisme depuis les Mureaux, dans les Yvelines.',
'uploader_id': '876450610001',
'upload_date': '20201002',
'timestamp': 1601629620,
},
}]
def _real_extract(self, url):
bfmtv_id = self._match_id(url)
webpage = self._download_webpage(url, bfmtv_id)
video_block = extract_attributes(self._search_regex(
self._VIDEO_BLOCK_REGEX, webpage, 'video block'))
return self._brightcove_url_result(video_block['videoid'], video_block)
class BFMTVLiveIE(BFMTVIE):
IE_NAME = 'bfmtv:live'
_VALID_URL = BFMTVBaseIE._VALID_URL_BASE + '(?P<id>(?:[^/]+/)?en-direct)'
_TESTS = [{
'url': 'https://www.bfmtv.com/en-direct/',
'info_dict': {
'id': '5615950982001',
'ext': 'mp4',
'title': r're:^le direct BFMTV WEB \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
'uploader_id': '876450610001',
'upload_date': '20171018',
'timestamp': 1508329950,
},
'params': {
'skip_download': True,
},
}, {
'url': 'https://www.bfmtv.com/economie/en-direct/',
'only_matching': True,
}]
class BFMTVArticleIE(BFMTVBaseIE):
IE_NAME = 'bfmtv:article'
_VALID_URL = BFMTVBaseIE._VALID_URL_TMPL % 'A'
_TESTS = [{
'url': 'https://www.bfmtv.com/sante/covid-19-un-responsable-de-l-institut-pasteur-se-demande-quand-la-france-va-se-reconfiner_AV-202101060198.html',
'info_dict': {
'id': '202101060198',
'title': 'Covid-19: un responsable de l\'Institut Pasteur se demande "quand la France va se reconfiner"',
'description': 'md5:947974089c303d3ac6196670ae262843',
},
'playlist_count': 2,
}, {
'url': 'https://www.bfmtv.com/international/pour-bolsonaro-le-bresil-est-en-faillite-mais-il-ne-peut-rien-faire_AD-202101060232.html',
'only_matching': True,
}, {
'url': 'https://www.bfmtv.com/sante/covid-19-oui-le-vaccin-de-pfizer-distribue-en-france-a-bien-ete-teste-sur-des-personnes-agees_AN-202101060275.html',
'only_matching': True,
}]
def _real_extract(self, url):
bfmtv_id = self._match_id(url)
webpage = self._download_webpage(url, bfmtv_id)
entries = []
for video_block_el in re.findall(self._VIDEO_BLOCK_REGEX, webpage):
video_block = extract_attributes(video_block_el)
video_id = video_block.get('videoid')
if not video_id:
continue
entries.append(self._brightcove_url_result(video_id, video_block))
return self.playlist_result(
entries, bfmtv_id, self._og_search_title(webpage, fatal=False),
self._html_search_meta(['og:description', 'description'], webpage))

View file

@ -0,0 +1,30 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class BibelTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?bibeltv\.de/mediathek/videos/(?:crn/)?(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.bibeltv.de/mediathek/videos/329703-sprachkurs-in-malaiisch',
'md5': '252f908192d611de038b8504b08bf97f',
'info_dict': {
'id': 'ref:329703',
'ext': 'mp4',
'title': 'Sprachkurs in Malaiisch',
'description': 'md5:3e9f197d29ee164714e67351cf737dfe',
'timestamp': 1608316701,
'uploader_id': '5840105145001',
'upload_date': '20201218',
}
}, {
'url': 'https://www.bibeltv.de/mediathek/videos/crn/326374',
'only_matching': True,
}]
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5840105145001/default_default/index.html?videoId=ref:%s'
def _real_extract(self, url):
crn_id = self._match_id(url)
return self.url_result(
self.BRIGHTCOVE_URL_TEMPLATE % crn_id, 'BrightcoveNew')

View file

@ -7,12 +7,12 @@
from .gigya import GigyaBaseIE from .gigya import GigyaBaseIE
from ..compat import compat_HTTPError from ..compat import compat_HTTPError
from ..utils import ( from ..utils import (
extract_attributes,
ExtractorError, ExtractorError,
strip_or_none, strip_or_none,
float_or_none, float_or_none,
int_or_none, int_or_none,
merge_dicts, merge_dicts,
parse_iso8601,
str_or_none, str_or_none,
url_or_none, url_or_none,
) )
@ -37,6 +37,7 @@ class CanvasIE(InfoExtractor):
'url': 'https://mediazone.vrt.be/api/v1/canvas/assets/mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e', 'url': 'https://mediazone.vrt.be/api/v1/canvas/assets/mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e',
'only_matching': True, 'only_matching': True,
}] }]
_GEO_BYPASS = False
_HLS_ENTRY_PROTOCOLS_MAP = { _HLS_ENTRY_PROTOCOLS_MAP = {
'HLS': 'm3u8_native', 'HLS': 'm3u8_native',
'HLS_AES': 'm3u8', 'HLS_AES': 'm3u8',
@ -47,29 +48,34 @@ def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
site_id, video_id = mobj.group('site_id'), mobj.group('id') site_id, video_id = mobj.group('site_id'), mobj.group('id')
# Old API endpoint, serves more formats but may fail for some videos data = None
data = self._download_json( if site_id != 'vrtvideo':
'https://mediazone.vrt.be/api/v1/%s/assets/%s' # Old API endpoint, serves more formats but may fail for some videos
% (site_id, video_id), video_id, 'Downloading asset JSON', data = self._download_json(
'Unable to download asset JSON', fatal=False) 'https://mediazone.vrt.be/api/v1/%s/assets/%s'
% (site_id, video_id), video_id, 'Downloading asset JSON',
'Unable to download asset JSON', fatal=False)
# New API endpoint # New API endpoint
if not data: if not data:
headers = self.geo_verification_headers()
headers.update({'Content-Type': 'application/json'})
token = self._download_json( token = self._download_json(
'%s/tokens' % self._REST_API_BASE, video_id, '%s/tokens' % self._REST_API_BASE, video_id,
'Downloading token', data=b'', 'Downloading token', data=b'', headers=headers)['vrtPlayerToken']
headers={'Content-Type': 'application/json'})['vrtPlayerToken']
data = self._download_json( data = self._download_json(
'%s/videos/%s' % (self._REST_API_BASE, video_id), '%s/videos/%s' % (self._REST_API_BASE, video_id),
video_id, 'Downloading video JSON', fatal=False, query={ video_id, 'Downloading video JSON', query={
'vrtPlayerToken': token, 'vrtPlayerToken': token,
'client': '%s@PROD' % site_id, 'client': '%s@PROD' % site_id,
}, expected_status=400) }, expected_status=400)
message = data.get('message') if not data.get('title'):
if message and not data.get('title'): code = data.get('code')
if data.get('code') == 'AUTHENTICATION_REQUIRED': if code == 'AUTHENTICATION_REQUIRED':
self.raise_login_required(message) self.raise_login_required()
raise ExtractorError(message, expected=True) elif code == 'INVALID_LOCATION':
self.raise_geo_restricted(countries=['BE'])
raise ExtractorError(data.get('message') or code, expected=True)
title = data['title'] title = data['title']
description = data.get('description') description = data.get('description')
@ -205,20 +211,24 @@ def _real_extract(self, url):
class VrtNUIE(GigyaBaseIE): class VrtNUIE(GigyaBaseIE):
IE_DESC = 'VrtNU.be' IE_DESC = 'VrtNU.be'
_VALID_URL = r'https?://(?:www\.)?vrt\.be/(?P<site_id>vrtnu)/(?:[^/]+/)*(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:www\.)?vrt\.be/vrtnu/a-z/(?:[^/]+/){2}(?P<id>[^/?#&]+)'
_TESTS = [{ _TESTS = [{
# Available via old API endpoint # Available via old API endpoint
'url': 'https://www.vrt.be/vrtnu/a-z/postbus-x/1/postbus-x-s1a1/', 'url': 'https://www.vrt.be/vrtnu/a-z/postbus-x/1989/postbus-x-s1989a1/',
'info_dict': { 'info_dict': {
'id': 'pbs-pub-2e2d8c27-df26-45c9-9dc6-90c78153044d$vid-90c932b1-e21d-4fb8-99b1-db7b49cf74de', 'id': 'pbs-pub-e8713dac-899e-41de-9313-81269f4c04ac$vid-90c932b1-e21d-4fb8-99b1-db7b49cf74de',
'ext': 'mp4', 'ext': 'mp4',
'title': 'De zwarte weduwe', 'title': 'Postbus X - Aflevering 1 (Seizoen 1989)',
'description': 'md5:db1227b0f318c849ba5eab1fef895ee4', 'description': 'md5:b704f669eb9262da4c55b33d7c6ed4b7',
'duration': 1457.04, 'duration': 1457.04,
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'season': 'Season 1', 'series': 'Postbus X',
'season_number': 1, 'season': 'Seizoen 1989',
'season_number': 1989,
'episode': 'De zwarte weduwe',
'episode_number': 1, 'episode_number': 1,
'timestamp': 1595822400,
'upload_date': '20200727',
}, },
'skip': 'This video is only available for registered users', 'skip': 'This video is only available for registered users',
'params': { 'params': {
@ -300,69 +310,25 @@ def _login(self):
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
webpage, urlh = self._download_webpage_handle(url, display_id) webpage = self._download_webpage(url, display_id)
attrs = extract_attributes(self._search_regex(
r'(<nui-media[^>]+>)', webpage, 'media element'))
video_id = attrs['videoid']
publication_id = attrs.get('publicationid')
if publication_id:
video_id = publication_id + '$' + video_id
page = (self._parse_json(self._search_regex(
r'digitalData\s*=\s*({.+?});', webpage, 'digial data',
default='{}'), video_id, fatal=False) or {}).get('page') or {}
info = self._search_json_ld(webpage, display_id, default={}) info = self._search_json_ld(webpage, display_id, default={})
# title is optional here since it may be extracted by extractor
# that is delegated from here
title = strip_or_none(self._html_search_regex(
r'(?ms)<h1 class="content__heading">(.+?)</h1>',
webpage, 'title', default=None))
description = self._html_search_regex(
r'(?ms)<div class="content__description">(.+?)</div>',
webpage, 'description', default=None)
season = self._html_search_regex(
[r'''(?xms)<div\ class="tabs__tab\ tabs__tab--active">\s*
<span>seizoen\ (.+?)</span>\s*
</div>''',
r'<option value="seizoen (\d{1,3})" data-href="[^"]+?" selected>'],
webpage, 'season', default=None)
season_number = int_or_none(season)
episode_number = int_or_none(self._html_search_regex(
r'''(?xms)<div\ class="content__episode">\s*
<abbr\ title="aflevering">afl</abbr>\s*<span>(\d+)</span>
</div>''',
webpage, 'episode_number', default=None))
release_date = parse_iso8601(self._html_search_regex(
r'(?ms)<div class="content__broadcastdate">\s*<time\ datetime="(.+?)"',
webpage, 'release_date', default=None))
# If there's a ? or a # in the URL, remove them and everything after
clean_url = urlh.geturl().split('?')[0].split('#')[0].strip('/')
securevideo_url = clean_url + '.mssecurevideo.json'
try:
video = self._download_json(securevideo_url, display_id)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
self.raise_login_required()
raise
# We are dealing with a '../<show>.relevant' URL
redirect_url = video.get('url')
if redirect_url:
return self.url_result(self._proto_relative_url(redirect_url, 'https:'))
# There is only one entry, but with an unknown key, so just get
# the first one
video_id = list(video.values())[0].get('videoid')
return merge_dicts(info, { return merge_dicts(info, {
'_type': 'url_transparent', '_type': 'url_transparent',
'url': 'https://mediazone.vrt.be/api/v1/vrtvideo/assets/%s' % video_id, 'url': 'https://mediazone.vrt.be/api/v1/vrtvideo/assets/%s' % video_id,
'ie_key': CanvasIE.ie_key(), 'ie_key': CanvasIE.ie_key(),
'id': video_id, 'id': video_id,
'display_id': display_id, 'display_id': display_id,
'title': title, 'season_number': int_or_none(page.get('episode_season')),
'description': description,
'season': season,
'season_number': season_number,
'episode_number': episode_number,
'release_date': release_date,
}) })

View file

@ -17,7 +17,12 @@
class DPlayIE(InfoExtractor): class DPlayIE(InfoExtractor):
_VALID_URL = r'''(?x)https?:// _VALID_URL = r'''(?x)https?://
(?P<domain> (?P<domain>
(?:www\.)?(?P<host>dplay\.(?P<country>dk|fi|jp|se|no))| (?:www\.)?(?P<host>d
(?:
play\.(?P<country>dk|fi|jp|se|no)|
iscoveryplus\.(?P<plus_country>dk|es|fi|it|se|no)
)
)|
(?P<subdomain_country>es|it)\.dplay\.com (?P<subdomain_country>es|it)\.dplay\.com
)/[^/]+/(?P<id>[^/]+/[^/?#]+)''' )/[^/]+/(?P<id>[^/]+/[^/?#]+)'''
@ -126,6 +131,24 @@ class DPlayIE(InfoExtractor):
}, { }, {
'url': 'https://www.dplay.jp/video/gold-rush/24086', 'url': 'https://www.dplay.jp/video/gold-rush/24086',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.discoveryplus.se/videos/nugammalt-77-handelser-som-format-sverige/nugammalt-77-handelser-som-format-sverige-101',
'only_matching': True,
}, {
'url': 'https://www.discoveryplus.dk/videoer/ted-bundy-mind-of-a-monster/ted-bundy-mind-of-a-monster',
'only_matching': True,
}, {
'url': 'https://www.discoveryplus.no/videoer/i-kongens-klr/sesong-1-episode-7',
'only_matching': True,
}, {
'url': 'https://www.discoveryplus.it/videos/biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij',
'only_matching': True,
}, {
'url': 'https://www.discoveryplus.es/videos/la-fiebre-del-oro/temporada-8-episodio-1',
'only_matching': True,
}, {
'url': 'https://www.discoveryplus.fi/videot/shifting-gears-with-aaron-kaufman/episode-16',
'only_matching': True,
}] }]
def _get_disco_api_info(self, url, display_id, disco_host, realm, country): def _get_disco_api_info(self, url, display_id, disco_host, realm, country):
@ -241,7 +264,7 @@ def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('id') display_id = mobj.group('id')
domain = mobj.group('domain').lstrip('www.') domain = mobj.group('domain').lstrip('www.')
country = mobj.group('country') or mobj.group('subdomain_country') country = mobj.group('country') or mobj.group('subdomain_country') or mobj.group('plus_country')
host = 'disco-api.' + domain if domain.startswith('dplay.') else 'eu2-prod.disco-api.com' host = 'disco-api.' + domain if domain[0] == 'd' else 'eu2-prod.disco-api.com'
return self._get_disco_api_info( return self._get_disco_api_info(
url, display_id, host, 'dplay' + country, country) url, display_id, host, 'dplay' + country, country)

View file

@ -59,6 +59,7 @@
AppleTrailersIE, AppleTrailersIE,
AppleTrailersSectionIE, AppleTrailersSectionIE,
) )
from .applepodcasts import ApplePodcastsIE
from .archiveorg import ArchiveOrgIE from .archiveorg import ArchiveOrgIE
from .arcpublishing import ArcPublishingIE from .arcpublishing import ArcPublishingIE
from .arkena import ArkenaIE from .arkena import ArkenaIE
@ -104,6 +105,12 @@
from .beatport import BeatportIE from .beatport import BeatportIE
from .bet import BetIE from .bet import BetIE
from .bfi import BFIPlayerIE from .bfi import BFIPlayerIE
from .bfmtv import (
BFMTVIE,
BFMTVLiveIE,
BFMTVArticleIE,
)
from .bibeltv import BibelTVIE
from .bigflix import BigflixIE from .bigflix import BigflixIE
from .bild import BildIE from .bild import BildIE
from .bilibili import ( from .bilibili import (
@ -442,7 +449,10 @@
from .godtube import GodTubeIE from .godtube import GodTubeIE
from .golem import GolemIE from .golem import GolemIE
from .googledrive import GoogleDriveIE from .googledrive import GoogleDriveIE
from .googleplus import GooglePlusIE from .googlepodcasts import (
GooglePodcastsIE,
GooglePodcastsFeedIE,
)
from .googlesearch import GoogleSearchIE from .googlesearch import GoogleSearchIE
from .goshgay import GoshgayIE from .goshgay import GoshgayIE
from .gputechconf import GPUTechConfIE from .gputechconf import GPUTechConfIE
@ -484,6 +494,10 @@
OneUPIE, OneUPIE,
PCMagIE, PCMagIE,
) )
from .iheart import (
IHeartRadioIE,
IHeartRadioPodcastIE,
)
from .imdb import ( from .imdb import (
ImdbIE, ImdbIE,
ImdbListIE ImdbListIE

View file

@ -1,73 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
import re
import codecs
from .common import InfoExtractor
from ..utils import unified_strdate
class GooglePlusIE(InfoExtractor):
IE_DESC = 'Google Plus'
_VALID_URL = r'https?://plus\.google\.com/(?:[^/]+/)*?posts/(?P<id>\w+)'
IE_NAME = 'plus.google'
_TEST = {
'url': 'https://plus.google.com/u/0/108897254135232129896/posts/ZButuJc6CtH',
'info_dict': {
'id': 'ZButuJc6CtH',
'ext': 'flv',
'title': '嘆きの天使 降臨',
'upload_date': '20120613',
'uploader': '井上ヨシマサ',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
# Step 1, Retrieve post webpage to extract further information
webpage = self._download_webpage(url, video_id, 'Downloading entry webpage')
title = self._og_search_description(webpage).splitlines()[0]
upload_date = unified_strdate(self._html_search_regex(
r'''(?x)<a.+?class="o-U-s\s[^"]+"\s+style="display:\s*none"\s*>
([0-9]{4}-[0-9]{2}-[0-9]{2})</a>''',
webpage, 'upload date', fatal=False, flags=re.VERBOSE))
uploader = self._html_search_regex(
r'rel="author".*?>(.*?)</a>', webpage, 'uploader', fatal=False)
# Step 2, Simulate clicking the image box to launch video
DOMAIN = 'https://plus.google.com/'
video_page = self._search_regex(
r'<a href="((?:%s)?photos/.*?)"' % re.escape(DOMAIN),
webpage, 'video page URL')
if not video_page.startswith(DOMAIN):
video_page = DOMAIN + video_page
webpage = self._download_webpage(video_page, video_id, 'Downloading video page')
def unicode_escape(s):
decoder = codecs.getdecoder('unicode_escape')
return re.sub(
r'\\u[0-9a-fA-F]{4,}',
lambda m: decoder(m.group(0))[0],
s)
# Extract video links all sizes
formats = [{
'url': unicode_escape(video_url),
'ext': 'flv',
'width': int(width),
'height': int(height),
} for width, height, video_url in re.findall(
r'\d+,(\d+),(\d+),"(https?://[^.]+\.googleusercontent\.com.*?)"', webpage)]
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'uploader': uploader,
'upload_date': upload_date,
'formats': formats,
}

View file

@ -0,0 +1,88 @@
# coding: utf-8
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
from ..utils import (
clean_podcast_url,
int_or_none,
try_get,
urlencode_postdata,
)
class GooglePodcastsBaseIE(InfoExtractor):
_VALID_URL_BASE = r'https?://podcasts\.google\.com/feed/'
def _batch_execute(self, func_id, video_id, params):
return json.loads(self._download_json(
'https://podcasts.google.com/_/PodcastsUi/data/batchexecute',
video_id, data=urlencode_postdata({
'f.req': json.dumps([[[func_id, json.dumps(params), None, '1']]]),
}), transform_source=lambda x: self._search_regex(r'(?s)(\[.+\])', x, 'data'))[0][2])
def _extract_episode(self, episode):
return {
'id': episode[4][3],
'title': episode[8],
'url': clean_podcast_url(episode[13]),
'thumbnail': episode[2],
'description': episode[9],
'creator': try_get(episode, lambda x: x[14]),
'timestamp': int_or_none(episode[11]),
'duration': int_or_none(episode[12]),
'series': episode[1],
}
class GooglePodcastsIE(GooglePodcastsBaseIE):
IE_NAME = 'google:podcasts'
_VALID_URL = GooglePodcastsBaseIE._VALID_URL_BASE + r'(?P<feed_url>[^/]+)/episode/(?P<id>[^/?&#]+)'
_TEST = {
'url': 'https://podcasts.google.com/feed/aHR0cHM6Ly9mZWVkcy5ucHIub3JnLzM0NDA5ODUzOS9wb2RjYXN0LnhtbA/episode/MzBlNWRlN2UtOWE4Yy00ODcwLTk2M2MtM2JlMmUyNmViOTRh',
'md5': 'fa56b2ee8bd0703e27e42d4b104c4766',
'info_dict': {
'id': '30e5de7e-9a8c-4870-963c-3be2e26eb94a',
'ext': 'mp3',
'title': 'WWDTM New Year 2021',
'description': 'We say goodbye to 2020 with Christine Baranksi, Doug Jones, Jonna Mendez, and Kellee Edwards.',
'upload_date': '20210102',
'timestamp': 1609606800,
'duration': 2901,
'series': "Wait Wait... Don't Tell Me!",
}
}
def _real_extract(self, url):
b64_feed_url, b64_guid = re.match(self._VALID_URL, url).groups()
episode = self._batch_execute(
'oNjqVe', b64_guid, [b64_feed_url, b64_guid])[1]
return self._extract_episode(episode)
class GooglePodcastsFeedIE(GooglePodcastsBaseIE):
IE_NAME = 'google:podcasts:feed'
_VALID_URL = GooglePodcastsBaseIE._VALID_URL_BASE + r'(?P<id>[^/?&#]+)/?(?:[?#&]|$)'
_TEST = {
'url': 'https://podcasts.google.com/feed/aHR0cHM6Ly9mZWVkcy5ucHIub3JnLzM0NDA5ODUzOS9wb2RjYXN0LnhtbA',
'info_dict': {
'title': "Wait Wait... Don't Tell Me!",
'description': "NPR's weekly current events quiz. Have a laugh and test your news knowledge while figuring out what's real and what we've made up.",
},
'playlist_mincount': 20,
}
def _real_extract(self, url):
b64_feed_url = self._match_id(url)
data = self._batch_execute('ncqJEe', b64_feed_url, [b64_feed_url])
entries = []
for episode in (try_get(data, lambda x: x[1][0]) or []):
entries.append(self._extract_episode(episode))
feed = try_get(data, lambda x: x[3]) or []
return self.playlist_result(
entries, playlist_title=try_get(feed, lambda x: x[0]),
playlist_description=try_get(feed, lambda x: x[2]))

View file

@ -0,0 +1,97 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
clean_html,
clean_podcast_url,
int_or_none,
str_or_none,
)
class IHeartRadioBaseIE(InfoExtractor):
def _call_api(self, path, video_id, fatal=True, query=None):
return self._download_json(
'https://api.iheart.com/api/v3/podcast/' + path,
video_id, fatal=fatal, query=query)
def _extract_episode(self, episode):
return {
'thumbnail': episode.get('imageUrl'),
'description': clean_html(episode.get('description')),
'timestamp': int_or_none(episode.get('startDate'), 1000),
'duration': int_or_none(episode.get('duration')),
}
class IHeartRadioIE(IHeartRadioBaseIE):
IENAME = 'iheartradio'
_VALID_URL = r'(?:https?://(?:www\.)?iheart\.com/podcast/[^/]+/episode/(?P<display_id>[^/?&#]+)-|iheartradio:)(?P<id>\d+)'
_TEST = {
'url': 'https://www.iheart.com/podcast/105-behind-the-bastards-29236323/episode/part-one-alexander-lukashenko-the-dictator-70346499/?embed=true',
'md5': 'c8609c92c8688dcb69d8541042b8abca',
'info_dict': {
'id': '70346499',
'ext': 'mp3',
'title': 'Part One: Alexander Lukashenko: The Dictator of Belarus',
'description': 'md5:96cc7297b3a5a9ebae28643801c96fae',
'timestamp': 1597741200,
'upload_date': '20200818',
}
}
def _real_extract(self, url):
episode_id = self._match_id(url)
episode = self._call_api(
'episodes/' + episode_id, episode_id)['episode']
info = self._extract_episode(episode)
info.update({
'id': episode_id,
'title': episode['title'],
'url': clean_podcast_url(episode['mediaUrl']),
})
return info
class IHeartRadioPodcastIE(IHeartRadioBaseIE):
IE_NAME = 'iheartradio:podcast'
_VALID_URL = r'https?://(?:www\.)?iheart(?:podcastnetwork)?\.com/podcast/[^/?&#]+-(?P<id>\d+)/?(?:[?#&]|$)'
_TESTS = [{
'url': 'https://www.iheart.com/podcast/1119-it-could-happen-here-30717896/',
'info_dict': {
'id': '30717896',
'title': 'It Could Happen Here',
'description': 'md5:5842117412a967eb0b01f8088eb663e2',
},
'playlist_mincount': 11,
}, {
'url': 'https://www.iheartpodcastnetwork.com/podcast/105-stuff-you-should-know-26940277',
'only_matching': True,
}]
def _real_extract(self, url):
podcast_id = self._match_id(url)
path = 'podcasts/' + podcast_id
episodes = self._call_api(
path + '/episodes', podcast_id, query={'limit': 1000000000})['data']
entries = []
for episode in episodes:
episode_id = str_or_none(episode.get('id'))
if not episode_id:
continue
info = self._extract_episode(episode)
info.update({
'_type': 'url',
'id': episode_id,
'title': episode.get('title'),
'url': 'iheartradio:' + episode_id,
'ie_key': IHeartRadioIE.ie_key(),
})
entries.append(info)
podcast = self._call_api(path, podcast_id, False) or {}
return self.playlist_result(
entries, podcast_id, podcast.get('title'), podcast.get('description'))

View file

@ -2,92 +2,71 @@
from .canvas import CanvasIE from .canvas import CanvasIE
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urllib_parse_unquote
from ..utils import (
int_or_none,
parse_iso8601,
)
class KetnetIE(InfoExtractor): class KetnetIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ketnet\.be/(?:[^/]+/)*(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:www\.)?ketnet\.be/(?P<id>(?:[^/]+/)*[^/?#&]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.ketnet.be/kijken/zomerse-filmpjes', 'url': 'https://www.ketnet.be/kijken/n/nachtwacht/3/nachtwacht-s3a1-de-greystook',
'md5': '6bdeb65998930251bbd1c510750edba9', 'md5': '37b2b7bb9b3dcaa05b67058dc3a714a9',
'info_dict': { 'info_dict': {
'id': 'zomerse-filmpjes', 'id': 'pbs-pub-aef8b526-115e-4006-aa24-e59ff6c6ef6f$vid-ddb815bf-c8e7-467b-8879-6bad7a32cebd',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Gluur mee op de filmset en op Pennenzakkenrock', 'title': 'Nachtwacht - Reeks 3: Aflevering 1',
'description': 'Gluur mee met Ghost Rockers op de filmset', 'description': 'De Nachtwacht krijgt te maken met een parasiet',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
} 'duration': 1468.02,
}, { 'timestamp': 1609225200,
# mzid in playerConfig instead of sources 'upload_date': '20201229',
'url': 'https://www.ketnet.be/kijken/nachtwacht/de-greystook', 'series': 'Nachtwacht',
'md5': '90139b746a0a9bd7bb631283f6e2a64e', 'season': 'Reeks 3',
'info_dict': { 'episode': 'De Greystook',
'id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475', 'episode_number': 1,
'display_id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
'ext': 'flv',
'title': 'Nachtwacht: De Greystook',
'description': 'md5:1db3f5dc4c7109c821261e7512975be7',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 1468.03,
}, },
'expected_warnings': ['is not a supported codec', 'Unknown MIME type'], 'expected_warnings': ['is not a supported codec', 'Unknown MIME type'],
}, { }, {
'url': 'https://www.ketnet.be/kijken/karrewiet/uitzending-8-september-2016', 'url': 'https://www.ketnet.be/themas/karrewiet/jaaroverzicht-20200/karrewiet-het-jaar-van-black-mamba',
'only_matching': True,
}, {
'url': 'https://www.ketnet.be/achter-de-schermen/sien-repeteert-voor-stars-for-life',
'only_matching': True,
}, {
# mzsource, geo restricted to Belgium
'url': 'https://www.ketnet.be/kijken/nachtwacht/de-bermadoe',
'only_matching': True, 'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) video = self._download_json(
'https://senior-bff.ketnet.be/graphql', display_id, query={
'query': '''{
video(id: "content/ketnet/nl/%s.model.json") {
description
episodeNr
imageUrl
mediaReference
programTitle
publicationDate
seasonTitle
subtitleVideodetail
titleVideodetail
}
}''' % display_id,
})['data']['video']
config = self._parse_json( mz_id = compat_urllib_parse_unquote(video['mediaReference'])
self._search_regex(
r'(?s)playerConfig\s*=\s*({.+?})\s*;', webpage,
'player config'),
video_id)
mzid = config.get('mzid')
if mzid:
return self.url_result(
'https://mediazone.vrt.be/api/v1/ketnet/assets/%s' % mzid,
CanvasIE.ie_key(), video_id=mzid)
title = config['title']
formats = []
for source_key in ('', 'mz'):
source = config.get('%ssource' % source_key)
if not isinstance(source, dict):
continue
for format_id, format_url in source.items():
if format_id == 'hls':
formats.extend(self._extract_m3u8_formats(
format_url, video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id=format_id,
fatal=False))
elif format_id == 'hds':
formats.extend(self._extract_f4m_formats(
format_url, video_id, f4m_id=format_id, fatal=False))
else:
formats.append({
'url': format_url,
'format_id': format_id,
})
self._sort_formats(formats)
return { return {
'id': video_id, '_type': 'url_transparent',
'title': title, 'id': mz_id,
'description': config.get('description'), 'title': video['titleVideodetail'],
'thumbnail': config.get('image'), 'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/' + mz_id,
'series': config.get('program'), 'thumbnail': video.get('imageUrl'),
'episode': config.get('episode'), 'description': video.get('description'),
'formats': formats, 'timestamp': parse_iso8601(video.get('publicationDate')),
'series': video.get('programTitle'),
'season': video.get('seasonTitle'),
'episode': video.get('subtitleVideodetail'),
'episode_number': int_or_none(video.get('episodeNr')),
'ie_key': CanvasIE.ie_key(),
} }

View file

@ -61,6 +61,23 @@ class MotherlessIE(InfoExtractor):
# no keywords # no keywords
'url': 'http://motherless.com/8B4BBC1', 'url': 'http://motherless.com/8B4BBC1',
'only_matching': True, 'only_matching': True,
}, {
# see https://motherless.com/videos/recent for recent videos with
# uploaded date in "ago" format
'url': 'https://motherless.com/3C3E2CF',
'info_dict': {
'id': '3C3E2CF',
'ext': 'mp4',
'title': 'a/ Hot Teens',
'categories': list,
'upload_date': '20210104',
'uploader_id': 'yonbiw',
'thumbnail': r're:https?://.*\.jpg',
'age_limit': 18,
},
'params': {
'skip_download': True,
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -85,20 +102,28 @@ def _real_extract(self, url):
or 'http://cdn4.videos.motherlessmedia.com/videos/%s.mp4?fs=opencloud' % video_id) or 'http://cdn4.videos.motherlessmedia.com/videos/%s.mp4?fs=opencloud' % video_id)
age_limit = self._rta_search(webpage) age_limit = self._rta_search(webpage)
view_count = str_to_int(self._html_search_regex( view_count = str_to_int(self._html_search_regex(
(r'>(\d+)\s+Views<', r'<strong>Views</strong>\s+([^<]+)<'), (r'>([\d,.]+)\s+Views<', r'<strong>Views</strong>\s+([^<]+)<'),
webpage, 'view count', fatal=False)) webpage, 'view count', fatal=False))
like_count = str_to_int(self._html_search_regex( like_count = str_to_int(self._html_search_regex(
(r'>(\d+)\s+Favorites<', r'<strong>Favorited</strong>\s+([^<]+)<'), (r'>([\d,.]+)\s+Favorites<',
r'<strong>Favorited</strong>\s+([^<]+)<'),
webpage, 'like count', fatal=False)) webpage, 'like count', fatal=False))
upload_date = self._html_search_regex( upload_date = unified_strdate(self._search_regex(
(r'class=["\']count[^>]+>(\d+\s+[a-zA-Z]{3}\s+\d{4})<', r'class=["\']count[^>]+>(\d+\s+[a-zA-Z]{3}\s+\d{4})<', webpage,
r'<strong>Uploaded</strong>\s+([^<]+)<'), webpage, 'upload date') 'upload date', default=None))
if 'Ago' in upload_date: if not upload_date:
days = int(re.search(r'([0-9]+)', upload_date).group(1)) uploaded_ago = self._search_regex(
upload_date = (datetime.datetime.now() - datetime.timedelta(days=days)).strftime('%Y%m%d') r'>\s*(\d+[hd])\s+[aA]go\b', webpage, 'uploaded ago',
else: default=None)
upload_date = unified_strdate(upload_date) if uploaded_ago:
delta = int(uploaded_ago[:-1])
_AGO_UNITS = {
'h': 'hours',
'd': 'days',
}
kwargs = {_AGO_UNITS.get(uploaded_ago[-1]): delta}
upload_date = (datetime.datetime.utcnow() - datetime.timedelta(**kwargs)).strftime('%Y%m%d')
comment_count = webpage.count('class="media-comment-contents"') comment_count = webpage.count('class="media-comment-contents"')
uploader_id = self._html_search_regex( uploader_id = self._html_search_regex(

View file

@ -223,12 +223,12 @@ def call_playback_api(item, query=None):
legal_age = try_get( legal_age = try_get(
data, lambda x: x['legalAge']['body']['rating']['code'], compat_str) data, lambda x: x['legalAge']['body']['rating']['code'], compat_str)
# https://en.wikipedia.org/wiki/Norwegian_Media_Authority # https://en.wikipedia.org/wiki/Norwegian_Media_Authority
if legal_age == 'A': age_limit = None
age_limit = 0 if legal_age:
elif legal_age.isdigit(): if legal_age == 'A':
age_limit = int_or_none(legal_age) age_limit = 0
else: elif legal_age.isdigit():
age_limit = None age_limit = int_or_none(legal_age)
is_series = try_get(data, lambda x: x['_links']['series']['name']) == 'series' is_series = try_get(data, lambda x: x['_links']['series']['name']) == 'series'
@ -298,6 +298,14 @@ class NRKTVIE(InfoExtractor):
'description': 'md5:46923a6e6510eefcce23d5ef2a58f2ce', 'description': 'md5:46923a6e6510eefcce23d5ef2a58f2ce',
'duration': 2223.44, 'duration': 2223.44,
'age_limit': 6, 'age_limit': 6,
'subtitles': {
'nb-nor': [{
'ext': 'vtt',
}],
'nb-ttv': [{
'ext': 'vtt',
}]
},
}, },
}, { }, {
'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014', 'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',

View file

@ -103,22 +103,28 @@ def _extract_relinker_info(self, relinker_url, video_id):
}.items() if v is not None) }.items() if v is not None)
@staticmethod @staticmethod
def _extract_subtitles(url, subtitle_url): def _extract_subtitles(url, video_data):
STL_EXT = 'stl'
SRT_EXT = 'srt'
subtitles = {} subtitles = {}
if subtitle_url and isinstance(subtitle_url, compat_str): subtitles_array = video_data.get('subtitlesArray') or []
subtitle_url = urljoin(url, subtitle_url) for k in ('subtitles', 'subtitlesUrl'):
STL_EXT = '.stl' subtitles_array.append({'url': video_data.get(k)})
SRT_EXT = '.srt' for subtitle in subtitles_array:
subtitles['it'] = [{ sub_url = subtitle.get('url')
'ext': 'stl', if sub_url and isinstance(sub_url, compat_str):
'url': subtitle_url, sub_lang = subtitle.get('language') or 'it'
}] sub_url = urljoin(url, sub_url)
if subtitle_url.endswith(STL_EXT): sub_ext = determine_ext(sub_url, SRT_EXT)
srt_url = subtitle_url[:-len(STL_EXT)] + SRT_EXT subtitles.setdefault(sub_lang, []).append({
subtitles['it'].append({ 'ext': sub_ext,
'ext': 'srt', 'url': sub_url,
'url': srt_url,
}) })
if STL_EXT == sub_ext:
subtitles[sub_lang].append({
'ext': SRT_EXT,
'url': sub_url[:-len(STL_EXT)] + SRT_EXT,
})
return subtitles return subtitles
@ -138,6 +144,9 @@ class RaiPlayIE(RaiBaseIE):
'duration': 6160, 'duration': 6160,
'series': 'Report', 'series': 'Report',
'season': '2013/14', 'season': '2013/14',
'subtitles': {
'it': 'count:2',
},
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -145,6 +154,10 @@ class RaiPlayIE(RaiBaseIE):
}, { }, {
'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?', 'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?',
'only_matching': True, 'only_matching': True,
}, {
# subtitles at 'subtitlesArray' key (see #27698)
'url': 'https://www.raiplay.it/video/2020/12/Report---04-01-2021-2e90f1de-8eee-4de4-ac0e-78d21db5b600.html',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -171,7 +184,7 @@ def _real_extract(self, url):
if date_published and time_published: if date_published and time_published:
date_published += ' ' + time_published date_published += ' ' + time_published
subtitles = self._extract_subtitles(url, video.get('subtitles')) subtitles = self._extract_subtitles(url, video)
program_info = media.get('program_info') or {} program_info = media.get('program_info') or {}
season = media.get('season') season = media.get('season')
@ -325,6 +338,22 @@ class RaiIE(RaiBaseIE):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
}, {
# ContentItem in iframe (see #12652) and subtitle at 'subtitlesUrl' key
'url': 'http://www.presadiretta.rai.it/dl/portali/site/puntata/ContentItem-3ed19d13-26c2-46ff-a551-b10828262f1b.html',
'info_dict': {
'id': '1ad6dc64-444a-42a4-9bea-e5419ad2f5fd',
'ext': 'mp4',
'title': 'Partiti acchiappavoti - Presa diretta del 13/09/2015',
'description': 'md5:d291b03407ec505f95f27970c0b025f4',
'upload_date': '20150913',
'subtitles': {
'it': 'count:2',
},
},
'params': {
'skip_download': True,
},
}, { }, {
# Direct MMS URL # Direct MMS URL
'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-b63a4089-ac28-48cf-bca5-9f5b5bc46df5.html', 'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-b63a4089-ac28-48cf-bca5-9f5b5bc46df5.html',
@ -365,7 +394,7 @@ def _extract_from_content_id(self, content_id, url):
'url': compat_urlparse.urljoin(url, thumbnail_url), 'url': compat_urlparse.urljoin(url, thumbnail_url),
}) })
subtitles = self._extract_subtitles(url, media.get('subtitlesUrl')) subtitles = self._extract_subtitles(url, media)
info = { info = {
'id': content_id, 'id': content_id,
@ -402,7 +431,8 @@ def _real_extract(self, url):
r'''(?x) r'''(?x)
(?: (?:
(?:initEdizione|drawMediaRaiTV)\(| (?:initEdizione|drawMediaRaiTV)\(|
<(?:[^>]+\bdata-id|var\s+uniquename)= <(?:[^>]+\bdata-id|var\s+uniquename)=|
<iframe[^>]+\bsrc=
) )
(["\']) (["\'])
(?:(?!\1).)*\bContentItem-(?P<id>%s) (?:(?!\1).)*\bContentItem-(?P<id>%s)

View file

@ -10,7 +10,7 @@
class SBSIE(InfoExtractor): class SBSIE(InfoExtractor):
IE_DESC = 'sbs.com.au' IE_DESC = 'sbs.com.au'
_VALID_URL = r'https?://(?:www\.)?sbs\.com\.au/(?:ondemand|news)/video/(?:single/)?(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:www\.)?sbs\.com\.au/(?:ondemand(?:/video/(?:single/)?|.*?\bplay=)|news/(?:embeds/)?video/)(?P<id>[0-9]+)'
_TESTS = [{ _TESTS = [{
# Original URL is handled by the generic IE which finds the iframe: # Original URL is handled by the generic IE which finds the iframe:
@ -18,7 +18,7 @@ class SBSIE(InfoExtractor):
'url': 'http://www.sbs.com.au/ondemand/video/single/320403011771/?source=drupal&vertical=thefeed', 'url': 'http://www.sbs.com.au/ondemand/video/single/320403011771/?source=drupal&vertical=thefeed',
'md5': '3150cf278965eeabb5b4cea1c963fe0a', 'md5': '3150cf278965eeabb5b4cea1c963fe0a',
'info_dict': { 'info_dict': {
'id': '320403011771', 'id': '_rFBPRPO4pMR',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Dingo Conservation (The Feed)', 'title': 'Dingo Conservation (The Feed)',
'description': 'md5:f250a9856fca50d22dec0b5b8015f8a5', 'description': 'md5:f250a9856fca50d22dec0b5b8015f8a5',
@ -34,6 +34,15 @@ class SBSIE(InfoExtractor):
}, { }, {
'url': 'http://www.sbs.com.au/news/video/471395907773/The-Feed-July-9', 'url': 'http://www.sbs.com.au/news/video/471395907773/The-Feed-July-9',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.sbs.com.au/ondemand/?play=1836638787723',
'only_matching': True,
}, {
'url': 'https://www.sbs.com.au/ondemand/program/inside-windsor-castle?play=1283505731842',
'only_matching': True,
}, {
'url': 'https://www.sbs.com.au/news/embeds/video/1840778819866',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View file

@ -8,13 +8,17 @@
compat_str, compat_str,
float_or_none, float_or_none,
int_or_none, int_or_none,
smuggle_url,
str_or_none,
try_get,
) )
class STVPlayerIE(InfoExtractor): class STVPlayerIE(InfoExtractor):
IE_NAME = 'stv:player' IE_NAME = 'stv:player'
_VALID_URL = r'https?://player\.stv\.tv/(?P<type>episode|video)/(?P<id>[a-z0-9]{4})' _VALID_URL = r'https?://player\.stv\.tv/(?P<type>episode|video)/(?P<id>[a-z0-9]{4})'
_TEST = { _TESTS = [{
# shortform
'url': 'https://player.stv.tv/video/4gwd/emmerdale/60-seconds-on-set-with-laura-norton/', 'url': 'https://player.stv.tv/video/4gwd/emmerdale/60-seconds-on-set-with-laura-norton/',
'md5': '5adf9439c31d554f8be0707c7abe7e0a', 'md5': '5adf9439c31d554f8be0707c7abe7e0a',
'info_dict': { 'info_dict': {
@ -27,7 +31,11 @@ class STVPlayerIE(InfoExtractor):
'uploader_id': '1486976045', 'uploader_id': '1486976045',
}, },
'skip': 'this resource is unavailable outside of the UK', 'skip': 'this resource is unavailable outside of the UK',
} }, {
# episodes
'url': 'https://player.stv.tv/episode/4125/jennifer-saunders-memory-lane',
'only_matching': True,
}]
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1486976045/default_default/index.html?videoId=%s' BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1486976045/default_default/index.html?videoId=%s'
_PTYPE_MAP = { _PTYPE_MAP = {
'episode': 'episodes', 'episode': 'episodes',
@ -36,11 +44,31 @@ class STVPlayerIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
ptype, video_id = re.match(self._VALID_URL, url).groups() ptype, video_id = re.match(self._VALID_URL, url).groups()
resp = self._download_json(
'https://player.api.stv.tv/v1/%s/%s' % (self._PTYPE_MAP[ptype], video_id),
video_id)
result = resp['results'] webpage = self._download_webpage(url, video_id, fatal=False) or ''
props = (self._parse_json(self._search_regex(
r'<script[^>]+id="__NEXT_DATA__"[^>]*>({.+?})</script>',
webpage, 'next data', default='{}'), video_id,
fatal=False) or {}).get('props') or {}
player_api_cache = try_get(
props, lambda x: x['initialReduxState']['playerApiCache']) or {}
api_path, resp = None, {}
for k, v in player_api_cache.items():
if k.startswith('/episodes/') or k.startswith('/shortform/'):
api_path, resp = k, v
break
else:
episode_id = str_or_none(try_get(
props, lambda x: x['pageProps']['episodeId']))
api_path = '/%s/%s' % (self._PTYPE_MAP[ptype], episode_id or video_id)
result = resp.get('results')
if not result:
resp = self._download_json(
'https://player.api.stv.tv/v1' + api_path, video_id)
result = resp['results']
video = result['video'] video = result['video']
video_id = compat_str(video['id']) video_id = compat_str(video['id'])
@ -57,7 +85,7 @@ def _real_extract(self, url):
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',
'id': video_id, 'id': video_id,
'url': self.BRIGHTCOVE_URL_TEMPLATE % video_id, 'url': smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, {'geo_countries': ['GB']}),
'description': result.get('summary'), 'description': result.get('summary'),
'duration': float_or_none(video.get('length'), 1000), 'duration': float_or_none(video.get('length'), 1000),
'subtitles': subtitles, 'subtitles': subtitles,

View file

@ -9,7 +9,6 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_kwargs,
compat_parse_qs, compat_parse_qs,
compat_str, compat_str,
compat_urlparse, compat_urlparse,
@ -42,30 +41,16 @@ class TwitchBaseIE(InfoExtractor):
_CLIENT_ID = 'kimne78kx3ncx6brgo4mv6wki5h1ko' _CLIENT_ID = 'kimne78kx3ncx6brgo4mv6wki5h1ko'
_NETRC_MACHINE = 'twitch' _NETRC_MACHINE = 'twitch'
def _handle_error(self, response): _OPERATION_HASHES = {
if not isinstance(response, dict): 'CollectionSideBar': '27111f1b382effad0b6def325caef1909c733fe6a4fbabf54f8d491ef2cf2f14',
return 'FilterableVideoTower_Videos': 'a937f1d22e269e39a03b509f65a7490f9fc247d7f83d6ac1421523e3b68042cb',
error = response.get('error') 'ClipsCards__User': 'b73ad2bfaecfd30a9e6c28fada15bd97032c83ec77a0440766a56fe0bd632777',
if error: 'ChannelCollectionsContent': '07e3691a1bad77a36aba590c351180439a40baefc1c275356f40fc7082419a84',
raise ExtractorError( 'StreamMetadata': '1c719a40e481453e5c48d9bb585d971b8b372f8ebb105b17076722264dfa5b3e',
'%s returned error: %s - %s' % (self.IE_NAME, error, response.get('message')), 'ComscoreStreamingQuery': 'e1edae8122517d013405f237ffcc124515dc6ded82480a88daef69c83b53ac01',
expected=True) 'VideoPreviewOverlay': '3006e77e51b128d838fa4e835723ca4dc9a05c5efd4466c1085215c6e437e65c',
'VideoMetadata': '226edb3e692509f727fd56821f5653c05740242c82b0388883e0c0e75dcbf687',
def _call_api(self, path, item_id, *args, **kwargs): }
headers = kwargs.get('headers', {}).copy()
headers.update({
'Accept': 'application/vnd.twitchtv.v5+json; charset=UTF-8',
'Client-ID': self._CLIENT_ID,
})
kwargs.update({
'headers': headers,
'expected_status': (400, 410),
})
response = self._download_json(
'%s/%s' % (self._API_BASE, path), item_id,
*args, **compat_kwargs(kwargs))
self._handle_error(response)
return response
def _real_initialize(self): def _real_initialize(self):
self._login() self._login()
@ -151,13 +136,46 @@ def _prefer_source(self, formats):
}) })
self._sort_formats(formats) self._sort_formats(formats)
def _download_access_token(self, channel_name): def _download_base_gql(self, video_id, ops, note, fatal=True):
return self._call_api( return self._download_json(
'api/channels/%s/access_token' % channel_name, channel_name, 'https://gql.twitch.tv/gql', video_id, note,
'Downloading access token JSON') data=json.dumps(ops).encode(),
headers={
'Content-Type': 'text/plain;charset=UTF-8',
'Client-ID': self._CLIENT_ID,
}, fatal=fatal)
def _extract_channel_id(self, token, channel_name): def _download_gql(self, video_id, ops, note, fatal=True):
return compat_str(self._parse_json(token, channel_name)['channel_id']) for op in ops:
op['extensions'] = {
'persistedQuery': {
'version': 1,
'sha256Hash': self._OPERATION_HASHES[op['operationName']],
}
}
return self._download_base_gql(video_id, ops, note)
def _download_access_token(self, video_id, token_kind, param_name):
method = '%sPlaybackAccessToken' % token_kind
ops = {
'query': '''{
%s(
%s: "%s",
params: {
platform: "web",
playerBackend: "mediaplayer",
playerType: "site"
}
)
{
value
signature
}
}''' % (method, param_name, video_id),
}
return self._download_base_gql(
video_id, ops,
'Downloading %s access token GraphQL' % token_kind)['data'][method]
class TwitchVodIE(TwitchBaseIE): class TwitchVodIE(TwitchBaseIE):
@ -170,8 +188,6 @@ class TwitchVodIE(TwitchBaseIE):
) )
(?P<id>\d+) (?P<id>\d+)
''' '''
_ITEM_TYPE = 'vod'
_ITEM_SHORTCUT = 'v'
_TESTS = [{ _TESTS = [{
'url': 'http://www.twitch.tv/riotgames/v/6528877?t=5m10s', 'url': 'http://www.twitch.tv/riotgames/v/6528877?t=5m10s',
@ -181,7 +197,7 @@ class TwitchVodIE(TwitchBaseIE):
'title': 'LCK Summer Split - Week 6 Day 1', 'title': 'LCK Summer Split - Week 6 Day 1',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 17208, 'duration': 17208,
'timestamp': 1435131709, 'timestamp': 1435131734,
'upload_date': '20150624', 'upload_date': '20150624',
'uploader': 'Riot Games', 'uploader': 'Riot Games',
'uploader_id': 'riotgames', 'uploader_id': 'riotgames',
@ -230,10 +246,20 @@ class TwitchVodIE(TwitchBaseIE):
}] }]
def _download_info(self, item_id): def _download_info(self, item_id):
return self._extract_info( data = self._download_gql(
self._call_api( item_id, [{
'kraken/videos/%s' % item_id, item_id, 'operationName': 'VideoMetadata',
'Downloading video info JSON')) 'variables': {
'channelLogin': '',
'videoID': item_id,
},
}],
'Downloading stream metadata GraphQL')[0]['data']
video = data.get('video')
if video is None:
raise ExtractorError(
'Video %s does not exist' % item_id, expected=True)
return self._extract_info_gql(video, item_id)
@staticmethod @staticmethod
def _extract_info(info): def _extract_info(info):
@ -272,13 +298,33 @@ def _extract_info(info):
'is_live': is_live, 'is_live': is_live,
} }
@staticmethod
def _extract_info_gql(info, item_id):
vod_id = info.get('id') or item_id
# id backward compatibility for download archives
if vod_id[0] != 'v':
vod_id = 'v%s' % vod_id
thumbnail = url_or_none(info.get('previewThumbnailURL'))
if thumbnail:
for p in ('width', 'height'):
thumbnail = thumbnail.replace('{%s}' % p, '0')
return {
'id': vod_id,
'title': info.get('title') or 'Untitled Broadcast',
'description': info.get('description'),
'duration': int_or_none(info.get('lengthSeconds')),
'thumbnail': thumbnail,
'uploader': try_get(info, lambda x: x['owner']['displayName'], compat_str),
'uploader_id': try_get(info, lambda x: x['owner']['login'], compat_str),
'timestamp': unified_timestamp(info.get('publishedAt')),
'view_count': int_or_none(info.get('viewCount')),
}
def _real_extract(self, url): def _real_extract(self, url):
vod_id = self._match_id(url) vod_id = self._match_id(url)
info = self._download_info(vod_id) info = self._download_info(vod_id)
access_token = self._call_api( access_token = self._download_access_token(vod_id, 'video', 'id')
'api/vods/%s/access_token' % vod_id, vod_id,
'Downloading %s access token' % self._ITEM_TYPE)
formats = self._extract_m3u8_formats( formats = self._extract_m3u8_formats(
'%s/vod/%s.m3u8?%s' % ( '%s/vod/%s.m3u8?%s' % (
@ -289,8 +335,8 @@ def _real_extract(self, url):
'allow_spectre': 'true', 'allow_spectre': 'true',
'player': 'twitchweb', 'player': 'twitchweb',
'playlist_include_framerate': 'true', 'playlist_include_framerate': 'true',
'nauth': access_token['token'], 'nauth': access_token['value'],
'nauthsig': access_token['sig'], 'nauthsig': access_token['signature'],
})), })),
vod_id, 'mp4', entry_protocol='m3u8_native') vod_id, 'mp4', entry_protocol='m3u8_native')
@ -333,37 +379,7 @@ def _make_video_result(node):
} }
class TwitchGraphQLBaseIE(TwitchBaseIE): class TwitchCollectionIE(TwitchBaseIE):
_PAGE_LIMIT = 100
_OPERATION_HASHES = {
'CollectionSideBar': '27111f1b382effad0b6def325caef1909c733fe6a4fbabf54f8d491ef2cf2f14',
'FilterableVideoTower_Videos': 'a937f1d22e269e39a03b509f65a7490f9fc247d7f83d6ac1421523e3b68042cb',
'ClipsCards__User': 'b73ad2bfaecfd30a9e6c28fada15bd97032c83ec77a0440766a56fe0bd632777',
'ChannelCollectionsContent': '07e3691a1bad77a36aba590c351180439a40baefc1c275356f40fc7082419a84',
'StreamMetadata': '1c719a40e481453e5c48d9bb585d971b8b372f8ebb105b17076722264dfa5b3e',
'ComscoreStreamingQuery': 'e1edae8122517d013405f237ffcc124515dc6ded82480a88daef69c83b53ac01',
'VideoPreviewOverlay': '3006e77e51b128d838fa4e835723ca4dc9a05c5efd4466c1085215c6e437e65c',
}
def _download_gql(self, video_id, ops, note, fatal=True):
for op in ops:
op['extensions'] = {
'persistedQuery': {
'version': 1,
'sha256Hash': self._OPERATION_HASHES[op['operationName']],
}
}
return self._download_json(
'https://gql.twitch.tv/gql', video_id, note,
data=json.dumps(ops).encode(),
headers={
'Content-Type': 'text/plain;charset=UTF-8',
'Client-ID': self._CLIENT_ID,
}, fatal=fatal)
class TwitchCollectionIE(TwitchGraphQLBaseIE):
_VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/collections/(?P<id>[^/]+)' _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/collections/(?P<id>[^/]+)'
_TESTS = [{ _TESTS = [{
@ -400,7 +416,9 @@ def _real_extract(self, url):
entries, playlist_id=collection_id, playlist_title=title) entries, playlist_id=collection_id, playlist_title=title)
class TwitchPlaylistBaseIE(TwitchGraphQLBaseIE): class TwitchPlaylistBaseIE(TwitchBaseIE):
_PAGE_LIMIT = 100
def _entries(self, channel_name, *args): def _entries(self, channel_name, *args):
cursor = None cursor = None
variables_common = self._make_variables(channel_name, *args) variables_common = self._make_variables(channel_name, *args)
@ -440,49 +458,6 @@ def _entries(self, channel_name, *args):
if not cursor or not isinstance(cursor, compat_str): if not cursor or not isinstance(cursor, compat_str):
break break
# Deprecated kraken v5 API
def _entries_kraken(self, channel_name, broadcast_type, sort):
access_token = self._download_access_token(channel_name)
channel_id = self._extract_channel_id(access_token['token'], channel_name)
offset = 0
counter_override = None
for counter in itertools.count(1):
response = self._call_api(
'kraken/channels/%s/videos/' % channel_id,
channel_id,
'Downloading video JSON page %s' % (counter_override or counter),
query={
'offset': offset,
'limit': self._PAGE_LIMIT,
'broadcast_type': broadcast_type,
'sort': sort,
})
videos = response.get('videos')
if not isinstance(videos, list):
break
for video in videos:
if not isinstance(video, dict):
continue
video_url = url_or_none(video.get('url'))
if not video_url:
continue
yield {
'_type': 'url_transparent',
'ie_key': TwitchVodIE.ie_key(),
'id': video.get('_id'),
'url': video_url,
'title': video.get('title'),
'description': video.get('description'),
'timestamp': unified_timestamp(video.get('published_at')),
'duration': float_or_none(video.get('length')),
'view_count': int_or_none(video.get('views')),
'language': video.get('language'),
}
offset += self._PAGE_LIMIT
total = int_or_none(response.get('_total'))
if total and offset >= total:
break
class TwitchVideosIE(TwitchPlaylistBaseIE): class TwitchVideosIE(TwitchPlaylistBaseIE):
_VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/(?P<id>[^/]+)/(?:videos|profile)' _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/(?P<id>[^/]+)/(?:videos|profile)'
@ -724,7 +699,7 @@ def _real_extract(self, url):
playlist_title='%s - Collections' % channel_name) playlist_title='%s - Collections' % channel_name)
class TwitchStreamIE(TwitchGraphQLBaseIE): class TwitchStreamIE(TwitchBaseIE):
IE_NAME = 'twitch:stream' IE_NAME = 'twitch:stream'
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?:// https?://
@ -814,8 +789,9 @@ def _real_extract(self, url):
if not stream: if not stream:
raise ExtractorError('%s is offline' % channel_name, expected=True) raise ExtractorError('%s is offline' % channel_name, expected=True)
access_token = self._download_access_token(channel_name) access_token = self._download_access_token(
token = access_token['token'] channel_name, 'stream', 'channelName')
token = access_token['value']
stream_id = stream.get('id') or channel_name stream_id = stream.get('id') or channel_name
query = { query = {
@ -826,7 +802,7 @@ def _real_extract(self, url):
'player': 'twitchweb', 'player': 'twitchweb',
'playlist_include_framerate': 'true', 'playlist_include_framerate': 'true',
'segment_preference': '4', 'segment_preference': '4',
'sig': access_token['sig'].encode('utf-8'), 'sig': access_token['signature'].encode('utf-8'),
'token': token.encode('utf-8'), 'token': token.encode('utf-8'),
} }
formats = self._extract_m3u8_formats( formats = self._extract_m3u8_formats(
@ -912,8 +888,8 @@ class TwitchClipsIE(TwitchBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
clip = self._download_json( clip = self._download_base_gql(
'https://gql.twitch.tv/gql', video_id, data=json.dumps({ video_id, {
'query': '''{ 'query': '''{
clip(slug: "%s") { clip(slug: "%s") {
broadcaster { broadcaster {
@ -937,10 +913,7 @@ def _real_extract(self, url):
} }
viewCount viewCount
} }
}''' % video_id, }''' % video_id}, 'Downloading clip GraphQL')['data']['clip']
}).encode(), headers={
'Client-ID': self._CLIENT_ID,
})['data']['clip']
if not clip: if not clip:
raise ExtractorError( raise ExtractorError(

View file

@ -251,10 +251,10 @@ class TwitterIE(TwitterBaseIE):
'info_dict': { 'info_dict': {
'id': '700207533655363584', 'id': '700207533655363584',
'ext': 'mp4', 'ext': 'mp4',
'title': 'simon vetugo - BEAT PROD: @suhmeduh #Damndaniel', 'title': 'simon vertugo - BEAT PROD: @suhmeduh #Damndaniel',
'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ', 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'uploader': 'simon vetugo', 'uploader': 'simon vertugo',
'uploader_id': 'simonvertugo', 'uploader_id': 'simonvertugo',
'duration': 30.0, 'duration': 30.0,
'timestamp': 1455777459, 'timestamp': 1455777459,
@ -312,6 +312,7 @@ class TwitterIE(TwitterBaseIE):
'timestamp': 1492000653, 'timestamp': 1492000653,
'upload_date': '20170412', 'upload_date': '20170412',
}, },
'skip': 'Account suspended',
}, { }, {
'url': 'https://twitter.com/i/web/status/910031516746514432', 'url': 'https://twitter.com/i/web/status/910031516746514432',
'info_dict': { 'info_dict': {
@ -380,6 +381,14 @@ class TwitterIE(TwitterBaseIE):
# promo_video_website card # promo_video_website card
'url': 'https://twitter.com/GunB1g/status/1163218564784017422', 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
'only_matching': True, 'only_matching': True,
}, {
# promo_video_convo card
'url': 'https://twitter.com/poco_dandy/status/1047395834013384704',
'only_matching': True,
}, {
# appplayer card
'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -462,7 +471,30 @@ def get_binding_value(k):
return try_get(o, lambda x: x[x['type'].lower() + '_value']) return try_get(o, lambda x: x[x['type'].lower() + '_value'])
card_name = card['name'].split(':')[-1] card_name = card['name'].split(':')[-1]
if card_name in ('amplify', 'promo_video_website'): if card_name == 'player':
info.update({
'_type': 'url',
'url': get_binding_value('player_url'),
})
elif card_name == 'periscope_broadcast':
info.update({
'_type': 'url',
'url': get_binding_value('url') or get_binding_value('player_url'),
'ie_key': PeriscopeIE.ie_key(),
})
elif card_name == 'broadcast':
info.update({
'_type': 'url',
'url': get_binding_value('broadcast_url'),
'ie_key': TwitterBroadcastIE.ie_key(),
})
elif card_name == 'summary':
info.update({
'_type': 'url',
'url': get_binding_value('card_url'),
})
# amplify, promo_video_website, promo_video_convo, appplayer, ...
else:
is_amplify = card_name == 'amplify' is_amplify = card_name == 'amplify'
vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url') vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player')) content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
@ -488,25 +520,6 @@ def get_binding_value(k):
'duration': int_or_none(get_binding_value( 'duration': int_or_none(get_binding_value(
'content_duration_seconds')), 'content_duration_seconds')),
}) })
elif card_name == 'player':
info.update({
'_type': 'url',
'url': get_binding_value('player_url'),
})
elif card_name == 'periscope_broadcast':
info.update({
'_type': 'url',
'url': get_binding_value('url') or get_binding_value('player_url'),
'ie_key': PeriscopeIE.ie_key(),
})
elif card_name == 'broadcast':
info.update({
'_type': 'url',
'url': get_binding_value('broadcast_url'),
'ie_key': TwitterBroadcastIE.ie_key(),
})
else:
raise ExtractorError('Unsupported Twitter Card.')
else: else:
expanded_url = try_get(status, lambda x: x['entities']['urls'][0]['expanded_url']) expanded_url = try_get(status, lambda x: x['entities']['urls'][0]['expanded_url'])
if not expanded_url: if not expanded_url:

View file

@ -45,6 +45,7 @@ def aa_decode(aa_code):
class XFileShareIE(InfoExtractor): class XFileShareIE(InfoExtractor):
_SITES = ( _SITES = (
(r'aparat\.cam', 'Aparat'),
(r'clipwatching\.com', 'ClipWatching'), (r'clipwatching\.com', 'ClipWatching'),
(r'gounlimited\.to', 'GoUnlimited'), (r'gounlimited\.to', 'GoUnlimited'),
(r'govid\.me', 'GoVid'), (r'govid\.me', 'GoVid'),
@ -78,6 +79,9 @@ class XFileShareIE(InfoExtractor):
'title': 'sample', 'title': 'sample',
'thumbnail': r're:http://.*\.jpg', 'thumbnail': r're:http://.*\.jpg',
}, },
}, {
'url': 'https://aparat.cam/n4d6dh0wvlpr',
'only_matching': True,
}] }]
@staticmethod @staticmethod

View file

@ -5819,3 +5819,20 @@ def format_field(obj, field, template='%s', ignore=(None, ''), default='', func=
if func and val not in ignore: if func and val not in ignore:
val = func(val) val = func(val)
return template % val if val not in ignore else default return template % val if val not in ignore else default
def clean_podcast_url(url):
return re.sub(r'''(?x)
(?:
(?:
chtbl\.com/track|
media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
play\.podtrac\.com
)/[^/]+|
(?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
flex\.acast\.com|
pd(?:
cn\.co| # https://podcorn.com/analytics-prefix/
st\.fm # https://podsights.com/docs/
)/e
)/''', '', url)