Merge branch 'master' of https://github.com/blackjack4494/yt-dlc into fix-tmz

This commit is contained in:
Diego Fernando Rodríguez Varón 2020-11-14 09:40:51 -05:00
commit a2044d57ca
17 changed files with 607 additions and 364 deletions

View file

@ -82,7 +82,7 @@ jobs:
- name: Set up Python - name: Set up Python
uses: actions/setup-python@v2 uses: actions/setup-python@v2
with: with:
python-version: '3.x' python-version: '3.8'
- name: Install Requirements - name: Install Requirements
run: pip install pyinstaller run: pip install pyinstaller
- name: Bump version - name: Bump version
@ -109,14 +109,14 @@ jobs:
runs-on: windows-latest runs-on: windows-latest
needs: build_unix needs: [build_unix, build_windows]
steps: steps:
- uses: actions/checkout@v2 - uses: actions/checkout@v2
- name: Set up Python 3.5.4 32-Bit - name: Set up Python 3.4.4 32-Bit
uses: actions/setup-python@v2 uses: actions/setup-python@v2
with: with:
python-version: '3.5.4' python-version: '3.4.4'
architecture: 'x86' architecture: 'x86'
- name: Install Requirements for 32 Bit - name: Install Requirements for 32 Bit
run: pip install pyinstaller==3.5 run: pip install pyinstaller==3.5
@ -146,10 +146,10 @@ jobs:
SHA2_UNIX: ${{ needs.build_unix.outputs.sha2_unix }} SHA2_UNIX: ${{ needs.build_unix.outputs.sha2_unix }}
YTDLC_VERSION: ${{ needs.build_unix.outputs.ytdlc_version }} YTDLC_VERSION: ${{ needs.build_unix.outputs.ytdlc_version }}
run: | run: |
echo "version:$YTDLC_VERSION" >> SHA2-256SUMS echo "version:${env:YTDLC_VERSION}" >> SHA2-256SUMS
echo "youtube-dlc.exe:$SHA2_WINDOWS" >> SHA2-256SUMS echo "youtube-dlc.exe:${env:SHA2_WINDOWS}" >> SHA2-256SUMS
echo "youtube-dlc32.exe:$SHA2_WINDOWS32" >> SHA2-256SUMS echo "youtube-dlc_x86.exe:${env:SHA2_WINDOWS32}" >> SHA2-256SUMS
echo "youtube-dlc:$SHA2_UNIX" >> SHA2-256SUMS echo "youtube-dlc:${env:SHA2_UNIX}" >> SHA2-256SUMS
- name: Upload 256SUMS file - name: Upload 256SUMS file
id: upload-sums id: upload-sums

View file

@ -1,15 +1,15 @@
[![Build Status](https://travis-ci.com/blackjack4494/yt-dlc.svg?branch=master)](https://travis-ci.com/blackjack4494/yt-dlc) [![Build Status](https://travis-ci.com/blackjack4494/yt-dlc.svg?branch=master)](https://travis-ci.com/blackjack4494/yt-dlc)
[![PyPi](https://img.shields.io/pypi/v/youtube-dlc.svg)](https://pypi.org/project/youtube-dlc) [![PyPi](https://img.shields.io/pypi/v/youtube-dlc.svg)](https://pypi.org/project/youtube-dlc)
[![Downloads](https://pepy.tech/badge/youtube-dlc)](https://pepy.tech/project/youtube-dlc)
[![Gitter chat](https://img.shields.io/gitter/room/youtube-dlc/community)](https://gitter.im/youtube-dlc) [![Gitter chat](https://img.shields.io/gitter/room/youtube-dlc/community)](https://gitter.im/youtube-dlc)
[![License: Unlicense](https://img.shields.io/badge/license-Unlicense-blue.svg)](https://github.com/blackjack4494/youtube-dlc/blob/master/LICENSE) [![License: Unlicense](https://img.shields.io/badge/license-Unlicense-blue.svg)](https://github.com/blackjack4494/yt-dlc/blob/master/LICENSE)
youtube-dlc - download videos from youtube.com or other video platforms. youtube-dlc - download videos from youtube.com or other video platforms.
youtube-dlc is a fork of youtube-dl with the intention of getting features tested by the community merged in the tool faster, since youtube-dl's development seems to be slowing down. (https://web.archive.org/web/20201014194602/https://github.com/ytdl-org/youtube-dl/issues/26462) youtube-dlc is a fork of youtube-dl with the intention of getting features tested by the community merged in the tool faster, since youtube-dl's development seems to be slowing down. (https://web.archive.org/web/20201014194602/https://github.com/ytdl-org/youtube-dl/issues/26462)
- [INSTALLATION](#installation) - [INSTALLATION](#installation)
- [UPDATE](#update)
- [DESCRIPTION](#description) - [DESCRIPTION](#description)
- [OPTIONS](#options) - [OPTIONS](#options)
- [Network Options:](#network-options) - [Network Options:](#network-options)
@ -44,6 +44,10 @@ # INSTALLATION
python -m pip install --upgrade youtube-dlc python -m pip install --upgrade youtube-dlc
If you want to install the current master branch
python -m pip install git+https://github.com/blackjack4494/yt-dlc
**UNIX** (Linux, macOS, etc.) **UNIX** (Linux, macOS, etc.)
Using wget: Using wget:

View file

@ -66,7 +66,7 @@ def run(self):
description=DESCRIPTION, description=DESCRIPTION,
long_description=LONG_DESCRIPTION, long_description=LONG_DESCRIPTION,
# long_description_content_type="text/markdown", # long_description_content_type="text/markdown",
url="https://github.com/blackjack4494/youtube-dlc", url="https://github.com/blackjack4494/yt-dlc",
packages=find_packages(exclude=("youtube_dl","test",)), packages=find_packages(exclude=("youtube_dl","test",)),
#packages=[ #packages=[
# 'youtube_dlc', # 'youtube_dlc',

View file

@ -364,8 +364,10 @@ def download(self, filename, info_dict, subtitle=False):
else '%.2f' % sleep_interval)) else '%.2f' % sleep_interval))
time.sleep(sleep_interval) time.sleep(sleep_interval)
else: else:
if self.params.get('sleep_interval_subtitles') > 0: sleep_interval_sub = 0
if type(self.params.get('sleep_interval_subtitles')) is int:
sleep_interval_sub = self.params.get('sleep_interval_subtitles') sleep_interval_sub = self.params.get('sleep_interval_subtitles')
if sleep_interval_sub > 0:
self.to_screen( self.to_screen(
'[download] Sleeping %s seconds...' % ( '[download] Sleeping %s seconds...' % (
sleep_interval_sub)) sleep_interval_sub))

View file

@ -115,8 +115,10 @@ class CurlFD(ExternalFD):
def _make_cmd(self, tmpfilename, info_dict): def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '--location', '-o', tmpfilename] cmd = [self.exe, '--location', '-o', tmpfilename]
if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items(): for key, val in info_dict['http_headers'].items():
cmd += ['--header', '%s: %s' % (key, val)] cmd += ['--header', '%s: %s' % (key, val)]
cmd += self._bool_option('--continue-at', 'continuedl', '-', '0') cmd += self._bool_option('--continue-at', 'continuedl', '-', '0')
cmd += self._valueless_option('--silent', 'noprogress') cmd += self._valueless_option('--silent', 'noprogress')
cmd += self._valueless_option('--verbose', 'verbose') cmd += self._valueless_option('--verbose', 'verbose')
@ -150,6 +152,7 @@ class AxelFD(ExternalFD):
def _make_cmd(self, tmpfilename, info_dict): def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '-o', tmpfilename] cmd = [self.exe, '-o', tmpfilename]
if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items(): for key, val in info_dict['http_headers'].items():
cmd += ['-H', '%s: %s' % (key, val)] cmd += ['-H', '%s: %s' % (key, val)]
cmd += self._configuration_args() cmd += self._configuration_args()
@ -162,6 +165,7 @@ class WgetFD(ExternalFD):
def _make_cmd(self, tmpfilename, info_dict): def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies'] cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items(): for key, val in info_dict['http_headers'].items():
cmd += ['--header', '%s: %s' % (key, val)] cmd += ['--header', '%s: %s' % (key, val)]
cmd += self._option('--limit-rate', 'ratelimit') cmd += self._option('--limit-rate', 'ratelimit')
@ -189,6 +193,7 @@ def _make_cmd(self, tmpfilename, info_dict):
if dn: if dn:
cmd += ['--dir', dn] cmd += ['--dir', dn]
cmd += ['--out', os.path.basename(tmpfilename)] cmd += ['--out', os.path.basename(tmpfilename)]
if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items(): for key, val in info_dict['http_headers'].items():
cmd += ['--header', '%s: %s' % (key, val)] cmd += ['--header', '%s: %s' % (key, val)]
cmd += self._option('--interface', 'source_address') cmd += self._option('--interface', 'source_address')
@ -206,6 +211,8 @@ def available(cls):
def _make_cmd(self, tmpfilename, info_dict): def _make_cmd(self, tmpfilename, info_dict):
cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']] cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']]
if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items(): for key, val in info_dict['http_headers'].items():
cmd += ['%s:%s' % (key, val)] cmd += ['%s:%s' % (key, val)]
return cmd return cmd
@ -253,7 +260,7 @@ def _call_downloader(self, tmpfilename, info_dict):
# if end_time: # if end_time:
# args += ['-t', compat_str(end_time - start_time)] # args += ['-t', compat_str(end_time - start_time)]
if info_dict['http_headers'] and re.match(r'^https?://', url): if info_dict.get('http_headers') is not None and re.match(r'^https?://', url):
# Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv: # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
# [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header. # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
headers = handle_youtubedl_headers(info_dict['http_headers']) headers = handle_youtubedl_headers(info_dict['http_headers'])

View file

@ -82,7 +82,10 @@ def parse_yt_initial_data(data):
offset = int(replay_chat_item_action['videoOffsetTimeMsec']) offset = int(replay_chat_item_action['videoOffsetTimeMsec'])
processed_fragment.extend( processed_fragment.extend(
json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n') json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n')
try:
continuation_id = live_chat_continuation['continuations'][0]['liveChatReplayContinuationData']['continuation'] continuation_id = live_chat_continuation['continuations'][0]['liveChatReplayContinuationData']['continuation']
except KeyError:
continuation_id = None
self._append_fragment(ctx, processed_fragment) self._append_fragment(ctx, processed_fragment)

View file

@ -1175,6 +1175,7 @@
from .thisamericanlife import ThisAmericanLifeIE from .thisamericanlife import ThisAmericanLifeIE
from .thisav import ThisAVIE from .thisav import ThisAVIE
from .thisoldhouse import ThisOldHouseIE from .thisoldhouse import ThisOldHouseIE
from .thisvid import ThisVidIE
from .threeqsdn import ThreeQSDNIE from .threeqsdn import ThreeQSDNIE
from .tiktok import TikTokIE from .tiktok import TikTokIE
from .tinypic import TinyPicIE from .tinypic import TinyPicIE
@ -1541,4 +1542,5 @@
) )
from .zdf import ZDFIE, ZDFChannelIE from .zdf import ZDFIE, ZDFChannelIE
from .zingmp3 import ZingMp3IE from .zingmp3 import ZingMp3IE
from .zoom import ZoomIE
from .zype import ZypeIE from .zype import ZypeIE

View file

@ -36,6 +36,9 @@ class LA7IE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
if not url.startswith('http'):
url = '%s//%s' % (self.http_scheme(), url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
player_data = self._search_regex( player_data = self._search_regex(

View file

@ -12,6 +12,7 @@
parse_duration, parse_duration,
remove_end, remove_end,
try_get, try_get,
urljoin,
) )
@ -93,6 +94,14 @@ class MailRuIE(InfoExtractor):
{ {
'url': 'https://my.mail.ru//list//sinyutin10/video/_myvideo/4.html', 'url': 'https://my.mail.ru//list//sinyutin10/video/_myvideo/4.html',
'only_matching': True, 'only_matching': True,
},
{
'url': 'https://my.mail.ru/mail/cloud-strife/video/embed/Games/2009',
'only_matching': True,
},
{
'url': 'https://videoapi.my.mail.ru/videos/embed/mail/cloud-strife/Games/2009.html',
'only_matching': True,
} }
] ]
@ -110,7 +119,7 @@ def _real_extract(self, url):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
page_config = self._parse_json(self._search_regex([ page_config = self._parse_json(self._search_regex([
r'(?s)<script[^>]+class="sp-video__page-config"[^>]*>(.+?)</script>', r'(?s)<script[^>]+class="sp-video__page-config"[^>]*>(.+?)</script>',
r'(?s)"video":\s*(\{.+?\}),'], r'(?s)"video":\s*({.+?}),'],
webpage, 'page config', default='{}'), video_id, fatal=False) webpage, 'page config', default='{}'), video_id, fatal=False)
if page_config: if page_config:
meta_url = page_config.get('metaUrl') or page_config.get('video', {}).get('metaUrl') or page_config.get('metadataUrl') meta_url = page_config.get('metaUrl') or page_config.get('video', {}).get('metaUrl') or page_config.get('metadataUrl')
@ -121,7 +130,7 @@ def _real_extract(self, url):
# fix meta_url if missing the host address # fix meta_url if missing the host address
if re.match(r'^\/\+\/', meta_url): if re.match(r'^\/\+\/', meta_url):
meta_url = 'https://my.mail.ru' + meta_url meta_url = urljoin('https://my.mail.ru', meta_url)
if meta_url: if meta_url:
video_data = self._download_json( video_data = self._download_json(

View file

@ -13,6 +13,7 @@ class SkyItaliaBaseIE(InfoExtractor):
'high': [854, 480], 'high': [854, 480],
'hd': [1280, 720] 'hd': [1280, 720]
} }
_GEO_BYPASS = False
def _extract_video_id(self, url): def _extract_video_id(self, url):
webpage = self._download_webpage(url, 'skyitalia') webpage = self._download_webpage(url, 'skyitalia')
@ -43,6 +44,9 @@ def _get_formats(self, video_id, token):
'height': r[1] 'height': r[1]
}) })
if not formats and video_data.get('geob') == 1:
self.raise_geo_restricted(countries=['IT'])
self._sort_formats(formats) self._sort_formats(formats)
title = video_data.get('title') title = video_data.get('title')
thumb = video_data.get('thumb') thumb = video_data.get('thumb')

View file

@ -0,0 +1,97 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class ThisVidIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?thisvid\.com/(?P<type>videos|embed)/(?P<id>[A-Za-z0-9-]+/?)'
_TESTS = [{
'url': 'https://thisvid.com/videos/french-boy-pantsed/',
'md5': '3397979512c682f6b85b3b04989df224',
'info_dict': {
'id': '2400174',
'ext': 'mp4',
'title': 'French Boy Pantsed',
'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/2400000/2400174/preview.mp4.jpg',
'age_limit': 18,
}
}, {
'url': 'https://thisvid.com/embed/2400174/',
'md5': '3397979512c682f6b85b3b04989df224',
'info_dict': {
'id': '2400174',
'ext': 'mp4',
'title': 'French Boy Pantsed',
'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/2400000/2400174/preview.mp4.jpg',
'age_limit': 18,
}
}]
def _real_extract(self, url):
main_id = self._match_id(url)
webpage = self._download_webpage(url, main_id)
# URL decryptor was reversed from version 4.0.4, later verified working with 5.2.0 and may change in the future.
kvs_version = self._html_search_regex(r'<script [^>]+?src="https://thisvid\.com/player/kt_player\.js\?v=(\d+(\.\d+)+)">', webpage, 'kvs_version', fatal=False)
if not kvs_version.startswith("5."):
self.report_warning("Major version change (" + kvs_version + ") in player engine--Download may fail.")
title = self._html_search_regex(r'<title>(?:Video: )?(.+?)(?: - (?:\w+ porn at )?ThisVid(?:.com| tube))?</title>', webpage, 'title')
# video_id, video_url and license_code from the 'flashvars' JSON object:
video_id = self._html_search_regex(r"video_id: '([0-9]+)',", webpage, 'video_id')
video_url = self._html_search_regex(r"video_url: '(function/0/.+?)',", webpage, 'video_url')
license_code = self._html_search_regex(r"license_code: '([0-9$]{16})',", webpage, 'license_code')
thumbnail = self._html_search_regex(r"preview_url: '((?:https?:)?//media.thisvid.com/.+?.jpg)',", webpage, 'thumbnail', fatal=False)
if thumbnail.startswith("//"):
thumbnail = "https:" + thumbnail
if (re.match(self._VALID_URL, url).group('type') == "videos"):
display_id = main_id
else:
display_id = self._search_regex(r'<link rel="canonical" href="' + self._VALID_URL + r'">', webpage, 'display_id', fatal=False),
return {
'id': video_id,
'display_id': display_id,
'title': title,
'url': getrealurl(video_url, license_code),
'thumbnail': thumbnail,
'age_limit': 18,
}
def getrealurl(video_url, license_code):
urlparts = video_url.split('/')[2:]
license = getlicensetoken(license_code)
newmagic = urlparts[5][:32]
for o in range(len(newmagic) - 1, -1, -1):
new = ""
l = (o + sum([int(n) for n in license[o:]])) % 32
for i in range(0, len(newmagic)):
if i == o:
new += newmagic[l]
elif i == l:
new += newmagic[o]
else:
new += newmagic[i]
newmagic = new
urlparts[5] = newmagic + urlparts[5][32:]
return "/".join(urlparts)
def getlicensetoken(license):
modlicense = license.replace("$", "").replace("0", "1")
center = int(len(modlicense) / 2)
fronthalf = int(modlicense[:center + 1])
backhalf = int(modlicense[center:])
modlicense = str(4 * abs(fronthalf - backhalf))
retval = ""
for o in range(0, center + 1):
for i in range(1, 5):
retval += str((int(license[o + i]) + int(modlicense[o])) % 10)
return retval

View file

@ -308,17 +308,26 @@ def _real_extract(self, url):
'url': thumbnail.get('url'), 'url': thumbnail.get('url'),
}) })
subtitles = {}
try:
# New way to fetch subtitles
new_video = self._download_json( new_video = self._download_json(
'https://www.viki.com/api/videos/%s' % video_id, video_id, 'https://www.viki.com/api/videos/%s' % video_id, video_id,
'Downloading new video JSON to get subtitles', headers={'x-viki-app-ver': '2.2.5.1428709186'}, expected_status=[200, 400, 404]) 'Downloading new video JSON to get subtitles', headers={'x-viki-app-ver': '2.2.5.1428709186'}, expected_status=[200, 400, 404])
subtitles = {}
for sub in new_video.get('streamSubtitles').get('dash'): for sub in new_video.get('streamSubtitles').get('dash'):
subtitles[sub.get('srclang')] = [{ subtitles[sub.get('srclang')] = [{
'ext': 'vtt', 'ext': 'vtt',
'url': sub.get('src'), 'url': sub.get('src'),
'completion': sub.get('percentage'), 'completion': sub.get('percentage'),
}] }]
except AttributeError:
# fall-back to the old way if there isn't a streamSubtitles attribute
for subtitle_lang, _ in video.get('subtitle_completions', {}).items():
subtitles[subtitle_lang] = [{
'ext': subtitles_format,
'url': self._prepare_call(
'videos/%s/subtitles/%s.%s' % (video_id, subtitle_lang, subtitles_format)),
} for subtitles_format in ('srt', 'vtt')]
result = { result = {
'id': video_id, 'id': video_id,

View file

@ -11,7 +11,6 @@
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
merge_dicts, merge_dicts,
remove_start,
try_get, try_get,
urlencode_postdata, urlencode_postdata,
) )
@ -19,10 +18,10 @@
class VLiveIE(NaverBaseIE): class VLiveIE(NaverBaseIE):
IE_NAME = 'vlive' IE_NAME = 'vlive'
_VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/(?:video|post)/(?P<id>(?:\d-)?[0-9]+)'
_NETRC_MACHINE = 'vlive' _NETRC_MACHINE = 'vlive'
_TESTS = [{ _TESTS = [{
'url': 'http://www.vlive.tv/video/1326', 'url': 'https://www.vlive.tv/video/1326',
'md5': 'cc7314812855ce56de70a06a27314983', 'md5': 'cc7314812855ce56de70a06a27314983',
'info_dict': { 'info_dict': {
'id': '1326', 'id': '1326',
@ -32,8 +31,21 @@ class VLiveIE(NaverBaseIE):
'view_count': int, 'view_count': int,
'uploader_id': 'muploader_a', 'uploader_id': 'muploader_a',
}, },
}, { },
'url': 'http://www.vlive.tv/video/16937', {
'url': 'https://vlive.tv/post/1-18244258',
'md5': 'cc7314812855ce56de70a06a27314983',
'info_dict': {
'id': '1326',
'ext': 'mp4',
'title': "[V LIVE] Girl's Day's Broadcast",
'creator': "Girl's Day",
'view_count': int,
'uploader_id': 'muploader_a',
},
},
{
'url': 'https://www.vlive.tv/video/16937',
'info_dict': { 'info_dict': {
'id': '16937', 'id': '16937',
'ext': 'mp4', 'ext': 'mp4',
@ -96,50 +108,69 @@ def is_logged_in():
raise ExtractorError('Unable to log in', expected=True) raise ExtractorError('Unable to log in', expected=True)
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) # url may match on a post or a video url with a post_id potentially matching a video_id
working_id = self._match_id(url)
webpage = self._download_webpage(url, working_id)
webpage = self._download_webpage( PARAMS_RE = r'window\.__PRELOADED_STATE__\s*=\s*({.*});?\s*</script>'
'https://www.vlive.tv/video/%s' % video_id, video_id) PARAMS_FIELD = 'params'
VIDEO_PARAMS_RE = r'\bvlive\.video\.init\(([^)]+)'
VIDEO_PARAMS_FIELD = 'video params'
params = self._parse_json(self._search_regex(
VIDEO_PARAMS_RE, webpage, VIDEO_PARAMS_FIELD, default=''), video_id,
transform_source=lambda s: '[' + s + ']', fatal=False)
if not params or len(params) < 7:
params = self._search_regex( params = self._search_regex(
VIDEO_PARAMS_RE, webpage, VIDEO_PARAMS_FIELD) PARAMS_RE, webpage, PARAMS_FIELD, default='', flags=re.DOTALL)
params = [p.strip(r'"') for p in re.split(r'\s*,\s*', params)] params = self._parse_json(params, working_id, fatal=False)
status, long_video_id, key = params[2], params[5], params[6] video_params = try_get(params, lambda x: x["postDetail"]["post"]["officialVideo"], dict)
status = remove_start(status, 'PRODUCT_')
if status in ('LIVE_ON_AIR', 'BIG_EVENT_ON_AIR'): if video_params is None:
return self._live(video_id, webpage) error = try_get(params, lambda x: x["postDetail"]["error"], dict)
elif status in ('VOD_ON_AIR', 'BIG_EVENT_INTRO'): error_data = try_get(error, lambda x: x["data"], dict)
return self._replay(video_id, webpage, long_video_id, key) error_video = try_get(error_data, lambda x: x["officialVideo"], dict)
error_msg = try_get(error, lambda x: x["message"], compat_str)
product_type = try_get(error_data,
[lambda x: x["officialVideo"]["productType"],
lambda x: x["board"]["boardType"]],
compat_str)
if status == 'LIVE_END': if error_video is not None:
raise ExtractorError('Uploading for replay. Please wait...', if product_type in ('VLIVE_PLUS', 'VLIVE+'):
expected=True) self.raise_login_required('This video is only available with V LIVE+.')
elif status == 'COMING_SOON': elif error_msg is not None:
raise ExtractorError('Coming soon!', expected=True) raise ExtractorError('V LIVE reported the following error: %s' % error_msg)
elif status == 'CANCELED':
raise ExtractorError('We are sorry, '
'but the live broadcast has been canceled.',
expected=True)
elif status == 'ONLY_APP':
raise ExtractorError('Unsupported video type', expected=True)
else: else:
raise ExtractorError('Unknown status %s' % status) raise ExtractorError('Failed to extract video parameters.')
elif 'post' in url:
raise ExtractorError('Url does not appear to be a video post.', expected=True)
else:
raise ExtractorError('Failed to extract video parameters.')
def _get_common_fields(self, webpage): video_id = working_id if 'video' in url else str(video_params["videoSeq"])
video_type = video_params["type"]
if video_type in ('VOD'):
encoding_status = video_params["encodingStatus"]
if encoding_status == 'COMPLETE':
return self._replay(video_id, webpage, params, video_params)
else:
raise ExtractorError('VOD encoding not yet complete. Please try again later.',
expected=True)
elif video_type in ('LIVE'):
video_status = video_params["status"]
if video_status in ('RESERVED'):
raise ExtractorError('Coming soon!', expected=True)
elif video_status in ('ENDED', 'END'):
raise ExtractorError('Uploading for replay. Please wait...', expected=True)
else:
return self._live(video_id, webpage, params)
else:
raise ExtractorError('Unknown video type %s' % video_type)
def _get_common_fields(self, webpage, params):
title = self._og_search_title(webpage) title = self._og_search_title(webpage)
creator = self._html_search_regex( description = self._html_search_meta(
r'<div[^>]+class="info_area"[^>]*>\s*(?:<em[^>]*>.*?</em\s*>\s*)?<a\s+[^>]*>([^<]+)', ['og:description', 'description', 'twitter:description'],
webpage, 'creator', fatal=False) webpage, 'description', default=None)
creator = (try_get(params, lambda x: x["channel"]["channel"]["channelName"], compat_str)
or self._search_regex(r'on (.*) channel', description or '', 'creator', fatal=False))
thumbnail = self._og_search_thumbnail(webpage) thumbnail = self._og_search_thumbnail(webpage)
return { return {
'title': title, 'title': title,
@ -147,24 +178,21 @@ def _get_common_fields(self, webpage):
'thumbnail': thumbnail, 'thumbnail': thumbnail,
} }
def _live(self, video_id, webpage): def _live(self, video_id, webpage, params):
init_page = self._download_init_page(video_id) LIVE_INFO_ENDPOINT = 'https://www.vlive.tv/globalv-web/vam-web/old/v3/live/%s/playInfo' % video_id
play_info = self._download_json(LIVE_INFO_ENDPOINT, video_id,
headers={"referer": "https://www.vlive.tv"})
live_params = self._search_regex( streams = try_get(play_info, lambda x: x["result"]["streamList"], list) or []
r'"liveStreamInfo"\s*:\s*(".*"),',
init_page, 'live stream info')
live_params = self._parse_json(live_params, video_id)
live_params = self._parse_json(live_params, video_id)
formats = [] formats = []
for vid in live_params.get('resolutions', []): for stream in streams:
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
vid['cdnUrl'], video_id, 'mp4', stream['serviceUrl'], video_id, 'mp4',
m3u8_id=vid.get('name'),
fatal=False, live=True)) fatal=False, live=True))
self._sort_formats(formats) self._sort_formats(formats)
info = self._get_common_fields(webpage) info = self._get_common_fields(webpage, params)
info.update({ info.update({
'title': self._live_title(info['title']), 'title': self._live_title(info['title']),
'id': video_id, 'id': video_id,
@ -173,44 +201,37 @@ def _live(self, video_id, webpage):
}) })
return info return info
def _replay(self, video_id, webpage, long_video_id, key): def _replay(self, video_id, webpage, params, video_params):
if '' in (long_video_id, key): long_video_id = video_params["vodId"]
init_page = self._download_init_page(video_id)
video_info = self._parse_json(self._search_regex( VOD_KEY_ENDPOINT = 'https://www.vlive.tv/globalv-web/vam-web/video/v1.0/vod/%s/inkey' % video_id
(r'(?s)oVideoStatus\s*=\s*({.+?})\s*</script', key_json = self._download_json(VOD_KEY_ENDPOINT, video_id,
r'(?s)oVideoStatus\s*=\s*({.+})'), init_page, 'video info'), headers={"referer": "https://www.vlive.tv"})
video_id) key = key_json["inkey"]
if video_info.get('status') == 'NEED_CHANNEL_PLUS':
self.raise_login_required(
'This video is only available for CH+ subscribers')
long_video_id, key = video_info['vid'], video_info['inkey']
return merge_dicts( return merge_dicts(
self._get_common_fields(webpage), self._get_common_fields(webpage, params),
self._extract_video_info(video_id, long_video_id, key)) self._extract_video_info(video_id, long_video_id, key))
def _download_init_page(self, video_id):
return self._download_webpage(
'https://www.vlive.tv/video/init/view',
video_id, note='Downloading live webpage',
data=urlencode_postdata({'videoSeq': video_id}),
headers={
'Referer': 'https://www.vlive.tv/video/%s' % video_id,
'Content-Type': 'application/x-www-form-urlencoded'
})
class VLiveChannelIE(InfoExtractor): class VLiveChannelIE(InfoExtractor):
IE_NAME = 'vlive:channel' IE_NAME = 'vlive:channel'
_VALID_URL = r'https?://channels\.vlive\.tv/(?P<id>[0-9A-Z]+)' _VALID_URL = r'https?://(?:(?:www|m)\.)?(?:channels\.vlive\.tv/|vlive\.tv/channels?/)(?P<id>[0-9A-Z]+)'
_TEST = { _TESTS = [{
'url': 'http://channels.vlive.tv/FCD4B', 'url': 'https://channels.vlive.tv/FCD4B',
'info_dict': { 'info_dict': {
'id': 'FCD4B', 'id': 'FCD4B',
'title': 'MAMAMOO', 'title': 'MAMAMOO',
}, },
'playlist_mincount': 110 'playlist_mincount': 110
} }, {
'url': 'https://www.vlive.tv/channel/FCD4B',
'info_dict': {
'id': 'FCD4B',
'title': 'MAMAMOO',
},
'playlist_mincount': 110
}]
_APP_ID = '8c6cc7b45d2568fb668be6e05b6e5a3b' _APP_ID = '8c6cc7b45d2568fb668be6e05b6e5a3b'
def _real_extract(self, url): def _real_extract(self, url):

View file

@ -36,6 +36,7 @@
get_element_by_attribute, get_element_by_attribute,
get_element_by_id, get_element_by_id,
int_or_none, int_or_none,
js_to_json,
mimetype2ext, mimetype2ext,
orderedSet, orderedSet,
parse_codecs, parse_codecs,
@ -70,6 +71,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
_LOGIN_REQUIRED = False _LOGIN_REQUIRED = False
_PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}' _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}'
_INITIAL_DATA_RE = r'(?:window\["ytInitialData"\]|ytInitialData)\W?=\W?({.*?});'
_YTCFG_DATA_RE = r"ytcfg.set\(({.*?})\)"
_YOUTUBE_CLIENT_HEADERS = { _YOUTUBE_CLIENT_HEADERS = {
'x-youtube-client-name': '1', 'x-youtube-client-name': '1',
@ -274,11 +277,19 @@ def warn(message):
def _download_webpage_handle(self, *args, **kwargs): def _download_webpage_handle(self, *args, **kwargs):
query = kwargs.get('query', {}).copy() query = kwargs.get('query', {}).copy()
query['disable_polymer'] = 'true'
kwargs['query'] = query kwargs['query'] = query
return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle( return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
*args, **compat_kwargs(kwargs)) *args, **compat_kwargs(kwargs))
def _get_yt_initial_data(self, video_id, webpage):
config = self._search_regex(
(r'window\["ytInitialData"\]\s*=\s*(.*?)(?<=});',
r'var\s+ytInitialData\s*=\s*(.*?)(?<=});'),
webpage, 'ytInitialData', default=None)
if config:
return self._parse_json(
uppercase_escape(config), video_id, fatal=False)
def _real_initialize(self): def _real_initialize(self):
if self._downloader is None: if self._downloader is None:
return return
@ -288,15 +299,61 @@ def _real_initialize(self):
class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor): class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
# Extract entries from page with "Load more" button
def _entries(self, page, playlist_id): def _find_entries_in_json(self, extracted):
more_widget_html = content_html = page entries = []
for page_num in itertools.count(1): c = {}
for entry in self._process_page(content_html):
def _real_find(obj):
if obj is None or isinstance(obj, str):
return
if type(obj) is list:
for elem in obj:
_real_find(elem)
if type(obj) is dict:
if self._is_entry(obj):
entries.append(obj)
return
if 'continuationCommand' in obj:
c['continuation'] = obj
return
for _, o in obj.items():
_real_find(o)
_real_find(extracted)
return entries, try_get(c, lambda x: x["continuation"])
def _entries(self, page, playlist_id, max_pages=None):
seen = []
yt_conf = {}
for m in re.finditer(self._YTCFG_DATA_RE, page):
parsed = self._parse_json(m.group(1), playlist_id,
transform_source=js_to_json, fatal=False)
if parsed:
yt_conf.update(parsed)
data_json = self._parse_json(self._search_regex(self._INITIAL_DATA_RE, page, 'ytInitialData'), None)
for page_num in range(1, max_pages + 1) if max_pages is not None else itertools.count(1):
entries, continuation = self._find_entries_in_json(data_json)
processed = self._process_entries(entries, seen)
if not processed:
break
for entry in processed:
yield entry yield entry
mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html) if not continuation or not yt_conf:
if not mobj: break
continuation_token = try_get(continuation, lambda x: x['continuationCommand']['token'])
continuation_url = try_get(continuation, lambda x: x['commandMetadata']['webCommandMetadata']['apiUrl'])
if not continuation_token or not continuation_url:
break break
count = 0 count = 0
@ -305,12 +362,23 @@ def _entries(self, page, playlist_id):
try: try:
# Downloading page may result in intermittent 5xx HTTP error # Downloading page may result in intermittent 5xx HTTP error
# that is usually worked around with a retry # that is usually worked around with a retry
more = self._download_json( data_json = self._download_json(
'https://www.youtube.com/%s' % mobj.group('more'), playlist_id, 'https://www.youtube.com%s' % continuation_url,
'Downloading page #%s%s' playlist_id,
% (page_num, ' (retry #%d)' % count if count else ''), 'Downloading continuation page #%s%s' % (page_num, ' (retry #%d)' % count if count else ''),
transform_source=uppercase_escape, transform_source=uppercase_escape,
headers=self._YOUTUBE_CLIENT_HEADERS) query={
'key': try_get(yt_conf, lambda x: x['INNERTUBE_API_KEY'])
},
data=str(json.dumps({
'context': try_get(yt_conf, lambda x: x['INNERTUBE_CONTEXT']),
'continuation': continuation_token
})).encode(encoding='UTF-8', errors='strict'),
headers={
'Content-Type': 'application/json'
}
)
break break
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503): if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
@ -319,31 +387,30 @@ def _entries(self, page, playlist_id):
continue continue
raise raise
content_html = more['content_html'] def _extract_title(self, renderer):
if not content_html.strip(): title = try_get(renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
# Some webpages show a "Load more" button but they don't if title:
# have more videos return title
break return try_get(renderer, lambda x: x['title']['simpleText'], compat_str)
more_widget_html = more['load_more_widget_html']
class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor): class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
def _process_page(self, content): def _is_entry(self, obj):
for video_id, video_title in self.extract_videos_from_page(content): return 'videoId' in obj
yield self.url_result(video_id, 'Youtube', video_id, video_title)
def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page): def _process_entries(self, entries, seen):
for mobj in re.finditer(video_re, page): ids_in_page = []
# The link with index 0 is not the first video of the playlist (not sure if still actual) titles_in_page = []
if 'index' in mobj.groupdict() and mobj.group('id') == '0': for renderer in entries:
video_id = try_get(renderer, lambda x: x['videoId'])
video_title = self._extract_title(renderer)
if video_id is None or video_title is None:
# we do not have a videoRenderer or title extraction broke
continue continue
video_id = mobj.group('id')
video_title = unescapeHTML(
mobj.group('title')) if 'title' in mobj.groupdict() else None
if video_title:
video_title = video_title.strip() video_title = video_title.strip()
if video_title == '► Play all':
video_title = None
try: try:
idx = ids_in_page.index(video_id) idx = ids_in_page.index(video_id)
if video_title and not titles_in_page[idx]: if video_title and not titles_in_page[idx]:
@ -352,19 +419,17 @@ def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_p
ids_in_page.append(video_id) ids_in_page.append(video_id)
titles_in_page.append(video_title) titles_in_page.append(video_title)
def extract_videos_from_page(self, page): for video_id, video_title in zip(ids_in_page, titles_in_page):
ids_in_page = [] yield self.url_result(video_id, 'Youtube', video_id, video_title)
titles_in_page = []
self.extract_videos_from_page_impl(
self._VIDEO_RE, page, ids_in_page, titles_in_page)
return zip(ids_in_page, titles_in_page)
class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor): class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
def _process_page(self, content): def _is_entry(self, obj):
for playlist_id in orderedSet(re.findall( return 'playlistId' in obj
r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
content)): def _process_entries(self, entries, seen):
for playlist_id in orderedSet(try_get(r, lambda x: x['playlistId']) for r in entries):
yield self.url_result( yield self.url_result(
'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist') 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
@ -1390,6 +1455,7 @@ def _get_ytplayer_config(self, video_id, webpage):
# https://github.com/ytdl-org/youtube-dl/pull/7599) # https://github.com/ytdl-org/youtube-dl/pull/7599)
r';ytplayer\.config\s*=\s*({.+?});ytplayer', r';ytplayer\.config\s*=\s*({.+?});ytplayer',
r';ytplayer\.config\s*=\s*({.+?});', r';ytplayer\.config\s*=\s*({.+?});',
r'ytInitialPlayerResponse\s*=\s*({.+?});var meta'
) )
config = self._search_regex( config = self._search_regex(
patterns, webpage, 'ytplayer.config', default=None) patterns, webpage, 'ytplayer.config', default=None)
@ -1397,15 +1463,6 @@ def _get_ytplayer_config(self, video_id, webpage):
return self._parse_json( return self._parse_json(
uppercase_escape(config), video_id, fatal=False) uppercase_escape(config), video_id, fatal=False)
def _get_yt_initial_data(self, video_id, webpage):
config = self._search_regex(
(r'window\["ytInitialData"\]\s*=\s*(.*?)(?<=});',
r'var\s+ytInitialData\s*=\s*(.*?)(?<=});'),
webpage, 'ytInitialData', default=None)
if config:
return self._parse_json(
uppercase_escape(config), video_id, fatal=False)
def _get_music_metadata_from_yt_initial(self, yt_initial): def _get_music_metadata_from_yt_initial(self, yt_initial):
music_metadata = [] music_metadata = []
key_map = { key_map = {
@ -1454,10 +1511,11 @@ def _get_automatic_captions(self, video_id, webpage):
self._downloader.report_warning(err_msg) self._downloader.report_warning(err_msg)
return {} return {}
try: try:
if "args" in player_config and "ttsurl" in player_config["args"]:
args = player_config['args'] args = player_config['args']
caption_url = args.get('ttsurl') caption_url = args['ttsurl']
if caption_url:
timestamp = args['timestamp'] timestamp = args['timestamp']
# We get the available subtitles # We get the available subtitles
list_params = compat_urllib_parse_urlencode({ list_params = compat_urllib_parse_urlencode({
'type': 'list', 'type': 'list',
@ -1513,11 +1571,17 @@ def make_captions(sub_url, sub_langs):
return captions return captions
# New captions format as of 22.06.2017 # New captions format as of 22.06.2017
player_response = args.get('player_response') if "args" in player_config:
if player_response and isinstance(player_response, compat_str): player_response = player_config["args"].get('player_response')
else:
# New player system (ytInitialPlayerResponse) as of October 2020
player_response = player_config
if player_response:
if isinstance(player_response, compat_str):
player_response = self._parse_json( player_response = self._parse_json(
player_response, video_id, fatal=False) player_response, video_id, fatal=False)
if player_response:
renderer = player_response['captions']['playerCaptionsTracklistRenderer'] renderer = player_response['captions']['playerCaptionsTracklistRenderer']
caption_tracks = renderer['captionTracks'] caption_tracks = renderer['captionTracks']
for caption_track in caption_tracks: for caption_track in caption_tracks:
@ -1534,6 +1598,10 @@ def make_captions(sub_url, sub_langs):
self._downloader.report_warning("Couldn't find automatic captions for %s" % video_id) self._downloader.report_warning("Couldn't find automatic captions for %s" % video_id)
return {} return {}
if "args" in player_config:
args = player_config["args"]
# Some videos don't provide ttsurl but rather caption_tracks and # Some videos don't provide ttsurl but rather caption_tracks and
# caption_translation_languages (e.g. 20LmZk1hakA) # caption_translation_languages (e.g. 20LmZk1hakA)
# Does not used anymore as of 22.06.2017 # Does not used anymore as of 22.06.2017
@ -1822,7 +1890,8 @@ def extract_embedded_config(embed_webpage, video_id):
# Try looking directly into the video webpage # Try looking directly into the video webpage
ytplayer_config = self._get_ytplayer_config(video_id, video_webpage) ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
if ytplayer_config: if ytplayer_config:
args = ytplayer_config['args'] args = ytplayer_config.get("args")
if args is not None:
if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'): if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
# Convert to the same format returned by compat_parse_qs # Convert to the same format returned by compat_parse_qs
video_info = dict((k, [v]) for k, v in args.items()) video_info = dict((k, [v]) for k, v in args.items())
@ -1837,6 +1906,8 @@ def extract_embedded_config(embed_webpage, video_id):
is_live = True is_live = True
if not player_response: if not player_response:
player_response = extract_player_response(args.get('player_response'), video_id) player_response = extract_player_response(args.get('player_response'), video_id)
elif not player_response:
player_response = ytplayer_config
if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True): if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
add_dash_mpd_pr(player_response) add_dash_mpd_pr(player_response)
else: else:
@ -1866,8 +1937,8 @@ def extract_embedded_config(embed_webpage, video_id):
age_gate = False age_gate = False
# Try looking directly into the video webpage # Try looking directly into the video webpage
ytplayer_config = self._get_ytplayer_config(video_id, video_webpage) ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
if ytplayer_config: args = ytplayer_config.get("args")
args = ytplayer_config['args'] if args is not None:
if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'): if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
# Convert to the same format returned by compat_parse_qs # Convert to the same format returned by compat_parse_qs
video_info = dict((k, [v]) for k, v in args.items()) video_info = dict((k, [v]) for k, v in args.items())
@ -1882,6 +1953,8 @@ def extract_embedded_config(embed_webpage, video_id):
is_live = True is_live = True
if not player_response: if not player_response:
player_response = extract_player_response(args.get('player_response'), video_id) player_response = extract_player_response(args.get('player_response'), video_id)
elif not player_response:
player_response = ytplayer_config
if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True): if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
add_dash_mpd_pr(player_response) add_dash_mpd_pr(player_response)
@ -2614,6 +2687,12 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
_VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&amp;(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?' _VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&amp;(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?'
_VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})' _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
IE_NAME = 'youtube:playlist' IE_NAME = 'youtube:playlist'
_YTM_PLAYLIST_PREFIX = 'RDCLAK5uy_'
_YTM_CHANNEL_INFO = {
'uploader': 'Youtube Music',
'uploader_id': 'music', # or "UC-9-kyTW8ZkZNDHQJ6FgpwQ"
'uploader_url': 'https://www.youtube.com/music'
}
_TESTS = [{ _TESTS = [{
'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc', 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
'info_dict': { 'info_dict': {
@ -2811,10 +2890,21 @@ def extract_videos_from_page(self, page):
return zip(ids_in_page, titles_in_page) return zip(ids_in_page, titles_in_page)
def _extract_mix_ids_from_yt_initial(self, yt_initial):
ids = []
playlist_contents = try_get(yt_initial, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist']['contents'], list)
if playlist_contents:
for item in playlist_contents:
videoId = try_get(item, lambda x: x['playlistPanelVideoRenderer']['videoId'], compat_str)
if videoId:
ids.append(videoId)
return ids
def _extract_mix(self, playlist_id): def _extract_mix(self, playlist_id):
# The mixes are generated from a single video # The mixes are generated from a single video
# the id of the playlist is just 'RD' + video_id # the id of the playlist is just 'RD' + video_id
ids = [] ids = []
yt_initial = None
last_id = playlist_id[-11:] last_id = playlist_id[-11:]
for n in itertools.count(1): for n in itertools.count(1):
url = 'https://www.youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id) url = 'https://www.youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
@ -2824,6 +2914,13 @@ def _extract_mix(self, playlist_id):
r'''(?xs)data-video-username=".*?".*? r'''(?xs)data-video-username=".*?".*?
href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id), href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
webpage)) webpage))
# if no ids in html of page, try using embedded json
if (len(new_ids) == 0):
yt_initial = self._get_yt_initial_data(playlist_id, webpage)
if yt_initial:
new_ids = self._extract_mix_ids_from_yt_initial(yt_initial)
# Fetch new pages until all the videos are repeated, it seems that # Fetch new pages until all the videos are repeated, it seems that
# there are always 51 unique videos. # there are always 51 unique videos.
new_ids = [_id for _id in new_ids if _id not in ids] new_ids = [_id for _id in new_ids if _id not in ids]
@ -2841,6 +2938,9 @@ def _extract_mix(self, playlist_id):
or search_title('title')) or search_title('title'))
title = clean_html(title_span) title = clean_html(title_span)
if not title:
title = try_get(yt_initial, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist']['title'], compat_str)
return self.playlist_result(url_results, playlist_id, title) return self.playlist_result(url_results, playlist_id, title)
def _extract_playlist(self, playlist_id): def _extract_playlist(self, playlist_id):
@ -2902,6 +3002,8 @@ def _extract_playlist(self, playlist_id):
'uploader_id': uploader_id, 'uploader_id': uploader_id,
'uploader_url': uploader_url, 'uploader_url': uploader_url,
}) })
if playlist_id.startswith(self._YTM_PLAYLIST_PREFIX):
playlist.update(self._YTM_CHANNEL_INFO)
return has_videos, playlist return has_videos, playlist
@ -2932,7 +3034,9 @@ def _real_extract(self, url):
return video return video
if playlist_id.startswith(('RD', 'UL', 'PU')): if playlist_id.startswith(('RD', 'UL', 'PU')):
# Mixes require a custom extraction process if not playlist_id.startswith(self._YTM_PLAYLIST_PREFIX):
# Mixes require a custom extraction process,
# Youtube Music playlists act like normal playlists (with randomized order)
return self._extract_mix(playlist_id) return self._extract_mix(playlist_id)
has_videos, playlist = self._extract_playlist(playlist_id) has_videos, playlist = self._extract_playlist(playlist_id)
@ -3192,11 +3296,7 @@ class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
}] }]
class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor): class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistBaseInfoExtractor):
_VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
IE_DESC = 'YouTube.com searches' IE_DESC = 'YouTube.com searches'
# there doesn't appear to be a real limit, for example if you search for # there doesn't appear to be a real limit, for example if you search for
# 'python' you get more than 8.000.000 results # 'python' you get more than 8.000.000 results
@ -3293,11 +3393,10 @@ class YoutubeSearchDateIE(YoutubeSearchIE):
_SEARCH_PARAMS = 'CAI%3D' _SEARCH_PARAMS = 'CAI%3D'
class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor): class YoutubeSearchURLIE(YoutubePlaylistBaseInfoExtractor):
IE_DESC = 'YouTube.com search URLs' IE_DESC = 'YouTube.com search URLs'
IE_NAME = 'youtube:search_url' IE_NAME = 'youtube:search_url'
_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)' _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
_SEARCH_DATA = r'(?:window\["ytInitialData"\]|ytInitialData)\W?=\W?({.*?});'
_TESTS = [{ _TESTS = [{
'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
'playlist_mincount': 5, 'playlist_mincount': 5,
@ -3309,63 +3408,20 @@ class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
'only_matching': True, 'only_matching': True,
}] }]
def _find_videos_in_json(self, extracted): def _process_json_dict(self, obj, videos, c):
videos = []
def _real_find(obj):
if obj is None or isinstance(obj, str):
return
if type(obj) is list:
for elem in obj:
_real_find(elem)
if type(obj) is dict:
if "videoId" in obj: if "videoId" in obj:
videos.append(obj) videos.append(obj)
return return
for _, o in obj.items(): if "nextContinuationData" in obj:
_real_find(o) c["continuation"] = obj["nextContinuationData"]
return
_real_find(extracted)
return videos
def extract_videos_from_page_impl(self, page, ids_in_page, titles_in_page):
search_response = self._parse_json(self._search_regex(self._SEARCH_DATA, page, 'ytInitialData'), None)
result_items = self._find_videos_in_json(search_response)
for renderer in result_items:
video_id = try_get(renderer, lambda x: x['videoId'])
video_title = try_get(renderer, lambda x: x['title']['runs'][0]['text']) or try_get(renderer, lambda x: x['title']['simpleText'])
if video_id is None or video_title is None:
# we do not have a videoRenderer or title extraction broke
continue
video_title = video_title.strip()
try:
idx = ids_in_page.index(video_id)
if video_title and not titles_in_page[idx]:
titles_in_page[idx] = video_title
except ValueError:
ids_in_page.append(video_id)
titles_in_page.append(video_title)
def extract_videos_from_page(self, page):
ids_in_page = []
titles_in_page = []
self.extract_videos_from_page_impl(page, ids_in_page, titles_in_page)
return zip(ids_in_page, titles_in_page)
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
query = compat_urllib_parse_unquote_plus(mobj.group('query')) query = compat_urllib_parse_unquote_plus(mobj.group('query'))
webpage = self._download_webpage(url, query) webpage = self._download_webpage(url, query)
return self.playlist_result(self._process_page(webpage), playlist_title=query) return self.playlist_result(self._entries(webpage, query, max_pages=5), playlist_title=query)
class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor): class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
@ -3387,14 +3443,12 @@ def _real_extract(self, url):
'https://www.youtube.com/show/%s/playlists' % playlist_id) 'https://www.youtube.com/show/%s/playlists' % playlist_id)
class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor): class YoutubeFeedsInfoExtractor(YoutubePlaylistBaseInfoExtractor):
""" """
Base class for feed extractors Base class for feed extractors
Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties. Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
""" """
_LOGIN_REQUIRED = True _LOGIN_REQUIRED = True
_FEED_DATA = r'(?:window\["ytInitialData"\]|ytInitialData)\W?=\W?({.*?});'
_YTCFG_DATA = r"ytcfg.set\(({.*?})\)"
@property @property
def IE_NAME(self): def IE_NAME(self):
@ -3403,53 +3457,15 @@ def IE_NAME(self):
def _real_initialize(self): def _real_initialize(self):
self._login() self._login()
def _find_videos_in_json(self, extracted): def _process_entries(self, entries, seen):
videos = []
c = {}
def _real_find(obj):
if obj is None or isinstance(obj, str):
return
if type(obj) is list:
for elem in obj:
_real_find(elem)
if type(obj) is dict:
if "videoId" in obj:
videos.append(obj)
return
if "nextContinuationData" in obj:
c["continuation"] = obj["nextContinuationData"]
return
for _, o in obj.items():
_real_find(o)
_real_find(extracted)
return videos, try_get(c, lambda x: x["continuation"])
def _entries(self, page):
info = []
yt_conf = self._parse_json(self._search_regex(self._YTCFG_DATA, page, 'ytcfg.set', default="null"), None, fatal=False)
search_response = self._parse_json(self._search_regex(self._FEED_DATA, page, 'ytInitialData'), None)
for page_num in itertools.count(1):
video_info, continuation = self._find_videos_in_json(search_response)
new_info = [] new_info = []
for v in entries:
for v in video_info:
v_id = try_get(v, lambda x: x['videoId']) v_id = try_get(v, lambda x: x['videoId'])
if not v_id: if not v_id:
continue continue
have_video = False have_video = False
for old in info: for old in seen:
if old['videoId'] == v_id: if old['videoId'] == v_id:
have_video = True have_video = True
break break
@ -3458,41 +3474,18 @@ def _entries(self, page):
new_info.append(v) new_info.append(v)
if not new_info: if not new_info:
break return
info.extend(new_info)
seen.extend(new_info)
for video in new_info: for video in new_info:
yield self.url_result(try_get(video, lambda x: x['videoId']), YoutubeIE.ie_key(), video_title=try_get(video, lambda x: x['title']['runs'][0]['text']) or try_get(video, lambda x: x['title']['simpleText'])) yield self.url_result(try_get(video, lambda x: x['videoId']), YoutubeIE.ie_key(), video_title=self._extract_title(video))
if not continuation or not yt_conf:
break
search_response = self._download_json(
'https://www.youtube.com/browse_ajax', self._PLAYLIST_TITLE,
'Downloading page #%s' % page_num,
transform_source=uppercase_escape,
query={
"ctoken": try_get(continuation, lambda x: x["continuation"]),
"continuation": try_get(continuation, lambda x: x["continuation"]),
"itct": try_get(continuation, lambda x: x["clickTrackingParams"])
},
headers={
"X-YouTube-Client-Name": try_get(yt_conf, lambda x: x["INNERTUBE_CONTEXT_CLIENT_NAME"]),
"X-YouTube-Client-Version": try_get(yt_conf, lambda x: x["INNERTUBE_CONTEXT_CLIENT_VERSION"]),
"X-Youtube-Identity-Token": try_get(yt_conf, lambda x: x["ID_TOKEN"]),
"X-YouTube-Device": try_get(yt_conf, lambda x: x["DEVICE"]),
"X-YouTube-Page-CL": try_get(yt_conf, lambda x: x["PAGE_CL"]),
"X-YouTube-Page-Label": try_get(yt_conf, lambda x: x["PAGE_BUILD_LABEL"]),
"X-YouTube-Variants-Checksum": try_get(yt_conf, lambda x: x["VARIANTS_CHECKSUM"]),
})
def _real_extract(self, url): def _real_extract(self, url):
page = self._download_webpage( page = self._download_webpage(
'https://www.youtube.com/feed/%s' % self._FEED_NAME, 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
self._PLAYLIST_TITLE) self._PLAYLIST_TITLE)
return self.playlist_result( return self.playlist_result(self._entries(page, self._PLAYLIST_TITLE),
self._entries(page), playlist_title=self._PLAYLIST_TITLE) playlist_title=self._PLAYLIST_TITLE)
class YoutubeWatchLaterIE(YoutubePlaylistIE): class YoutubeWatchLaterIE(YoutubePlaylistIE):

View file

@ -0,0 +1,82 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
url_or_none,
parse_filesize,
urlencode_postdata
)
class ZoomIE(InfoExtractor):
IE_NAME = 'zoom'
_VALID_URL = r'https://(?:.*).?zoom.us/rec(?:ording)?/(play|share)/(?P<id>[A-Za-z0-9\-_.]+)'
_TEST = {
'url': 'https://zoom.us/recording/play/SILVuCL4bFtRwWTtOCFQQxAsBQsJljFtm9e4Z_bvo-A8B-nzUSYZRNuPl3qW5IGK',
'info_dict': {
'md5': '031a5b379f1547a8b29c5c4c837dccf2',
'title': "GAZ Transformational Tuesdays W/ Landon & Stapes",
'id': "SILVuCL4bFtRwWTtOCFQQxAsBQsJljFtm9e4Z_bvo-A8B-nzUSYZRNuPl3qW5IGK",
'ext': "mp4"
}
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
password_protected = self._search_regex(r'<form[^>]+?id="(password_form)"', webpage, 'password field', fatal=False, default=None)
if password_protected is not None:
self._verify_video_password(url, display_id, webpage)
webpage = self._download_webpage(url, display_id)
video_url = self._search_regex(r"viewMp4Url: \'(.*)\'", webpage, 'video url')
title = self._html_search_regex([r"topic: \"(.*)\",", r"<title>(.*) - Zoom</title>"], webpage, 'title')
viewResolvtionsWidth = self._search_regex(r"viewResolvtionsWidth: (\d*)", webpage, 'res width', fatal=False)
viewResolvtionsHeight = self._search_regex(r"viewResolvtionsHeight: (\d*)", webpage, 'res height', fatal=False)
fileSize = parse_filesize(self._search_regex(r"fileSize: \'(.+)\'", webpage, 'fileSize', fatal=False))
urlprefix = url.split("zoom.us")[0] + "zoom.us/"
formats = []
formats.append({
'url': url_or_none(video_url),
'width': int_or_none(viewResolvtionsWidth),
'height': int_or_none(viewResolvtionsHeight),
'http_headers': {'Accept': 'video/webm,video/ogg,video/*;q=0.9,application/ogg;q=0.7,audio/*;q=0.6,*/*;q=0.5',
'Referer': urlprefix},
'ext': "mp4",
'filesize_approx': int_or_none(fileSize)
})
self._sort_formats(formats)
return {
'id': display_id,
'title': title,
'formats': formats
}
def _verify_video_password(self, url, video_id, webpage):
password = self._downloader.params.get('videopassword')
if password is None:
raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
meetId = self._search_regex(r'<input[^>]+?id="meetId" value="([^\"]+)"', webpage, 'meetId')
data = urlencode_postdata({
'id': meetId,
'passwd': password,
'action': "viewdetailedpage",
'recaptcha': ""
})
validation_url = url.split("zoom.us")[0] + "zoom.us/rec/validate_meet_passwd"
validation_response = self._download_json(
validation_url, video_id,
note='Validating Password...',
errnote='Wrong password?',
data=data)
if validation_response['errorCode'] != 0:
raise ExtractorError('Login failed, %s said: %r' % (self.IE_NAME, validation_response['errorMessage']))

View file

@ -412,7 +412,9 @@ def run(self, information):
for lang, sub_info in subtitles.items(): for lang, sub_info in subtitles.items():
sub_ext = sub_info['ext'] sub_ext = sub_info['ext']
if ext != 'webm' or ext == 'webm' and sub_ext == 'vtt': if sub_ext == 'json':
self._downloader.to_screen('[ffmpeg] JSON subtitles cannot be embedded')
elif ext != 'webm' or ext == 'webm' and sub_ext == 'vtt':
sub_langs.append(lang) sub_langs.append(lang)
sub_filenames.append(subtitles_filename(filename, lang, sub_ext, ext)) sub_filenames.append(subtitles_filename(filename, lang, sub_ext, ext))
else: else:
@ -643,13 +645,18 @@ def run(self, info):
self._downloader.to_screen( self._downloader.to_screen(
'[ffmpeg] Subtitle file for %s is already in the requested format' % new_ext) '[ffmpeg] Subtitle file for %s is already in the requested format' % new_ext)
continue continue
elif ext == 'json':
self._downloader.to_screen(
'[ffmpeg] You have requested to convert json subtitles into another format, '
'which is currently not possible')
continue
old_file = subtitles_filename(filename, lang, ext, info.get('ext')) old_file = subtitles_filename(filename, lang, ext, info.get('ext'))
sub_filenames.append(old_file) sub_filenames.append(old_file)
new_file = subtitles_filename(filename, lang, new_ext, info.get('ext')) new_file = subtitles_filename(filename, lang, new_ext, info.get('ext'))
if ext in ('dfxp', 'ttml', 'tt'): if ext in ('dfxp', 'ttml', 'tt'):
self._downloader.report_warning( self._downloader.report_warning(
'You have requested to convert dfxp (TTML) subtitles into another format, ' '[ffmpeg] You have requested to convert dfxp (TTML) subtitles into another format, '
'which results in style information loss') 'which results in style information loss')
dfxp_file = old_file dfxp_file = old_file

View file

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2020.10.25' __version__ = '2020.11.11-2'