mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-13 20:03:17 +00:00
[skip travis] adding automerge support
basically copying content of youtube_dl folder to youtube_dlc and excluding the youtube_dl folder when compiling
This commit is contained in:
parent
4cd6add62b
commit
3ca3f77f9c
|
@ -35,4 +35,7 @@ jobs:
|
|||
- env: JYTHON=true; YTDL_TEST_SET=core
|
||||
before_install:
|
||||
- if [ "$JYTHON" == "true" ]; then ./devscripts/install_jython.sh; export PATH="$HOME/jython/bin:$PATH"; fi
|
||||
before_script:
|
||||
- rm -rf /youtube_dlc/*
|
||||
- cp /youtube_dl/* /youtube_dlc
|
||||
script: ./devscripts/run_tests.sh
|
||||
|
|
2
setup.py
2
setup.py
|
@ -67,7 +67,7 @@ def run(self):
|
|||
long_description=LONG_DESCRIPTION,
|
||||
# long_description_content_type="text/markdown",
|
||||
url="https://github.com/blackjack4494/youtube-dlc",
|
||||
packages=find_packages(),
|
||||
packages=find_packages(exclude=("youtube_dl",)),
|
||||
#packages=[
|
||||
# 'youtube_dlc',
|
||||
# 'youtube_dlc.extractor', 'youtube_dlc.downloader',
|
||||
|
|
33
youtube-dlc.spec
Normal file
33
youtube-dlc.spec
Normal file
|
@ -0,0 +1,33 @@
|
|||
# -*- mode: python ; coding: utf-8 -*-
|
||||
|
||||
block_cipher = None
|
||||
|
||||
|
||||
a = Analysis(['youtube_dlc\\__main__.py'],
|
||||
pathex=['D:\\gitkraken\\youtube-dl'],
|
||||
binaries=[],
|
||||
datas=[],
|
||||
hiddenimports=[],
|
||||
hookspath=[],
|
||||
runtime_hooks=[],
|
||||
excludes=[],
|
||||
win_no_prefer_redirects=False,
|
||||
win_private_assemblies=False,
|
||||
cipher=block_cipher,
|
||||
noarchive=False)
|
||||
pyz = PYZ(a.pure, a.zipped_data,
|
||||
cipher=block_cipher)
|
||||
exe = EXE(pyz,
|
||||
a.scripts,
|
||||
a.binaries,
|
||||
a.zipfiles,
|
||||
a.datas,
|
||||
[],
|
||||
name='youtube-dlc',
|
||||
debug=False,
|
||||
bootloader_ignore_signals=False,
|
||||
strip=False,
|
||||
upx=True,
|
||||
upx_exclude=[],
|
||||
runtime_tmpdir=None,
|
||||
console=True )
|
2417
youtube_dl/YoutubeDL.py
Normal file
2417
youtube_dl/YoutubeDL.py
Normal file
File diff suppressed because it is too large
Load diff
483
youtube_dl/__init__.py
Normal file
483
youtube_dl/__init__.py
Normal file
|
@ -0,0 +1,483 @@
|
|||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__license__ = 'Public Domain'
|
||||
|
||||
import codecs
|
||||
import io
|
||||
import os
|
||||
import random
|
||||
import sys
|
||||
|
||||
|
||||
from .options import (
|
||||
parseOpts,
|
||||
)
|
||||
from .compat import (
|
||||
compat_getpass,
|
||||
compat_shlex_split,
|
||||
workaround_optparse_bug9161,
|
||||
)
|
||||
from .utils import (
|
||||
DateRange,
|
||||
decodeOption,
|
||||
DEFAULT_OUTTMPL,
|
||||
DownloadError,
|
||||
expand_path,
|
||||
match_filter_func,
|
||||
MaxDownloadsReached,
|
||||
preferredencoding,
|
||||
read_batch_urls,
|
||||
SameFileError,
|
||||
setproctitle,
|
||||
std_headers,
|
||||
write_string,
|
||||
render_table,
|
||||
)
|
||||
from .update import update_self
|
||||
from .downloader import (
|
||||
FileDownloader,
|
||||
)
|
||||
from .extractor import gen_extractors, list_extractors
|
||||
from .extractor.adobepass import MSO_INFO
|
||||
from .YoutubeDL import YoutubeDL
|
||||
|
||||
|
||||
def _real_main(argv=None):
|
||||
# Compatibility fixes for Windows
|
||||
if sys.platform == 'win32':
|
||||
# https://github.com/ytdl-org/youtube-dl/issues/820
|
||||
codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None)
|
||||
|
||||
workaround_optparse_bug9161()
|
||||
|
||||
setproctitle('youtube-dlc')
|
||||
|
||||
parser, opts, args = parseOpts(argv)
|
||||
|
||||
# Set user agent
|
||||
if opts.user_agent is not None:
|
||||
std_headers['User-Agent'] = opts.user_agent
|
||||
|
||||
# Set referer
|
||||
if opts.referer is not None:
|
||||
std_headers['Referer'] = opts.referer
|
||||
|
||||
# Custom HTTP headers
|
||||
if opts.headers is not None:
|
||||
for h in opts.headers:
|
||||
if ':' not in h:
|
||||
parser.error('wrong header formatting, it should be key:value, not "%s"' % h)
|
||||
key, value = h.split(':', 1)
|
||||
if opts.verbose:
|
||||
write_string('[debug] Adding header from command line option %s:%s\n' % (key, value))
|
||||
std_headers[key] = value
|
||||
|
||||
# Dump user agent
|
||||
if opts.dump_user_agent:
|
||||
write_string(std_headers['User-Agent'] + '\n', out=sys.stdout)
|
||||
sys.exit(0)
|
||||
|
||||
# Batch file verification
|
||||
batch_urls = []
|
||||
if opts.batchfile is not None:
|
||||
try:
|
||||
if opts.batchfile == '-':
|
||||
batchfd = sys.stdin
|
||||
else:
|
||||
batchfd = io.open(
|
||||
expand_path(opts.batchfile),
|
||||
'r', encoding='utf-8', errors='ignore')
|
||||
batch_urls = read_batch_urls(batchfd)
|
||||
if opts.verbose:
|
||||
write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n')
|
||||
except IOError:
|
||||
sys.exit('ERROR: batch file %s could not be read' % opts.batchfile)
|
||||
all_urls = batch_urls + [url.strip() for url in args] # batch_urls are already striped in read_batch_urls
|
||||
_enc = preferredencoding()
|
||||
all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls]
|
||||
|
||||
if opts.list_extractors:
|
||||
for ie in list_extractors(opts.age_limit):
|
||||
write_string(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else '') + '\n', out=sys.stdout)
|
||||
matchedUrls = [url for url in all_urls if ie.suitable(url)]
|
||||
for mu in matchedUrls:
|
||||
write_string(' ' + mu + '\n', out=sys.stdout)
|
||||
sys.exit(0)
|
||||
if opts.list_extractor_descriptions:
|
||||
for ie in list_extractors(opts.age_limit):
|
||||
if not ie._WORKING:
|
||||
continue
|
||||
desc = getattr(ie, 'IE_DESC', ie.IE_NAME)
|
||||
if desc is False:
|
||||
continue
|
||||
if hasattr(ie, 'SEARCH_KEY'):
|
||||
_SEARCHES = ('cute kittens', 'slithering pythons', 'falling cat', 'angry poodle', 'purple fish', 'running tortoise', 'sleeping bunny', 'burping cow')
|
||||
_COUNTS = ('', '5', '10', 'all')
|
||||
desc += ' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES))
|
||||
write_string(desc + '\n', out=sys.stdout)
|
||||
sys.exit(0)
|
||||
if opts.ap_list_mso:
|
||||
table = [[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()]
|
||||
write_string('Supported TV Providers:\n' + render_table(['mso', 'mso name'], table) + '\n', out=sys.stdout)
|
||||
sys.exit(0)
|
||||
|
||||
# Conflicting, missing and erroneous options
|
||||
if opts.usenetrc and (opts.username is not None or opts.password is not None):
|
||||
parser.error('using .netrc conflicts with giving username/password')
|
||||
if opts.password is not None and opts.username is None:
|
||||
parser.error('account username missing\n')
|
||||
if opts.ap_password is not None and opts.ap_username is None:
|
||||
parser.error('TV Provider account username missing\n')
|
||||
if opts.outtmpl is not None and (opts.usetitle or opts.autonumber or opts.useid):
|
||||
parser.error('using output template conflicts with using title, video ID or auto number')
|
||||
if opts.autonumber_size is not None:
|
||||
if opts.autonumber_size <= 0:
|
||||
parser.error('auto number size must be positive')
|
||||
if opts.autonumber_start is not None:
|
||||
if opts.autonumber_start < 0:
|
||||
parser.error('auto number start must be positive or 0')
|
||||
if opts.usetitle and opts.useid:
|
||||
parser.error('using title conflicts with using video ID')
|
||||
if opts.username is not None and opts.password is None:
|
||||
opts.password = compat_getpass('Type account password and press [Return]: ')
|
||||
if opts.ap_username is not None and opts.ap_password is None:
|
||||
opts.ap_password = compat_getpass('Type TV provider account password and press [Return]: ')
|
||||
if opts.ratelimit is not None:
|
||||
numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
|
||||
if numeric_limit is None:
|
||||
parser.error('invalid rate limit specified')
|
||||
opts.ratelimit = numeric_limit
|
||||
if opts.min_filesize is not None:
|
||||
numeric_limit = FileDownloader.parse_bytes(opts.min_filesize)
|
||||
if numeric_limit is None:
|
||||
parser.error('invalid min_filesize specified')
|
||||
opts.min_filesize = numeric_limit
|
||||
if opts.max_filesize is not None:
|
||||
numeric_limit = FileDownloader.parse_bytes(opts.max_filesize)
|
||||
if numeric_limit is None:
|
||||
parser.error('invalid max_filesize specified')
|
||||
opts.max_filesize = numeric_limit
|
||||
if opts.sleep_interval is not None:
|
||||
if opts.sleep_interval < 0:
|
||||
parser.error('sleep interval must be positive or 0')
|
||||
if opts.max_sleep_interval is not None:
|
||||
if opts.max_sleep_interval < 0:
|
||||
parser.error('max sleep interval must be positive or 0')
|
||||
if opts.sleep_interval is None:
|
||||
parser.error('min sleep interval must be specified, use --min-sleep-interval')
|
||||
if opts.max_sleep_interval < opts.sleep_interval:
|
||||
parser.error('max sleep interval must be greater than or equal to min sleep interval')
|
||||
else:
|
||||
opts.max_sleep_interval = opts.sleep_interval
|
||||
if opts.ap_mso and opts.ap_mso not in MSO_INFO:
|
||||
parser.error('Unsupported TV Provider, use --ap-list-mso to get a list of supported TV Providers')
|
||||
|
||||
def parse_retries(retries):
|
||||
if retries in ('inf', 'infinite'):
|
||||
parsed_retries = float('inf')
|
||||
else:
|
||||
try:
|
||||
parsed_retries = int(retries)
|
||||
except (TypeError, ValueError):
|
||||
parser.error('invalid retry count specified')
|
||||
return parsed_retries
|
||||
if opts.retries is not None:
|
||||
opts.retries = parse_retries(opts.retries)
|
||||
if opts.fragment_retries is not None:
|
||||
opts.fragment_retries = parse_retries(opts.fragment_retries)
|
||||
if opts.buffersize is not None:
|
||||
numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize)
|
||||
if numeric_buffersize is None:
|
||||
parser.error('invalid buffer size specified')
|
||||
opts.buffersize = numeric_buffersize
|
||||
if opts.http_chunk_size is not None:
|
||||
numeric_chunksize = FileDownloader.parse_bytes(opts.http_chunk_size)
|
||||
if not numeric_chunksize:
|
||||
parser.error('invalid http chunk size specified')
|
||||
opts.http_chunk_size = numeric_chunksize
|
||||
if opts.playliststart <= 0:
|
||||
raise ValueError('Playlist start must be positive')
|
||||
if opts.playlistend not in (-1, None) and opts.playlistend < opts.playliststart:
|
||||
raise ValueError('Playlist end must be greater than playlist start')
|
||||
if opts.extractaudio:
|
||||
if opts.audioformat not in ['best', 'aac', 'flac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']:
|
||||
parser.error('invalid audio format specified')
|
||||
if opts.audioquality:
|
||||
opts.audioquality = opts.audioquality.strip('k').strip('K')
|
||||
if not opts.audioquality.isdigit():
|
||||
parser.error('invalid audio quality specified')
|
||||
if opts.recodevideo is not None:
|
||||
if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv', 'avi']:
|
||||
parser.error('invalid video recode format specified')
|
||||
if opts.convertsubtitles is not None:
|
||||
if opts.convertsubtitles not in ['srt', 'vtt', 'ass', 'lrc']:
|
||||
parser.error('invalid subtitle format specified')
|
||||
|
||||
if opts.date is not None:
|
||||
date = DateRange.day(opts.date)
|
||||
else:
|
||||
date = DateRange(opts.dateafter, opts.datebefore)
|
||||
|
||||
# Do not download videos when there are audio-only formats
|
||||
if opts.extractaudio and not opts.keepvideo and opts.format is None:
|
||||
opts.format = 'bestaudio/best'
|
||||
|
||||
# --all-sub automatically sets --write-sub if --write-auto-sub is not given
|
||||
# this was the old behaviour if only --all-sub was given.
|
||||
if opts.allsubtitles and not opts.writeautomaticsub:
|
||||
opts.writesubtitles = True
|
||||
|
||||
outtmpl = ((opts.outtmpl is not None and opts.outtmpl)
|
||||
or (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s')
|
||||
or (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s')
|
||||
or (opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s')
|
||||
or (opts.usetitle and '%(title)s-%(id)s.%(ext)s')
|
||||
or (opts.useid and '%(id)s.%(ext)s')
|
||||
or (opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s')
|
||||
or DEFAULT_OUTTMPL)
|
||||
if not os.path.splitext(outtmpl)[1] and opts.extractaudio:
|
||||
parser.error('Cannot download a video and extract audio into the same'
|
||||
' file! Use "{0}.%(ext)s" instead of "{0}" as the output'
|
||||
' template'.format(outtmpl))
|
||||
|
||||
any_getting = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json
|
||||
any_printing = opts.print_json
|
||||
download_archive_fn = expand_path(opts.download_archive) if opts.download_archive is not None else opts.download_archive
|
||||
|
||||
# PostProcessors
|
||||
postprocessors = []
|
||||
if opts.metafromtitle:
|
||||
postprocessors.append({
|
||||
'key': 'MetadataFromTitle',
|
||||
'titleformat': opts.metafromtitle
|
||||
})
|
||||
if opts.extractaudio:
|
||||
postprocessors.append({
|
||||
'key': 'FFmpegExtractAudio',
|
||||
'preferredcodec': opts.audioformat,
|
||||
'preferredquality': opts.audioquality,
|
||||
'nopostoverwrites': opts.nopostoverwrites,
|
||||
})
|
||||
if opts.recodevideo:
|
||||
postprocessors.append({
|
||||
'key': 'FFmpegVideoConvertor',
|
||||
'preferedformat': opts.recodevideo,
|
||||
})
|
||||
# FFmpegMetadataPP should be run after FFmpegVideoConvertorPP and
|
||||
# FFmpegExtractAudioPP as containers before conversion may not support
|
||||
# metadata (3gp, webm, etc.)
|
||||
# And this post-processor should be placed before other metadata
|
||||
# manipulating post-processors (FFmpegEmbedSubtitle) to prevent loss of
|
||||
# extra metadata. By default ffmpeg preserves metadata applicable for both
|
||||
# source and target containers. From this point the container won't change,
|
||||
# so metadata can be added here.
|
||||
if opts.addmetadata:
|
||||
postprocessors.append({'key': 'FFmpegMetadata'})
|
||||
if opts.convertsubtitles:
|
||||
postprocessors.append({
|
||||
'key': 'FFmpegSubtitlesConvertor',
|
||||
'format': opts.convertsubtitles,
|
||||
})
|
||||
if opts.embedsubtitles:
|
||||
postprocessors.append({
|
||||
'key': 'FFmpegEmbedSubtitle',
|
||||
})
|
||||
if opts.embedthumbnail:
|
||||
already_have_thumbnail = opts.writethumbnail or opts.write_all_thumbnails
|
||||
postprocessors.append({
|
||||
'key': 'EmbedThumbnail',
|
||||
'already_have_thumbnail': already_have_thumbnail
|
||||
})
|
||||
if not already_have_thumbnail:
|
||||
opts.writethumbnail = True
|
||||
# XAttrMetadataPP should be run after post-processors that may change file
|
||||
# contents
|
||||
if opts.xattrs:
|
||||
postprocessors.append({'key': 'XAttrMetadata'})
|
||||
# Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way.
|
||||
# So if the user is able to remove the file before your postprocessor runs it might cause a few problems.
|
||||
if opts.exec_cmd:
|
||||
postprocessors.append({
|
||||
'key': 'ExecAfterDownload',
|
||||
'exec_cmd': opts.exec_cmd,
|
||||
})
|
||||
external_downloader_args = None
|
||||
if opts.external_downloader_args:
|
||||
external_downloader_args = compat_shlex_split(opts.external_downloader_args)
|
||||
postprocessor_args = None
|
||||
if opts.postprocessor_args:
|
||||
postprocessor_args = compat_shlex_split(opts.postprocessor_args)
|
||||
match_filter = (
|
||||
None if opts.match_filter is None
|
||||
else match_filter_func(opts.match_filter))
|
||||
|
||||
ydl_opts = {
|
||||
'usenetrc': opts.usenetrc,
|
||||
'username': opts.username,
|
||||
'password': opts.password,
|
||||
'twofactor': opts.twofactor,
|
||||
'videopassword': opts.videopassword,
|
||||
'ap_mso': opts.ap_mso,
|
||||
'ap_username': opts.ap_username,
|
||||
'ap_password': opts.ap_password,
|
||||
'quiet': (opts.quiet or any_getting or any_printing),
|
||||
'no_warnings': opts.no_warnings,
|
||||
'forceurl': opts.geturl,
|
||||
'forcetitle': opts.gettitle,
|
||||
'forceid': opts.getid,
|
||||
'forcethumbnail': opts.getthumbnail,
|
||||
'forcedescription': opts.getdescription,
|
||||
'forceduration': opts.getduration,
|
||||
'forcefilename': opts.getfilename,
|
||||
'forceformat': opts.getformat,
|
||||
'forcejson': opts.dumpjson or opts.print_json,
|
||||
'dump_single_json': opts.dump_single_json,
|
||||
'simulate': opts.simulate or any_getting,
|
||||
'skip_download': opts.skip_download,
|
||||
'format': opts.format,
|
||||
'listformats': opts.listformats,
|
||||
'outtmpl': outtmpl,
|
||||
'autonumber_size': opts.autonumber_size,
|
||||
'autonumber_start': opts.autonumber_start,
|
||||
'restrictfilenames': opts.restrictfilenames,
|
||||
'ignoreerrors': opts.ignoreerrors,
|
||||
'force_generic_extractor': opts.force_generic_extractor,
|
||||
'ratelimit': opts.ratelimit,
|
||||
'nooverwrites': opts.nooverwrites,
|
||||
'retries': opts.retries,
|
||||
'fragment_retries': opts.fragment_retries,
|
||||
'skip_unavailable_fragments': opts.skip_unavailable_fragments,
|
||||
'keep_fragments': opts.keep_fragments,
|
||||
'buffersize': opts.buffersize,
|
||||
'noresizebuffer': opts.noresizebuffer,
|
||||
'http_chunk_size': opts.http_chunk_size,
|
||||
'continuedl': opts.continue_dl,
|
||||
'noprogress': opts.noprogress,
|
||||
'progress_with_newline': opts.progress_with_newline,
|
||||
'playliststart': opts.playliststart,
|
||||
'playlistend': opts.playlistend,
|
||||
'playlistreverse': opts.playlist_reverse,
|
||||
'playlistrandom': opts.playlist_random,
|
||||
'noplaylist': opts.noplaylist,
|
||||
'logtostderr': opts.outtmpl == '-',
|
||||
'consoletitle': opts.consoletitle,
|
||||
'nopart': opts.nopart,
|
||||
'updatetime': opts.updatetime,
|
||||
'writedescription': opts.writedescription,
|
||||
'writeannotations': opts.writeannotations,
|
||||
'writeinfojson': opts.writeinfojson,
|
||||
'writethumbnail': opts.writethumbnail,
|
||||
'write_all_thumbnails': opts.write_all_thumbnails,
|
||||
'writesubtitles': opts.writesubtitles,
|
||||
'writeautomaticsub': opts.writeautomaticsub,
|
||||
'allsubtitles': opts.allsubtitles,
|
||||
'listsubtitles': opts.listsubtitles,
|
||||
'subtitlesformat': opts.subtitlesformat,
|
||||
'subtitleslangs': opts.subtitleslangs,
|
||||
'matchtitle': decodeOption(opts.matchtitle),
|
||||
'rejecttitle': decodeOption(opts.rejecttitle),
|
||||
'max_downloads': opts.max_downloads,
|
||||
'prefer_free_formats': opts.prefer_free_formats,
|
||||
'verbose': opts.verbose,
|
||||
'dump_intermediate_pages': opts.dump_intermediate_pages,
|
||||
'write_pages': opts.write_pages,
|
||||
'test': opts.test,
|
||||
'keepvideo': opts.keepvideo,
|
||||
'min_filesize': opts.min_filesize,
|
||||
'max_filesize': opts.max_filesize,
|
||||
'min_views': opts.min_views,
|
||||
'max_views': opts.max_views,
|
||||
'daterange': date,
|
||||
'cachedir': opts.cachedir,
|
||||
'youtube_print_sig_code': opts.youtube_print_sig_code,
|
||||
'age_limit': opts.age_limit,
|
||||
'download_archive': download_archive_fn,
|
||||
'cookiefile': opts.cookiefile,
|
||||
'nocheckcertificate': opts.no_check_certificate,
|
||||
'prefer_insecure': opts.prefer_insecure,
|
||||
'proxy': opts.proxy,
|
||||
'socket_timeout': opts.socket_timeout,
|
||||
'bidi_workaround': opts.bidi_workaround,
|
||||
'debug_printtraffic': opts.debug_printtraffic,
|
||||
'prefer_ffmpeg': opts.prefer_ffmpeg,
|
||||
'include_ads': opts.include_ads,
|
||||
'default_search': opts.default_search,
|
||||
'youtube_include_dash_manifest': opts.youtube_include_dash_manifest,
|
||||
'encoding': opts.encoding,
|
||||
'extract_flat': opts.extract_flat,
|
||||
'mark_watched': opts.mark_watched,
|
||||
'merge_output_format': opts.merge_output_format,
|
||||
'postprocessors': postprocessors,
|
||||
'fixup': opts.fixup,
|
||||
'source_address': opts.source_address,
|
||||
'call_home': opts.call_home,
|
||||
'sleep_interval': opts.sleep_interval,
|
||||
'max_sleep_interval': opts.max_sleep_interval,
|
||||
'external_downloader': opts.external_downloader,
|
||||
'list_thumbnails': opts.list_thumbnails,
|
||||
'playlist_items': opts.playlist_items,
|
||||
'xattr_set_filesize': opts.xattr_set_filesize,
|
||||
'match_filter': match_filter,
|
||||
'no_color': opts.no_color,
|
||||
'ffmpeg_location': opts.ffmpeg_location,
|
||||
'hls_prefer_native': opts.hls_prefer_native,
|
||||
'hls_use_mpegts': opts.hls_use_mpegts,
|
||||
'external_downloader_args': external_downloader_args,
|
||||
'postprocessor_args': postprocessor_args,
|
||||
'cn_verification_proxy': opts.cn_verification_proxy,
|
||||
'geo_verification_proxy': opts.geo_verification_proxy,
|
||||
'config_location': opts.config_location,
|
||||
'geo_bypass': opts.geo_bypass,
|
||||
'geo_bypass_country': opts.geo_bypass_country,
|
||||
'geo_bypass_ip_block': opts.geo_bypass_ip_block,
|
||||
# just for deprecation check
|
||||
'autonumber': opts.autonumber if opts.autonumber is True else None,
|
||||
'usetitle': opts.usetitle if opts.usetitle is True else None,
|
||||
}
|
||||
|
||||
with YoutubeDL(ydl_opts) as ydl:
|
||||
# Update version
|
||||
if opts.update_self:
|
||||
update_self(ydl.to_screen, opts.verbose, ydl._opener)
|
||||
|
||||
# Remove cache dir
|
||||
if opts.rm_cachedir:
|
||||
ydl.cache.remove()
|
||||
|
||||
# Maybe do nothing
|
||||
if (len(all_urls) < 1) and (opts.load_info_filename is None):
|
||||
if opts.update_self or opts.rm_cachedir:
|
||||
sys.exit()
|
||||
|
||||
ydl.warn_if_short_id(sys.argv[1:] if argv is None else argv)
|
||||
parser.error(
|
||||
'You must provide at least one URL.\n'
|
||||
'Type youtube-dlc --help to see a list of all options.')
|
||||
|
||||
try:
|
||||
if opts.load_info_filename is not None:
|
||||
retcode = ydl.download_with_info_file(expand_path(opts.load_info_filename))
|
||||
else:
|
||||
retcode = ydl.download(all_urls)
|
||||
except MaxDownloadsReached:
|
||||
ydl.to_screen('--max-download limit reached, aborting.')
|
||||
retcode = 101
|
||||
|
||||
sys.exit(retcode)
|
||||
|
||||
|
||||
def main(argv=None):
|
||||
try:
|
||||
_real_main(argv)
|
||||
except DownloadError:
|
||||
sys.exit(1)
|
||||
except SameFileError:
|
||||
sys.exit('ERROR: fixed output name but more than one file to download')
|
||||
except KeyboardInterrupt:
|
||||
sys.exit('\nERROR: Interrupted by user')
|
||||
|
||||
|
||||
__all__ = ['main', 'YoutubeDL', 'gen_extractors', 'list_extractors']
|
19
youtube_dl/__main__.py
Normal file
19
youtube_dl/__main__.py
Normal file
|
@ -0,0 +1,19 @@
|
|||
#!/usr/bin/env python
|
||||
from __future__ import unicode_literals
|
||||
|
||||
# Execute with
|
||||
# $ python youtube_dlc/__main__.py (2.6+)
|
||||
# $ python -m youtube_dlc (2.7+)
|
||||
|
||||
import sys
|
||||
|
||||
if __package__ is None and not hasattr(sys, 'frozen'):
|
||||
# direct call of __main__.py
|
||||
import os.path
|
||||
path = os.path.realpath(os.path.abspath(__file__))
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(path)))
|
||||
|
||||
import youtube_dlc
|
||||
|
||||
if __name__ == '__main__':
|
||||
youtube_dlc.main()
|
361
youtube_dl/aes.py
Normal file
361
youtube_dl/aes.py
Normal file
|
@ -0,0 +1,361 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
from math import ceil
|
||||
|
||||
from .compat import compat_b64decode
|
||||
from .utils import bytes_to_intlist, intlist_to_bytes
|
||||
|
||||
BLOCK_SIZE_BYTES = 16
|
||||
|
||||
|
||||
def aes_ctr_decrypt(data, key, counter):
|
||||
"""
|
||||
Decrypt with aes in counter mode
|
||||
|
||||
@param {int[]} data cipher
|
||||
@param {int[]} key 16/24/32-Byte cipher key
|
||||
@param {instance} counter Instance whose next_value function (@returns {int[]} 16-Byte block)
|
||||
returns the next counter block
|
||||
@returns {int[]} decrypted data
|
||||
"""
|
||||
expanded_key = key_expansion(key)
|
||||
block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
|
||||
|
||||
decrypted_data = []
|
||||
for i in range(block_count):
|
||||
counter_block = counter.next_value()
|
||||
block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
|
||||
block += [0] * (BLOCK_SIZE_BYTES - len(block))
|
||||
|
||||
cipher_counter_block = aes_encrypt(counter_block, expanded_key)
|
||||
decrypted_data += xor(block, cipher_counter_block)
|
||||
decrypted_data = decrypted_data[:len(data)]
|
||||
|
||||
return decrypted_data
|
||||
|
||||
|
||||
def aes_cbc_decrypt(data, key, iv):
|
||||
"""
|
||||
Decrypt with aes in CBC mode
|
||||
|
||||
@param {int[]} data cipher
|
||||
@param {int[]} key 16/24/32-Byte cipher key
|
||||
@param {int[]} iv 16-Byte IV
|
||||
@returns {int[]} decrypted data
|
||||
"""
|
||||
expanded_key = key_expansion(key)
|
||||
block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
|
||||
|
||||
decrypted_data = []
|
||||
previous_cipher_block = iv
|
||||
for i in range(block_count):
|
||||
block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
|
||||
block += [0] * (BLOCK_SIZE_BYTES - len(block))
|
||||
|
||||
decrypted_block = aes_decrypt(block, expanded_key)
|
||||
decrypted_data += xor(decrypted_block, previous_cipher_block)
|
||||
previous_cipher_block = block
|
||||
decrypted_data = decrypted_data[:len(data)]
|
||||
|
||||
return decrypted_data
|
||||
|
||||
|
||||
def aes_cbc_encrypt(data, key, iv):
|
||||
"""
|
||||
Encrypt with aes in CBC mode. Using PKCS#7 padding
|
||||
|
||||
@param {int[]} data cleartext
|
||||
@param {int[]} key 16/24/32-Byte cipher key
|
||||
@param {int[]} iv 16-Byte IV
|
||||
@returns {int[]} encrypted data
|
||||
"""
|
||||
expanded_key = key_expansion(key)
|
||||
block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
|
||||
|
||||
encrypted_data = []
|
||||
previous_cipher_block = iv
|
||||
for i in range(block_count):
|
||||
block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
|
||||
remaining_length = BLOCK_SIZE_BYTES - len(block)
|
||||
block += [remaining_length] * remaining_length
|
||||
mixed_block = xor(block, previous_cipher_block)
|
||||
|
||||
encrypted_block = aes_encrypt(mixed_block, expanded_key)
|
||||
encrypted_data += encrypted_block
|
||||
|
||||
previous_cipher_block = encrypted_block
|
||||
|
||||
return encrypted_data
|
||||
|
||||
|
||||
def key_expansion(data):
|
||||
"""
|
||||
Generate key schedule
|
||||
|
||||
@param {int[]} data 16/24/32-Byte cipher key
|
||||
@returns {int[]} 176/208/240-Byte expanded key
|
||||
"""
|
||||
data = data[:] # copy
|
||||
rcon_iteration = 1
|
||||
key_size_bytes = len(data)
|
||||
expanded_key_size_bytes = (key_size_bytes // 4 + 7) * BLOCK_SIZE_BYTES
|
||||
|
||||
while len(data) < expanded_key_size_bytes:
|
||||
temp = data[-4:]
|
||||
temp = key_schedule_core(temp, rcon_iteration)
|
||||
rcon_iteration += 1
|
||||
data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
|
||||
|
||||
for _ in range(3):
|
||||
temp = data[-4:]
|
||||
data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
|
||||
|
||||
if key_size_bytes == 32:
|
||||
temp = data[-4:]
|
||||
temp = sub_bytes(temp)
|
||||
data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
|
||||
|
||||
for _ in range(3 if key_size_bytes == 32 else 2 if key_size_bytes == 24 else 0):
|
||||
temp = data[-4:]
|
||||
data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
|
||||
data = data[:expanded_key_size_bytes]
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def aes_encrypt(data, expanded_key):
|
||||
"""
|
||||
Encrypt one block with aes
|
||||
|
||||
@param {int[]} data 16-Byte state
|
||||
@param {int[]} expanded_key 176/208/240-Byte expanded key
|
||||
@returns {int[]} 16-Byte cipher
|
||||
"""
|
||||
rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1
|
||||
|
||||
data = xor(data, expanded_key[:BLOCK_SIZE_BYTES])
|
||||
for i in range(1, rounds + 1):
|
||||
data = sub_bytes(data)
|
||||
data = shift_rows(data)
|
||||
if i != rounds:
|
||||
data = mix_columns(data)
|
||||
data = xor(data, expanded_key[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES])
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def aes_decrypt(data, expanded_key):
|
||||
"""
|
||||
Decrypt one block with aes
|
||||
|
||||
@param {int[]} data 16-Byte cipher
|
||||
@param {int[]} expanded_key 176/208/240-Byte expanded key
|
||||
@returns {int[]} 16-Byte state
|
||||
"""
|
||||
rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1
|
||||
|
||||
for i in range(rounds, 0, -1):
|
||||
data = xor(data, expanded_key[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES])
|
||||
if i != rounds:
|
||||
data = mix_columns_inv(data)
|
||||
data = shift_rows_inv(data)
|
||||
data = sub_bytes_inv(data)
|
||||
data = xor(data, expanded_key[:BLOCK_SIZE_BYTES])
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def aes_decrypt_text(data, password, key_size_bytes):
|
||||
"""
|
||||
Decrypt text
|
||||
- The first 8 Bytes of decoded 'data' are the 8 high Bytes of the counter
|
||||
- The cipher key is retrieved by encrypting the first 16 Byte of 'password'
|
||||
with the first 'key_size_bytes' Bytes from 'password' (if necessary filled with 0's)
|
||||
- Mode of operation is 'counter'
|
||||
|
||||
@param {str} data Base64 encoded string
|
||||
@param {str,unicode} password Password (will be encoded with utf-8)
|
||||
@param {int} key_size_bytes Possible values: 16 for 128-Bit, 24 for 192-Bit or 32 for 256-Bit
|
||||
@returns {str} Decrypted data
|
||||
"""
|
||||
NONCE_LENGTH_BYTES = 8
|
||||
|
||||
data = bytes_to_intlist(compat_b64decode(data))
|
||||
password = bytes_to_intlist(password.encode('utf-8'))
|
||||
|
||||
key = password[:key_size_bytes] + [0] * (key_size_bytes - len(password))
|
||||
key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * (key_size_bytes // BLOCK_SIZE_BYTES)
|
||||
|
||||
nonce = data[:NONCE_LENGTH_BYTES]
|
||||
cipher = data[NONCE_LENGTH_BYTES:]
|
||||
|
||||
class Counter(object):
|
||||
__value = nonce + [0] * (BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES)
|
||||
|
||||
def next_value(self):
|
||||
temp = self.__value
|
||||
self.__value = inc(self.__value)
|
||||
return temp
|
||||
|
||||
decrypted_data = aes_ctr_decrypt(cipher, key, Counter())
|
||||
plaintext = intlist_to_bytes(decrypted_data)
|
||||
|
||||
return plaintext
|
||||
|
||||
|
||||
RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36)
|
||||
SBOX = (0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76,
|
||||
0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0,
|
||||
0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15,
|
||||
0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75,
|
||||
0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84,
|
||||
0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF,
|
||||
0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8,
|
||||
0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2,
|
||||
0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73,
|
||||
0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB,
|
||||
0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79,
|
||||
0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08,
|
||||
0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A,
|
||||
0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E,
|
||||
0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,
|
||||
0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16)
|
||||
SBOX_INV = (0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
|
||||
0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
|
||||
0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
|
||||
0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
|
||||
0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
|
||||
0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
|
||||
0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
|
||||
0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
|
||||
0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
|
||||
0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
|
||||
0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
|
||||
0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
|
||||
0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
|
||||
0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
|
||||
0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
|
||||
0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d)
|
||||
MIX_COLUMN_MATRIX = ((0x2, 0x3, 0x1, 0x1),
|
||||
(0x1, 0x2, 0x3, 0x1),
|
||||
(0x1, 0x1, 0x2, 0x3),
|
||||
(0x3, 0x1, 0x1, 0x2))
|
||||
MIX_COLUMN_MATRIX_INV = ((0xE, 0xB, 0xD, 0x9),
|
||||
(0x9, 0xE, 0xB, 0xD),
|
||||
(0xD, 0x9, 0xE, 0xB),
|
||||
(0xB, 0xD, 0x9, 0xE))
|
||||
RIJNDAEL_EXP_TABLE = (0x01, 0x03, 0x05, 0x0F, 0x11, 0x33, 0x55, 0xFF, 0x1A, 0x2E, 0x72, 0x96, 0xA1, 0xF8, 0x13, 0x35,
|
||||
0x5F, 0xE1, 0x38, 0x48, 0xD8, 0x73, 0x95, 0xA4, 0xF7, 0x02, 0x06, 0x0A, 0x1E, 0x22, 0x66, 0xAA,
|
||||
0xE5, 0x34, 0x5C, 0xE4, 0x37, 0x59, 0xEB, 0x26, 0x6A, 0xBE, 0xD9, 0x70, 0x90, 0xAB, 0xE6, 0x31,
|
||||
0x53, 0xF5, 0x04, 0x0C, 0x14, 0x3C, 0x44, 0xCC, 0x4F, 0xD1, 0x68, 0xB8, 0xD3, 0x6E, 0xB2, 0xCD,
|
||||
0x4C, 0xD4, 0x67, 0xA9, 0xE0, 0x3B, 0x4D, 0xD7, 0x62, 0xA6, 0xF1, 0x08, 0x18, 0x28, 0x78, 0x88,
|
||||
0x83, 0x9E, 0xB9, 0xD0, 0x6B, 0xBD, 0xDC, 0x7F, 0x81, 0x98, 0xB3, 0xCE, 0x49, 0xDB, 0x76, 0x9A,
|
||||
0xB5, 0xC4, 0x57, 0xF9, 0x10, 0x30, 0x50, 0xF0, 0x0B, 0x1D, 0x27, 0x69, 0xBB, 0xD6, 0x61, 0xA3,
|
||||
0xFE, 0x19, 0x2B, 0x7D, 0x87, 0x92, 0xAD, 0xEC, 0x2F, 0x71, 0x93, 0xAE, 0xE9, 0x20, 0x60, 0xA0,
|
||||
0xFB, 0x16, 0x3A, 0x4E, 0xD2, 0x6D, 0xB7, 0xC2, 0x5D, 0xE7, 0x32, 0x56, 0xFA, 0x15, 0x3F, 0x41,
|
||||
0xC3, 0x5E, 0xE2, 0x3D, 0x47, 0xC9, 0x40, 0xC0, 0x5B, 0xED, 0x2C, 0x74, 0x9C, 0xBF, 0xDA, 0x75,
|
||||
0x9F, 0xBA, 0xD5, 0x64, 0xAC, 0xEF, 0x2A, 0x7E, 0x82, 0x9D, 0xBC, 0xDF, 0x7A, 0x8E, 0x89, 0x80,
|
||||
0x9B, 0xB6, 0xC1, 0x58, 0xE8, 0x23, 0x65, 0xAF, 0xEA, 0x25, 0x6F, 0xB1, 0xC8, 0x43, 0xC5, 0x54,
|
||||
0xFC, 0x1F, 0x21, 0x63, 0xA5, 0xF4, 0x07, 0x09, 0x1B, 0x2D, 0x77, 0x99, 0xB0, 0xCB, 0x46, 0xCA,
|
||||
0x45, 0xCF, 0x4A, 0xDE, 0x79, 0x8B, 0x86, 0x91, 0xA8, 0xE3, 0x3E, 0x42, 0xC6, 0x51, 0xF3, 0x0E,
|
||||
0x12, 0x36, 0x5A, 0xEE, 0x29, 0x7B, 0x8D, 0x8C, 0x8F, 0x8A, 0x85, 0x94, 0xA7, 0xF2, 0x0D, 0x17,
|
||||
0x39, 0x4B, 0xDD, 0x7C, 0x84, 0x97, 0xA2, 0xFD, 0x1C, 0x24, 0x6C, 0xB4, 0xC7, 0x52, 0xF6, 0x01)
|
||||
RIJNDAEL_LOG_TABLE = (0x00, 0x00, 0x19, 0x01, 0x32, 0x02, 0x1a, 0xc6, 0x4b, 0xc7, 0x1b, 0x68, 0x33, 0xee, 0xdf, 0x03,
|
||||
0x64, 0x04, 0xe0, 0x0e, 0x34, 0x8d, 0x81, 0xef, 0x4c, 0x71, 0x08, 0xc8, 0xf8, 0x69, 0x1c, 0xc1,
|
||||
0x7d, 0xc2, 0x1d, 0xb5, 0xf9, 0xb9, 0x27, 0x6a, 0x4d, 0xe4, 0xa6, 0x72, 0x9a, 0xc9, 0x09, 0x78,
|
||||
0x65, 0x2f, 0x8a, 0x05, 0x21, 0x0f, 0xe1, 0x24, 0x12, 0xf0, 0x82, 0x45, 0x35, 0x93, 0xda, 0x8e,
|
||||
0x96, 0x8f, 0xdb, 0xbd, 0x36, 0xd0, 0xce, 0x94, 0x13, 0x5c, 0xd2, 0xf1, 0x40, 0x46, 0x83, 0x38,
|
||||
0x66, 0xdd, 0xfd, 0x30, 0xbf, 0x06, 0x8b, 0x62, 0xb3, 0x25, 0xe2, 0x98, 0x22, 0x88, 0x91, 0x10,
|
||||
0x7e, 0x6e, 0x48, 0xc3, 0xa3, 0xb6, 0x1e, 0x42, 0x3a, 0x6b, 0x28, 0x54, 0xfa, 0x85, 0x3d, 0xba,
|
||||
0x2b, 0x79, 0x0a, 0x15, 0x9b, 0x9f, 0x5e, 0xca, 0x4e, 0xd4, 0xac, 0xe5, 0xf3, 0x73, 0xa7, 0x57,
|
||||
0xaf, 0x58, 0xa8, 0x50, 0xf4, 0xea, 0xd6, 0x74, 0x4f, 0xae, 0xe9, 0xd5, 0xe7, 0xe6, 0xad, 0xe8,
|
||||
0x2c, 0xd7, 0x75, 0x7a, 0xeb, 0x16, 0x0b, 0xf5, 0x59, 0xcb, 0x5f, 0xb0, 0x9c, 0xa9, 0x51, 0xa0,
|
||||
0x7f, 0x0c, 0xf6, 0x6f, 0x17, 0xc4, 0x49, 0xec, 0xd8, 0x43, 0x1f, 0x2d, 0xa4, 0x76, 0x7b, 0xb7,
|
||||
0xcc, 0xbb, 0x3e, 0x5a, 0xfb, 0x60, 0xb1, 0x86, 0x3b, 0x52, 0xa1, 0x6c, 0xaa, 0x55, 0x29, 0x9d,
|
||||
0x97, 0xb2, 0x87, 0x90, 0x61, 0xbe, 0xdc, 0xfc, 0xbc, 0x95, 0xcf, 0xcd, 0x37, 0x3f, 0x5b, 0xd1,
|
||||
0x53, 0x39, 0x84, 0x3c, 0x41, 0xa2, 0x6d, 0x47, 0x14, 0x2a, 0x9e, 0x5d, 0x56, 0xf2, 0xd3, 0xab,
|
||||
0x44, 0x11, 0x92, 0xd9, 0x23, 0x20, 0x2e, 0x89, 0xb4, 0x7c, 0xb8, 0x26, 0x77, 0x99, 0xe3, 0xa5,
|
||||
0x67, 0x4a, 0xed, 0xde, 0xc5, 0x31, 0xfe, 0x18, 0x0d, 0x63, 0x8c, 0x80, 0xc0, 0xf7, 0x70, 0x07)
|
||||
|
||||
|
||||
def sub_bytes(data):
|
||||
return [SBOX[x] for x in data]
|
||||
|
||||
|
||||
def sub_bytes_inv(data):
|
||||
return [SBOX_INV[x] for x in data]
|
||||
|
||||
|
||||
def rotate(data):
|
||||
return data[1:] + [data[0]]
|
||||
|
||||
|
||||
def key_schedule_core(data, rcon_iteration):
|
||||
data = rotate(data)
|
||||
data = sub_bytes(data)
|
||||
data[0] = data[0] ^ RCON[rcon_iteration]
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def xor(data1, data2):
|
||||
return [x ^ y for x, y in zip(data1, data2)]
|
||||
|
||||
|
||||
def rijndael_mul(a, b):
|
||||
if(a == 0 or b == 0):
|
||||
return 0
|
||||
return RIJNDAEL_EXP_TABLE[(RIJNDAEL_LOG_TABLE[a] + RIJNDAEL_LOG_TABLE[b]) % 0xFF]
|
||||
|
||||
|
||||
def mix_column(data, matrix):
|
||||
data_mixed = []
|
||||
for row in range(4):
|
||||
mixed = 0
|
||||
for column in range(4):
|
||||
# xor is (+) and (-)
|
||||
mixed ^= rijndael_mul(data[column], matrix[row][column])
|
||||
data_mixed.append(mixed)
|
||||
return data_mixed
|
||||
|
||||
|
||||
def mix_columns(data, matrix=MIX_COLUMN_MATRIX):
|
||||
data_mixed = []
|
||||
for i in range(4):
|
||||
column = data[i * 4: (i + 1) * 4]
|
||||
data_mixed += mix_column(column, matrix)
|
||||
return data_mixed
|
||||
|
||||
|
||||
def mix_columns_inv(data):
|
||||
return mix_columns(data, MIX_COLUMN_MATRIX_INV)
|
||||
|
||||
|
||||
def shift_rows(data):
|
||||
data_shifted = []
|
||||
for column in range(4):
|
||||
for row in range(4):
|
||||
data_shifted.append(data[((column + row) & 0b11) * 4 + row])
|
||||
return data_shifted
|
||||
|
||||
|
||||
def shift_rows_inv(data):
|
||||
data_shifted = []
|
||||
for column in range(4):
|
||||
for row in range(4):
|
||||
data_shifted.append(data[((column - row) & 0b11) * 4 + row])
|
||||
return data_shifted
|
||||
|
||||
|
||||
def inc(data):
|
||||
data = data[:] # copy
|
||||
for i in range(len(data) - 1, -1, -1):
|
||||
if data[i] == 255:
|
||||
data[i] = 0
|
||||
else:
|
||||
data[i] = data[i] + 1
|
||||
break
|
||||
return data
|
||||
|
||||
|
||||
__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text']
|
96
youtube_dl/cache.py
Normal file
96
youtube_dl/cache.py
Normal file
|
@ -0,0 +1,96 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
import errno
|
||||
import io
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import traceback
|
||||
|
||||
from .compat import compat_getenv
|
||||
from .utils import (
|
||||
expand_path,
|
||||
write_json_file,
|
||||
)
|
||||
|
||||
|
||||
class Cache(object):
|
||||
def __init__(self, ydl):
|
||||
self._ydl = ydl
|
||||
|
||||
def _get_root_dir(self):
|
||||
res = self._ydl.params.get('cachedir')
|
||||
if res is None:
|
||||
cache_root = compat_getenv('XDG_CACHE_HOME', '~/.cache')
|
||||
res = os.path.join(cache_root, 'youtube-dlc')
|
||||
return expand_path(res)
|
||||
|
||||
def _get_cache_fn(self, section, key, dtype):
|
||||
assert re.match(r'^[a-zA-Z0-9_.-]+$', section), \
|
||||
'invalid section %r' % section
|
||||
assert re.match(r'^[a-zA-Z0-9_.-]+$', key), 'invalid key %r' % key
|
||||
return os.path.join(
|
||||
self._get_root_dir(), section, '%s.%s' % (key, dtype))
|
||||
|
||||
@property
|
||||
def enabled(self):
|
||||
return self._ydl.params.get('cachedir') is not False
|
||||
|
||||
def store(self, section, key, data, dtype='json'):
|
||||
assert dtype in ('json',)
|
||||
|
||||
if not self.enabled:
|
||||
return
|
||||
|
||||
fn = self._get_cache_fn(section, key, dtype)
|
||||
try:
|
||||
try:
|
||||
os.makedirs(os.path.dirname(fn))
|
||||
except OSError as ose:
|
||||
if ose.errno != errno.EEXIST:
|
||||
raise
|
||||
write_json_file(data, fn)
|
||||
except Exception:
|
||||
tb = traceback.format_exc()
|
||||
self._ydl.report_warning(
|
||||
'Writing cache to %r failed: %s' % (fn, tb))
|
||||
|
||||
def load(self, section, key, dtype='json', default=None):
|
||||
assert dtype in ('json',)
|
||||
|
||||
if not self.enabled:
|
||||
return default
|
||||
|
||||
cache_fn = self._get_cache_fn(section, key, dtype)
|
||||
try:
|
||||
try:
|
||||
with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
|
||||
return json.load(cachef)
|
||||
except ValueError:
|
||||
try:
|
||||
file_size = os.path.getsize(cache_fn)
|
||||
except (OSError, IOError) as oe:
|
||||
file_size = str(oe)
|
||||
self._ydl.report_warning(
|
||||
'Cache retrieval from %s failed (%s)' % (cache_fn, file_size))
|
||||
except IOError:
|
||||
pass # No cache available
|
||||
|
||||
return default
|
||||
|
||||
def remove(self):
|
||||
if not self.enabled:
|
||||
self._ydl.to_screen('Cache is disabled (Did you combine --no-cache-dir and --rm-cache-dir?)')
|
||||
return
|
||||
|
||||
cachedir = self._get_root_dir()
|
||||
if not any((term in cachedir) for term in ('cache', 'tmp')):
|
||||
raise Exception('Not removing directory %s - this does not look like a cache dir' % cachedir)
|
||||
|
||||
self._ydl.to_screen(
|
||||
'Removing cache dir %s .' % cachedir, skip_eol=True)
|
||||
if os.path.exists(cachedir):
|
||||
self._ydl.to_screen('.', skip_eol=True)
|
||||
shutil.rmtree(cachedir)
|
||||
self._ydl.to_screen('.')
|
3050
youtube_dl/compat.py
Normal file
3050
youtube_dl/compat.py
Normal file
File diff suppressed because it is too large
Load diff
63
youtube_dl/downloader/__init__.py
Normal file
63
youtube_dl/downloader/__init__.py
Normal file
|
@ -0,0 +1,63 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
from .common import FileDownloader
|
||||
from .f4m import F4mFD
|
||||
from .hls import HlsFD
|
||||
from .http import HttpFD
|
||||
from .rtmp import RtmpFD
|
||||
from .dash import DashSegmentsFD
|
||||
from .rtsp import RtspFD
|
||||
from .ism import IsmFD
|
||||
from .youtube_live_chat import YoutubeLiveChatReplayFD
|
||||
from .external import (
|
||||
get_external_downloader,
|
||||
FFmpegFD,
|
||||
)
|
||||
|
||||
from ..utils import (
|
||||
determine_protocol,
|
||||
)
|
||||
|
||||
PROTOCOL_MAP = {
|
||||
'rtmp': RtmpFD,
|
||||
'm3u8_native': HlsFD,
|
||||
'm3u8': FFmpegFD,
|
||||
'mms': RtspFD,
|
||||
'rtsp': RtspFD,
|
||||
'f4m': F4mFD,
|
||||
'http_dash_segments': DashSegmentsFD,
|
||||
'ism': IsmFD,
|
||||
'youtube_live_chat_replay': YoutubeLiveChatReplayFD,
|
||||
}
|
||||
|
||||
|
||||
def get_suitable_downloader(info_dict, params={}):
|
||||
"""Get the downloader class that can handle the info dict."""
|
||||
protocol = determine_protocol(info_dict)
|
||||
info_dict['protocol'] = protocol
|
||||
|
||||
# if (info_dict.get('start_time') or info_dict.get('end_time')) and not info_dict.get('requested_formats') and FFmpegFD.can_download(info_dict):
|
||||
# return FFmpegFD
|
||||
|
||||
external_downloader = params.get('external_downloader')
|
||||
if external_downloader is not None:
|
||||
ed = get_external_downloader(external_downloader)
|
||||
if ed.can_download(info_dict):
|
||||
return ed
|
||||
|
||||
if protocol.startswith('m3u8') and info_dict.get('is_live'):
|
||||
return FFmpegFD
|
||||
|
||||
if protocol == 'm3u8' and params.get('hls_prefer_native') is True:
|
||||
return HlsFD
|
||||
|
||||
if protocol == 'm3u8_native' and params.get('hls_prefer_native') is False:
|
||||
return FFmpegFD
|
||||
|
||||
return PROTOCOL_MAP.get(protocol, HttpFD)
|
||||
|
||||
|
||||
__all__ = [
|
||||
'get_suitable_downloader',
|
||||
'FileDownloader',
|
||||
]
|
391
youtube_dl/downloader/common.py
Normal file
391
youtube_dl/downloader/common.py
Normal file
|
@ -0,0 +1,391 @@
|
|||
from __future__ import division, unicode_literals
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
import random
|
||||
|
||||
from ..compat import compat_os_name
|
||||
from ..utils import (
|
||||
decodeArgument,
|
||||
encodeFilename,
|
||||
error_to_compat_str,
|
||||
format_bytes,
|
||||
shell_quote,
|
||||
timeconvert,
|
||||
)
|
||||
|
||||
|
||||
class FileDownloader(object):
|
||||
"""File Downloader class.
|
||||
|
||||
File downloader objects are the ones responsible of downloading the
|
||||
actual video file and writing it to disk.
|
||||
|
||||
File downloaders accept a lot of parameters. In order not to saturate
|
||||
the object constructor with arguments, it receives a dictionary of
|
||||
options instead.
|
||||
|
||||
Available options:
|
||||
|
||||
verbose: Print additional info to stdout.
|
||||
quiet: Do not print messages to stdout.
|
||||
ratelimit: Download speed limit, in bytes/sec.
|
||||
retries: Number of times to retry for HTTP error 5xx
|
||||
buffersize: Size of download buffer in bytes.
|
||||
noresizebuffer: Do not automatically resize the download buffer.
|
||||
continuedl: Try to continue downloads if possible.
|
||||
noprogress: Do not print the progress bar.
|
||||
logtostderr: Log messages to stderr instead of stdout.
|
||||
consoletitle: Display progress in console window's titlebar.
|
||||
nopart: Do not use temporary .part files.
|
||||
updatetime: Use the Last-modified header to set output file timestamps.
|
||||
test: Download only first bytes to test the downloader.
|
||||
min_filesize: Skip files smaller than this size
|
||||
max_filesize: Skip files larger than this size
|
||||
xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
|
||||
external_downloader_args: A list of additional command-line arguments for the
|
||||
external downloader.
|
||||
hls_use_mpegts: Use the mpegts container for HLS videos.
|
||||
http_chunk_size: Size of a chunk for chunk-based HTTP downloading. May be
|
||||
useful for bypassing bandwidth throttling imposed by
|
||||
a webserver (experimental)
|
||||
|
||||
Subclasses of this one must re-define the real_download method.
|
||||
"""
|
||||
|
||||
_TEST_FILE_SIZE = 10241
|
||||
params = None
|
||||
|
||||
def __init__(self, ydl, params):
|
||||
"""Create a FileDownloader object with the given options."""
|
||||
self.ydl = ydl
|
||||
self._progress_hooks = []
|
||||
self.params = params
|
||||
self.add_progress_hook(self.report_progress)
|
||||
|
||||
@staticmethod
|
||||
def format_seconds(seconds):
|
||||
(mins, secs) = divmod(seconds, 60)
|
||||
(hours, mins) = divmod(mins, 60)
|
||||
if hours > 99:
|
||||
return '--:--:--'
|
||||
if hours == 0:
|
||||
return '%02d:%02d' % (mins, secs)
|
||||
else:
|
||||
return '%02d:%02d:%02d' % (hours, mins, secs)
|
||||
|
||||
@staticmethod
|
||||
def calc_percent(byte_counter, data_len):
|
||||
if data_len is None:
|
||||
return None
|
||||
return float(byte_counter) / float(data_len) * 100.0
|
||||
|
||||
@staticmethod
|
||||
def format_percent(percent):
|
||||
if percent is None:
|
||||
return '---.-%'
|
||||
return '%6s' % ('%3.1f%%' % percent)
|
||||
|
||||
@staticmethod
|
||||
def calc_eta(start, now, total, current):
|
||||
if total is None:
|
||||
return None
|
||||
if now is None:
|
||||
now = time.time()
|
||||
dif = now - start
|
||||
if current == 0 or dif < 0.001: # One millisecond
|
||||
return None
|
||||
rate = float(current) / dif
|
||||
return int((float(total) - float(current)) / rate)
|
||||
|
||||
@staticmethod
|
||||
def format_eta(eta):
|
||||
if eta is None:
|
||||
return '--:--'
|
||||
return FileDownloader.format_seconds(eta)
|
||||
|
||||
@staticmethod
|
||||
def calc_speed(start, now, bytes):
|
||||
dif = now - start
|
||||
if bytes == 0 or dif < 0.001: # One millisecond
|
||||
return None
|
||||
return float(bytes) / dif
|
||||
|
||||
@staticmethod
|
||||
def format_speed(speed):
|
||||
if speed is None:
|
||||
return '%10s' % '---b/s'
|
||||
return '%10s' % ('%s/s' % format_bytes(speed))
|
||||
|
||||
@staticmethod
|
||||
def format_retries(retries):
|
||||
return 'inf' if retries == float('inf') else '%.0f' % retries
|
||||
|
||||
@staticmethod
|
||||
def best_block_size(elapsed_time, bytes):
|
||||
new_min = max(bytes / 2.0, 1.0)
|
||||
new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
|
||||
if elapsed_time < 0.001:
|
||||
return int(new_max)
|
||||
rate = bytes / elapsed_time
|
||||
if rate > new_max:
|
||||
return int(new_max)
|
||||
if rate < new_min:
|
||||
return int(new_min)
|
||||
return int(rate)
|
||||
|
||||
@staticmethod
|
||||
def parse_bytes(bytestr):
|
||||
"""Parse a string indicating a byte quantity into an integer."""
|
||||
matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
|
||||
if matchobj is None:
|
||||
return None
|
||||
number = float(matchobj.group(1))
|
||||
multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
|
||||
return int(round(number * multiplier))
|
||||
|
||||
def to_screen(self, *args, **kargs):
|
||||
self.ydl.to_screen(*args, **kargs)
|
||||
|
||||
def to_stderr(self, message):
|
||||
self.ydl.to_screen(message)
|
||||
|
||||
def to_console_title(self, message):
|
||||
self.ydl.to_console_title(message)
|
||||
|
||||
def trouble(self, *args, **kargs):
|
||||
self.ydl.trouble(*args, **kargs)
|
||||
|
||||
def report_warning(self, *args, **kargs):
|
||||
self.ydl.report_warning(*args, **kargs)
|
||||
|
||||
def report_error(self, *args, **kargs):
|
||||
self.ydl.report_error(*args, **kargs)
|
||||
|
||||
def slow_down(self, start_time, now, byte_counter):
|
||||
"""Sleep if the download speed is over the rate limit."""
|
||||
rate_limit = self.params.get('ratelimit')
|
||||
if rate_limit is None or byte_counter == 0:
|
||||
return
|
||||
if now is None:
|
||||
now = time.time()
|
||||
elapsed = now - start_time
|
||||
if elapsed <= 0.0:
|
||||
return
|
||||
speed = float(byte_counter) / elapsed
|
||||
if speed > rate_limit:
|
||||
sleep_time = float(byte_counter) / rate_limit - elapsed
|
||||
if sleep_time > 0:
|
||||
time.sleep(sleep_time)
|
||||
|
||||
def temp_name(self, filename):
|
||||
"""Returns a temporary filename for the given filename."""
|
||||
if self.params.get('nopart', False) or filename == '-' or \
|
||||
(os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
|
||||
return filename
|
||||
return filename + '.part'
|
||||
|
||||
def undo_temp_name(self, filename):
|
||||
if filename.endswith('.part'):
|
||||
return filename[:-len('.part')]
|
||||
return filename
|
||||
|
||||
def ytdl_filename(self, filename):
|
||||
return filename + '.ytdl'
|
||||
|
||||
def try_rename(self, old_filename, new_filename):
|
||||
try:
|
||||
if old_filename == new_filename:
|
||||
return
|
||||
os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
|
||||
except (IOError, OSError) as err:
|
||||
self.report_error('unable to rename file: %s' % error_to_compat_str(err))
|
||||
|
||||
def try_utime(self, filename, last_modified_hdr):
|
||||
"""Try to set the last-modified time of the given file."""
|
||||
if last_modified_hdr is None:
|
||||
return
|
||||
if not os.path.isfile(encodeFilename(filename)):
|
||||
return
|
||||
timestr = last_modified_hdr
|
||||
if timestr is None:
|
||||
return
|
||||
filetime = timeconvert(timestr)
|
||||
if filetime is None:
|
||||
return filetime
|
||||
# Ignore obviously invalid dates
|
||||
if filetime == 0:
|
||||
return
|
||||
try:
|
||||
os.utime(filename, (time.time(), filetime))
|
||||
except Exception:
|
||||
pass
|
||||
return filetime
|
||||
|
||||
def report_destination(self, filename):
|
||||
"""Report destination filename."""
|
||||
self.to_screen('[download] Destination: ' + filename)
|
||||
|
||||
def _report_progress_status(self, msg, is_last_line=False):
|
||||
fullmsg = '[download] ' + msg
|
||||
if self.params.get('progress_with_newline', False):
|
||||
self.to_screen(fullmsg)
|
||||
else:
|
||||
if compat_os_name == 'nt':
|
||||
prev_len = getattr(self, '_report_progress_prev_line_length',
|
||||
0)
|
||||
if prev_len > len(fullmsg):
|
||||
fullmsg += ' ' * (prev_len - len(fullmsg))
|
||||
self._report_progress_prev_line_length = len(fullmsg)
|
||||
clear_line = '\r'
|
||||
else:
|
||||
clear_line = ('\r\x1b[K' if sys.stderr.isatty() else '\r')
|
||||
self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line)
|
||||
self.to_console_title('youtube-dlc ' + msg)
|
||||
|
||||
def report_progress(self, s):
|
||||
if s['status'] == 'finished':
|
||||
if self.params.get('noprogress', False):
|
||||
self.to_screen('[download] Download completed')
|
||||
else:
|
||||
msg_template = '100%%'
|
||||
if s.get('total_bytes') is not None:
|
||||
s['_total_bytes_str'] = format_bytes(s['total_bytes'])
|
||||
msg_template += ' of %(_total_bytes_str)s'
|
||||
if s.get('elapsed') is not None:
|
||||
s['_elapsed_str'] = self.format_seconds(s['elapsed'])
|
||||
msg_template += ' in %(_elapsed_str)s'
|
||||
self._report_progress_status(
|
||||
msg_template % s, is_last_line=True)
|
||||
|
||||
if self.params.get('noprogress'):
|
||||
return
|
||||
|
||||
if s['status'] != 'downloading':
|
||||
return
|
||||
|
||||
if s.get('eta') is not None:
|
||||
s['_eta_str'] = self.format_eta(s['eta'])
|
||||
else:
|
||||
s['_eta_str'] = 'Unknown ETA'
|
||||
|
||||
if s.get('total_bytes') and s.get('downloaded_bytes') is not None:
|
||||
s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes'])
|
||||
elif s.get('total_bytes_estimate') and s.get('downloaded_bytes') is not None:
|
||||
s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes_estimate'])
|
||||
else:
|
||||
if s.get('downloaded_bytes') == 0:
|
||||
s['_percent_str'] = self.format_percent(0)
|
||||
else:
|
||||
s['_percent_str'] = 'Unknown %'
|
||||
|
||||
if s.get('speed') is not None:
|
||||
s['_speed_str'] = self.format_speed(s['speed'])
|
||||
else:
|
||||
s['_speed_str'] = 'Unknown speed'
|
||||
|
||||
if s.get('total_bytes') is not None:
|
||||
s['_total_bytes_str'] = format_bytes(s['total_bytes'])
|
||||
msg_template = '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s'
|
||||
elif s.get('total_bytes_estimate') is not None:
|
||||
s['_total_bytes_estimate_str'] = format_bytes(s['total_bytes_estimate'])
|
||||
msg_template = '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s'
|
||||
else:
|
||||
if s.get('downloaded_bytes') is not None:
|
||||
s['_downloaded_bytes_str'] = format_bytes(s['downloaded_bytes'])
|
||||
if s.get('elapsed'):
|
||||
s['_elapsed_str'] = self.format_seconds(s['elapsed'])
|
||||
msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)'
|
||||
else:
|
||||
msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s'
|
||||
else:
|
||||
msg_template = '%(_percent_str)s % at %(_speed_str)s ETA %(_eta_str)s'
|
||||
|
||||
self._report_progress_status(msg_template % s)
|
||||
|
||||
def report_resuming_byte(self, resume_len):
|
||||
"""Report attempt to resume at given byte."""
|
||||
self.to_screen('[download] Resuming download at byte %s' % resume_len)
|
||||
|
||||
def report_retry(self, err, count, retries):
|
||||
"""Report retry in case of HTTP error 5xx"""
|
||||
self.to_screen(
|
||||
'[download] Got server HTTP error: %s. Retrying (attempt %d of %s)...'
|
||||
% (error_to_compat_str(err), count, self.format_retries(retries)))
|
||||
|
||||
def report_file_already_downloaded(self, file_name):
|
||||
"""Report file has already been fully downloaded."""
|
||||
try:
|
||||
self.to_screen('[download] %s has already been downloaded' % file_name)
|
||||
except UnicodeEncodeError:
|
||||
self.to_screen('[download] The file has already been downloaded')
|
||||
|
||||
def report_unable_to_resume(self):
|
||||
"""Report it was impossible to resume download."""
|
||||
self.to_screen('[download] Unable to resume')
|
||||
|
||||
def download(self, filename, info_dict):
|
||||
"""Download to a filename using the info from info_dict
|
||||
Return True on success and False otherwise
|
||||
"""
|
||||
|
||||
nooverwrites_and_exists = (
|
||||
self.params.get('nooverwrites', False)
|
||||
and os.path.exists(encodeFilename(filename))
|
||||
)
|
||||
|
||||
if not hasattr(filename, 'write'):
|
||||
continuedl_and_exists = (
|
||||
self.params.get('continuedl', True)
|
||||
and os.path.isfile(encodeFilename(filename))
|
||||
and not self.params.get('nopart', False)
|
||||
)
|
||||
|
||||
# Check file already present
|
||||
if filename != '-' and (nooverwrites_and_exists or continuedl_and_exists):
|
||||
self.report_file_already_downloaded(filename)
|
||||
self._hook_progress({
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
'total_bytes': os.path.getsize(encodeFilename(filename)),
|
||||
})
|
||||
return True
|
||||
|
||||
min_sleep_interval = self.params.get('sleep_interval')
|
||||
if min_sleep_interval:
|
||||
max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval)
|
||||
sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval)
|
||||
self.to_screen(
|
||||
'[download] Sleeping %s seconds...' % (
|
||||
int(sleep_interval) if sleep_interval.is_integer()
|
||||
else '%.2f' % sleep_interval))
|
||||
time.sleep(sleep_interval)
|
||||
|
||||
return self.real_download(filename, info_dict)
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
"""Real download process. Redefine in subclasses."""
|
||||
raise NotImplementedError('This method must be implemented by subclasses')
|
||||
|
||||
def _hook_progress(self, status):
|
||||
for ph in self._progress_hooks:
|
||||
ph(status)
|
||||
|
||||
def add_progress_hook(self, ph):
|
||||
# See YoutubeDl.py (search for progress_hooks) for a description of
|
||||
# this interface
|
||||
self._progress_hooks.append(ph)
|
||||
|
||||
def _debug_cmd(self, args, exe=None):
|
||||
if not self.params.get('verbose', False):
|
||||
return
|
||||
|
||||
str_args = [decodeArgument(a) for a in args]
|
||||
|
||||
if exe is None:
|
||||
exe = os.path.basename(str_args[0])
|
||||
|
||||
self.to_screen('[debug] %s command line: %s' % (
|
||||
exe, shell_quote(str_args)))
|
80
youtube_dl/downloader/dash.py
Normal file
80
youtube_dl/downloader/dash.py
Normal file
|
@ -0,0 +1,80 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
from .fragment import FragmentFD
|
||||
from ..compat import compat_urllib_error
|
||||
from ..utils import (
|
||||
DownloadError,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class DashSegmentsFD(FragmentFD):
|
||||
"""
|
||||
Download segments in a DASH manifest
|
||||
"""
|
||||
|
||||
FD_NAME = 'dashsegments'
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
fragment_base_url = info_dict.get('fragment_base_url')
|
||||
fragments = info_dict['fragments'][:1] if self.params.get(
|
||||
'test', False) else info_dict['fragments']
|
||||
|
||||
ctx = {
|
||||
'filename': filename,
|
||||
'total_frags': len(fragments),
|
||||
}
|
||||
|
||||
self._prepare_and_start_frag_download(ctx)
|
||||
|
||||
fragment_retries = self.params.get('fragment_retries', 0)
|
||||
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
|
||||
|
||||
frag_index = 0
|
||||
for i, fragment in enumerate(fragments):
|
||||
frag_index += 1
|
||||
if frag_index <= ctx['fragment_index']:
|
||||
continue
|
||||
# In DASH, the first segment contains necessary headers to
|
||||
# generate a valid MP4 file, so always abort for the first segment
|
||||
fatal = i == 0 or not skip_unavailable_fragments
|
||||
count = 0
|
||||
while count <= fragment_retries:
|
||||
try:
|
||||
fragment_url = fragment.get('url')
|
||||
if not fragment_url:
|
||||
assert fragment_base_url
|
||||
fragment_url = urljoin(fragment_base_url, fragment['path'])
|
||||
success, frag_content = self._download_fragment(ctx, fragment_url, info_dict)
|
||||
if not success:
|
||||
return False
|
||||
self._append_fragment(ctx, frag_content)
|
||||
break
|
||||
except compat_urllib_error.HTTPError as err:
|
||||
# YouTube may often return 404 HTTP error for a fragment causing the
|
||||
# whole download to fail. However if the same fragment is immediately
|
||||
# retried with the same request data this usually succeeds (1-2 attempts
|
||||
# is usually enough) thus allowing to download the whole file successfully.
|
||||
# To be future-proof we will retry all fragments that fail with any
|
||||
# HTTP error.
|
||||
count += 1
|
||||
if count <= fragment_retries:
|
||||
self.report_retry_fragment(err, frag_index, count, fragment_retries)
|
||||
except DownloadError:
|
||||
# Don't retry fragment if error occurred during HTTP downloading
|
||||
# itself since it has own retry settings
|
||||
if not fatal:
|
||||
self.report_skip_fragment(frag_index)
|
||||
break
|
||||
raise
|
||||
|
||||
if count > fragment_retries:
|
||||
if not fatal:
|
||||
self.report_skip_fragment(frag_index)
|
||||
continue
|
||||
self.report_error('giving up after %s fragment retries' % fragment_retries)
|
||||
return False
|
||||
|
||||
self._finish_frag_download(ctx)
|
||||
|
||||
return True
|
371
youtube_dl/downloader/external.py
Normal file
371
youtube_dl/downloader/external.py
Normal file
|
@ -0,0 +1,371 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
import os.path
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
|
||||
from .common import FileDownloader
|
||||
from ..compat import (
|
||||
compat_setenv,
|
||||
compat_str,
|
||||
)
|
||||
from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS
|
||||
from ..utils import (
|
||||
cli_option,
|
||||
cli_valueless_option,
|
||||
cli_bool_option,
|
||||
cli_configuration_args,
|
||||
encodeFilename,
|
||||
encodeArgument,
|
||||
handle_youtubedl_headers,
|
||||
check_executable,
|
||||
is_outdated_version,
|
||||
)
|
||||
|
||||
|
||||
class ExternalFD(FileDownloader):
|
||||
def real_download(self, filename, info_dict):
|
||||
self.report_destination(filename)
|
||||
tmpfilename = self.temp_name(filename)
|
||||
|
||||
try:
|
||||
started = time.time()
|
||||
retval = self._call_downloader(tmpfilename, info_dict)
|
||||
except KeyboardInterrupt:
|
||||
if not info_dict.get('is_live'):
|
||||
raise
|
||||
# Live stream downloading cancellation should be considered as
|
||||
# correct and expected termination thus all postprocessing
|
||||
# should take place
|
||||
retval = 0
|
||||
self.to_screen('[%s] Interrupted by user' % self.get_basename())
|
||||
|
||||
if retval == 0:
|
||||
status = {
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
'elapsed': time.time() - started,
|
||||
}
|
||||
if filename != '-':
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen('\r[%s] Downloaded %s bytes' % (self.get_basename(), fsize))
|
||||
self.try_rename(tmpfilename, filename)
|
||||
status.update({
|
||||
'downloaded_bytes': fsize,
|
||||
'total_bytes': fsize,
|
||||
})
|
||||
self._hook_progress(status)
|
||||
return True
|
||||
else:
|
||||
self.to_stderr('\n')
|
||||
self.report_error('%s exited with code %d' % (
|
||||
self.get_basename(), retval))
|
||||
return False
|
||||
|
||||
@classmethod
|
||||
def get_basename(cls):
|
||||
return cls.__name__[:-2].lower()
|
||||
|
||||
@property
|
||||
def exe(self):
|
||||
return self.params.get('external_downloader')
|
||||
|
||||
@classmethod
|
||||
def available(cls):
|
||||
return check_executable(cls.get_basename(), [cls.AVAILABLE_OPT])
|
||||
|
||||
@classmethod
|
||||
def supports(cls, info_dict):
|
||||
return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps')
|
||||
|
||||
@classmethod
|
||||
def can_download(cls, info_dict):
|
||||
return cls.available() and cls.supports(info_dict)
|
||||
|
||||
def _option(self, command_option, param):
|
||||
return cli_option(self.params, command_option, param)
|
||||
|
||||
def _bool_option(self, command_option, param, true_value='true', false_value='false', separator=None):
|
||||
return cli_bool_option(self.params, command_option, param, true_value, false_value, separator)
|
||||
|
||||
def _valueless_option(self, command_option, param, expected_value=True):
|
||||
return cli_valueless_option(self.params, command_option, param, expected_value)
|
||||
|
||||
def _configuration_args(self, default=[]):
|
||||
return cli_configuration_args(self.params, 'external_downloader_args', default)
|
||||
|
||||
def _call_downloader(self, tmpfilename, info_dict):
|
||||
""" Either overwrite this or implement _make_cmd """
|
||||
cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)]
|
||||
|
||||
self._debug_cmd(cmd)
|
||||
|
||||
p = subprocess.Popen(
|
||||
cmd, stderr=subprocess.PIPE)
|
||||
_, stderr = p.communicate()
|
||||
if p.returncode != 0:
|
||||
self.to_stderr(stderr.decode('utf-8', 'replace'))
|
||||
return p.returncode
|
||||
|
||||
|
||||
class CurlFD(ExternalFD):
|
||||
AVAILABLE_OPT = '-V'
|
||||
|
||||
def _make_cmd(self, tmpfilename, info_dict):
|
||||
cmd = [self.exe, '--location', '-o', tmpfilename]
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['--header', '%s: %s' % (key, val)]
|
||||
cmd += self._bool_option('--continue-at', 'continuedl', '-', '0')
|
||||
cmd += self._valueless_option('--silent', 'noprogress')
|
||||
cmd += self._valueless_option('--verbose', 'verbose')
|
||||
cmd += self._option('--limit-rate', 'ratelimit')
|
||||
retry = self._option('--retry', 'retries')
|
||||
if len(retry) == 2:
|
||||
if retry[1] in ('inf', 'infinite'):
|
||||
retry[1] = '2147483647'
|
||||
cmd += retry
|
||||
cmd += self._option('--max-filesize', 'max_filesize')
|
||||
cmd += self._option('--interface', 'source_address')
|
||||
cmd += self._option('--proxy', 'proxy')
|
||||
cmd += self._valueless_option('--insecure', 'nocheckcertificate')
|
||||
cmd += self._configuration_args()
|
||||
cmd += ['--', info_dict['url']]
|
||||
return cmd
|
||||
|
||||
def _call_downloader(self, tmpfilename, info_dict):
|
||||
cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)]
|
||||
|
||||
self._debug_cmd(cmd)
|
||||
|
||||
# curl writes the progress to stderr so don't capture it.
|
||||
p = subprocess.Popen(cmd)
|
||||
p.communicate()
|
||||
return p.returncode
|
||||
|
||||
|
||||
class AxelFD(ExternalFD):
|
||||
AVAILABLE_OPT = '-V'
|
||||
|
||||
def _make_cmd(self, tmpfilename, info_dict):
|
||||
cmd = [self.exe, '-o', tmpfilename]
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['-H', '%s: %s' % (key, val)]
|
||||
cmd += self._configuration_args()
|
||||
cmd += ['--', info_dict['url']]
|
||||
return cmd
|
||||
|
||||
|
||||
class WgetFD(ExternalFD):
|
||||
AVAILABLE_OPT = '--version'
|
||||
|
||||
def _make_cmd(self, tmpfilename, info_dict):
|
||||
cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['--header', '%s: %s' % (key, val)]
|
||||
cmd += self._option('--limit-rate', 'ratelimit')
|
||||
retry = self._option('--tries', 'retries')
|
||||
if len(retry) == 2:
|
||||
if retry[1] in ('inf', 'infinite'):
|
||||
retry[1] = '0'
|
||||
cmd += retry
|
||||
cmd += self._option('--bind-address', 'source_address')
|
||||
cmd += self._option('--proxy', 'proxy')
|
||||
cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate')
|
||||
cmd += self._configuration_args()
|
||||
cmd += ['--', info_dict['url']]
|
||||
return cmd
|
||||
|
||||
|
||||
class Aria2cFD(ExternalFD):
|
||||
AVAILABLE_OPT = '-v'
|
||||
|
||||
def _make_cmd(self, tmpfilename, info_dict):
|
||||
cmd = [self.exe, '-c']
|
||||
cmd += self._configuration_args([
|
||||
'--min-split-size', '1M', '--max-connection-per-server', '4'])
|
||||
dn = os.path.dirname(tmpfilename)
|
||||
if dn:
|
||||
cmd += ['--dir', dn]
|
||||
cmd += ['--out', os.path.basename(tmpfilename)]
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['--header', '%s: %s' % (key, val)]
|
||||
cmd += self._option('--interface', 'source_address')
|
||||
cmd += self._option('--all-proxy', 'proxy')
|
||||
cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=')
|
||||
cmd += self._bool_option('--remote-time', 'updatetime', 'true', 'false', '=')
|
||||
cmd += ['--', info_dict['url']]
|
||||
return cmd
|
||||
|
||||
|
||||
class HttpieFD(ExternalFD):
|
||||
@classmethod
|
||||
def available(cls):
|
||||
return check_executable('http', ['--version'])
|
||||
|
||||
def _make_cmd(self, tmpfilename, info_dict):
|
||||
cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']]
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['%s:%s' % (key, val)]
|
||||
return cmd
|
||||
|
||||
|
||||
class FFmpegFD(ExternalFD):
|
||||
@classmethod
|
||||
def supports(cls, info_dict):
|
||||
return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps', 'm3u8', 'rtsp', 'rtmp', 'mms')
|
||||
|
||||
@classmethod
|
||||
def available(cls):
|
||||
return FFmpegPostProcessor().available
|
||||
|
||||
def _call_downloader(self, tmpfilename, info_dict):
|
||||
url = info_dict['url']
|
||||
ffpp = FFmpegPostProcessor(downloader=self)
|
||||
if not ffpp.available:
|
||||
self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
|
||||
return False
|
||||
ffpp.check_version()
|
||||
|
||||
args = [ffpp.executable, '-y']
|
||||
|
||||
for log_level in ('quiet', 'verbose'):
|
||||
if self.params.get(log_level, False):
|
||||
args += ['-loglevel', log_level]
|
||||
break
|
||||
|
||||
seekable = info_dict.get('_seekable')
|
||||
if seekable is not None:
|
||||
# setting -seekable prevents ffmpeg from guessing if the server
|
||||
# supports seeking(by adding the header `Range: bytes=0-`), which
|
||||
# can cause problems in some cases
|
||||
# https://github.com/ytdl-org/youtube-dl/issues/11800#issuecomment-275037127
|
||||
# http://trac.ffmpeg.org/ticket/6125#comment:10
|
||||
args += ['-seekable', '1' if seekable else '0']
|
||||
|
||||
args += self._configuration_args()
|
||||
|
||||
# start_time = info_dict.get('start_time') or 0
|
||||
# if start_time:
|
||||
# args += ['-ss', compat_str(start_time)]
|
||||
# end_time = info_dict.get('end_time')
|
||||
# if end_time:
|
||||
# args += ['-t', compat_str(end_time - start_time)]
|
||||
|
||||
if info_dict['http_headers'] and re.match(r'^https?://', url):
|
||||
# Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
|
||||
# [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
|
||||
headers = handle_youtubedl_headers(info_dict['http_headers'])
|
||||
args += [
|
||||
'-headers',
|
||||
''.join('%s: %s\r\n' % (key, val) for key, val in headers.items())]
|
||||
|
||||
env = None
|
||||
proxy = self.params.get('proxy')
|
||||
if proxy:
|
||||
if not re.match(r'^[\da-zA-Z]+://', proxy):
|
||||
proxy = 'http://%s' % proxy
|
||||
|
||||
if proxy.startswith('socks'):
|
||||
self.report_warning(
|
||||
'%s does not support SOCKS proxies. Downloading is likely to fail. '
|
||||
'Consider adding --hls-prefer-native to your command.' % self.get_basename())
|
||||
|
||||
# Since December 2015 ffmpeg supports -http_proxy option (see
|
||||
# http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd)
|
||||
# We could switch to the following code if we are able to detect version properly
|
||||
# args += ['-http_proxy', proxy]
|
||||
env = os.environ.copy()
|
||||
compat_setenv('HTTP_PROXY', proxy, env=env)
|
||||
compat_setenv('http_proxy', proxy, env=env)
|
||||
|
||||
protocol = info_dict.get('protocol')
|
||||
|
||||
if protocol == 'rtmp':
|
||||
player_url = info_dict.get('player_url')
|
||||
page_url = info_dict.get('page_url')
|
||||
app = info_dict.get('app')
|
||||
play_path = info_dict.get('play_path')
|
||||
tc_url = info_dict.get('tc_url')
|
||||
flash_version = info_dict.get('flash_version')
|
||||
live = info_dict.get('rtmp_live', False)
|
||||
conn = info_dict.get('rtmp_conn')
|
||||
if player_url is not None:
|
||||
args += ['-rtmp_swfverify', player_url]
|
||||
if page_url is not None:
|
||||
args += ['-rtmp_pageurl', page_url]
|
||||
if app is not None:
|
||||
args += ['-rtmp_app', app]
|
||||
if play_path is not None:
|
||||
args += ['-rtmp_playpath', play_path]
|
||||
if tc_url is not None:
|
||||
args += ['-rtmp_tcurl', tc_url]
|
||||
if flash_version is not None:
|
||||
args += ['-rtmp_flashver', flash_version]
|
||||
if live:
|
||||
args += ['-rtmp_live', 'live']
|
||||
if isinstance(conn, list):
|
||||
for entry in conn:
|
||||
args += ['-rtmp_conn', entry]
|
||||
elif isinstance(conn, compat_str):
|
||||
args += ['-rtmp_conn', conn]
|
||||
|
||||
args += ['-i', url, '-c', 'copy']
|
||||
|
||||
if self.params.get('test', False):
|
||||
args += ['-fs', compat_str(self._TEST_FILE_SIZE)]
|
||||
|
||||
if protocol in ('m3u8', 'm3u8_native'):
|
||||
if self.params.get('hls_use_mpegts', False) or tmpfilename == '-':
|
||||
args += ['-f', 'mpegts']
|
||||
else:
|
||||
args += ['-f', 'mp4']
|
||||
if (ffpp.basename == 'ffmpeg' and is_outdated_version(ffpp._versions['ffmpeg'], '3.2', False)) and (not info_dict.get('acodec') or info_dict['acodec'].split('.')[0] in ('aac', 'mp4a')):
|
||||
args += ['-bsf:a', 'aac_adtstoasc']
|
||||
elif protocol == 'rtmp':
|
||||
args += ['-f', 'flv']
|
||||
else:
|
||||
args += ['-f', EXT_TO_OUT_FORMATS.get(info_dict['ext'], info_dict['ext'])]
|
||||
|
||||
args = [encodeArgument(opt) for opt in args]
|
||||
args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True))
|
||||
|
||||
self._debug_cmd(args)
|
||||
|
||||
proc = subprocess.Popen(args, stdin=subprocess.PIPE, env=env)
|
||||
try:
|
||||
retval = proc.wait()
|
||||
except KeyboardInterrupt:
|
||||
# subprocces.run would send the SIGKILL signal to ffmpeg and the
|
||||
# mp4 file couldn't be played, but if we ask ffmpeg to quit it
|
||||
# produces a file that is playable (this is mostly useful for live
|
||||
# streams). Note that Windows is not affected and produces playable
|
||||
# files (see https://github.com/ytdl-org/youtube-dl/issues/8300).
|
||||
if sys.platform != 'win32':
|
||||
proc.communicate(b'q')
|
||||
raise
|
||||
return retval
|
||||
|
||||
|
||||
class AVconvFD(FFmpegFD):
|
||||
pass
|
||||
|
||||
|
||||
_BY_NAME = dict(
|
||||
(klass.get_basename(), klass)
|
||||
for name, klass in globals().items()
|
||||
if name.endswith('FD') and name != 'ExternalFD'
|
||||
)
|
||||
|
||||
|
||||
def list_external_downloaders():
|
||||
return sorted(_BY_NAME.keys())
|
||||
|
||||
|
||||
def get_external_downloader(external_downloader):
|
||||
""" Given the name of the executable, see whether we support the given
|
||||
downloader . """
|
||||
# Drop .exe extension on Windows
|
||||
bn = os.path.splitext(os.path.basename(external_downloader))[0]
|
||||
return _BY_NAME[bn]
|
438
youtube_dl/downloader/f4m.py
Normal file
438
youtube_dl/downloader/f4m.py
Normal file
|
@ -0,0 +1,438 @@
|
|||
from __future__ import division, unicode_literals
|
||||
|
||||
import io
|
||||
import itertools
|
||||
import time
|
||||
|
||||
from .fragment import FragmentFD
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_etree_fromstring,
|
||||
compat_urlparse,
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_struct_pack,
|
||||
compat_struct_unpack,
|
||||
)
|
||||
from ..utils import (
|
||||
fix_xml_ampersands,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
class DataTruncatedError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class FlvReader(io.BytesIO):
|
||||
"""
|
||||
Reader for Flv files
|
||||
The file format is documented in https://www.adobe.com/devnet/f4v.html
|
||||
"""
|
||||
|
||||
def read_bytes(self, n):
|
||||
data = self.read(n)
|
||||
if len(data) < n:
|
||||
raise DataTruncatedError(
|
||||
'FlvReader error: need %d bytes while only %d bytes got' % (
|
||||
n, len(data)))
|
||||
return data
|
||||
|
||||
# Utility functions for reading numbers and strings
|
||||
def read_unsigned_long_long(self):
|
||||
return compat_struct_unpack('!Q', self.read_bytes(8))[0]
|
||||
|
||||
def read_unsigned_int(self):
|
||||
return compat_struct_unpack('!I', self.read_bytes(4))[0]
|
||||
|
||||
def read_unsigned_char(self):
|
||||
return compat_struct_unpack('!B', self.read_bytes(1))[0]
|
||||
|
||||
def read_string(self):
|
||||
res = b''
|
||||
while True:
|
||||
char = self.read_bytes(1)
|
||||
if char == b'\x00':
|
||||
break
|
||||
res += char
|
||||
return res
|
||||
|
||||
def read_box_info(self):
|
||||
"""
|
||||
Read a box and return the info as a tuple: (box_size, box_type, box_data)
|
||||
"""
|
||||
real_size = size = self.read_unsigned_int()
|
||||
box_type = self.read_bytes(4)
|
||||
header_end = 8
|
||||
if size == 1:
|
||||
real_size = self.read_unsigned_long_long()
|
||||
header_end = 16
|
||||
return real_size, box_type, self.read_bytes(real_size - header_end)
|
||||
|
||||
def read_asrt(self):
|
||||
# version
|
||||
self.read_unsigned_char()
|
||||
# flags
|
||||
self.read_bytes(3)
|
||||
quality_entry_count = self.read_unsigned_char()
|
||||
# QualityEntryCount
|
||||
for i in range(quality_entry_count):
|
||||
self.read_string()
|
||||
|
||||
segment_run_count = self.read_unsigned_int()
|
||||
segments = []
|
||||
for i in range(segment_run_count):
|
||||
first_segment = self.read_unsigned_int()
|
||||
fragments_per_segment = self.read_unsigned_int()
|
||||
segments.append((first_segment, fragments_per_segment))
|
||||
|
||||
return {
|
||||
'segment_run': segments,
|
||||
}
|
||||
|
||||
def read_afrt(self):
|
||||
# version
|
||||
self.read_unsigned_char()
|
||||
# flags
|
||||
self.read_bytes(3)
|
||||
# time scale
|
||||
self.read_unsigned_int()
|
||||
|
||||
quality_entry_count = self.read_unsigned_char()
|
||||
# QualitySegmentUrlModifiers
|
||||
for i in range(quality_entry_count):
|
||||
self.read_string()
|
||||
|
||||
fragments_count = self.read_unsigned_int()
|
||||
fragments = []
|
||||
for i in range(fragments_count):
|
||||
first = self.read_unsigned_int()
|
||||
first_ts = self.read_unsigned_long_long()
|
||||
duration = self.read_unsigned_int()
|
||||
if duration == 0:
|
||||
discontinuity_indicator = self.read_unsigned_char()
|
||||
else:
|
||||
discontinuity_indicator = None
|
||||
fragments.append({
|
||||
'first': first,
|
||||
'ts': first_ts,
|
||||
'duration': duration,
|
||||
'discontinuity_indicator': discontinuity_indicator,
|
||||
})
|
||||
|
||||
return {
|
||||
'fragments': fragments,
|
||||
}
|
||||
|
||||
def read_abst(self):
|
||||
# version
|
||||
self.read_unsigned_char()
|
||||
# flags
|
||||
self.read_bytes(3)
|
||||
|
||||
self.read_unsigned_int() # BootstrapinfoVersion
|
||||
# Profile,Live,Update,Reserved
|
||||
flags = self.read_unsigned_char()
|
||||
live = flags & 0x20 != 0
|
||||
# time scale
|
||||
self.read_unsigned_int()
|
||||
# CurrentMediaTime
|
||||
self.read_unsigned_long_long()
|
||||
# SmpteTimeCodeOffset
|
||||
self.read_unsigned_long_long()
|
||||
|
||||
self.read_string() # MovieIdentifier
|
||||
server_count = self.read_unsigned_char()
|
||||
# ServerEntryTable
|
||||
for i in range(server_count):
|
||||
self.read_string()
|
||||
quality_count = self.read_unsigned_char()
|
||||
# QualityEntryTable
|
||||
for i in range(quality_count):
|
||||
self.read_string()
|
||||
# DrmData
|
||||
self.read_string()
|
||||
# MetaData
|
||||
self.read_string()
|
||||
|
||||
segments_count = self.read_unsigned_char()
|
||||
segments = []
|
||||
for i in range(segments_count):
|
||||
box_size, box_type, box_data = self.read_box_info()
|
||||
assert box_type == b'asrt'
|
||||
segment = FlvReader(box_data).read_asrt()
|
||||
segments.append(segment)
|
||||
fragments_run_count = self.read_unsigned_char()
|
||||
fragments = []
|
||||
for i in range(fragments_run_count):
|
||||
box_size, box_type, box_data = self.read_box_info()
|
||||
assert box_type == b'afrt'
|
||||
fragments.append(FlvReader(box_data).read_afrt())
|
||||
|
||||
return {
|
||||
'segments': segments,
|
||||
'fragments': fragments,
|
||||
'live': live,
|
||||
}
|
||||
|
||||
def read_bootstrap_info(self):
|
||||
total_size, box_type, box_data = self.read_box_info()
|
||||
assert box_type == b'abst'
|
||||
return FlvReader(box_data).read_abst()
|
||||
|
||||
|
||||
def read_bootstrap_info(bootstrap_bytes):
|
||||
return FlvReader(bootstrap_bytes).read_bootstrap_info()
|
||||
|
||||
|
||||
def build_fragments_list(boot_info):
|
||||
""" Return a list of (segment, fragment) for each fragment in the video """
|
||||
res = []
|
||||
segment_run_table = boot_info['segments'][0]
|
||||
fragment_run_entry_table = boot_info['fragments'][0]['fragments']
|
||||
first_frag_number = fragment_run_entry_table[0]['first']
|
||||
fragments_counter = itertools.count(first_frag_number)
|
||||
for segment, fragments_count in segment_run_table['segment_run']:
|
||||
# In some live HDS streams (for example Rai), `fragments_count` is
|
||||
# abnormal and causing out-of-memory errors. It's OK to change the
|
||||
# number of fragments for live streams as they are updated periodically
|
||||
if fragments_count == 4294967295 and boot_info['live']:
|
||||
fragments_count = 2
|
||||
for _ in range(fragments_count):
|
||||
res.append((segment, next(fragments_counter)))
|
||||
|
||||
if boot_info['live']:
|
||||
res = res[-2:]
|
||||
|
||||
return res
|
||||
|
||||
|
||||
def write_unsigned_int(stream, val):
|
||||
stream.write(compat_struct_pack('!I', val))
|
||||
|
||||
|
||||
def write_unsigned_int_24(stream, val):
|
||||
stream.write(compat_struct_pack('!I', val)[1:])
|
||||
|
||||
|
||||
def write_flv_header(stream):
|
||||
"""Writes the FLV header to stream"""
|
||||
# FLV header
|
||||
stream.write(b'FLV\x01')
|
||||
stream.write(b'\x05')
|
||||
stream.write(b'\x00\x00\x00\x09')
|
||||
stream.write(b'\x00\x00\x00\x00')
|
||||
|
||||
|
||||
def write_metadata_tag(stream, metadata):
|
||||
"""Writes optional metadata tag to stream"""
|
||||
SCRIPT_TAG = b'\x12'
|
||||
FLV_TAG_HEADER_LEN = 11
|
||||
|
||||
if metadata:
|
||||
stream.write(SCRIPT_TAG)
|
||||
write_unsigned_int_24(stream, len(metadata))
|
||||
stream.write(b'\x00\x00\x00\x00\x00\x00\x00')
|
||||
stream.write(metadata)
|
||||
write_unsigned_int(stream, FLV_TAG_HEADER_LEN + len(metadata))
|
||||
|
||||
|
||||
def remove_encrypted_media(media):
|
||||
return list(filter(lambda e: 'drmAdditionalHeaderId' not in e.attrib
|
||||
and 'drmAdditionalHeaderSetId' not in e.attrib,
|
||||
media))
|
||||
|
||||
|
||||
def _add_ns(prop, ver=1):
|
||||
return '{http://ns.adobe.com/f4m/%d.0}%s' % (ver, prop)
|
||||
|
||||
|
||||
def get_base_url(manifest):
|
||||
base_url = xpath_text(
|
||||
manifest, [_add_ns('baseURL'), _add_ns('baseURL', 2)],
|
||||
'base URL', default=None)
|
||||
if base_url:
|
||||
base_url = base_url.strip()
|
||||
return base_url
|
||||
|
||||
|
||||
class F4mFD(FragmentFD):
|
||||
"""
|
||||
A downloader for f4m manifests or AdobeHDS.
|
||||
"""
|
||||
|
||||
FD_NAME = 'f4m'
|
||||
|
||||
def _get_unencrypted_media(self, doc):
|
||||
media = doc.findall(_add_ns('media'))
|
||||
if not media:
|
||||
self.report_error('No media found')
|
||||
for e in (doc.findall(_add_ns('drmAdditionalHeader'))
|
||||
+ doc.findall(_add_ns('drmAdditionalHeaderSet'))):
|
||||
# If id attribute is missing it's valid for all media nodes
|
||||
# without drmAdditionalHeaderId or drmAdditionalHeaderSetId attribute
|
||||
if 'id' not in e.attrib:
|
||||
self.report_error('Missing ID in f4m DRM')
|
||||
media = remove_encrypted_media(media)
|
||||
if not media:
|
||||
self.report_error('Unsupported DRM')
|
||||
return media
|
||||
|
||||
def _get_bootstrap_from_url(self, bootstrap_url):
|
||||
bootstrap = self.ydl.urlopen(bootstrap_url).read()
|
||||
return read_bootstrap_info(bootstrap)
|
||||
|
||||
def _update_live_fragments(self, bootstrap_url, latest_fragment):
|
||||
fragments_list = []
|
||||
retries = 30
|
||||
while (not fragments_list) and (retries > 0):
|
||||
boot_info = self._get_bootstrap_from_url(bootstrap_url)
|
||||
fragments_list = build_fragments_list(boot_info)
|
||||
fragments_list = [f for f in fragments_list if f[1] > latest_fragment]
|
||||
if not fragments_list:
|
||||
# Retry after a while
|
||||
time.sleep(5.0)
|
||||
retries -= 1
|
||||
|
||||
if not fragments_list:
|
||||
self.report_error('Failed to update fragments')
|
||||
|
||||
return fragments_list
|
||||
|
||||
def _parse_bootstrap_node(self, node, base_url):
|
||||
# Sometimes non empty inline bootstrap info can be specified along
|
||||
# with bootstrap url attribute (e.g. dummy inline bootstrap info
|
||||
# contains whitespace characters in [1]). We will prefer bootstrap
|
||||
# url over inline bootstrap info when present.
|
||||
# 1. http://live-1-1.rutube.ru/stream/1024/HDS/SD/C2NKsS85HQNckgn5HdEmOQ/1454167650/S-s604419906/move/four/dirs/upper/1024-576p.f4m
|
||||
bootstrap_url = node.get('url')
|
||||
if bootstrap_url:
|
||||
bootstrap_url = compat_urlparse.urljoin(
|
||||
base_url, bootstrap_url)
|
||||
boot_info = self._get_bootstrap_from_url(bootstrap_url)
|
||||
else:
|
||||
bootstrap_url = None
|
||||
bootstrap = compat_b64decode(node.text)
|
||||
boot_info = read_bootstrap_info(bootstrap)
|
||||
return boot_info, bootstrap_url
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
man_url = info_dict['url']
|
||||
requested_bitrate = info_dict.get('tbr')
|
||||
self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME)
|
||||
|
||||
urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
|
||||
man_url = urlh.geturl()
|
||||
# Some manifests may be malformed, e.g. prosiebensat1 generated manifests
|
||||
# (see https://github.com/ytdl-org/youtube-dl/issues/6215#issuecomment-121704244
|
||||
# and https://github.com/ytdl-org/youtube-dl/issues/7823)
|
||||
manifest = fix_xml_ampersands(urlh.read().decode('utf-8', 'ignore')).strip()
|
||||
|
||||
doc = compat_etree_fromstring(manifest)
|
||||
formats = [(int(f.attrib.get('bitrate', -1)), f)
|
||||
for f in self._get_unencrypted_media(doc)]
|
||||
if requested_bitrate is None or len(formats) == 1:
|
||||
# get the best format
|
||||
formats = sorted(formats, key=lambda f: f[0])
|
||||
rate, media = formats[-1]
|
||||
else:
|
||||
rate, media = list(filter(
|
||||
lambda f: int(f[0]) == requested_bitrate, formats))[0]
|
||||
|
||||
# Prefer baseURL for relative URLs as per 11.2 of F4M 3.0 spec.
|
||||
man_base_url = get_base_url(doc) or man_url
|
||||
|
||||
base_url = compat_urlparse.urljoin(man_base_url, media.attrib['url'])
|
||||
bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
|
||||
boot_info, bootstrap_url = self._parse_bootstrap_node(
|
||||
bootstrap_node, man_base_url)
|
||||
live = boot_info['live']
|
||||
metadata_node = media.find(_add_ns('metadata'))
|
||||
if metadata_node is not None:
|
||||
metadata = compat_b64decode(metadata_node.text)
|
||||
else:
|
||||
metadata = None
|
||||
|
||||
fragments_list = build_fragments_list(boot_info)
|
||||
test = self.params.get('test', False)
|
||||
if test:
|
||||
# We only download the first fragment
|
||||
fragments_list = fragments_list[:1]
|
||||
total_frags = len(fragments_list)
|
||||
# For some akamai manifests we'll need to add a query to the fragment url
|
||||
akamai_pv = xpath_text(doc, _add_ns('pv-2.0'))
|
||||
|
||||
ctx = {
|
||||
'filename': filename,
|
||||
'total_frags': total_frags,
|
||||
'live': live,
|
||||
}
|
||||
|
||||
self._prepare_frag_download(ctx)
|
||||
|
||||
dest_stream = ctx['dest_stream']
|
||||
|
||||
if ctx['complete_frags_downloaded_bytes'] == 0:
|
||||
write_flv_header(dest_stream)
|
||||
if not live:
|
||||
write_metadata_tag(dest_stream, metadata)
|
||||
|
||||
base_url_parsed = compat_urllib_parse_urlparse(base_url)
|
||||
|
||||
self._start_frag_download(ctx)
|
||||
|
||||
frag_index = 0
|
||||
while fragments_list:
|
||||
seg_i, frag_i = fragments_list.pop(0)
|
||||
frag_index += 1
|
||||
if frag_index <= ctx['fragment_index']:
|
||||
continue
|
||||
name = 'Seg%d-Frag%d' % (seg_i, frag_i)
|
||||
query = []
|
||||
if base_url_parsed.query:
|
||||
query.append(base_url_parsed.query)
|
||||
if akamai_pv:
|
||||
query.append(akamai_pv.strip(';'))
|
||||
if info_dict.get('extra_param_to_segment_url'):
|
||||
query.append(info_dict['extra_param_to_segment_url'])
|
||||
url_parsed = base_url_parsed._replace(path=base_url_parsed.path + name, query='&'.join(query))
|
||||
try:
|
||||
success, down_data = self._download_fragment(ctx, url_parsed.geturl(), info_dict)
|
||||
if not success:
|
||||
return False
|
||||
reader = FlvReader(down_data)
|
||||
while True:
|
||||
try:
|
||||
_, box_type, box_data = reader.read_box_info()
|
||||
except DataTruncatedError:
|
||||
if test:
|
||||
# In tests, segments may be truncated, and thus
|
||||
# FlvReader may not be able to parse the whole
|
||||
# chunk. If so, write the segment as is
|
||||
# See https://github.com/ytdl-org/youtube-dl/issues/9214
|
||||
dest_stream.write(down_data)
|
||||
break
|
||||
raise
|
||||
if box_type == b'mdat':
|
||||
self._append_fragment(ctx, box_data)
|
||||
break
|
||||
except (compat_urllib_error.HTTPError, ) as err:
|
||||
if live and (err.code == 404 or err.code == 410):
|
||||
# We didn't keep up with the live window. Continue
|
||||
# with the next available fragment.
|
||||
msg = 'Fragment %d unavailable' % frag_i
|
||||
self.report_warning(msg)
|
||||
fragments_list = []
|
||||
else:
|
||||
raise
|
||||
|
||||
if not fragments_list and not test and live and bootstrap_url:
|
||||
fragments_list = self._update_live_fragments(bootstrap_url, frag_i)
|
||||
total_frags += len(fragments_list)
|
||||
if fragments_list and (fragments_list[0][1] > frag_i + 1):
|
||||
msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1))
|
||||
self.report_warning(msg)
|
||||
|
||||
self._finish_frag_download(ctx)
|
||||
|
||||
return True
|
269
youtube_dl/downloader/fragment.py
Normal file
269
youtube_dl/downloader/fragment.py
Normal file
|
@ -0,0 +1,269 @@
|
|||
from __future__ import division, unicode_literals
|
||||
|
||||
import os
|
||||
import time
|
||||
import json
|
||||
|
||||
from .common import FileDownloader
|
||||
from .http import HttpFD
|
||||
from ..utils import (
|
||||
error_to_compat_str,
|
||||
encodeFilename,
|
||||
sanitize_open,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
class HttpQuietDownloader(HttpFD):
|
||||
def to_screen(self, *args, **kargs):
|
||||
pass
|
||||
|
||||
|
||||
class FragmentFD(FileDownloader):
|
||||
"""
|
||||
A base file downloader class for fragmented media (e.g. f4m/m3u8 manifests).
|
||||
|
||||
Available options:
|
||||
|
||||
fragment_retries: Number of times to retry a fragment for HTTP error (DASH
|
||||
and hlsnative only)
|
||||
skip_unavailable_fragments:
|
||||
Skip unavailable fragments (DASH and hlsnative only)
|
||||
keep_fragments: Keep downloaded fragments on disk after downloading is
|
||||
finished
|
||||
|
||||
For each incomplete fragment download youtube-dlc keeps on disk a special
|
||||
bookkeeping file with download state and metadata (in future such files will
|
||||
be used for any incomplete download handled by youtube-dlc). This file is
|
||||
used to properly handle resuming, check download file consistency and detect
|
||||
potential errors. The file has a .ytdl extension and represents a standard
|
||||
JSON file of the following format:
|
||||
|
||||
extractor:
|
||||
Dictionary of extractor related data. TBD.
|
||||
|
||||
downloader:
|
||||
Dictionary of downloader related data. May contain following data:
|
||||
current_fragment:
|
||||
Dictionary with current (being downloaded) fragment data:
|
||||
index: 0-based index of current fragment among all fragments
|
||||
fragment_count:
|
||||
Total count of fragments
|
||||
|
||||
This feature is experimental and file format may change in future.
|
||||
"""
|
||||
|
||||
def report_retry_fragment(self, err, frag_index, count, retries):
|
||||
self.to_screen(
|
||||
'[download] Got server HTTP error: %s. Retrying fragment %d (attempt %d of %s)...'
|
||||
% (error_to_compat_str(err), frag_index, count, self.format_retries(retries)))
|
||||
|
||||
def report_skip_fragment(self, frag_index):
|
||||
self.to_screen('[download] Skipping fragment %d...' % frag_index)
|
||||
|
||||
def _prepare_url(self, info_dict, url):
|
||||
headers = info_dict.get('http_headers')
|
||||
return sanitized_Request(url, None, headers) if headers else url
|
||||
|
||||
def _prepare_and_start_frag_download(self, ctx):
|
||||
self._prepare_frag_download(ctx)
|
||||
self._start_frag_download(ctx)
|
||||
|
||||
@staticmethod
|
||||
def __do_ytdl_file(ctx):
|
||||
return not ctx['live'] and not ctx['tmpfilename'] == '-'
|
||||
|
||||
def _read_ytdl_file(self, ctx):
|
||||
assert 'ytdl_corrupt' not in ctx
|
||||
stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'r')
|
||||
try:
|
||||
ctx['fragment_index'] = json.loads(stream.read())['downloader']['current_fragment']['index']
|
||||
except Exception:
|
||||
ctx['ytdl_corrupt'] = True
|
||||
finally:
|
||||
stream.close()
|
||||
|
||||
def _write_ytdl_file(self, ctx):
|
||||
frag_index_stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'w')
|
||||
downloader = {
|
||||
'current_fragment': {
|
||||
'index': ctx['fragment_index'],
|
||||
},
|
||||
}
|
||||
if ctx.get('fragment_count') is not None:
|
||||
downloader['fragment_count'] = ctx['fragment_count']
|
||||
frag_index_stream.write(json.dumps({'downloader': downloader}))
|
||||
frag_index_stream.close()
|
||||
|
||||
def _download_fragment(self, ctx, frag_url, info_dict, headers=None):
|
||||
fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index'])
|
||||
success = ctx['dl'].download(fragment_filename, {
|
||||
'url': frag_url,
|
||||
'http_headers': headers or info_dict.get('http_headers'),
|
||||
})
|
||||
if not success:
|
||||
return False, None
|
||||
down, frag_sanitized = sanitize_open(fragment_filename, 'rb')
|
||||
ctx['fragment_filename_sanitized'] = frag_sanitized
|
||||
frag_content = down.read()
|
||||
down.close()
|
||||
return True, frag_content
|
||||
|
||||
def _append_fragment(self, ctx, frag_content):
|
||||
try:
|
||||
ctx['dest_stream'].write(frag_content)
|
||||
ctx['dest_stream'].flush()
|
||||
finally:
|
||||
if self.__do_ytdl_file(ctx):
|
||||
self._write_ytdl_file(ctx)
|
||||
if not self.params.get('keep_fragments', False):
|
||||
os.remove(encodeFilename(ctx['fragment_filename_sanitized']))
|
||||
del ctx['fragment_filename_sanitized']
|
||||
|
||||
def _prepare_frag_download(self, ctx):
|
||||
if 'live' not in ctx:
|
||||
ctx['live'] = False
|
||||
if not ctx['live']:
|
||||
total_frags_str = '%d' % ctx['total_frags']
|
||||
ad_frags = ctx.get('ad_frags', 0)
|
||||
if ad_frags:
|
||||
total_frags_str += ' (not including %d ad)' % ad_frags
|
||||
else:
|
||||
total_frags_str = 'unknown (live)'
|
||||
self.to_screen(
|
||||
'[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str))
|
||||
self.report_destination(ctx['filename'])
|
||||
dl = HttpQuietDownloader(
|
||||
self.ydl,
|
||||
{
|
||||
'continuedl': True,
|
||||
'quiet': True,
|
||||
'noprogress': True,
|
||||
'ratelimit': self.params.get('ratelimit'),
|
||||
'retries': self.params.get('retries', 0),
|
||||
'nopart': self.params.get('nopart', False),
|
||||
'test': self.params.get('test', False),
|
||||
}
|
||||
)
|
||||
tmpfilename = self.temp_name(ctx['filename'])
|
||||
open_mode = 'wb'
|
||||
resume_len = 0
|
||||
|
||||
# Establish possible resume length
|
||||
if os.path.isfile(encodeFilename(tmpfilename)):
|
||||
open_mode = 'ab'
|
||||
resume_len = os.path.getsize(encodeFilename(tmpfilename))
|
||||
|
||||
# Should be initialized before ytdl file check
|
||||
ctx.update({
|
||||
'tmpfilename': tmpfilename,
|
||||
'fragment_index': 0,
|
||||
})
|
||||
|
||||
if self.__do_ytdl_file(ctx):
|
||||
if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))):
|
||||
self._read_ytdl_file(ctx)
|
||||
is_corrupt = ctx.get('ytdl_corrupt') is True
|
||||
is_inconsistent = ctx['fragment_index'] > 0 and resume_len == 0
|
||||
if is_corrupt or is_inconsistent:
|
||||
message = (
|
||||
'.ytdl file is corrupt' if is_corrupt else
|
||||
'Inconsistent state of incomplete fragment download')
|
||||
self.report_warning(
|
||||
'%s. Restarting from the beginning...' % message)
|
||||
ctx['fragment_index'] = resume_len = 0
|
||||
if 'ytdl_corrupt' in ctx:
|
||||
del ctx['ytdl_corrupt']
|
||||
self._write_ytdl_file(ctx)
|
||||
else:
|
||||
self._write_ytdl_file(ctx)
|
||||
assert ctx['fragment_index'] == 0
|
||||
|
||||
dest_stream, tmpfilename = sanitize_open(tmpfilename, open_mode)
|
||||
|
||||
ctx.update({
|
||||
'dl': dl,
|
||||
'dest_stream': dest_stream,
|
||||
'tmpfilename': tmpfilename,
|
||||
# Total complete fragments downloaded so far in bytes
|
||||
'complete_frags_downloaded_bytes': resume_len,
|
||||
})
|
||||
|
||||
def _start_frag_download(self, ctx):
|
||||
resume_len = ctx['complete_frags_downloaded_bytes']
|
||||
total_frags = ctx['total_frags']
|
||||
# This dict stores the download progress, it's updated by the progress
|
||||
# hook
|
||||
state = {
|
||||
'status': 'downloading',
|
||||
'downloaded_bytes': resume_len,
|
||||
'fragment_index': ctx['fragment_index'],
|
||||
'fragment_count': total_frags,
|
||||
'filename': ctx['filename'],
|
||||
'tmpfilename': ctx['tmpfilename'],
|
||||
}
|
||||
|
||||
start = time.time()
|
||||
ctx.update({
|
||||
'started': start,
|
||||
# Amount of fragment's bytes downloaded by the time of the previous
|
||||
# frag progress hook invocation
|
||||
'prev_frag_downloaded_bytes': 0,
|
||||
})
|
||||
|
||||
def frag_progress_hook(s):
|
||||
if s['status'] not in ('downloading', 'finished'):
|
||||
return
|
||||
|
||||
time_now = time.time()
|
||||
state['elapsed'] = time_now - start
|
||||
frag_total_bytes = s.get('total_bytes') or 0
|
||||
if not ctx['live']:
|
||||
estimated_size = (
|
||||
(ctx['complete_frags_downloaded_bytes'] + frag_total_bytes)
|
||||
/ (state['fragment_index'] + 1) * total_frags)
|
||||
state['total_bytes_estimate'] = estimated_size
|
||||
|
||||
if s['status'] == 'finished':
|
||||
state['fragment_index'] += 1
|
||||
ctx['fragment_index'] = state['fragment_index']
|
||||
state['downloaded_bytes'] += frag_total_bytes - ctx['prev_frag_downloaded_bytes']
|
||||
ctx['complete_frags_downloaded_bytes'] = state['downloaded_bytes']
|
||||
ctx['prev_frag_downloaded_bytes'] = 0
|
||||
else:
|
||||
frag_downloaded_bytes = s['downloaded_bytes']
|
||||
state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes']
|
||||
if not ctx['live']:
|
||||
state['eta'] = self.calc_eta(
|
||||
start, time_now, estimated_size - resume_len,
|
||||
state['downloaded_bytes'] - resume_len)
|
||||
state['speed'] = s.get('speed') or ctx.get('speed')
|
||||
ctx['speed'] = state['speed']
|
||||
ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
|
||||
self._hook_progress(state)
|
||||
|
||||
ctx['dl'].add_progress_hook(frag_progress_hook)
|
||||
|
||||
return start
|
||||
|
||||
def _finish_frag_download(self, ctx):
|
||||
ctx['dest_stream'].close()
|
||||
if self.__do_ytdl_file(ctx):
|
||||
ytdl_filename = encodeFilename(self.ytdl_filename(ctx['filename']))
|
||||
if os.path.isfile(ytdl_filename):
|
||||
os.remove(ytdl_filename)
|
||||
elapsed = time.time() - ctx['started']
|
||||
|
||||
if ctx['tmpfilename'] == '-':
|
||||
downloaded_bytes = ctx['complete_frags_downloaded_bytes']
|
||||
else:
|
||||
self.try_rename(ctx['tmpfilename'], ctx['filename'])
|
||||
downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename']))
|
||||
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': downloaded_bytes,
|
||||
'total_bytes': downloaded_bytes,
|
||||
'filename': ctx['filename'],
|
||||
'status': 'finished',
|
||||
'elapsed': elapsed,
|
||||
})
|
210
youtube_dl/downloader/hls.py
Normal file
210
youtube_dl/downloader/hls.py
Normal file
|
@ -0,0 +1,210 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import binascii
|
||||
try:
|
||||
from Crypto.Cipher import AES
|
||||
can_decrypt_frag = True
|
||||
except ImportError:
|
||||
can_decrypt_frag = False
|
||||
|
||||
from .fragment import FragmentFD
|
||||
from .external import FFmpegFD
|
||||
|
||||
from ..compat import (
|
||||
compat_urllib_error,
|
||||
compat_urlparse,
|
||||
compat_struct_pack,
|
||||
)
|
||||
from ..utils import (
|
||||
parse_m3u8_attributes,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class HlsFD(FragmentFD):
|
||||
""" A limited implementation that does not require ffmpeg """
|
||||
|
||||
FD_NAME = 'hlsnative'
|
||||
|
||||
@staticmethod
|
||||
def can_download(manifest, info_dict):
|
||||
UNSUPPORTED_FEATURES = (
|
||||
r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1]
|
||||
# r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2]
|
||||
|
||||
# Live streams heuristic does not always work (e.g. geo restricted to Germany
|
||||
# http://hls-geo.daserste.de/i/videoportal/Film/c_620000/622873/format,716451,716457,716450,716458,716459,.mp4.csmil/index_4_av.m3u8?null=0)
|
||||
# r'#EXT-X-MEDIA-SEQUENCE:(?!0$)', # live streams [3]
|
||||
|
||||
# This heuristic also is not correct since segments may not be appended as well.
|
||||
# Twitch vods of finished streams have EXT-X-PLAYLIST-TYPE:EVENT despite
|
||||
# no segments will definitely be appended to the end of the playlist.
|
||||
# r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of
|
||||
# # event media playlists [4]
|
||||
|
||||
# 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4
|
||||
# 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2
|
||||
# 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2
|
||||
# 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5
|
||||
)
|
||||
check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES]
|
||||
is_aes128_enc = '#EXT-X-KEY:METHOD=AES-128' in manifest
|
||||
check_results.append(can_decrypt_frag or not is_aes128_enc)
|
||||
check_results.append(not (is_aes128_enc and r'#EXT-X-BYTERANGE' in manifest))
|
||||
check_results.append(not info_dict.get('is_live'))
|
||||
return all(check_results)
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
man_url = info_dict['url']
|
||||
self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)
|
||||
|
||||
urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
|
||||
man_url = urlh.geturl()
|
||||
s = urlh.read().decode('utf-8', 'ignore')
|
||||
|
||||
if not self.can_download(s, info_dict):
|
||||
if info_dict.get('extra_param_to_segment_url') or info_dict.get('_decryption_key_url'):
|
||||
self.report_error('pycrypto not found. Please install it.')
|
||||
return False
|
||||
self.report_warning(
|
||||
'hlsnative has detected features it does not support, '
|
||||
'extraction will be delegated to ffmpeg')
|
||||
fd = FFmpegFD(self.ydl, self.params)
|
||||
for ph in self._progress_hooks:
|
||||
fd.add_progress_hook(ph)
|
||||
return fd.real_download(filename, info_dict)
|
||||
|
||||
def is_ad_fragment_start(s):
|
||||
return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s
|
||||
or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad'))
|
||||
|
||||
def is_ad_fragment_end(s):
|
||||
return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s
|
||||
or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment'))
|
||||
|
||||
media_frags = 0
|
||||
ad_frags = 0
|
||||
ad_frag_next = False
|
||||
for line in s.splitlines():
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
if line.startswith('#'):
|
||||
if is_ad_fragment_start(line):
|
||||
ad_frag_next = True
|
||||
elif is_ad_fragment_end(line):
|
||||
ad_frag_next = False
|
||||
continue
|
||||
if ad_frag_next:
|
||||
ad_frags += 1
|
||||
continue
|
||||
media_frags += 1
|
||||
|
||||
ctx = {
|
||||
'filename': filename,
|
||||
'total_frags': media_frags,
|
||||
'ad_frags': ad_frags,
|
||||
}
|
||||
|
||||
self._prepare_and_start_frag_download(ctx)
|
||||
|
||||
fragment_retries = self.params.get('fragment_retries', 0)
|
||||
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
|
||||
test = self.params.get('test', False)
|
||||
|
||||
extra_query = None
|
||||
extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url')
|
||||
if extra_param_to_segment_url:
|
||||
extra_query = compat_urlparse.parse_qs(extra_param_to_segment_url)
|
||||
i = 0
|
||||
media_sequence = 0
|
||||
decrypt_info = {'METHOD': 'NONE'}
|
||||
byte_range = {}
|
||||
frag_index = 0
|
||||
ad_frag_next = False
|
||||
for line in s.splitlines():
|
||||
line = line.strip()
|
||||
if line:
|
||||
if not line.startswith('#'):
|
||||
if ad_frag_next:
|
||||
continue
|
||||
frag_index += 1
|
||||
if frag_index <= ctx['fragment_index']:
|
||||
continue
|
||||
frag_url = (
|
||||
line
|
||||
if re.match(r'^https?://', line)
|
||||
else compat_urlparse.urljoin(man_url, line))
|
||||
if extra_query:
|
||||
frag_url = update_url_query(frag_url, extra_query)
|
||||
count = 0
|
||||
headers = info_dict.get('http_headers', {})
|
||||
if byte_range:
|
||||
headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'])
|
||||
while count <= fragment_retries:
|
||||
try:
|
||||
success, frag_content = self._download_fragment(
|
||||
ctx, frag_url, info_dict, headers)
|
||||
if not success:
|
||||
return False
|
||||
break
|
||||
except compat_urllib_error.HTTPError as err:
|
||||
# Unavailable (possibly temporary) fragments may be served.
|
||||
# First we try to retry then either skip or abort.
|
||||
# See https://github.com/ytdl-org/youtube-dl/issues/10165,
|
||||
# https://github.com/ytdl-org/youtube-dl/issues/10448).
|
||||
count += 1
|
||||
if count <= fragment_retries:
|
||||
self.report_retry_fragment(err, frag_index, count, fragment_retries)
|
||||
if count > fragment_retries:
|
||||
if skip_unavailable_fragments:
|
||||
i += 1
|
||||
media_sequence += 1
|
||||
self.report_skip_fragment(frag_index)
|
||||
continue
|
||||
self.report_error(
|
||||
'giving up after %s fragment retries' % fragment_retries)
|
||||
return False
|
||||
if decrypt_info['METHOD'] == 'AES-128':
|
||||
iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence)
|
||||
decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen(
|
||||
self._prepare_url(info_dict, info_dict.get('_decryption_key_url') or decrypt_info['URI'])).read()
|
||||
frag_content = AES.new(
|
||||
decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
|
||||
self._append_fragment(ctx, frag_content)
|
||||
# We only download the first fragment during the test
|
||||
if test:
|
||||
break
|
||||
i += 1
|
||||
media_sequence += 1
|
||||
elif line.startswith('#EXT-X-KEY'):
|
||||
decrypt_url = decrypt_info.get('URI')
|
||||
decrypt_info = parse_m3u8_attributes(line[11:])
|
||||
if decrypt_info['METHOD'] == 'AES-128':
|
||||
if 'IV' in decrypt_info:
|
||||
decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:].zfill(32))
|
||||
if not re.match(r'^https?://', decrypt_info['URI']):
|
||||
decrypt_info['URI'] = compat_urlparse.urljoin(
|
||||
man_url, decrypt_info['URI'])
|
||||
if extra_query:
|
||||
decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query)
|
||||
if decrypt_url != decrypt_info['URI']:
|
||||
decrypt_info['KEY'] = None
|
||||
elif line.startswith('#EXT-X-MEDIA-SEQUENCE'):
|
||||
media_sequence = int(line[22:])
|
||||
elif line.startswith('#EXT-X-BYTERANGE'):
|
||||
splitted_byte_range = line[17:].split('@')
|
||||
sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end']
|
||||
byte_range = {
|
||||
'start': sub_range_start,
|
||||
'end': sub_range_start + int(splitted_byte_range[0]),
|
||||
}
|
||||
elif is_ad_fragment_start(line):
|
||||
ad_frag_next = True
|
||||
elif is_ad_fragment_end(line):
|
||||
ad_frag_next = False
|
||||
|
||||
self._finish_frag_download(ctx)
|
||||
|
||||
return True
|
354
youtube_dl/downloader/http.py
Normal file
354
youtube_dl/downloader/http.py
Normal file
|
@ -0,0 +1,354 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
import errno
|
||||
import os
|
||||
import socket
|
||||
import time
|
||||
import random
|
||||
import re
|
||||
|
||||
from .common import FileDownloader
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_error,
|
||||
)
|
||||
from ..utils import (
|
||||
ContentTooShortError,
|
||||
encodeFilename,
|
||||
int_or_none,
|
||||
sanitize_open,
|
||||
sanitized_Request,
|
||||
write_xattr,
|
||||
XAttrMetadataError,
|
||||
XAttrUnavailableError,
|
||||
)
|
||||
|
||||
|
||||
class HttpFD(FileDownloader):
|
||||
def real_download(self, filename, info_dict):
|
||||
url = info_dict['url']
|
||||
|
||||
class DownloadContext(dict):
|
||||
__getattr__ = dict.get
|
||||
__setattr__ = dict.__setitem__
|
||||
__delattr__ = dict.__delitem__
|
||||
|
||||
ctx = DownloadContext()
|
||||
ctx.filename = filename
|
||||
ctx.tmpfilename = self.temp_name(filename)
|
||||
ctx.stream = None
|
||||
|
||||
# Do not include the Accept-Encoding header
|
||||
headers = {'Youtubedl-no-compression': 'True'}
|
||||
add_headers = info_dict.get('http_headers')
|
||||
if add_headers:
|
||||
headers.update(add_headers)
|
||||
|
||||
is_test = self.params.get('test', False)
|
||||
chunk_size = self._TEST_FILE_SIZE if is_test else (
|
||||
info_dict.get('downloader_options', {}).get('http_chunk_size')
|
||||
or self.params.get('http_chunk_size') or 0)
|
||||
|
||||
ctx.open_mode = 'wb'
|
||||
ctx.resume_len = 0
|
||||
ctx.data_len = None
|
||||
ctx.block_size = self.params.get('buffersize', 1024)
|
||||
ctx.start_time = time.time()
|
||||
ctx.chunk_size = None
|
||||
|
||||
if self.params.get('continuedl', True):
|
||||
# Establish possible resume length
|
||||
if os.path.isfile(encodeFilename(ctx.tmpfilename)):
|
||||
ctx.resume_len = os.path.getsize(
|
||||
encodeFilename(ctx.tmpfilename))
|
||||
|
||||
ctx.is_resume = ctx.resume_len > 0
|
||||
|
||||
count = 0
|
||||
retries = self.params.get('retries', 0)
|
||||
|
||||
class SucceedDownload(Exception):
|
||||
pass
|
||||
|
||||
class RetryDownload(Exception):
|
||||
def __init__(self, source_error):
|
||||
self.source_error = source_error
|
||||
|
||||
class NextFragment(Exception):
|
||||
pass
|
||||
|
||||
def set_range(req, start, end):
|
||||
range_header = 'bytes=%d-' % start
|
||||
if end:
|
||||
range_header += compat_str(end)
|
||||
req.add_header('Range', range_header)
|
||||
|
||||
def establish_connection():
|
||||
ctx.chunk_size = (random.randint(int(chunk_size * 0.95), chunk_size)
|
||||
if not is_test and chunk_size else chunk_size)
|
||||
if ctx.resume_len > 0:
|
||||
range_start = ctx.resume_len
|
||||
if ctx.is_resume:
|
||||
self.report_resuming_byte(ctx.resume_len)
|
||||
ctx.open_mode = 'ab'
|
||||
elif ctx.chunk_size > 0:
|
||||
range_start = 0
|
||||
else:
|
||||
range_start = None
|
||||
ctx.is_resume = False
|
||||
range_end = range_start + ctx.chunk_size - 1 if ctx.chunk_size else None
|
||||
if range_end and ctx.data_len is not None and range_end >= ctx.data_len:
|
||||
range_end = ctx.data_len - 1
|
||||
has_range = range_start is not None
|
||||
ctx.has_range = has_range
|
||||
request = sanitized_Request(url, None, headers)
|
||||
if has_range:
|
||||
set_range(request, range_start, range_end)
|
||||
# Establish connection
|
||||
try:
|
||||
ctx.data = self.ydl.urlopen(request)
|
||||
# When trying to resume, Content-Range HTTP header of response has to be checked
|
||||
# to match the value of requested Range HTTP header. This is due to a webservers
|
||||
# that don't support resuming and serve a whole file with no Content-Range
|
||||
# set in response despite of requested Range (see
|
||||
# https://github.com/ytdl-org/youtube-dl/issues/6057#issuecomment-126129799)
|
||||
if has_range:
|
||||
content_range = ctx.data.headers.get('Content-Range')
|
||||
if content_range:
|
||||
content_range_m = re.search(r'bytes (\d+)-(\d+)?(?:/(\d+))?', content_range)
|
||||
# Content-Range is present and matches requested Range, resume is possible
|
||||
if content_range_m:
|
||||
if range_start == int(content_range_m.group(1)):
|
||||
content_range_end = int_or_none(content_range_m.group(2))
|
||||
content_len = int_or_none(content_range_m.group(3))
|
||||
accept_content_len = (
|
||||
# Non-chunked download
|
||||
not ctx.chunk_size
|
||||
# Chunked download and requested piece or
|
||||
# its part is promised to be served
|
||||
or content_range_end == range_end
|
||||
or content_len < range_end)
|
||||
if accept_content_len:
|
||||
ctx.data_len = content_len
|
||||
return
|
||||
# Content-Range is either not present or invalid. Assuming remote webserver is
|
||||
# trying to send the whole file, resume is not possible, so wiping the local file
|
||||
# and performing entire redownload
|
||||
self.report_unable_to_resume()
|
||||
ctx.resume_len = 0
|
||||
ctx.open_mode = 'wb'
|
||||
ctx.data_len = int_or_none(ctx.data.info().get('Content-length', None))
|
||||
return
|
||||
except (compat_urllib_error.HTTPError, ) as err:
|
||||
if err.code == 416:
|
||||
# Unable to resume (requested range not satisfiable)
|
||||
try:
|
||||
# Open the connection again without the range header
|
||||
ctx.data = self.ydl.urlopen(
|
||||
sanitized_Request(url, None, headers))
|
||||
content_length = ctx.data.info()['Content-Length']
|
||||
except (compat_urllib_error.HTTPError, ) as err:
|
||||
if err.code < 500 or err.code >= 600:
|
||||
raise
|
||||
else:
|
||||
# Examine the reported length
|
||||
if (content_length is not None
|
||||
and (ctx.resume_len - 100 < int(content_length) < ctx.resume_len + 100)):
|
||||
# The file had already been fully downloaded.
|
||||
# Explanation to the above condition: in issue #175 it was revealed that
|
||||
# YouTube sometimes adds or removes a few bytes from the end of the file,
|
||||
# changing the file size slightly and causing problems for some users. So
|
||||
# I decided to implement a suggested change and consider the file
|
||||
# completely downloaded if the file size differs less than 100 bytes from
|
||||
# the one in the hard drive.
|
||||
self.report_file_already_downloaded(ctx.filename)
|
||||
self.try_rename(ctx.tmpfilename, ctx.filename)
|
||||
self._hook_progress({
|
||||
'filename': ctx.filename,
|
||||
'status': 'finished',
|
||||
'downloaded_bytes': ctx.resume_len,
|
||||
'total_bytes': ctx.resume_len,
|
||||
})
|
||||
raise SucceedDownload()
|
||||
else:
|
||||
# The length does not match, we start the download over
|
||||
self.report_unable_to_resume()
|
||||
ctx.resume_len = 0
|
||||
ctx.open_mode = 'wb'
|
||||
return
|
||||
elif err.code < 500 or err.code >= 600:
|
||||
# Unexpected HTTP error
|
||||
raise
|
||||
raise RetryDownload(err)
|
||||
except socket.error as err:
|
||||
if err.errno != errno.ECONNRESET:
|
||||
# Connection reset is no problem, just retry
|
||||
raise
|
||||
raise RetryDownload(err)
|
||||
|
||||
def download():
|
||||
data_len = ctx.data.info().get('Content-length', None)
|
||||
|
||||
# Range HTTP header may be ignored/unsupported by a webserver
|
||||
# (e.g. extractor/scivee.py, extractor/bambuser.py).
|
||||
# However, for a test we still would like to download just a piece of a file.
|
||||
# To achieve this we limit data_len to _TEST_FILE_SIZE and manually control
|
||||
# block size when downloading a file.
|
||||
if is_test and (data_len is None or int(data_len) > self._TEST_FILE_SIZE):
|
||||
data_len = self._TEST_FILE_SIZE
|
||||
|
||||
if data_len is not None:
|
||||
data_len = int(data_len) + ctx.resume_len
|
||||
min_data_len = self.params.get('min_filesize')
|
||||
max_data_len = self.params.get('max_filesize')
|
||||
if min_data_len is not None and data_len < min_data_len:
|
||||
self.to_screen('\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
|
||||
return False
|
||||
if max_data_len is not None and data_len > max_data_len:
|
||||
self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
|
||||
return False
|
||||
|
||||
byte_counter = 0 + ctx.resume_len
|
||||
block_size = ctx.block_size
|
||||
start = time.time()
|
||||
|
||||
# measure time over whole while-loop, so slow_down() and best_block_size() work together properly
|
||||
now = None # needed for slow_down() in the first loop run
|
||||
before = start # start measuring
|
||||
|
||||
def retry(e):
|
||||
to_stdout = ctx.tmpfilename == '-'
|
||||
if not to_stdout:
|
||||
ctx.stream.close()
|
||||
ctx.stream = None
|
||||
ctx.resume_len = byte_counter if to_stdout else os.path.getsize(encodeFilename(ctx.tmpfilename))
|
||||
raise RetryDownload(e)
|
||||
|
||||
while True:
|
||||
try:
|
||||
# Download and write
|
||||
data_block = ctx.data.read(block_size if data_len is None else min(block_size, data_len - byte_counter))
|
||||
# socket.timeout is a subclass of socket.error but may not have
|
||||
# errno set
|
||||
except socket.timeout as e:
|
||||
retry(e)
|
||||
except socket.error as e:
|
||||
if e.errno not in (errno.ECONNRESET, errno.ETIMEDOUT):
|
||||
raise
|
||||
retry(e)
|
||||
|
||||
byte_counter += len(data_block)
|
||||
|
||||
# exit loop when download is finished
|
||||
if len(data_block) == 0:
|
||||
break
|
||||
|
||||
# Open destination file just in time
|
||||
if ctx.stream is None:
|
||||
try:
|
||||
ctx.stream, ctx.tmpfilename = sanitize_open(
|
||||
ctx.tmpfilename, ctx.open_mode)
|
||||
assert ctx.stream is not None
|
||||
ctx.filename = self.undo_temp_name(ctx.tmpfilename)
|
||||
self.report_destination(ctx.filename)
|
||||
except (OSError, IOError) as err:
|
||||
self.report_error('unable to open for writing: %s' % str(err))
|
||||
return False
|
||||
|
||||
if self.params.get('xattr_set_filesize', False) and data_len is not None:
|
||||
try:
|
||||
write_xattr(ctx.tmpfilename, 'user.ytdl.filesize', str(data_len).encode('utf-8'))
|
||||
except (XAttrUnavailableError, XAttrMetadataError) as err:
|
||||
self.report_error('unable to set filesize xattr: %s' % str(err))
|
||||
|
||||
try:
|
||||
ctx.stream.write(data_block)
|
||||
except (IOError, OSError) as err:
|
||||
self.to_stderr('\n')
|
||||
self.report_error('unable to write data: %s' % str(err))
|
||||
return False
|
||||
|
||||
# Apply rate limit
|
||||
self.slow_down(start, now, byte_counter - ctx.resume_len)
|
||||
|
||||
# end measuring of one loop run
|
||||
now = time.time()
|
||||
after = now
|
||||
|
||||
# Adjust block size
|
||||
if not self.params.get('noresizebuffer', False):
|
||||
block_size = self.best_block_size(after - before, len(data_block))
|
||||
|
||||
before = after
|
||||
|
||||
# Progress message
|
||||
speed = self.calc_speed(start, now, byte_counter - ctx.resume_len)
|
||||
if ctx.data_len is None:
|
||||
eta = None
|
||||
else:
|
||||
eta = self.calc_eta(start, time.time(), ctx.data_len - ctx.resume_len, byte_counter - ctx.resume_len)
|
||||
|
||||
self._hook_progress({
|
||||
'status': 'downloading',
|
||||
'downloaded_bytes': byte_counter,
|
||||
'total_bytes': ctx.data_len,
|
||||
'tmpfilename': ctx.tmpfilename,
|
||||
'filename': ctx.filename,
|
||||
'eta': eta,
|
||||
'speed': speed,
|
||||
'elapsed': now - ctx.start_time,
|
||||
})
|
||||
|
||||
if data_len is not None and byte_counter == data_len:
|
||||
break
|
||||
|
||||
if not is_test and ctx.chunk_size and ctx.data_len is not None and byte_counter < ctx.data_len:
|
||||
ctx.resume_len = byte_counter
|
||||
# ctx.block_size = block_size
|
||||
raise NextFragment()
|
||||
|
||||
if ctx.stream is None:
|
||||
self.to_stderr('\n')
|
||||
self.report_error('Did not get any data blocks')
|
||||
return False
|
||||
if ctx.tmpfilename != '-':
|
||||
ctx.stream.close()
|
||||
|
||||
if data_len is not None and byte_counter != data_len:
|
||||
err = ContentTooShortError(byte_counter, int(data_len))
|
||||
if count <= retries:
|
||||
retry(err)
|
||||
raise err
|
||||
|
||||
self.try_rename(ctx.tmpfilename, ctx.filename)
|
||||
|
||||
# Update file modification time
|
||||
if self.params.get('updatetime', True):
|
||||
info_dict['filetime'] = self.try_utime(ctx.filename, ctx.data.info().get('last-modified', None))
|
||||
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': byte_counter,
|
||||
'total_bytes': byte_counter,
|
||||
'filename': ctx.filename,
|
||||
'status': 'finished',
|
||||
'elapsed': time.time() - ctx.start_time,
|
||||
})
|
||||
|
||||
return True
|
||||
|
||||
while count <= retries:
|
||||
try:
|
||||
establish_connection()
|
||||
return download()
|
||||
except RetryDownload as e:
|
||||
count += 1
|
||||
if count <= retries:
|
||||
self.report_retry(e.source_error, count, retries)
|
||||
continue
|
||||
except NextFragment:
|
||||
continue
|
||||
except SucceedDownload:
|
||||
return True
|
||||
|
||||
self.report_error('giving up after %s retries' % retries)
|
||||
return False
|
259
youtube_dl/downloader/ism.py
Normal file
259
youtube_dl/downloader/ism.py
Normal file
|
@ -0,0 +1,259 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
import time
|
||||
import binascii
|
||||
import io
|
||||
|
||||
from .fragment import FragmentFD
|
||||
from ..compat import (
|
||||
compat_Struct,
|
||||
compat_urllib_error,
|
||||
)
|
||||
|
||||
|
||||
u8 = compat_Struct('>B')
|
||||
u88 = compat_Struct('>Bx')
|
||||
u16 = compat_Struct('>H')
|
||||
u1616 = compat_Struct('>Hxx')
|
||||
u32 = compat_Struct('>I')
|
||||
u64 = compat_Struct('>Q')
|
||||
|
||||
s88 = compat_Struct('>bx')
|
||||
s16 = compat_Struct('>h')
|
||||
s1616 = compat_Struct('>hxx')
|
||||
s32 = compat_Struct('>i')
|
||||
|
||||
unity_matrix = (s32.pack(0x10000) + s32.pack(0) * 3) * 2 + s32.pack(0x40000000)
|
||||
|
||||
TRACK_ENABLED = 0x1
|
||||
TRACK_IN_MOVIE = 0x2
|
||||
TRACK_IN_PREVIEW = 0x4
|
||||
|
||||
SELF_CONTAINED = 0x1
|
||||
|
||||
|
||||
def box(box_type, payload):
|
||||
return u32.pack(8 + len(payload)) + box_type + payload
|
||||
|
||||
|
||||
def full_box(box_type, version, flags, payload):
|
||||
return box(box_type, u8.pack(version) + u32.pack(flags)[1:] + payload)
|
||||
|
||||
|
||||
def write_piff_header(stream, params):
|
||||
track_id = params['track_id']
|
||||
fourcc = params['fourcc']
|
||||
duration = params['duration']
|
||||
timescale = params.get('timescale', 10000000)
|
||||
language = params.get('language', 'und')
|
||||
height = params.get('height', 0)
|
||||
width = params.get('width', 0)
|
||||
is_audio = width == 0 and height == 0
|
||||
creation_time = modification_time = int(time.time())
|
||||
|
||||
ftyp_payload = b'isml' # major brand
|
||||
ftyp_payload += u32.pack(1) # minor version
|
||||
ftyp_payload += b'piff' + b'iso2' # compatible brands
|
||||
stream.write(box(b'ftyp', ftyp_payload)) # File Type Box
|
||||
|
||||
mvhd_payload = u64.pack(creation_time)
|
||||
mvhd_payload += u64.pack(modification_time)
|
||||
mvhd_payload += u32.pack(timescale)
|
||||
mvhd_payload += u64.pack(duration)
|
||||
mvhd_payload += s1616.pack(1) # rate
|
||||
mvhd_payload += s88.pack(1) # volume
|
||||
mvhd_payload += u16.pack(0) # reserved
|
||||
mvhd_payload += u32.pack(0) * 2 # reserved
|
||||
mvhd_payload += unity_matrix
|
||||
mvhd_payload += u32.pack(0) * 6 # pre defined
|
||||
mvhd_payload += u32.pack(0xffffffff) # next track id
|
||||
moov_payload = full_box(b'mvhd', 1, 0, mvhd_payload) # Movie Header Box
|
||||
|
||||
tkhd_payload = u64.pack(creation_time)
|
||||
tkhd_payload += u64.pack(modification_time)
|
||||
tkhd_payload += u32.pack(track_id) # track id
|
||||
tkhd_payload += u32.pack(0) # reserved
|
||||
tkhd_payload += u64.pack(duration)
|
||||
tkhd_payload += u32.pack(0) * 2 # reserved
|
||||
tkhd_payload += s16.pack(0) # layer
|
||||
tkhd_payload += s16.pack(0) # alternate group
|
||||
tkhd_payload += s88.pack(1 if is_audio else 0) # volume
|
||||
tkhd_payload += u16.pack(0) # reserved
|
||||
tkhd_payload += unity_matrix
|
||||
tkhd_payload += u1616.pack(width)
|
||||
tkhd_payload += u1616.pack(height)
|
||||
trak_payload = full_box(b'tkhd', 1, TRACK_ENABLED | TRACK_IN_MOVIE | TRACK_IN_PREVIEW, tkhd_payload) # Track Header Box
|
||||
|
||||
mdhd_payload = u64.pack(creation_time)
|
||||
mdhd_payload += u64.pack(modification_time)
|
||||
mdhd_payload += u32.pack(timescale)
|
||||
mdhd_payload += u64.pack(duration)
|
||||
mdhd_payload += u16.pack(((ord(language[0]) - 0x60) << 10) | ((ord(language[1]) - 0x60) << 5) | (ord(language[2]) - 0x60))
|
||||
mdhd_payload += u16.pack(0) # pre defined
|
||||
mdia_payload = full_box(b'mdhd', 1, 0, mdhd_payload) # Media Header Box
|
||||
|
||||
hdlr_payload = u32.pack(0) # pre defined
|
||||
hdlr_payload += b'soun' if is_audio else b'vide' # handler type
|
||||
hdlr_payload += u32.pack(0) * 3 # reserved
|
||||
hdlr_payload += (b'Sound' if is_audio else b'Video') + b'Handler\0' # name
|
||||
mdia_payload += full_box(b'hdlr', 0, 0, hdlr_payload) # Handler Reference Box
|
||||
|
||||
if is_audio:
|
||||
smhd_payload = s88.pack(0) # balance
|
||||
smhd_payload += u16.pack(0) # reserved
|
||||
media_header_box = full_box(b'smhd', 0, 0, smhd_payload) # Sound Media Header
|
||||
else:
|
||||
vmhd_payload = u16.pack(0) # graphics mode
|
||||
vmhd_payload += u16.pack(0) * 3 # opcolor
|
||||
media_header_box = full_box(b'vmhd', 0, 1, vmhd_payload) # Video Media Header
|
||||
minf_payload = media_header_box
|
||||
|
||||
dref_payload = u32.pack(1) # entry count
|
||||
dref_payload += full_box(b'url ', 0, SELF_CONTAINED, b'') # Data Entry URL Box
|
||||
dinf_payload = full_box(b'dref', 0, 0, dref_payload) # Data Reference Box
|
||||
minf_payload += box(b'dinf', dinf_payload) # Data Information Box
|
||||
|
||||
stsd_payload = u32.pack(1) # entry count
|
||||
|
||||
sample_entry_payload = u8.pack(0) * 6 # reserved
|
||||
sample_entry_payload += u16.pack(1) # data reference index
|
||||
if is_audio:
|
||||
sample_entry_payload += u32.pack(0) * 2 # reserved
|
||||
sample_entry_payload += u16.pack(params.get('channels', 2))
|
||||
sample_entry_payload += u16.pack(params.get('bits_per_sample', 16))
|
||||
sample_entry_payload += u16.pack(0) # pre defined
|
||||
sample_entry_payload += u16.pack(0) # reserved
|
||||
sample_entry_payload += u1616.pack(params['sampling_rate'])
|
||||
|
||||
if fourcc == 'AACL':
|
||||
sample_entry_box = box(b'mp4a', sample_entry_payload)
|
||||
else:
|
||||
sample_entry_payload += u16.pack(0) # pre defined
|
||||
sample_entry_payload += u16.pack(0) # reserved
|
||||
sample_entry_payload += u32.pack(0) * 3 # pre defined
|
||||
sample_entry_payload += u16.pack(width)
|
||||
sample_entry_payload += u16.pack(height)
|
||||
sample_entry_payload += u1616.pack(0x48) # horiz resolution 72 dpi
|
||||
sample_entry_payload += u1616.pack(0x48) # vert resolution 72 dpi
|
||||
sample_entry_payload += u32.pack(0) # reserved
|
||||
sample_entry_payload += u16.pack(1) # frame count
|
||||
sample_entry_payload += u8.pack(0) * 32 # compressor name
|
||||
sample_entry_payload += u16.pack(0x18) # depth
|
||||
sample_entry_payload += s16.pack(-1) # pre defined
|
||||
|
||||
codec_private_data = binascii.unhexlify(params['codec_private_data'].encode('utf-8'))
|
||||
if fourcc in ('H264', 'AVC1'):
|
||||
sps, pps = codec_private_data.split(u32.pack(1))[1:]
|
||||
avcc_payload = u8.pack(1) # configuration version
|
||||
avcc_payload += sps[1:4] # avc profile indication + profile compatibility + avc level indication
|
||||
avcc_payload += u8.pack(0xfc | (params.get('nal_unit_length_field', 4) - 1)) # complete representation (1) + reserved (11111) + length size minus one
|
||||
avcc_payload += u8.pack(1) # reserved (0) + number of sps (0000001)
|
||||
avcc_payload += u16.pack(len(sps))
|
||||
avcc_payload += sps
|
||||
avcc_payload += u8.pack(1) # number of pps
|
||||
avcc_payload += u16.pack(len(pps))
|
||||
avcc_payload += pps
|
||||
sample_entry_payload += box(b'avcC', avcc_payload) # AVC Decoder Configuration Record
|
||||
sample_entry_box = box(b'avc1', sample_entry_payload) # AVC Simple Entry
|
||||
stsd_payload += sample_entry_box
|
||||
|
||||
stbl_payload = full_box(b'stsd', 0, 0, stsd_payload) # Sample Description Box
|
||||
|
||||
stts_payload = u32.pack(0) # entry count
|
||||
stbl_payload += full_box(b'stts', 0, 0, stts_payload) # Decoding Time to Sample Box
|
||||
|
||||
stsc_payload = u32.pack(0) # entry count
|
||||
stbl_payload += full_box(b'stsc', 0, 0, stsc_payload) # Sample To Chunk Box
|
||||
|
||||
stco_payload = u32.pack(0) # entry count
|
||||
stbl_payload += full_box(b'stco', 0, 0, stco_payload) # Chunk Offset Box
|
||||
|
||||
minf_payload += box(b'stbl', stbl_payload) # Sample Table Box
|
||||
|
||||
mdia_payload += box(b'minf', minf_payload) # Media Information Box
|
||||
|
||||
trak_payload += box(b'mdia', mdia_payload) # Media Box
|
||||
|
||||
moov_payload += box(b'trak', trak_payload) # Track Box
|
||||
|
||||
mehd_payload = u64.pack(duration)
|
||||
mvex_payload = full_box(b'mehd', 1, 0, mehd_payload) # Movie Extends Header Box
|
||||
|
||||
trex_payload = u32.pack(track_id) # track id
|
||||
trex_payload += u32.pack(1) # default sample description index
|
||||
trex_payload += u32.pack(0) # default sample duration
|
||||
trex_payload += u32.pack(0) # default sample size
|
||||
trex_payload += u32.pack(0) # default sample flags
|
||||
mvex_payload += full_box(b'trex', 0, 0, trex_payload) # Track Extends Box
|
||||
|
||||
moov_payload += box(b'mvex', mvex_payload) # Movie Extends Box
|
||||
stream.write(box(b'moov', moov_payload)) # Movie Box
|
||||
|
||||
|
||||
def extract_box_data(data, box_sequence):
|
||||
data_reader = io.BytesIO(data)
|
||||
while True:
|
||||
box_size = u32.unpack(data_reader.read(4))[0]
|
||||
box_type = data_reader.read(4)
|
||||
if box_type == box_sequence[0]:
|
||||
box_data = data_reader.read(box_size - 8)
|
||||
if len(box_sequence) == 1:
|
||||
return box_data
|
||||
return extract_box_data(box_data, box_sequence[1:])
|
||||
data_reader.seek(box_size - 8, 1)
|
||||
|
||||
|
||||
class IsmFD(FragmentFD):
|
||||
"""
|
||||
Download segments in a ISM manifest
|
||||
"""
|
||||
|
||||
FD_NAME = 'ism'
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
segments = info_dict['fragments'][:1] if self.params.get(
|
||||
'test', False) else info_dict['fragments']
|
||||
|
||||
ctx = {
|
||||
'filename': filename,
|
||||
'total_frags': len(segments),
|
||||
}
|
||||
|
||||
self._prepare_and_start_frag_download(ctx)
|
||||
|
||||
fragment_retries = self.params.get('fragment_retries', 0)
|
||||
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
|
||||
|
||||
track_written = False
|
||||
frag_index = 0
|
||||
for i, segment in enumerate(segments):
|
||||
frag_index += 1
|
||||
if frag_index <= ctx['fragment_index']:
|
||||
continue
|
||||
count = 0
|
||||
while count <= fragment_retries:
|
||||
try:
|
||||
success, frag_content = self._download_fragment(ctx, segment['url'], info_dict)
|
||||
if not success:
|
||||
return False
|
||||
if not track_written:
|
||||
tfhd_data = extract_box_data(frag_content, [b'moof', b'traf', b'tfhd'])
|
||||
info_dict['_download_params']['track_id'] = u32.unpack(tfhd_data[4:8])[0]
|
||||
write_piff_header(ctx['dest_stream'], info_dict['_download_params'])
|
||||
track_written = True
|
||||
self._append_fragment(ctx, frag_content)
|
||||
break
|
||||
except compat_urllib_error.HTTPError as err:
|
||||
count += 1
|
||||
if count <= fragment_retries:
|
||||
self.report_retry_fragment(err, frag_index, count, fragment_retries)
|
||||
if count > fragment_retries:
|
||||
if skip_unavailable_fragments:
|
||||
self.report_skip_fragment(frag_index)
|
||||
continue
|
||||
self.report_error('giving up after %s fragment retries' % fragment_retries)
|
||||
return False
|
||||
|
||||
self._finish_frag_download(ctx)
|
||||
|
||||
return True
|
214
youtube_dl/downloader/rtmp.py
Normal file
214
youtube_dl/downloader/rtmp.py
Normal file
|
@ -0,0 +1,214 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import time
|
||||
|
||||
from .common import FileDownloader
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
check_executable,
|
||||
encodeFilename,
|
||||
encodeArgument,
|
||||
get_exe_version,
|
||||
)
|
||||
|
||||
|
||||
def rtmpdump_version():
|
||||
return get_exe_version(
|
||||
'rtmpdump', ['--help'], r'(?i)RTMPDump\s*v?([0-9a-zA-Z._-]+)')
|
||||
|
||||
|
||||
class RtmpFD(FileDownloader):
|
||||
def real_download(self, filename, info_dict):
|
||||
def run_rtmpdump(args):
|
||||
start = time.time()
|
||||
resume_percent = None
|
||||
resume_downloaded_data_len = None
|
||||
proc = subprocess.Popen(args, stderr=subprocess.PIPE)
|
||||
cursor_in_new_line = True
|
||||
proc_stderr_closed = False
|
||||
try:
|
||||
while not proc_stderr_closed:
|
||||
# read line from stderr
|
||||
line = ''
|
||||
while True:
|
||||
char = proc.stderr.read(1)
|
||||
if not char:
|
||||
proc_stderr_closed = True
|
||||
break
|
||||
if char in [b'\r', b'\n']:
|
||||
break
|
||||
line += char.decode('ascii', 'replace')
|
||||
if not line:
|
||||
# proc_stderr_closed is True
|
||||
continue
|
||||
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec \(([0-9]{1,2}\.[0-9])%\)', line)
|
||||
if mobj:
|
||||
downloaded_data_len = int(float(mobj.group(1)) * 1024)
|
||||
percent = float(mobj.group(2))
|
||||
if not resume_percent:
|
||||
resume_percent = percent
|
||||
resume_downloaded_data_len = downloaded_data_len
|
||||
time_now = time.time()
|
||||
eta = self.calc_eta(start, time_now, 100 - resume_percent, percent - resume_percent)
|
||||
speed = self.calc_speed(start, time_now, downloaded_data_len - resume_downloaded_data_len)
|
||||
data_len = None
|
||||
if percent > 0:
|
||||
data_len = int(downloaded_data_len * 100 / percent)
|
||||
self._hook_progress({
|
||||
'status': 'downloading',
|
||||
'downloaded_bytes': downloaded_data_len,
|
||||
'total_bytes_estimate': data_len,
|
||||
'tmpfilename': tmpfilename,
|
||||
'filename': filename,
|
||||
'eta': eta,
|
||||
'elapsed': time_now - start,
|
||||
'speed': speed,
|
||||
})
|
||||
cursor_in_new_line = False
|
||||
else:
|
||||
# no percent for live streams
|
||||
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line)
|
||||
if mobj:
|
||||
downloaded_data_len = int(float(mobj.group(1)) * 1024)
|
||||
time_now = time.time()
|
||||
speed = self.calc_speed(start, time_now, downloaded_data_len)
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': downloaded_data_len,
|
||||
'tmpfilename': tmpfilename,
|
||||
'filename': filename,
|
||||
'status': 'downloading',
|
||||
'elapsed': time_now - start,
|
||||
'speed': speed,
|
||||
})
|
||||
cursor_in_new_line = False
|
||||
elif self.params.get('verbose', False):
|
||||
if not cursor_in_new_line:
|
||||
self.to_screen('')
|
||||
cursor_in_new_line = True
|
||||
self.to_screen('[rtmpdump] ' + line)
|
||||
finally:
|
||||
proc.wait()
|
||||
if not cursor_in_new_line:
|
||||
self.to_screen('')
|
||||
return proc.returncode
|
||||
|
||||
url = info_dict['url']
|
||||
player_url = info_dict.get('player_url')
|
||||
page_url = info_dict.get('page_url')
|
||||
app = info_dict.get('app')
|
||||
play_path = info_dict.get('play_path')
|
||||
tc_url = info_dict.get('tc_url')
|
||||
flash_version = info_dict.get('flash_version')
|
||||
live = info_dict.get('rtmp_live', False)
|
||||
conn = info_dict.get('rtmp_conn')
|
||||
protocol = info_dict.get('rtmp_protocol')
|
||||
real_time = info_dict.get('rtmp_real_time', False)
|
||||
no_resume = info_dict.get('no_resume', False)
|
||||
continue_dl = self.params.get('continuedl', True)
|
||||
|
||||
self.report_destination(filename)
|
||||
tmpfilename = self.temp_name(filename)
|
||||
test = self.params.get('test', False)
|
||||
|
||||
# Check for rtmpdump first
|
||||
if not check_executable('rtmpdump', ['-h']):
|
||||
self.report_error('RTMP download detected but "rtmpdump" could not be run. Please install it.')
|
||||
return False
|
||||
|
||||
# Download using rtmpdump. rtmpdump returns exit code 2 when
|
||||
# the connection was interrupted and resuming appears to be
|
||||
# possible. This is part of rtmpdump's normal usage, AFAIK.
|
||||
basic_args = [
|
||||
'rtmpdump', '--verbose', '-r', url,
|
||||
'-o', tmpfilename]
|
||||
if player_url is not None:
|
||||
basic_args += ['--swfVfy', player_url]
|
||||
if page_url is not None:
|
||||
basic_args += ['--pageUrl', page_url]
|
||||
if app is not None:
|
||||
basic_args += ['--app', app]
|
||||
if play_path is not None:
|
||||
basic_args += ['--playpath', play_path]
|
||||
if tc_url is not None:
|
||||
basic_args += ['--tcUrl', tc_url]
|
||||
if test:
|
||||
basic_args += ['--stop', '1']
|
||||
if flash_version is not None:
|
||||
basic_args += ['--flashVer', flash_version]
|
||||
if live:
|
||||
basic_args += ['--live']
|
||||
if isinstance(conn, list):
|
||||
for entry in conn:
|
||||
basic_args += ['--conn', entry]
|
||||
elif isinstance(conn, compat_str):
|
||||
basic_args += ['--conn', conn]
|
||||
if protocol is not None:
|
||||
basic_args += ['--protocol', protocol]
|
||||
if real_time:
|
||||
basic_args += ['--realtime']
|
||||
|
||||
args = basic_args
|
||||
if not no_resume and continue_dl and not live:
|
||||
args += ['--resume']
|
||||
if not live and continue_dl:
|
||||
args += ['--skip', '1']
|
||||
|
||||
args = [encodeArgument(a) for a in args]
|
||||
|
||||
self._debug_cmd(args, exe='rtmpdump')
|
||||
|
||||
RD_SUCCESS = 0
|
||||
RD_FAILED = 1
|
||||
RD_INCOMPLETE = 2
|
||||
RD_NO_CONNECT = 3
|
||||
|
||||
started = time.time()
|
||||
|
||||
try:
|
||||
retval = run_rtmpdump(args)
|
||||
except KeyboardInterrupt:
|
||||
if not info_dict.get('is_live'):
|
||||
raise
|
||||
retval = RD_SUCCESS
|
||||
self.to_screen('\n[rtmpdump] Interrupted by user')
|
||||
|
||||
if retval == RD_NO_CONNECT:
|
||||
self.report_error('[rtmpdump] Could not connect to RTMP server.')
|
||||
return False
|
||||
|
||||
while retval in (RD_INCOMPLETE, RD_FAILED) and not test and not live:
|
||||
prevsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen('[rtmpdump] Downloaded %s bytes' % prevsize)
|
||||
time.sleep(5.0) # This seems to be needed
|
||||
args = basic_args + ['--resume']
|
||||
if retval == RD_FAILED:
|
||||
args += ['--skip', '1']
|
||||
args = [encodeArgument(a) for a in args]
|
||||
retval = run_rtmpdump(args)
|
||||
cursize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
if prevsize == cursize and retval == RD_FAILED:
|
||||
break
|
||||
# Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
|
||||
if prevsize == cursize and retval == RD_INCOMPLETE and cursize > 1024:
|
||||
self.to_screen('[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
|
||||
retval = RD_SUCCESS
|
||||
break
|
||||
if retval == RD_SUCCESS or (test and retval == RD_INCOMPLETE):
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen('[rtmpdump] Downloaded %s bytes' % fsize)
|
||||
self.try_rename(tmpfilename, filename)
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': fsize,
|
||||
'total_bytes': fsize,
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
'elapsed': time.time() - started,
|
||||
})
|
||||
return True
|
||||
else:
|
||||
self.to_stderr('\n')
|
||||
self.report_error('rtmpdump exited with code %d' % retval)
|
||||
return False
|
47
youtube_dl/downloader/rtsp.py
Normal file
47
youtube_dl/downloader/rtsp.py
Normal file
|
@ -0,0 +1,47 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
from .common import FileDownloader
|
||||
from ..utils import (
|
||||
check_executable,
|
||||
encodeFilename,
|
||||
)
|
||||
|
||||
|
||||
class RtspFD(FileDownloader):
|
||||
def real_download(self, filename, info_dict):
|
||||
url = info_dict['url']
|
||||
self.report_destination(filename)
|
||||
tmpfilename = self.temp_name(filename)
|
||||
|
||||
if check_executable('mplayer', ['-h']):
|
||||
args = [
|
||||
'mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy',
|
||||
'-dumpstream', '-dumpfile', tmpfilename, url]
|
||||
elif check_executable('mpv', ['-h']):
|
||||
args = [
|
||||
'mpv', '-really-quiet', '--vo=null', '--stream-dump=' + tmpfilename, url]
|
||||
else:
|
||||
self.report_error('MMS or RTSP download detected but neither "mplayer" nor "mpv" could be run. Please install any.')
|
||||
return False
|
||||
|
||||
self._debug_cmd(args)
|
||||
|
||||
retval = subprocess.call(args)
|
||||
if retval == 0:
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen('\r[%s] %s bytes' % (args[0], fsize))
|
||||
self.try_rename(tmpfilename, filename)
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': fsize,
|
||||
'total_bytes': fsize,
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
})
|
||||
return True
|
||||
else:
|
||||
self.to_stderr('\n')
|
||||
self.report_error('%s exited with code %d' % (args[0], retval))
|
||||
return False
|
94
youtube_dl/downloader/youtube_live_chat.py
Normal file
94
youtube_dl/downloader/youtube_live_chat.py
Normal file
|
@ -0,0 +1,94 @@
|
|||
from __future__ import division, unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .fragment import FragmentFD
|
||||
|
||||
|
||||
class YoutubeLiveChatReplayFD(FragmentFD):
|
||||
""" Downloads YouTube live chat replays fragment by fragment """
|
||||
|
||||
FD_NAME = 'youtube_live_chat_replay'
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
video_id = info_dict['video_id']
|
||||
self.to_screen('[%s] Downloading live chat' % self.FD_NAME)
|
||||
|
||||
test = self.params.get('test', False)
|
||||
|
||||
ctx = {
|
||||
'filename': filename,
|
||||
'live': True,
|
||||
'total_frags': None,
|
||||
}
|
||||
|
||||
def dl_fragment(url):
|
||||
headers = info_dict.get('http_headers', {})
|
||||
return self._download_fragment(ctx, url, info_dict, headers)
|
||||
|
||||
def parse_yt_initial_data(data):
|
||||
window_patt = b'window\\["ytInitialData"\\]\\s*=\\s*(.*?)(?<=});'
|
||||
var_patt = b'var\\s+ytInitialData\\s*=\\s*(.*?)(?<=});'
|
||||
for patt in window_patt, var_patt:
|
||||
try:
|
||||
raw_json = re.search(patt, data).group(1)
|
||||
return json.loads(raw_json)
|
||||
except AttributeError:
|
||||
continue
|
||||
|
||||
self._prepare_and_start_frag_download(ctx)
|
||||
|
||||
success, raw_fragment = dl_fragment(
|
||||
'https://www.youtube.com/watch?v={}'.format(video_id))
|
||||
if not success:
|
||||
return False
|
||||
data = parse_yt_initial_data(raw_fragment)
|
||||
continuation_id = data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
|
||||
# no data yet but required to call _append_fragment
|
||||
self._append_fragment(ctx, b'')
|
||||
|
||||
first = True
|
||||
offset = None
|
||||
while continuation_id is not None:
|
||||
data = None
|
||||
if first:
|
||||
url = 'https://www.youtube.com/live_chat_replay?continuation={}'.format(continuation_id)
|
||||
success, raw_fragment = dl_fragment(url)
|
||||
if not success:
|
||||
return False
|
||||
data = parse_yt_initial_data(raw_fragment)
|
||||
else:
|
||||
url = ('https://www.youtube.com/live_chat_replay/get_live_chat_replay'
|
||||
+ '?continuation={}'.format(continuation_id)
|
||||
+ '&playerOffsetMs={}'.format(offset - 5000)
|
||||
+ '&hidden=false'
|
||||
+ '&pbj=1')
|
||||
success, raw_fragment = dl_fragment(url)
|
||||
if not success:
|
||||
return False
|
||||
data = json.loads(raw_fragment)['response']
|
||||
|
||||
first = False
|
||||
continuation_id = None
|
||||
|
||||
live_chat_continuation = data['continuationContents']['liveChatContinuation']
|
||||
offset = None
|
||||
processed_fragment = bytearray()
|
||||
if 'actions' in live_chat_continuation:
|
||||
for action in live_chat_continuation['actions']:
|
||||
if 'replayChatItemAction' in action:
|
||||
replay_chat_item_action = action['replayChatItemAction']
|
||||
offset = int(replay_chat_item_action['videoOffsetTimeMsec'])
|
||||
processed_fragment.extend(
|
||||
json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n')
|
||||
continuation_id = live_chat_continuation['continuations'][0]['liveChatReplayContinuationData']['continuation']
|
||||
|
||||
self._append_fragment(ctx, processed_fragment)
|
||||
|
||||
if test or offset is None:
|
||||
break
|
||||
|
||||
self._finish_frag_download(ctx)
|
||||
|
||||
return True
|
46
youtube_dl/extractor/__init__.py
Normal file
46
youtube_dl/extractor/__init__.py
Normal file
|
@ -0,0 +1,46 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
try:
|
||||
from .lazy_extractors import *
|
||||
from .lazy_extractors import _ALL_CLASSES
|
||||
_LAZY_LOADER = True
|
||||
except ImportError:
|
||||
_LAZY_LOADER = False
|
||||
from .extractors import *
|
||||
|
||||
_ALL_CLASSES = [
|
||||
klass
|
||||
for name, klass in globals().items()
|
||||
if name.endswith('IE') and name != 'GenericIE'
|
||||
]
|
||||
_ALL_CLASSES.append(GenericIE)
|
||||
|
||||
|
||||
def gen_extractor_classes():
|
||||
""" Return a list of supported extractors.
|
||||
The order does matter; the first extractor matched is the one handling the URL.
|
||||
"""
|
||||
return _ALL_CLASSES
|
||||
|
||||
|
||||
def gen_extractors():
|
||||
""" Return a list of an instance of every supported extractor.
|
||||
The order does matter; the first extractor matched is the one handling the URL.
|
||||
"""
|
||||
return [klass() for klass in gen_extractor_classes()]
|
||||
|
||||
|
||||
def list_extractors(age_limit):
|
||||
"""
|
||||
Return a list of extractors that are suitable for the given age,
|
||||
sorted by extractor ID.
|
||||
"""
|
||||
|
||||
return sorted(
|
||||
filter(lambda ie: ie.is_suitable(age_limit), gen_extractors()),
|
||||
key=lambda ie: ie.IE_NAME.lower())
|
||||
|
||||
|
||||
def get_info_extractor(ie_name):
|
||||
"""Returns the info extractor class with the given ie_name"""
|
||||
return globals()[ie_name + 'IE']
|
193
youtube_dl/extractor/abc.py
Normal file
193
youtube_dl/extractor/abc.py
Normal file
|
@ -0,0 +1,193 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
import hashlib
|
||||
import hmac
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
js_to_json,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class ABCIE(InfoExtractor):
|
||||
IE_NAME = 'abc.net.au'
|
||||
_VALID_URL = r'https?://(?:www\.)?abc\.net\.au/news/(?:[^/]+/){1,2}(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.abc.net.au/news/2014-11-05/australia-to-staff-ebola-treatment-centre-in-sierra-leone/5868334',
|
||||
'md5': 'cb3dd03b18455a661071ee1e28344d9f',
|
||||
'info_dict': {
|
||||
'id': '5868334',
|
||||
'ext': 'mp4',
|
||||
'title': 'Australia to help staff Ebola treatment centre in Sierra Leone',
|
||||
'description': 'md5:809ad29c67a05f54eb41f2a105693a67',
|
||||
},
|
||||
'skip': 'this video has expired',
|
||||
}, {
|
||||
'url': 'http://www.abc.net.au/news/2015-08-17/warren-entsch-introduces-same-sex-marriage-bill/6702326',
|
||||
'md5': 'db2a5369238b51f9811ad815b69dc086',
|
||||
'info_dict': {
|
||||
'id': 'NvqvPeNZsHU',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20150816',
|
||||
'uploader': 'ABC News (Australia)',
|
||||
'description': 'Government backbencher Warren Entsch introduces a cross-party sponsored bill to legalise same-sex marriage, saying the bill is designed to promote "an inclusive Australia, not a divided one.". Read more here: http://ab.co/1Mwc6ef',
|
||||
'uploader_id': 'NewsOnABC',
|
||||
'title': 'Marriage Equality: Warren Entsch introduces same sex marriage bill',
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
'skip': 'Not accessible from Travis CI server',
|
||||
}, {
|
||||
'url': 'http://www.abc.net.au/news/2015-10-23/nab-lifts-interest-rates-following-westpac-and-cba/6880080',
|
||||
'md5': 'b96eee7c9edf4fc5a358a0252881cc1f',
|
||||
'info_dict': {
|
||||
'id': '6880080',
|
||||
'ext': 'mp3',
|
||||
'title': 'NAB lifts interest rates, following Westpac and CBA',
|
||||
'description': 'md5:f13d8edc81e462fce4a0437c7dc04728',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.abc.net.au/news/2015-10-19/6866214',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
mobj = re.search(
|
||||
r'inline(?P<type>Video|Audio|YouTube)Data\.push\((?P<json_data>[^)]+)\);',
|
||||
webpage)
|
||||
if mobj is None:
|
||||
expired = self._html_search_regex(r'(?s)class="expired-(?:video|audio)".+?<span>(.+?)</span>', webpage, 'expired', None)
|
||||
if expired:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, expired), expected=True)
|
||||
raise ExtractorError('Unable to extract video urls')
|
||||
|
||||
urls_info = self._parse_json(
|
||||
mobj.group('json_data'), video_id, transform_source=js_to_json)
|
||||
|
||||
if not isinstance(urls_info, list):
|
||||
urls_info = [urls_info]
|
||||
|
||||
if mobj.group('type') == 'YouTube':
|
||||
return self.playlist_result([
|
||||
self.url_result(url_info['url']) for url_info in urls_info])
|
||||
|
||||
formats = [{
|
||||
'url': url_info['url'],
|
||||
'vcodec': url_info.get('codec') if mobj.group('type') == 'Video' else 'none',
|
||||
'width': int_or_none(url_info.get('width')),
|
||||
'height': int_or_none(url_info.get('height')),
|
||||
'tbr': int_or_none(url_info.get('bitrate')),
|
||||
'filesize': int_or_none(url_info.get('filesize')),
|
||||
} for url_info in urls_info]
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'formats': formats,
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
}
|
||||
|
||||
|
||||
class ABCIViewIE(InfoExtractor):
|
||||
IE_NAME = 'abc.net.au:iview'
|
||||
_VALID_URL = r'https?://iview\.abc\.net\.au/(?:[^/]+/)*video/(?P<id>[^/?#]+)'
|
||||
_GEO_COUNTRIES = ['AU']
|
||||
|
||||
# ABC iview programs are normally available for 14 days only.
|
||||
_TESTS = [{
|
||||
'url': 'https://iview.abc.net.au/show/gruen/series/11/video/LE1927H001S00',
|
||||
'md5': '67715ce3c78426b11ba167d875ac6abf',
|
||||
'info_dict': {
|
||||
'id': 'LE1927H001S00',
|
||||
'ext': 'mp4',
|
||||
'title': "Series 11 Ep 1",
|
||||
'series': "Gruen",
|
||||
'description': 'md5:52cc744ad35045baf6aded2ce7287f67',
|
||||
'upload_date': '20190925',
|
||||
'uploader_id': 'abc1',
|
||||
'timestamp': 1569445289,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_params = self._download_json(
|
||||
'https://iview.abc.net.au/api/programs/' + video_id, video_id)
|
||||
title = unescapeHTML(video_params.get('title') or video_params['seriesTitle'])
|
||||
stream = next(s for s in video_params['playlist'] if s.get('type') in ('program', 'livestream'))
|
||||
|
||||
house_number = video_params.get('episodeHouseNumber') or video_id
|
||||
path = '/auth/hls/sign?ts={0}&hn={1}&d=android-tablet'.format(
|
||||
int(time.time()), house_number)
|
||||
sig = hmac.new(
|
||||
b'android.content.res.Resources',
|
||||
path.encode('utf-8'), hashlib.sha256).hexdigest()
|
||||
token = self._download_webpage(
|
||||
'http://iview.abc.net.au{0}&sig={1}'.format(path, sig), video_id)
|
||||
|
||||
def tokenize_url(url, token):
|
||||
return update_url_query(url, {
|
||||
'hdnea': token,
|
||||
})
|
||||
|
||||
for sd in ('720', 'sd', 'sd-low'):
|
||||
sd_url = try_get(
|
||||
stream, lambda x: x['streams']['hls'][sd], compat_str)
|
||||
if not sd_url:
|
||||
continue
|
||||
formats = self._extract_m3u8_formats(
|
||||
tokenize_url(sd_url, token), video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
|
||||
if formats:
|
||||
break
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
src_vtt = stream.get('captions', {}).get('src-vtt')
|
||||
if src_vtt:
|
||||
subtitles['en'] = [{
|
||||
'url': src_vtt,
|
||||
'ext': 'vtt',
|
||||
}]
|
||||
|
||||
is_live = video_params.get('livestream') == '1'
|
||||
if is_live:
|
||||
title = self._live_title(title)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': video_params.get('description'),
|
||||
'thumbnail': video_params.get('thumbnail'),
|
||||
'duration': int_or_none(video_params.get('eventDuration')),
|
||||
'timestamp': parse_iso8601(video_params.get('pubDate'), ' '),
|
||||
'series': unescapeHTML(video_params.get('seriesTitle')),
|
||||
'series_id': video_params.get('seriesHouseNumber') or video_id[:7],
|
||||
'season_number': int_or_none(self._search_regex(
|
||||
r'\bSeries\s+(\d+)\b', title, 'season number', default=None)),
|
||||
'episode_number': int_or_none(self._search_regex(
|
||||
r'\bEp\s+(\d+)\b', title, 'episode number', default=None)),
|
||||
'episode_id': house_number,
|
||||
'uploader_id': video_params.get('channel'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'is_live': is_live,
|
||||
}
|
148
youtube_dl/extractor/abcnews.py
Normal file
148
youtube_dl/extractor/abcnews.py
Normal file
|
@ -0,0 +1,148 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import calendar
|
||||
import re
|
||||
import time
|
||||
|
||||
from .amp import AMPIE
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..compat import compat_urlparse
|
||||
|
||||
|
||||
class AbcNewsVideoIE(AMPIE):
|
||||
IE_NAME = 'abcnews:video'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
abcnews\.go\.com/
|
||||
(?:
|
||||
[^/]+/video/(?P<display_id>[0-9a-z-]+)-|
|
||||
video/embed\?.*?\bid=
|
||||
)|
|
||||
fivethirtyeight\.abcnews\.go\.com/video/embed/\d+/
|
||||
)
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://abcnews.go.com/ThisWeek/video/week-exclusive-irans-foreign-minister-zarif-20411932',
|
||||
'info_dict': {
|
||||
'id': '20411932',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'week-exclusive-irans-foreign-minister-zarif',
|
||||
'title': '\'This Week\' Exclusive: Iran\'s Foreign Minister Zarif',
|
||||
'description': 'George Stephanopoulos goes one-on-one with Iranian Foreign Minister Dr. Javad Zarif.',
|
||||
'duration': 180,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://abcnews.go.com/video/embed?id=46979033',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://abcnews.go.com/2020/video/2020-husband-stands-teacher-jail-student-affairs-26119478',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('display_id')
|
||||
video_id = mobj.group('id')
|
||||
info_dict = self._extract_feed_info(
|
||||
'http://abcnews.go.com/video/itemfeed?id=%s' % video_id)
|
||||
info_dict.update({
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
})
|
||||
return info_dict
|
||||
|
||||
|
||||
class AbcNewsIE(InfoExtractor):
|
||||
IE_NAME = 'abcnews'
|
||||
_VALID_URL = r'https?://abcnews\.go\.com/(?:[^/]+/)+(?P<display_id>[0-9a-z-]+)/story\?id=(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://abcnews.go.com/Blotter/News/dramatic-video-rare-death-job-america/story?id=10498713#.UIhwosWHLjY',
|
||||
'info_dict': {
|
||||
'id': '10505354',
|
||||
'ext': 'flv',
|
||||
'display_id': 'dramatic-video-rare-death-job-america',
|
||||
'title': 'Occupational Hazards',
|
||||
'description': 'Nightline investigates the dangers that lurk at various jobs.',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'upload_date': '20100428',
|
||||
'timestamp': 1272412800,
|
||||
},
|
||||
'add_ie': ['AbcNewsVideo'],
|
||||
}, {
|
||||
'url': 'http://abcnews.go.com/Entertainment/justin-timberlake-performs-stop-feeling-eurovision-2016/story?id=39125818',
|
||||
'info_dict': {
|
||||
'id': '38897857',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'justin-timberlake-performs-stop-feeling-eurovision-2016',
|
||||
'title': 'Justin Timberlake Drops Hints For Secret Single',
|
||||
'description': 'Lara Spencer reports the buzziest stories of the day in "GMA" Pop News.',
|
||||
'upload_date': '20160515',
|
||||
'timestamp': 1463329500,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
# The embedded YouTube video is blocked due to copyright issues
|
||||
'playlist_items': '1',
|
||||
},
|
||||
'add_ie': ['AbcNewsVideo'],
|
||||
}, {
|
||||
'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('display_id')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_url = self._search_regex(
|
||||
r'window\.abcnvideo\.url\s*=\s*"([^"]+)"', webpage, 'video URL')
|
||||
full_video_url = compat_urlparse.urljoin(url, video_url)
|
||||
|
||||
youtube_url = YoutubeIE._extract_url(webpage)
|
||||
|
||||
timestamp = None
|
||||
date_str = self._html_search_regex(
|
||||
r'<span[^>]+class="timestamp">([^<]+)</span>',
|
||||
webpage, 'timestamp', fatal=False)
|
||||
if date_str:
|
||||
tz_offset = 0
|
||||
if date_str.endswith(' ET'): # Eastern Time
|
||||
tz_offset = -5
|
||||
date_str = date_str[:-3]
|
||||
date_formats = ['%b. %d, %Y', '%b %d, %Y, %I:%M %p']
|
||||
for date_format in date_formats:
|
||||
try:
|
||||
timestamp = calendar.timegm(time.strptime(date_str.strip(), date_format))
|
||||
except ValueError:
|
||||
continue
|
||||
if timestamp is not None:
|
||||
timestamp -= tz_offset * 3600
|
||||
|
||||
entry = {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': AbcNewsVideoIE.ie_key(),
|
||||
'url': full_video_url,
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'timestamp': timestamp,
|
||||
}
|
||||
|
||||
if youtube_url:
|
||||
entries = [entry, self.url_result(youtube_url, ie=YoutubeIE.ie_key())]
|
||||
return self.playlist_result(entries)
|
||||
|
||||
return entry
|
137
youtube_dl/extractor/abcotvs.py
Normal file
137
youtube_dl/extractor/abcotvs.py
Normal file
|
@ -0,0 +1,137 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
dict_get,
|
||||
int_or_none,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class ABCOTVSIE(InfoExtractor):
|
||||
IE_NAME = 'abcotvs'
|
||||
IE_DESC = 'ABC Owned Television Stations'
|
||||
_VALID_URL = r'https?://(?P<site>abc(?:7(?:news|ny|chicago)?|11|13|30)|6abc)\.com(?:(?:/[^/]+)*/(?P<display_id>[^/]+))?/(?P<id>\d+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://abc7news.com/entertainment/east-bay-museum-celebrates-vintage-synthesizers/472581/',
|
||||
'info_dict': {
|
||||
'id': '472548',
|
||||
'display_id': 'east-bay-museum-celebrates-vintage-synthesizers',
|
||||
'ext': 'mp4',
|
||||
'title': 'East Bay museum celebrates synthesized music',
|
||||
'description': 'md5:24ed2bd527096ec2a5c67b9d5a9005f3',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'timestamp': 1421118520,
|
||||
'upload_date': '20150113',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://abc7news.com/472581',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://6abc.com/man-75-killed-after-being-struck-by-vehicle-in-chester/5725182/',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
_SITE_MAP = {
|
||||
'6abc': 'wpvi',
|
||||
'abc11': 'wtvd',
|
||||
'abc13': 'ktrk',
|
||||
'abc30': 'kfsn',
|
||||
'abc7': 'kabc',
|
||||
'abc7chicago': 'wls',
|
||||
'abc7news': 'kgo',
|
||||
'abc7ny': 'wabc',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
site, display_id, video_id = re.match(self._VALID_URL, url).groups()
|
||||
display_id = display_id or video_id
|
||||
station = self._SITE_MAP[site]
|
||||
|
||||
data = self._download_json(
|
||||
'https://api.abcotvs.com/v2/content', display_id, query={
|
||||
'id': video_id,
|
||||
'key': 'otv.web.%s.story' % station,
|
||||
'station': station,
|
||||
})['data']
|
||||
video = try_get(data, lambda x: x['featuredMedia']['video'], dict) or data
|
||||
video_id = compat_str(dict_get(video, ('id', 'publishedKey'), video_id))
|
||||
title = video.get('title') or video['linkText']
|
||||
|
||||
formats = []
|
||||
m3u8_url = video.get('m3u8')
|
||||
if m3u8_url:
|
||||
formats = self._extract_m3u8_formats(
|
||||
video['m3u8'].split('?')[0], display_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
mp4_url = video.get('mp4')
|
||||
if mp4_url:
|
||||
formats.append({
|
||||
'abr': 128,
|
||||
'format_id': 'https',
|
||||
'height': 360,
|
||||
'url': mp4_url,
|
||||
'width': 640,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
image = video.get('image') or {}
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': dict_get(video, ('description', 'caption'), try_get(video, lambda x: x['meta']['description'])),
|
||||
'thumbnail': dict_get(image, ('source', 'dynamicSource')),
|
||||
'timestamp': int_or_none(video.get('date')),
|
||||
'duration': int_or_none(video.get('length')),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class ABCOTVSClipsIE(InfoExtractor):
|
||||
IE_NAME = 'abcotvs:clips'
|
||||
_VALID_URL = r'https?://clips\.abcotvs\.com/(?:[^/]+/)*video/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://clips.abcotvs.com/kabc/video/214814',
|
||||
'info_dict': {
|
||||
'id': '214814',
|
||||
'ext': 'mp4',
|
||||
'title': 'SpaceX launch pad explosion destroys rocket, satellite',
|
||||
'description': 'md5:9f186e5ad8f490f65409965ee9c7be1b',
|
||||
'upload_date': '20160901',
|
||||
'timestamp': 1472756695,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_json('https://clips.abcotvs.com/vogo/video/getByIds?ids=' + video_id, video_id)['results'][0]
|
||||
title = video_data['title']
|
||||
formats = self._extract_m3u8_formats(
|
||||
video_data['videoURL'].split('?')[0], video_id, 'mp4')
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': video_data.get('description'),
|
||||
'thumbnail': video_data.get('thumbnailURL'),
|
||||
'duration': int_or_none(video_data.get('duration')),
|
||||
'timestamp': int_or_none(video_data.get('pubDate')),
|
||||
'formats': formats,
|
||||
}
|
41
youtube_dl/extractor/academicearth.py
Normal file
41
youtube_dl/extractor/academicearth.py
Normal file
|
@ -0,0 +1,41 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class AcademicEarthCourseIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:www\.)?academicearth\.org/playlists/(?P<id>[^?#/]+)'
|
||||
IE_NAME = 'AcademicEarth:Course'
|
||||
_TEST = {
|
||||
'url': 'http://academicearth.org/playlists/laws-of-nature/',
|
||||
'info_dict': {
|
||||
'id': 'laws-of-nature',
|
||||
'title': 'Laws of Nature',
|
||||
'description': 'Introduce yourself to the laws of nature with these free online college lectures from Yale, Harvard, and MIT.',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
title = self._html_search_regex(
|
||||
r'<h1 class="playlist-name"[^>]*?>(.*?)</h1>', webpage, 'title')
|
||||
description = self._html_search_regex(
|
||||
r'<p class="excerpt"[^>]*?>(.*?)</p>',
|
||||
webpage, 'description', fatal=False)
|
||||
urls = re.findall(
|
||||
r'<li class="lecture-preview">\s*?<a target="_blank" href="([^"]+)">',
|
||||
webpage)
|
||||
entries = [self.url_result(u) for u in urls]
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': playlist_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'entries': entries,
|
||||
}
|
135
youtube_dl/extractor/acast.py
Normal file
135
youtube_dl/extractor/acast.py
Normal file
|
@ -0,0 +1,135 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
OnDemandPagedList,
|
||||
)
|
||||
|
||||
|
||||
class ACastIE(InfoExtractor):
|
||||
IE_NAME = 'acast'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:(?:embed|www)\.)?acast\.com/|
|
||||
play\.acast\.com/s/
|
||||
)
|
||||
(?P<channel>[^/]+)/(?P<id>[^/#?]+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna',
|
||||
'md5': '16d936099ec5ca2d5869e3a813ee8dc4',
|
||||
'info_dict': {
|
||||
'id': '2a92b283-1a75-4ad8-8396-499c641de0d9',
|
||||
'ext': 'mp3',
|
||||
'title': '2. Raggarmordet - Röster ur det förflutna',
|
||||
'description': 'md5:4f81f6d8cf2e12ee21a321d8bca32db4',
|
||||
'timestamp': 1477346700,
|
||||
'upload_date': '20161024',
|
||||
'duration': 2766.602563,
|
||||
'creator': 'Anton Berg & Martin Johnson',
|
||||
'series': 'Spår',
|
||||
'episode': '2. Raggarmordet - Röster ur det förflutna',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://embed.acast.com/adambuxton/ep.12-adam-joeschristmaspodcast2015',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://play.acast.com/s/rattegangspodden/s04e09-styckmordet-i-helenelund-del-22',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://play.acast.com/s/sparpodcast/2a92b283-1a75-4ad8-8396-499c641de0d9',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel, display_id = re.match(self._VALID_URL, url).groups()
|
||||
s = self._download_json(
|
||||
'https://feeder.acast.com/api/v1/shows/%s/episodes/%s' % (channel, display_id),
|
||||
display_id)
|
||||
media_url = s['url']
|
||||
if re.search(r'[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12}', display_id):
|
||||
episode_url = s.get('episodeUrl')
|
||||
if episode_url:
|
||||
display_id = episode_url
|
||||
else:
|
||||
channel, display_id = re.match(self._VALID_URL, s['link']).groups()
|
||||
cast_data = self._download_json(
|
||||
'https://play-api.acast.com/splash/%s/%s' % (channel, display_id),
|
||||
display_id)['result']
|
||||
e = cast_data['episode']
|
||||
title = e.get('name') or s['title']
|
||||
return {
|
||||
'id': compat_str(e['id']),
|
||||
'display_id': display_id,
|
||||
'url': media_url,
|
||||
'title': title,
|
||||
'description': e.get('summary') or clean_html(e.get('description') or s.get('description')),
|
||||
'thumbnail': e.get('image'),
|
||||
'timestamp': unified_timestamp(e.get('publishingDate') or s.get('publishDate')),
|
||||
'duration': float_or_none(e.get('duration') or s.get('duration')),
|
||||
'filesize': int_or_none(e.get('contentLength')),
|
||||
'creator': try_get(cast_data, lambda x: x['show']['author'], compat_str),
|
||||
'series': try_get(cast_data, lambda x: x['show']['name'], compat_str),
|
||||
'season_number': int_or_none(e.get('seasonNumber')),
|
||||
'episode': title,
|
||||
'episode_number': int_or_none(e.get('episodeNumber')),
|
||||
}
|
||||
|
||||
|
||||
class ACastChannelIE(InfoExtractor):
|
||||
IE_NAME = 'acast:channel'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:www\.)?acast\.com/|
|
||||
play\.acast\.com/s/
|
||||
)
|
||||
(?P<id>[^/#?]+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.acast.com/todayinfocus',
|
||||
'info_dict': {
|
||||
'id': '4efc5294-5385-4847-98bd-519799ce5786',
|
||||
'title': 'Today in Focus',
|
||||
'description': 'md5:9ba5564de5ce897faeb12963f4537a64',
|
||||
},
|
||||
'playlist_mincount': 35,
|
||||
}, {
|
||||
'url': 'http://play.acast.com/s/ft-banking-weekly',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_API_BASE_URL = 'https://play.acast.com/api/'
|
||||
_PAGE_SIZE = 10
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if ACastIE.suitable(url) else super(ACastChannelIE, cls).suitable(url)
|
||||
|
||||
def _fetch_page(self, channel_slug, page):
|
||||
casts = self._download_json(
|
||||
self._API_BASE_URL + 'channels/%s/acasts?page=%s' % (channel_slug, page),
|
||||
channel_slug, note='Download page %d of channel data' % page)
|
||||
for cast in casts:
|
||||
yield self.url_result(
|
||||
'https://play.acast.com/s/%s/%s' % (channel_slug, cast['url']),
|
||||
'ACast', cast['id'])
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_slug = self._match_id(url)
|
||||
channel_data = self._download_json(
|
||||
self._API_BASE_URL + 'channels/%s' % channel_slug, channel_slug)
|
||||
entries = OnDemandPagedList(functools.partial(
|
||||
self._fetch_page, channel_slug), self._PAGE_SIZE)
|
||||
return self.playlist_result(entries, compat_str(
|
||||
channel_data['id']), channel_data['name'], channel_data.get('description'))
|
207
youtube_dl/extractor/adn.py
Normal file
207
youtube_dl/extractor/adn.py
Normal file
|
@ -0,0 +1,207 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import binascii
|
||||
import json
|
||||
import os
|
||||
import random
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_cbc_decrypt
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_ord,
|
||||
)
|
||||
from ..utils import (
|
||||
bytes_to_intlist,
|
||||
bytes_to_long,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
intlist_to_bytes,
|
||||
long_to_bytes,
|
||||
pkcs1pad,
|
||||
strip_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class ADNIE(InfoExtractor):
|
||||
IE_DESC = 'Anime Digital Network'
|
||||
_VALID_URL = r'https?://(?:www\.)?animedigitalnetwork\.fr/video/[^/]+/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
|
||||
'md5': 'e497370d847fd79d9d4c74be55575c7a',
|
||||
'info_dict': {
|
||||
'id': '7778',
|
||||
'ext': 'mp4',
|
||||
'title': 'Blue Exorcist - Kyôto Saga - Épisode 1',
|
||||
'description': 'md5:2f7b5aa76edbc1a7a92cedcda8a528d5',
|
||||
}
|
||||
}
|
||||
_BASE_URL = 'http://animedigitalnetwork.fr'
|
||||
_RSA_KEY = (0xc35ae1e4356b65a73b551493da94b8cb443491c0aa092a357a5aee57ffc14dda85326f42d716e539a34542a0d3f363adf16c5ec222d713d5997194030ee2e4f0d1fb328c01a81cf6868c090d50de8e169c6b13d1675b9eeed1cbc51e1fffca9b38af07f37abd790924cd3bee59d0257cfda4fe5f3f0534877e21ce5821447d1b, 65537)
|
||||
_POS_ALIGN_MAP = {
|
||||
'start': 1,
|
||||
'end': 3,
|
||||
}
|
||||
_LINE_ALIGN_MAP = {
|
||||
'middle': 8,
|
||||
'end': 4,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _ass_subtitles_timecode(seconds):
|
||||
return '%01d:%02d:%02d.%02d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 100)
|
||||
|
||||
def _get_subtitles(self, sub_path, video_id):
|
||||
if not sub_path:
|
||||
return None
|
||||
|
||||
enc_subtitles = self._download_webpage(
|
||||
urljoin(self._BASE_URL, sub_path),
|
||||
video_id, 'Downloading subtitles location', fatal=False) or '{}'
|
||||
subtitle_location = (self._parse_json(enc_subtitles, video_id, fatal=False) or {}).get('location')
|
||||
if subtitle_location:
|
||||
enc_subtitles = self._download_webpage(
|
||||
urljoin(self._BASE_URL, subtitle_location),
|
||||
video_id, 'Downloading subtitles data', fatal=False,
|
||||
headers={'Origin': 'https://animedigitalnetwork.fr'})
|
||||
if not enc_subtitles:
|
||||
return None
|
||||
|
||||
# http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
|
||||
dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
|
||||
bytes_to_intlist(compat_b64decode(enc_subtitles[24:])),
|
||||
bytes_to_intlist(binascii.unhexlify(self._K + '4b8ef13ec1872730')),
|
||||
bytes_to_intlist(compat_b64decode(enc_subtitles[:24]))
|
||||
))
|
||||
subtitles_json = self._parse_json(
|
||||
dec_subtitles[:-compat_ord(dec_subtitles[-1])].decode(),
|
||||
None, fatal=False)
|
||||
if not subtitles_json:
|
||||
return None
|
||||
|
||||
subtitles = {}
|
||||
for sub_lang, sub in subtitles_json.items():
|
||||
ssa = '''[Script Info]
|
||||
ScriptType:V4.00
|
||||
[V4 Styles]
|
||||
Format: Name,Fontname,Fontsize,PrimaryColour,SecondaryColour,TertiaryColour,BackColour,Bold,Italic,BorderStyle,Outline,Shadow,Alignment,MarginL,MarginR,MarginV,AlphaLevel,Encoding
|
||||
Style: Default,Arial,18,16777215,16777215,16777215,0,-1,0,1,1,0,2,20,20,20,0,0
|
||||
[Events]
|
||||
Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
for current in sub:
|
||||
start, end, text, line_align, position_align = (
|
||||
float_or_none(current.get('startTime')),
|
||||
float_or_none(current.get('endTime')),
|
||||
current.get('text'), current.get('lineAlign'),
|
||||
current.get('positionAlign'))
|
||||
if start is None or end is None or text is None:
|
||||
continue
|
||||
alignment = self._POS_ALIGN_MAP.get(position_align, 2) + self._LINE_ALIGN_MAP.get(line_align, 0)
|
||||
ssa += os.linesep + 'Dialogue: Marked=0,%s,%s,Default,,0,0,0,,%s%s' % (
|
||||
self._ass_subtitles_timecode(start),
|
||||
self._ass_subtitles_timecode(end),
|
||||
'{\\a%d}' % alignment if alignment != 2 else '',
|
||||
text.replace('\n', '\\N').replace('<i>', '{\\i1}').replace('</i>', '{\\i0}'))
|
||||
|
||||
if sub_lang == 'vostf':
|
||||
sub_lang = 'fr'
|
||||
subtitles.setdefault(sub_lang, []).extend([{
|
||||
'ext': 'json',
|
||||
'data': json.dumps(sub),
|
||||
}, {
|
||||
'ext': 'ssa',
|
||||
'data': ssa,
|
||||
}])
|
||||
return subtitles
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
player_config = self._parse_json(self._search_regex(
|
||||
r'playerConfig\s*=\s*({.+});', webpage,
|
||||
'player config', default='{}'), video_id, fatal=False)
|
||||
if not player_config:
|
||||
config_url = urljoin(self._BASE_URL, self._search_regex(
|
||||
r'(?:id="player"|class="[^"]*adn-player-container[^"]*")[^>]+data-url="([^"]+)"',
|
||||
webpage, 'config url'))
|
||||
player_config = self._download_json(
|
||||
config_url, video_id,
|
||||
'Downloading player config JSON metadata')['player']
|
||||
|
||||
video_info = {}
|
||||
video_info_str = self._search_regex(
|
||||
r'videoInfo\s*=\s*({.+});', webpage,
|
||||
'video info', fatal=False)
|
||||
if video_info_str:
|
||||
video_info = self._parse_json(
|
||||
video_info_str, video_id, fatal=False) or {}
|
||||
|
||||
options = player_config.get('options') or {}
|
||||
metas = options.get('metas') or {}
|
||||
links = player_config.get('links') or {}
|
||||
sub_path = player_config.get('subtitles')
|
||||
error = None
|
||||
if not links:
|
||||
links_url = player_config.get('linksurl') or options['videoUrl']
|
||||
token = options['token']
|
||||
self._K = ''.join([random.choice('0123456789abcdef') for _ in range(16)])
|
||||
message = bytes_to_intlist(json.dumps({
|
||||
'k': self._K,
|
||||
'e': 60,
|
||||
't': token,
|
||||
}))
|
||||
padded_message = intlist_to_bytes(pkcs1pad(message, 128))
|
||||
n, e = self._RSA_KEY
|
||||
encrypted_message = long_to_bytes(pow(bytes_to_long(padded_message), e, n))
|
||||
authorization = base64.b64encode(encrypted_message).decode()
|
||||
links_data = self._download_json(
|
||||
urljoin(self._BASE_URL, links_url), video_id,
|
||||
'Downloading links JSON metadata', headers={
|
||||
'Authorization': 'Bearer ' + authorization,
|
||||
})
|
||||
links = links_data.get('links') or {}
|
||||
metas = metas or links_data.get('meta') or {}
|
||||
sub_path = sub_path or links_data.get('subtitles') or \
|
||||
'index.php?option=com_vodapi&task=subtitles.getJSON&format=json&id=' + video_id
|
||||
sub_path += '&token=' + token
|
||||
error = links_data.get('error')
|
||||
title = metas.get('title') or video_info['title']
|
||||
|
||||
formats = []
|
||||
for format_id, qualities in links.items():
|
||||
if not isinstance(qualities, dict):
|
||||
continue
|
||||
for quality, load_balancer_url in qualities.items():
|
||||
load_balancer_data = self._download_json(
|
||||
load_balancer_url, video_id,
|
||||
'Downloading %s %s JSON metadata' % (format_id, quality),
|
||||
fatal=False) or {}
|
||||
m3u8_url = load_balancer_data.get('location')
|
||||
if not m3u8_url:
|
||||
continue
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id=format_id, fatal=False)
|
||||
if format_id == 'vf':
|
||||
for f in m3u8_formats:
|
||||
f['language'] = 'fr'
|
||||
formats.extend(m3u8_formats)
|
||||
if not error:
|
||||
error = options.get('error')
|
||||
if not formats and error:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': strip_or_none(metas.get('summary') or video_info.get('resume')),
|
||||
'thumbnail': video_info.get('image'),
|
||||
'formats': formats,
|
||||
'subtitles': self.extract_subtitles(sub_path, video_id),
|
||||
'episode': metas.get('subtitle') or video_info.get('videoTitle'),
|
||||
'series': video_info.get('playlistTitle'),
|
||||
}
|
37
youtube_dl/extractor/adobeconnect.py
Normal file
37
youtube_dl/extractor/adobeconnect.py
Normal file
|
@ -0,0 +1,37 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urlparse,
|
||||
)
|
||||
|
||||
|
||||
class AdobeConnectIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://\w+\.adobeconnect\.com/(?P<id>[\w-]+)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._html_search_regex(r'<title>(.+?)</title>', webpage, 'title')
|
||||
qs = compat_parse_qs(self._search_regex(r"swfUrl\s*=\s*'([^']+)'", webpage, 'swf url').split('?')[1])
|
||||
is_live = qs.get('isLive', ['false'])[0] == 'true'
|
||||
formats = []
|
||||
for con_string in qs['conStrings'][0].split(','):
|
||||
formats.append({
|
||||
'format_id': con_string.split('://')[0],
|
||||
'app': compat_urlparse.quote('?' + con_string.split('?')[1] + 'flvplayerapp/' + qs['appInstance'][0]),
|
||||
'ext': 'flv',
|
||||
'play_path': 'mp4:' + qs['streamName'][0],
|
||||
'rtmp_conn': 'S:' + qs['ticket'][0],
|
||||
'rtmp_live': is_live,
|
||||
'url': con_string,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._live_title(title) if is_live else title,
|
||||
'formats': formats,
|
||||
'is_live': is_live,
|
||||
}
|
1572
youtube_dl/extractor/adobepass.py
Normal file
1572
youtube_dl/extractor/adobepass.py
Normal file
File diff suppressed because it is too large
Load diff
288
youtube_dl/extractor/adobetv.py
Normal file
288
youtube_dl/extractor/adobetv.py
Normal file
|
@ -0,0 +1,288 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
import functools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
ISO639Utils,
|
||||
OnDemandPagedList,
|
||||
parse_duration,
|
||||
str_or_none,
|
||||
str_to_int,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class AdobeTVBaseIE(InfoExtractor):
|
||||
def _call_api(self, path, video_id, query, note=None):
|
||||
return self._download_json(
|
||||
'http://tv.adobe.com/api/v4/' + path,
|
||||
video_id, note, query=query)['data']
|
||||
|
||||
def _parse_subtitles(self, video_data, url_key):
|
||||
subtitles = {}
|
||||
for translation in video_data.get('translations', []):
|
||||
vtt_path = translation.get(url_key)
|
||||
if not vtt_path:
|
||||
continue
|
||||
lang = translation.get('language_w3c') or ISO639Utils.long2short(translation['language_medium'])
|
||||
subtitles.setdefault(lang, []).append({
|
||||
'ext': 'vtt',
|
||||
'url': vtt_path,
|
||||
})
|
||||
return subtitles
|
||||
|
||||
def _parse_video_data(self, video_data):
|
||||
video_id = compat_str(video_data['id'])
|
||||
title = video_data['title']
|
||||
|
||||
s3_extracted = False
|
||||
formats = []
|
||||
for source in video_data.get('videos', []):
|
||||
source_url = source.get('url')
|
||||
if not source_url:
|
||||
continue
|
||||
f = {
|
||||
'format_id': source.get('quality_level'),
|
||||
'fps': int_or_none(source.get('frame_rate')),
|
||||
'height': int_or_none(source.get('height')),
|
||||
'tbr': int_or_none(source.get('video_data_rate')),
|
||||
'width': int_or_none(source.get('width')),
|
||||
'url': source_url,
|
||||
}
|
||||
original_filename = source.get('original_filename')
|
||||
if original_filename:
|
||||
if not (f.get('height') and f.get('width')):
|
||||
mobj = re.search(r'_(\d+)x(\d+)', original_filename)
|
||||
if mobj:
|
||||
f.update({
|
||||
'height': int(mobj.group(2)),
|
||||
'width': int(mobj.group(1)),
|
||||
})
|
||||
if original_filename.startswith('s3://') and not s3_extracted:
|
||||
formats.append({
|
||||
'format_id': 'original',
|
||||
'preference': 1,
|
||||
'url': original_filename.replace('s3://', 'https://s3.amazonaws.com/'),
|
||||
})
|
||||
s3_extracted = True
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': video_data.get('description'),
|
||||
'thumbnail': video_data.get('thumbnail'),
|
||||
'upload_date': unified_strdate(video_data.get('start_date')),
|
||||
'duration': parse_duration(video_data.get('duration')),
|
||||
'view_count': str_to_int(video_data.get('playcount')),
|
||||
'formats': formats,
|
||||
'subtitles': self._parse_subtitles(video_data, 'vtt'),
|
||||
}
|
||||
|
||||
|
||||
class AdobeTVEmbedIE(AdobeTVBaseIE):
|
||||
IE_NAME = 'adobetv:embed'
|
||||
_VALID_URL = r'https?://tv\.adobe\.com/embed/\d+/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://tv.adobe.com/embed/22/4153',
|
||||
'md5': 'c8c0461bf04d54574fc2b4d07ac6783a',
|
||||
'info_dict': {
|
||||
'id': '4153',
|
||||
'ext': 'flv',
|
||||
'title': 'Creating Graphics Optimized for BlackBerry',
|
||||
'description': 'md5:eac6e8dced38bdaae51cd94447927459',
|
||||
'thumbnail': r're:https?://.*\.jpg$',
|
||||
'upload_date': '20091109',
|
||||
'duration': 377,
|
||||
'view_count': int,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video_data = self._call_api(
|
||||
'episode/' + video_id, video_id, {'disclosure': 'standard'})[0]
|
||||
return self._parse_video_data(video_data)
|
||||
|
||||
|
||||
class AdobeTVIE(AdobeTVBaseIE):
|
||||
IE_NAME = 'adobetv'
|
||||
_VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?watch/(?P<show_urlname>[^/]+)/(?P<id>[^/]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://tv.adobe.com/watch/the-complete-picture-with-julieanne-kost/quick-tip-how-to-draw-a-circle-around-an-object-in-photoshop/',
|
||||
'md5': '9bc5727bcdd55251f35ad311ca74fa1e',
|
||||
'info_dict': {
|
||||
'id': '10981',
|
||||
'ext': 'mp4',
|
||||
'title': 'Quick Tip - How to Draw a Circle Around an Object in Photoshop',
|
||||
'description': 'md5:99ec318dc909d7ba2a1f2b038f7d2311',
|
||||
'thumbnail': r're:https?://.*\.jpg$',
|
||||
'upload_date': '20110914',
|
||||
'duration': 60,
|
||||
'view_count': int,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
language, show_urlname, urlname = re.match(self._VALID_URL, url).groups()
|
||||
if not language:
|
||||
language = 'en'
|
||||
|
||||
video_data = self._call_api(
|
||||
'episode/get', urlname, {
|
||||
'disclosure': 'standard',
|
||||
'language': language,
|
||||
'show_urlname': show_urlname,
|
||||
'urlname': urlname,
|
||||
})[0]
|
||||
return self._parse_video_data(video_data)
|
||||
|
||||
|
||||
class AdobeTVPlaylistBaseIE(AdobeTVBaseIE):
|
||||
_PAGE_SIZE = 25
|
||||
|
||||
def _fetch_page(self, display_id, query, page):
|
||||
page += 1
|
||||
query['page'] = page
|
||||
for element_data in self._call_api(
|
||||
self._RESOURCE, display_id, query, 'Download Page %d' % page):
|
||||
yield self._process_data(element_data)
|
||||
|
||||
def _extract_playlist_entries(self, display_id, query):
|
||||
return OnDemandPagedList(functools.partial(
|
||||
self._fetch_page, display_id, query), self._PAGE_SIZE)
|
||||
|
||||
|
||||
class AdobeTVShowIE(AdobeTVPlaylistBaseIE):
|
||||
IE_NAME = 'adobetv:show'
|
||||
_VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?show/(?P<id>[^/]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://tv.adobe.com/show/the-complete-picture-with-julieanne-kost',
|
||||
'info_dict': {
|
||||
'id': '36',
|
||||
'title': 'The Complete Picture with Julieanne Kost',
|
||||
'description': 'md5:fa50867102dcd1aa0ddf2ab039311b27',
|
||||
},
|
||||
'playlist_mincount': 136,
|
||||
}
|
||||
_RESOURCE = 'episode'
|
||||
_process_data = AdobeTVBaseIE._parse_video_data
|
||||
|
||||
def _real_extract(self, url):
|
||||
language, show_urlname = re.match(self._VALID_URL, url).groups()
|
||||
if not language:
|
||||
language = 'en'
|
||||
query = {
|
||||
'disclosure': 'standard',
|
||||
'language': language,
|
||||
'show_urlname': show_urlname,
|
||||
}
|
||||
|
||||
show_data = self._call_api(
|
||||
'show/get', show_urlname, query)[0]
|
||||
|
||||
return self.playlist_result(
|
||||
self._extract_playlist_entries(show_urlname, query),
|
||||
str_or_none(show_data.get('id')),
|
||||
show_data.get('show_name'),
|
||||
show_data.get('show_description'))
|
||||
|
||||
|
||||
class AdobeTVChannelIE(AdobeTVPlaylistBaseIE):
|
||||
IE_NAME = 'adobetv:channel'
|
||||
_VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?channel/(?P<id>[^/]+)(?:/(?P<category_urlname>[^/]+))?'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://tv.adobe.com/channel/development',
|
||||
'info_dict': {
|
||||
'id': 'development',
|
||||
},
|
||||
'playlist_mincount': 96,
|
||||
}
|
||||
_RESOURCE = 'show'
|
||||
|
||||
def _process_data(self, show_data):
|
||||
return self.url_result(
|
||||
show_data['url'], 'AdobeTVShow', str_or_none(show_data.get('id')))
|
||||
|
||||
def _real_extract(self, url):
|
||||
language, channel_urlname, category_urlname = re.match(self._VALID_URL, url).groups()
|
||||
if not language:
|
||||
language = 'en'
|
||||
query = {
|
||||
'channel_urlname': channel_urlname,
|
||||
'language': language,
|
||||
}
|
||||
if category_urlname:
|
||||
query['category_urlname'] = category_urlname
|
||||
|
||||
return self.playlist_result(
|
||||
self._extract_playlist_entries(channel_urlname, query),
|
||||
channel_urlname)
|
||||
|
||||
|
||||
class AdobeTVVideoIE(AdobeTVBaseIE):
|
||||
IE_NAME = 'adobetv:video'
|
||||
_VALID_URL = r'https?://video\.tv\.adobe\.com/v/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
# From https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners
|
||||
'url': 'https://video.tv.adobe.com/v/2456/',
|
||||
'md5': '43662b577c018ad707a63766462b1e87',
|
||||
'info_dict': {
|
||||
'id': '2456',
|
||||
'ext': 'mp4',
|
||||
'title': 'New experience with Acrobat DC',
|
||||
'description': 'New experience with Acrobat DC',
|
||||
'duration': 248.667,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_data = self._parse_json(self._search_regex(
|
||||
r'var\s+bridge\s*=\s*([^;]+);', webpage, 'bridged data'), video_id)
|
||||
title = video_data['title']
|
||||
|
||||
formats = []
|
||||
sources = video_data.get('sources') or []
|
||||
for source in sources:
|
||||
source_src = source.get('src')
|
||||
if not source_src:
|
||||
continue
|
||||
formats.append({
|
||||
'filesize': int_or_none(source.get('kilobytes') or None, invscale=1000),
|
||||
'format_id': '-'.join(filter(None, [source.get('format'), source.get('label')])),
|
||||
'height': int_or_none(source.get('height') or None),
|
||||
'tbr': int_or_none(source.get('bitrate') or None),
|
||||
'width': int_or_none(source.get('width') or None),
|
||||
'url': source_src,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
# For both metadata and downloaded files the duration varies among
|
||||
# formats. I just pick the max one
|
||||
duration = max(filter(None, [
|
||||
float_or_none(source.get('duration'), scale=1000)
|
||||
for source in sources]))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'description': video_data.get('description'),
|
||||
'thumbnail': video_data.get('video', {}).get('poster'),
|
||||
'duration': duration,
|
||||
'subtitles': self._parse_subtitles(video_data, 'vttPath'),
|
||||
}
|
202
youtube_dl/extractor/adultswim.py
Normal file
202
youtube_dl/extractor/adultswim.py
Normal file
|
@ -0,0 +1,202 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .turner import TurnerBaseIE
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
parse_age_limit,
|
||||
parse_iso8601,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class AdultSwimIE(TurnerBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?adultswim\.com/videos/(?P<show_path>[^/?#]+)(?:/(?P<episode_path>[^/?#]+))?'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://adultswim.com/videos/rick-and-morty/pilot',
|
||||
'info_dict': {
|
||||
'id': 'rQxZvXQ4ROaSOqq-or2Mow',
|
||||
'ext': 'mp4',
|
||||
'title': 'Rick and Morty - Pilot',
|
||||
'description': 'Rick moves in with his daughter\'s family and establishes himself as a bad influence on his grandson, Morty.',
|
||||
'timestamp': 1543294800,
|
||||
'upload_date': '20181127',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}, {
|
||||
'url': 'http://www.adultswim.com/videos/tim-and-eric-awesome-show-great-job/dr-steve-brule-for-your-wine/',
|
||||
'info_dict': {
|
||||
'id': 'sY3cMUR_TbuE4YmdjzbIcQ',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
|
||||
'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.',
|
||||
'upload_date': '20080124',
|
||||
'timestamp': 1201150800,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
}, {
|
||||
'url': 'http://www.adultswim.com/videos/decker/inside-decker-a-new-hero/',
|
||||
'info_dict': {
|
||||
'id': 'I0LQFQkaSUaFp8PnAWHhoQ',
|
||||
'ext': 'mp4',
|
||||
'title': 'Decker - Inside Decker: A New Hero',
|
||||
'description': 'The guys recap the conclusion of the season. They announce a new hero, take a peek into the Victorville Film Archive and welcome back the talented James Dean.',
|
||||
'timestamp': 1469480460,
|
||||
'upload_date': '20160725',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}, {
|
||||
'url': 'http://www.adultswim.com/videos/attack-on-titan',
|
||||
'info_dict': {
|
||||
'id': 'attack-on-titan',
|
||||
'title': 'Attack on Titan',
|
||||
'description': 'md5:41caa9416906d90711e31dc00cb7db7e',
|
||||
},
|
||||
'playlist_mincount': 12,
|
||||
}, {
|
||||
'url': 'http://www.adultswim.com/videos/streams/williams-stream',
|
||||
'info_dict': {
|
||||
'id': 'd8DEBj7QRfetLsRgFnGEyg',
|
||||
'ext': 'mp4',
|
||||
'title': r're:^Williams Stream \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||
'description': 'original programming',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_path, episode_path = re.match(self._VALID_URL, url).groups()
|
||||
display_id = episode_path or show_path
|
||||
query = '''query {
|
||||
getShowBySlug(slug:"%s") {
|
||||
%%s
|
||||
}
|
||||
}''' % show_path
|
||||
if episode_path:
|
||||
query = query % '''title
|
||||
getVideoBySlug(slug:"%s") {
|
||||
_id
|
||||
auth
|
||||
description
|
||||
duration
|
||||
episodeNumber
|
||||
launchDate
|
||||
mediaID
|
||||
seasonNumber
|
||||
poster
|
||||
title
|
||||
tvRating
|
||||
}''' % episode_path
|
||||
['getVideoBySlug']
|
||||
else:
|
||||
query = query % '''metaDescription
|
||||
title
|
||||
videos(first:1000,sort:["episode_number"]) {
|
||||
edges {
|
||||
node {
|
||||
_id
|
||||
slug
|
||||
}
|
||||
}
|
||||
}'''
|
||||
show_data = self._download_json(
|
||||
'https://www.adultswim.com/api/search', display_id,
|
||||
data=json.dumps({'query': query}).encode(),
|
||||
headers={'Content-Type': 'application/json'})['data']['getShowBySlug']
|
||||
if episode_path:
|
||||
video_data = show_data['getVideoBySlug']
|
||||
video_id = video_data['_id']
|
||||
episode_title = title = video_data['title']
|
||||
series = show_data.get('title')
|
||||
if series:
|
||||
title = '%s - %s' % (series, title)
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': strip_or_none(video_data.get('description')),
|
||||
'duration': float_or_none(video_data.get('duration')),
|
||||
'formats': [],
|
||||
'subtitles': {},
|
||||
'age_limit': parse_age_limit(video_data.get('tvRating')),
|
||||
'thumbnail': video_data.get('poster'),
|
||||
'timestamp': parse_iso8601(video_data.get('launchDate')),
|
||||
'series': series,
|
||||
'season_number': int_or_none(video_data.get('seasonNumber')),
|
||||
'episode': episode_title,
|
||||
'episode_number': int_or_none(video_data.get('episodeNumber')),
|
||||
}
|
||||
|
||||
auth = video_data.get('auth')
|
||||
media_id = video_data.get('mediaID')
|
||||
if media_id:
|
||||
info.update(self._extract_ngtv_info(media_id, {
|
||||
# CDN_TOKEN_APP_ID from:
|
||||
# https://d2gg02c3xr550i.cloudfront.net/assets/asvp.e9c8bef24322d060ef87.bundle.js
|
||||
'appId': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhcHBJZCI6ImFzLXR2ZS1kZXNrdG9wLXB0enQ2bSIsInByb2R1Y3QiOiJ0dmUiLCJuZXR3b3JrIjoiYXMiLCJwbGF0Zm9ybSI6ImRlc2t0b3AiLCJpYXQiOjE1MzI3MDIyNzl9.BzSCk-WYOZ2GMCIaeVb8zWnzhlgnXuJTCu0jGp_VaZE',
|
||||
}, {
|
||||
'url': url,
|
||||
'site_name': 'AdultSwim',
|
||||
'auth_required': auth,
|
||||
}))
|
||||
|
||||
if not auth:
|
||||
extract_data = self._download_json(
|
||||
'https://www.adultswim.com/api/shows/v1/videos/' + video_id,
|
||||
video_id, query={'fields': 'stream'}, fatal=False) or {}
|
||||
assets = try_get(extract_data, lambda x: x['data']['video']['stream']['assets'], list) or []
|
||||
for asset in assets:
|
||||
asset_url = asset.get('url')
|
||||
if not asset_url:
|
||||
continue
|
||||
ext = determine_ext(asset_url, mimetype2ext(asset.get('mime_type')))
|
||||
if ext == 'm3u8':
|
||||
info['formats'].extend(self._extract_m3u8_formats(
|
||||
asset_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
elif ext == 'f4m':
|
||||
continue
|
||||
# info['formats'].extend(self._extract_f4m_formats(
|
||||
# asset_url, video_id, f4m_id='hds', fatal=False))
|
||||
elif ext in ('scc', 'ttml', 'vtt'):
|
||||
info['subtitles'].setdefault('en', []).append({
|
||||
'url': asset_url,
|
||||
})
|
||||
self._sort_formats(info['formats'])
|
||||
|
||||
return info
|
||||
else:
|
||||
entries = []
|
||||
for edge in show_data.get('videos', {}).get('edges', []):
|
||||
video = edge.get('node') or {}
|
||||
slug = video.get('slug')
|
||||
if not slug:
|
||||
continue
|
||||
entries.append(self.url_result(
|
||||
'http://adultswim.com/videos/%s/%s' % (show_path, slug),
|
||||
'AdultSwim', video.get('_id')))
|
||||
return self.playlist_result(
|
||||
entries, show_path, show_data.get('title'),
|
||||
strip_or_none(show_data.get('metaDescription')))
|
247
youtube_dl/extractor/aenetworks.py
Normal file
247
youtube_dl/extractor/aenetworks.py
Normal file
|
@ -0,0 +1,247 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .theplatform import ThePlatformIE
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
smuggle_url,
|
||||
update_url_query,
|
||||
)
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
)
|
||||
|
||||
|
||||
class AENetworksBaseIE(ThePlatformIE):
|
||||
_THEPLATFORM_KEY = 'crazyjava'
|
||||
_THEPLATFORM_SECRET = 's3cr3t'
|
||||
|
||||
def _extract_aen_smil(self, smil_url, video_id, auth=None):
|
||||
query = {'mbr': 'true'}
|
||||
if auth:
|
||||
query['auth'] = auth
|
||||
TP_SMIL_QUERY = [{
|
||||
'assetTypes': 'high_video_ak',
|
||||
'switch': 'hls_high_ak'
|
||||
}, {
|
||||
'assetTypes': 'high_video_s3'
|
||||
}, {
|
||||
'assetTypes': 'high_video_s3',
|
||||
'switch': 'hls_ingest_fastly'
|
||||
}]
|
||||
formats = []
|
||||
subtitles = {}
|
||||
last_e = None
|
||||
for q in TP_SMIL_QUERY:
|
||||
q.update(query)
|
||||
m_url = update_url_query(smil_url, q)
|
||||
m_url = self._sign_url(m_url, self._THEPLATFORM_KEY, self._THEPLATFORM_SECRET)
|
||||
try:
|
||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(
|
||||
m_url, video_id, 'Downloading %s SMIL data' % (q.get('switch') or q['assetTypes']))
|
||||
except ExtractorError as e:
|
||||
last_e = e
|
||||
continue
|
||||
formats.extend(tp_formats)
|
||||
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
|
||||
if last_e and not formats:
|
||||
raise last_e
|
||||
self._sort_formats(formats)
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
class AENetworksIE(AENetworksBaseIE):
|
||||
IE_NAME = 'aenetworks'
|
||||
IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?
|
||||
(?P<domain>
|
||||
(?:history(?:vault)?|aetv|mylifetime|lifetimemovieclub)\.com|
|
||||
fyi\.tv
|
||||
)/
|
||||
(?:
|
||||
shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})|
|
||||
movies/(?P<movie_display_id>[^/]+)(?:/full-movie)?|
|
||||
specials/(?P<special_display_id>[^/]+)/(?:full-special|preview-)|
|
||||
collections/[^/]+/(?P<collection_display_id>[^/]+)
|
||||
)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
|
||||
'info_dict': {
|
||||
'id': '22253814',
|
||||
'ext': 'mp4',
|
||||
'title': 'Winter is Coming',
|
||||
'description': 'md5:641f424b7a19d8e24f26dea22cf59d74',
|
||||
'timestamp': 1338306241,
|
||||
'upload_date': '20120529',
|
||||
'uploader': 'AENE-NEW',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
}, {
|
||||
'url': 'http://www.history.com/shows/ancient-aliens/season-1',
|
||||
'info_dict': {
|
||||
'id': '71889446852',
|
||||
},
|
||||
'playlist_mincount': 5,
|
||||
}, {
|
||||
'url': 'http://www.mylifetime.com/shows/atlanta-plastic',
|
||||
'info_dict': {
|
||||
'id': 'SERIES4317',
|
||||
'title': 'Atlanta Plastic',
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
}, {
|
||||
'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'http://www.mylifetime.com/shows/project-runway-junior/season-1/episode-6',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'http://www.mylifetime.com/movies/center-stage-on-pointe/full-movie',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://www.lifetimemovieclub.com/movies/a-killer-among-us',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'http://www.history.com/specials/sniper-into-the-kill-zone/full-special',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://www.historyvault.com/collections/america-the-story-of-us/westward',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://www.aetv.com/specials/hunting-jonbenets-killer-the-untold-story/preview-hunting-jonbenets-killer-the-untold-story',
|
||||
'only_matching': True
|
||||
}]
|
||||
_DOMAIN_TO_REQUESTOR_ID = {
|
||||
'history.com': 'HISTORY',
|
||||
'aetv.com': 'AETV',
|
||||
'mylifetime.com': 'LIFETIME',
|
||||
'lifetimemovieclub.com': 'LIFETIMEMOVIECLUB',
|
||||
'fyi.tv': 'FYI',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
domain, show_path, movie_display_id, special_display_id, collection_display_id = re.match(self._VALID_URL, url).groups()
|
||||
display_id = show_path or movie_display_id or special_display_id or collection_display_id
|
||||
webpage = self._download_webpage(url, display_id, headers=self.geo_verification_headers())
|
||||
if show_path:
|
||||
url_parts = show_path.split('/')
|
||||
url_parts_len = len(url_parts)
|
||||
if url_parts_len == 1:
|
||||
entries = []
|
||||
for season_url_path in re.findall(r'(?s)<li[^>]+data-href="(/shows/%s/season-\d+)"' % url_parts[0], webpage):
|
||||
entries.append(self.url_result(
|
||||
compat_urlparse.urljoin(url, season_url_path), 'AENetworks'))
|
||||
if entries:
|
||||
return self.playlist_result(
|
||||
entries, self._html_search_meta('aetn:SeriesId', webpage),
|
||||
self._html_search_meta('aetn:SeriesTitle', webpage))
|
||||
else:
|
||||
# single season
|
||||
url_parts_len = 2
|
||||
if url_parts_len == 2:
|
||||
entries = []
|
||||
for episode_item in re.findall(r'(?s)<[^>]+class="[^"]*(?:episode|program)-item[^"]*"[^>]*>', webpage):
|
||||
episode_attributes = extract_attributes(episode_item)
|
||||
episode_url = compat_urlparse.urljoin(
|
||||
url, episode_attributes['data-canonical'])
|
||||
entries.append(self.url_result(
|
||||
episode_url, 'AENetworks',
|
||||
episode_attributes.get('data-videoid') or episode_attributes.get('data-video-id')))
|
||||
return self.playlist_result(
|
||||
entries, self._html_search_meta('aetn:SeasonId', webpage))
|
||||
|
||||
video_id = self._html_search_meta('aetn:VideoID', webpage)
|
||||
media_url = self._search_regex(
|
||||
[r"media_url\s*=\s*'(?P<url>[^']+)'",
|
||||
r'data-media-url=(?P<url>(?:https?:)?//[^\s>]+)',
|
||||
r'data-media-url=(["\'])(?P<url>(?:(?!\1).)+?)\1'],
|
||||
webpage, 'video url', group='url')
|
||||
theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
|
||||
r'https?://link\.theplatform\.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
|
||||
info = self._parse_theplatform_metadata(theplatform_metadata)
|
||||
auth = None
|
||||
if theplatform_metadata.get('AETN$isBehindWall'):
|
||||
requestor_id = self._DOMAIN_TO_REQUESTOR_ID[domain]
|
||||
resource = self._get_mvpd_resource(
|
||||
requestor_id, theplatform_metadata['title'],
|
||||
theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'),
|
||||
theplatform_metadata['ratings'][0]['rating'])
|
||||
auth = self._extract_mvpd_auth(
|
||||
url, video_id, requestor_id, resource)
|
||||
info.update(self._search_json_ld(webpage, video_id, fatal=False))
|
||||
info.update(self._extract_aen_smil(media_url, video_id, auth))
|
||||
return info
|
||||
|
||||
|
||||
class HistoryTopicIE(AENetworksBaseIE):
|
||||
IE_NAME = 'history:topic'
|
||||
IE_DESC = 'History.com Topic'
|
||||
_VALID_URL = r'https?://(?:www\.)?history\.com/topics/[^/]+/(?P<id>[\w+-]+?)-video'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.history.com/topics/valentines-day/history-of-valentines-day-video',
|
||||
'info_dict': {
|
||||
'id': '40700995724',
|
||||
'ext': 'mp4',
|
||||
'title': "History of Valentine’s Day",
|
||||
'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
|
||||
'timestamp': 1375819729,
|
||||
'upload_date': '20130806',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
}]
|
||||
|
||||
def theplatform_url_result(self, theplatform_url, video_id, query):
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'url': smuggle_url(
|
||||
update_url_query(theplatform_url, query),
|
||||
{
|
||||
'sig': {
|
||||
'key': self._THEPLATFORM_KEY,
|
||||
'secret': self._THEPLATFORM_SECRET,
|
||||
},
|
||||
'force_smil_url': True
|
||||
}),
|
||||
'ie_key': 'ThePlatform',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._search_regex(
|
||||
r'<phoenix-iframe[^>]+src="[^"]+\btpid=(\d+)', webpage, 'tpid')
|
||||
result = self._download_json(
|
||||
'https://feeds.video.aetnd.com/api/v2/history/videos',
|
||||
video_id, query={'filter[id]': video_id})['results'][0]
|
||||
title = result['title']
|
||||
info = self._extract_aen_smil(result['publicUrl'], video_id)
|
||||
info.update({
|
||||
'title': title,
|
||||
'description': result.get('description'),
|
||||
'duration': int_or_none(result.get('duration')),
|
||||
'timestamp': int_or_none(result.get('added'), 1000),
|
||||
})
|
||||
return info
|
367
youtube_dl/extractor/afreecatv.py
Normal file
367
youtube_dl/extractor/afreecatv.py
Normal file
|
@ -0,0 +1,367 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_xpath
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
class AfreecaTVIE(InfoExtractor):
|
||||
IE_NAME = 'afreecatv'
|
||||
IE_DESC = 'afreecatv.com'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:(?:live|afbbs|www)\.)?afreeca(?:tv)?\.com(?::\d+)?
|
||||
(?:
|
||||
/app/(?:index|read_ucc_bbs)\.cgi|
|
||||
/player/[Pp]layer\.(?:swf|html)
|
||||
)\?.*?\bnTitleNo=|
|
||||
vod\.afreecatv\.com/PLAYER/STATION/
|
||||
)
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
_NETRC_MACHINE = 'afreecatv'
|
||||
_TESTS = [{
|
||||
'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=',
|
||||
'md5': 'f72c89fe7ecc14c1b5ce506c4996046e',
|
||||
'info_dict': {
|
||||
'id': '36164052',
|
||||
'ext': 'mp4',
|
||||
'title': '데일리 에이프릴 요정들의 시상식!',
|
||||
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||
'uploader': 'dailyapril',
|
||||
'uploader_id': 'dailyapril',
|
||||
'upload_date': '20160503',
|
||||
},
|
||||
'skip': 'Video is gone',
|
||||
}, {
|
||||
'url': 'http://afbbs.afreecatv.com:8080/app/read_ucc_bbs.cgi?nStationNo=16711924&nTitleNo=36153164&szBjId=dailyapril&nBbsNo=18605867',
|
||||
'info_dict': {
|
||||
'id': '36153164',
|
||||
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
|
||||
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||
'uploader': 'dailyapril',
|
||||
'uploader_id': 'dailyapril',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
'playlist': [{
|
||||
'md5': 'd8b7c174568da61d774ef0203159bf97',
|
||||
'info_dict': {
|
||||
'id': '36153164_1',
|
||||
'ext': 'mp4',
|
||||
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
|
||||
'upload_date': '20160502',
|
||||
},
|
||||
}, {
|
||||
'md5': '58f2ce7f6044e34439ab2d50612ab02b',
|
||||
'info_dict': {
|
||||
'id': '36153164_2',
|
||||
'ext': 'mp4',
|
||||
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
|
||||
'upload_date': '20160502',
|
||||
},
|
||||
}],
|
||||
'skip': 'Video is gone',
|
||||
}, {
|
||||
'url': 'http://vod.afreecatv.com/PLAYER/STATION/18650793',
|
||||
'info_dict': {
|
||||
'id': '18650793',
|
||||
'ext': 'mp4',
|
||||
'title': '오늘은 다르다! 쏘님의 우월한 위아래~ 댄스리액션!',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': '윈아디',
|
||||
'uploader_id': 'badkids',
|
||||
'duration': 107,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://vod.afreecatv.com/PLAYER/STATION/10481652',
|
||||
'info_dict': {
|
||||
'id': '10481652',
|
||||
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
|
||||
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||
'uploader': 'dailyapril',
|
||||
'uploader_id': 'dailyapril',
|
||||
'duration': 6492,
|
||||
},
|
||||
'playlist_count': 2,
|
||||
'playlist': [{
|
||||
'md5': 'd8b7c174568da61d774ef0203159bf97',
|
||||
'info_dict': {
|
||||
'id': '20160502_c4c62b9d_174361386_1',
|
||||
'ext': 'mp4',
|
||||
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!' (part 1)",
|
||||
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||
'uploader': 'dailyapril',
|
||||
'uploader_id': 'dailyapril',
|
||||
'upload_date': '20160502',
|
||||
'duration': 3601,
|
||||
},
|
||||
}, {
|
||||
'md5': '58f2ce7f6044e34439ab2d50612ab02b',
|
||||
'info_dict': {
|
||||
'id': '20160502_39e739bb_174361386_2',
|
||||
'ext': 'mp4',
|
||||
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!' (part 2)",
|
||||
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||
'uploader': 'dailyapril',
|
||||
'uploader_id': 'dailyapril',
|
||||
'upload_date': '20160502',
|
||||
'duration': 2891,
|
||||
},
|
||||
}],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# non standard key
|
||||
'url': 'http://vod.afreecatv.com/PLAYER/STATION/20515605',
|
||||
'info_dict': {
|
||||
'id': '20170411_BE689A0E_190960999_1_2_h',
|
||||
'ext': 'mp4',
|
||||
'title': '혼자사는여자집',
|
||||
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||
'uploader': '♥이슬이',
|
||||
'uploader_id': 'dasl8121',
|
||||
'upload_date': '20170411',
|
||||
'duration': 213,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# PARTIAL_ADULT
|
||||
'url': 'http://vod.afreecatv.com/PLAYER/STATION/32028439',
|
||||
'info_dict': {
|
||||
'id': '20180327_27901457_202289533_1',
|
||||
'ext': 'mp4',
|
||||
'title': '[생]빨개요♥ (part 1)',
|
||||
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||
'uploader': '[SA]서아',
|
||||
'uploader_id': 'bjdyrksu',
|
||||
'upload_date': '20180327',
|
||||
'duration': 3601,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['adult content'],
|
||||
}, {
|
||||
'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://vod.afreecatv.com/PLAYER/STATION/15055030',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def parse_video_key(key):
|
||||
video_key = {}
|
||||
m = re.match(r'^(?P<upload_date>\d{8})_\w+_(?P<part>\d+)$', key)
|
||||
if m:
|
||||
video_key['upload_date'] = m.group('upload_date')
|
||||
video_key['part'] = int(m.group('part'))
|
||||
return video_key
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _login(self):
|
||||
username, password = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
|
||||
login_form = {
|
||||
'szWork': 'login',
|
||||
'szType': 'json',
|
||||
'szUid': username,
|
||||
'szPassword': password,
|
||||
'isSaveId': 'false',
|
||||
'szScriptVar': 'oLoginRet',
|
||||
'szAction': '',
|
||||
}
|
||||
|
||||
response = self._download_json(
|
||||
'https://login.afreecatv.com/app/LoginAction.php', None,
|
||||
'Logging in', data=urlencode_postdata(login_form))
|
||||
|
||||
_ERRORS = {
|
||||
-4: 'Your account has been suspended due to a violation of our terms and policies.',
|
||||
-5: 'https://member.afreecatv.com/app/user_delete_progress.php',
|
||||
-6: 'https://login.afreecatv.com/membership/changeMember.php',
|
||||
-8: "Hello! AfreecaTV here.\nThe username you have entered belongs to \n an account that requires a legal guardian's consent. \nIf you wish to use our services without restriction, \nplease make sure to go through the necessary verification process.",
|
||||
-9: 'https://member.afreecatv.com/app/pop_login_block.php',
|
||||
-11: 'https://login.afreecatv.com/afreeca/second_login.php',
|
||||
-12: 'https://member.afreecatv.com/app/user_security.php',
|
||||
0: 'The username does not exist or you have entered the wrong password.',
|
||||
-1: 'The username does not exist or you have entered the wrong password.',
|
||||
-3: 'You have entered your username/password incorrectly.',
|
||||
-7: 'You cannot use your Global AfreecaTV account to access Korean AfreecaTV.',
|
||||
-10: 'Sorry for the inconvenience. \nYour account has been blocked due to an unauthorized access. \nPlease contact our Help Center for assistance.',
|
||||
-32008: 'You have failed to log in. Please contact our Help Center.',
|
||||
}
|
||||
|
||||
result = int_or_none(response.get('RESULT'))
|
||||
if result != 1:
|
||||
error = _ERRORS.get(result, 'You have failed to log in.')
|
||||
raise ExtractorError(
|
||||
'Unable to login: %s said: %s' % (self.IE_NAME, error),
|
||||
expected=True)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if re.search(r'alert\(["\']This video has been deleted', webpage):
|
||||
raise ExtractorError(
|
||||
'Video %s has been deleted' % video_id, expected=True)
|
||||
|
||||
station_id = self._search_regex(
|
||||
r'nStationNo\s*=\s*(\d+)', webpage, 'station')
|
||||
bbs_id = self._search_regex(
|
||||
r'nBbsNo\s*=\s*(\d+)', webpage, 'bbs')
|
||||
video_id = self._search_regex(
|
||||
r'nTitleNo\s*=\s*(\d+)', webpage, 'title', default=video_id)
|
||||
|
||||
partial_view = False
|
||||
for _ in range(2):
|
||||
query = {
|
||||
'nTitleNo': video_id,
|
||||
'nStationNo': station_id,
|
||||
'nBbsNo': bbs_id,
|
||||
}
|
||||
if partial_view:
|
||||
query['partialView'] = 'SKIP_ADULT'
|
||||
video_xml = self._download_xml(
|
||||
'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php',
|
||||
video_id, 'Downloading video info XML%s'
|
||||
% (' (skipping adult)' if partial_view else ''),
|
||||
video_id, headers={
|
||||
'Referer': url,
|
||||
}, query=query)
|
||||
|
||||
flag = xpath_text(video_xml, './track/flag', 'flag', default=None)
|
||||
if flag and flag == 'SUCCEED':
|
||||
break
|
||||
if flag == 'PARTIAL_ADULT':
|
||||
self._downloader.report_warning(
|
||||
'In accordance with local laws and regulations, underage users are restricted from watching adult content. '
|
||||
'Only content suitable for all ages will be downloaded. '
|
||||
'Provide account credentials if you wish to download restricted content.')
|
||||
partial_view = True
|
||||
continue
|
||||
elif flag == 'ADULT':
|
||||
error = 'Only users older than 19 are able to watch this video. Provide account credentials to download this content.'
|
||||
else:
|
||||
error = flag
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, error), expected=True)
|
||||
else:
|
||||
raise ExtractorError('Unable to download video info')
|
||||
|
||||
video_element = video_xml.findall(compat_xpath('./track/video'))[-1]
|
||||
if video_element is None or video_element.text is None:
|
||||
raise ExtractorError(
|
||||
'Video %s video does not exist' % video_id, expected=True)
|
||||
|
||||
video_url = video_element.text.strip()
|
||||
|
||||
title = xpath_text(video_xml, './track/title', 'title', fatal=True)
|
||||
|
||||
uploader = xpath_text(video_xml, './track/nickname', 'uploader')
|
||||
uploader_id = xpath_text(video_xml, './track/bj_id', 'uploader id')
|
||||
duration = int_or_none(xpath_text(
|
||||
video_xml, './track/duration', 'duration'))
|
||||
thumbnail = xpath_text(video_xml, './track/titleImage', 'thumbnail')
|
||||
|
||||
common_entry = {
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
info = common_entry.copy()
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'duration': duration,
|
||||
})
|
||||
|
||||
if not video_url:
|
||||
entries = []
|
||||
file_elements = video_element.findall(compat_xpath('./file'))
|
||||
one = len(file_elements) == 1
|
||||
for file_num, file_element in enumerate(file_elements, start=1):
|
||||
file_url = url_or_none(file_element.text)
|
||||
if not file_url:
|
||||
continue
|
||||
key = file_element.get('key', '')
|
||||
upload_date = self._search_regex(
|
||||
r'^(\d{8})_', key, 'upload date', default=None)
|
||||
file_duration = int_or_none(file_element.get('duration'))
|
||||
format_id = key if key else '%s_%s' % (video_id, file_num)
|
||||
if determine_ext(file_url) == 'm3u8':
|
||||
formats = self._extract_m3u8_formats(
|
||||
file_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls',
|
||||
note='Downloading part %d m3u8 information' % file_num)
|
||||
else:
|
||||
formats = [{
|
||||
'url': file_url,
|
||||
'format_id': 'http',
|
||||
}]
|
||||
if not formats:
|
||||
continue
|
||||
self._sort_formats(formats)
|
||||
file_info = common_entry.copy()
|
||||
file_info.update({
|
||||
'id': format_id,
|
||||
'title': title if one else '%s (part %d)' % (title, file_num),
|
||||
'upload_date': upload_date,
|
||||
'duration': file_duration,
|
||||
'formats': formats,
|
||||
})
|
||||
entries.append(file_info)
|
||||
entries_info = info.copy()
|
||||
entries_info.update({
|
||||
'_type': 'multi_video',
|
||||
'entries': entries,
|
||||
})
|
||||
return entries_info
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
if determine_ext(video_url) == 'm3u8':
|
||||
info['formats'] = self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
else:
|
||||
app, playpath = video_url.split('mp4:')
|
||||
info.update({
|
||||
'url': app,
|
||||
'ext': 'flv',
|
||||
'play_path': 'mp4:' + playpath,
|
||||
'rtmp_live': True, # downloading won't end without this
|
||||
})
|
||||
|
||||
return info
|
66
youtube_dl/extractor/airmozilla.py
Normal file
66
youtube_dl/extractor/airmozilla.py
Normal file
|
@ -0,0 +1,66 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class AirMozillaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://air\.mozilla\.org/(?P<id>[0-9a-z-]+)/?'
|
||||
_TEST = {
|
||||
'url': 'https://air.mozilla.org/privacy-lab-a-meetup-for-privacy-minded-people-in-san-francisco/',
|
||||
'md5': '8d02f53ee39cf006009180e21df1f3ba',
|
||||
'info_dict': {
|
||||
'id': '6x4q2w',
|
||||
'ext': 'mp4',
|
||||
'title': 'Privacy Lab - a meetup for privacy minded people in San Francisco',
|
||||
'thumbnail': r're:https?://.*/poster\.jpg',
|
||||
'description': 'Brings together privacy professionals and others interested in privacy at for-profits, non-profits, and NGOs in an effort to contribute to the state of the ecosystem...',
|
||||
'timestamp': 1422487800,
|
||||
'upload_date': '20150128',
|
||||
'location': 'SFO Commons',
|
||||
'duration': 3780,
|
||||
'view_count': int,
|
||||
'categories': ['Main', 'Privacy'],
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._html_search_regex(r'//vid\.ly/(.*?)/embed', webpage, 'id')
|
||||
|
||||
embed_script = self._download_webpage('https://vid.ly/{0}/embed'.format(video_id), video_id)
|
||||
jwconfig = self._parse_json(self._search_regex(
|
||||
r'initCallback\((.*)\);', embed_script, 'metadata'), video_id)['config']
|
||||
|
||||
info_dict = self._parse_jwplayer_data(jwconfig, video_id)
|
||||
view_count = int_or_none(self._html_search_regex(
|
||||
r'Views since archived: ([0-9]+)',
|
||||
webpage, 'view count', fatal=False))
|
||||
timestamp = parse_iso8601(self._html_search_regex(
|
||||
r'<time datetime="(.*?)"', webpage, 'timestamp', fatal=False))
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'Duration:\s*(\d+\s*hours?\s*\d+\s*minutes?)',
|
||||
webpage, 'duration', fatal=False))
|
||||
|
||||
info_dict.update({
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'url': self._og_search_url(webpage),
|
||||
'display_id': display_id,
|
||||
'description': self._og_search_description(webpage),
|
||||
'timestamp': timestamp,
|
||||
'location': self._html_search_regex(r'Location: (.*)', webpage, 'location', default=None),
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'categories': re.findall(r'<a href=".*?" class="channel">(.*?)</a>', webpage),
|
||||
})
|
||||
|
||||
return info_dict
|
53
youtube_dl/extractor/aliexpress.py
Normal file
53
youtube_dl/extractor/aliexpress.py
Normal file
|
@ -0,0 +1,53 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class AliExpressLiveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://live\.aliexpress\.com/live/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://live.aliexpress.com/live/2800002704436634',
|
||||
'md5': 'e729e25d47c5e557f2630eaf99b740a5',
|
||||
'info_dict': {
|
||||
'id': '2800002704436634',
|
||||
'ext': 'mp4',
|
||||
'title': 'CASIMA7.22',
|
||||
'thumbnail': r're:http://.*\.jpg',
|
||||
'uploader': 'CASIMA Official Store',
|
||||
'timestamp': 1500717600,
|
||||
'upload_date': '20170722',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)runParams\s*=\s*({.+?})\s*;?\s*var',
|
||||
webpage, 'runParams'),
|
||||
video_id)
|
||||
|
||||
title = data['title']
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
data['replyStreamUrl'], video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': data.get('coverUrl'),
|
||||
'uploader': try_get(
|
||||
data, lambda x: x['followBar']['name'], compat_str),
|
||||
'timestamp': float_or_none(data.get('startTimeLong'), scale=1000),
|
||||
'formats': formats,
|
||||
}
|
33
youtube_dl/extractor/aljazeera.py
Normal file
33
youtube_dl/extractor/aljazeera.py
Normal file
|
@ -0,0 +1,33 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class AlJazeeraIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?aljazeera\.com/(?:programmes|video)/.*?/(?P<id>[^/]+)\.html'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.aljazeera.com/programmes/the-slum/2014/08/deliverance-201482883754237240.html',
|
||||
'info_dict': {
|
||||
'id': '3792260579001',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Slum - Episode 1: Deliverance',
|
||||
'description': 'As a birth attendant advocating for family planning, Remy is on the frontline of Tondo\'s battle with overcrowding.',
|
||||
'uploader_id': '665003303001',
|
||||
'timestamp': 1411116829,
|
||||
'upload_date': '20140919',
|
||||
},
|
||||
'add_ie': ['BrightcoveNew'],
|
||||
'skip': 'Not accessible from Travis CI server',
|
||||
}, {
|
||||
'url': 'http://www.aljazeera.com/video/news/2017/05/sierra-leone-709-carat-diamond-auctioned-170511100111930.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/665003303001/default_default/index.html?videoId=%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
program_name = self._match_id(url)
|
||||
webpage = self._download_webpage(url, program_name)
|
||||
brightcove_id = self._search_regex(
|
||||
r'RenderPagesVideo\(\'(.+?)\'', webpage, 'brightcove id')
|
||||
return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
|
132
youtube_dl/extractor/allocine.py
Normal file
132
youtube_dl/extractor/allocine.py
Normal file
|
@ -0,0 +1,132 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
qualities,
|
||||
remove_end,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
url_basename,
|
||||
)
|
||||
|
||||
|
||||
class AllocineIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?allocine\.fr/(?:article|video|film)/(?:fichearticle_gen_carticle=|player_gen_cmedia=|fichefilm_gen_cfilm=|video-)(?P<id>[0-9]+)(?:\.html)?'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.allocine.fr/article/fichearticle_gen_carticle=18635087.html',
|
||||
'md5': '0c9fcf59a841f65635fa300ac43d8269',
|
||||
'info_dict': {
|
||||
'id': '19546517',
|
||||
'display_id': '18635087',
|
||||
'ext': 'mp4',
|
||||
'title': 'Astérix - Le Domaine des Dieux Teaser VF',
|
||||
'description': 'md5:4a754271d9c6f16c72629a8a993ee884',
|
||||
'thumbnail': r're:http://.*\.jpg',
|
||||
'duration': 39,
|
||||
'timestamp': 1404273600,
|
||||
'upload_date': '20140702',
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.allocine.fr/video/player_gen_cmedia=19540403&cfilm=222257.html',
|
||||
'md5': 'd0cdce5d2b9522ce279fdfec07ff16e0',
|
||||
'info_dict': {
|
||||
'id': '19540403',
|
||||
'display_id': '19540403',
|
||||
'ext': 'mp4',
|
||||
'title': 'Planes 2 Bande-annonce VF',
|
||||
'description': 'Regardez la bande annonce du film Planes 2 (Planes 2 Bande-annonce VF). Planes 2, un film de Roberts Gannaway',
|
||||
'thumbnail': r're:http://.*\.jpg',
|
||||
'duration': 69,
|
||||
'timestamp': 1385659800,
|
||||
'upload_date': '20131128',
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.allocine.fr/video/player_gen_cmedia=19544709&cfilm=181290.html',
|
||||
'md5': '101250fb127ef9ca3d73186ff22a47ce',
|
||||
'info_dict': {
|
||||
'id': '19544709',
|
||||
'display_id': '19544709',
|
||||
'ext': 'mp4',
|
||||
'title': 'Dragons 2 - Bande annonce finale VF',
|
||||
'description': 'md5:6cdd2d7c2687d4c6aafe80a35e17267a',
|
||||
'thumbnail': r're:http://.*\.jpg',
|
||||
'duration': 144,
|
||||
'timestamp': 1397589900,
|
||||
'upload_date': '20140415',
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.allocine.fr/video/video-19550147/',
|
||||
'md5': '3566c0668c0235e2d224fd8edb389f67',
|
||||
'info_dict': {
|
||||
'id': '19550147',
|
||||
'ext': 'mp4',
|
||||
'title': 'Faux Raccord N°123 - Les gaffes de Cliffhanger',
|
||||
'description': 'md5:bc734b83ffa2d8a12188d9eb48bb6354',
|
||||
'thumbnail': r're:http://.*\.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
formats = []
|
||||
quality = qualities(['ld', 'md', 'hd'])
|
||||
|
||||
model = self._html_search_regex(
|
||||
r'data-model="([^"]+)"', webpage, 'data model', default=None)
|
||||
if model:
|
||||
model_data = self._parse_json(model, display_id)
|
||||
video = model_data['videos'][0]
|
||||
title = video['title']
|
||||
for video_url in video['sources'].values():
|
||||
video_id, format_id = url_basename(video_url).split('_')[:2]
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'quality': quality(format_id),
|
||||
'url': video_url,
|
||||
})
|
||||
duration = int_or_none(video.get('duration'))
|
||||
view_count = int_or_none(video.get('view_count'))
|
||||
timestamp = unified_timestamp(try_get(
|
||||
video, lambda x: x['added_at']['date'], compat_str))
|
||||
else:
|
||||
video_id = display_id
|
||||
media_data = self._download_json(
|
||||
'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media=%s' % video_id, display_id)
|
||||
title = remove_end(
|
||||
self._html_search_regex(
|
||||
r'(?s)<title>(.+?)</title>', webpage, 'title').strip(),
|
||||
' - AlloCiné')
|
||||
for key, value in media_data['video'].items():
|
||||
if not key.endswith('Path'):
|
||||
continue
|
||||
format_id = key[:-len('Path')]
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'quality': quality(format_id),
|
||||
'url': value,
|
||||
})
|
||||
duration, view_count, timestamp = [None] * 3
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'view_count': view_count,
|
||||
'formats': formats,
|
||||
}
|
77
youtube_dl/extractor/alphaporno.py
Normal file
77
youtube_dl/extractor/alphaporno.py
Normal file
|
@ -0,0 +1,77 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_iso8601,
|
||||
parse_duration,
|
||||
parse_filesize,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class AlphaPornoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?alphaporno\.com/videos/(?P<id>[^/]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.alphaporno.com/videos/sensual-striptease-porn-with-samantha-alexandra/',
|
||||
'md5': 'feb6d3bba8848cd54467a87ad34bd38e',
|
||||
'info_dict': {
|
||||
'id': '258807',
|
||||
'display_id': 'sensual-striptease-porn-with-samantha-alexandra',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sensual striptease porn with Samantha Alexandra',
|
||||
'thumbnail': r're:https?://.*\.jpg$',
|
||||
'timestamp': 1418694611,
|
||||
'upload_date': '20141216',
|
||||
'duration': 387,
|
||||
'filesize_approx': 54120000,
|
||||
'tbr': 1145,
|
||||
'categories': list,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r"video_id\s*:\s*'([^']+)'", webpage, 'video id', default=None)
|
||||
|
||||
video_url = self._search_regex(
|
||||
r"video_url\s*:\s*'([^']+)'", webpage, 'video url')
|
||||
ext = self._html_search_meta(
|
||||
'encodingFormat', webpage, 'ext', default='.mp4')[1:]
|
||||
|
||||
title = self._search_regex(
|
||||
[r'<meta content="([^"]+)" itemprop="description">',
|
||||
r'class="title" itemprop="name">([^<]+)<'],
|
||||
webpage, 'title')
|
||||
thumbnail = self._html_search_meta('thumbnail', webpage, 'thumbnail')
|
||||
timestamp = parse_iso8601(self._html_search_meta(
|
||||
'uploadDate', webpage, 'upload date'))
|
||||
duration = parse_duration(self._html_search_meta(
|
||||
'duration', webpage, 'duration'))
|
||||
filesize_approx = parse_filesize(self._html_search_meta(
|
||||
'contentSize', webpage, 'file size'))
|
||||
bitrate = int_or_none(self._html_search_meta(
|
||||
'bitrate', webpage, 'bitrate'))
|
||||
categories = self._html_search_meta(
|
||||
'keywords', webpage, 'categories', default='').split(',')
|
||||
|
||||
age_limit = self._rta_search(webpage)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'url': video_url,
|
||||
'ext': ext,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'filesize_approx': filesize_approx,
|
||||
'tbr': bitrate,
|
||||
'categories': categories,
|
||||
'age_limit': age_limit,
|
||||
}
|
118
youtube_dl/extractor/amcnetworks.py
Normal file
118
youtube_dl/extractor/amcnetworks.py
Normal file
|
@ -0,0 +1,118 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .theplatform import ThePlatformIE
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
try_get,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class AMCNetworksIE(ThePlatformIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|(?:we|sundance)tv)\.com/(?:movies|shows(?:/[^/]+)+)/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ifc.com/shows/maron/season-04/episode-01/step-1',
|
||||
'md5': '',
|
||||
'info_dict': {
|
||||
'id': 's3MX01Nl4vPH',
|
||||
'ext': 'mp4',
|
||||
'title': 'Maron - Season 4 - Step 1',
|
||||
'description': 'In denial about his current situation, Marc is reluctantly convinced by his friends to enter rehab. Starring Marc Maron and Constance Zimmer.',
|
||||
'age_limit': 17,
|
||||
'upload_date': '20160505',
|
||||
'timestamp': 1462468831,
|
||||
'uploader': 'AMCN',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Requires TV provider accounts',
|
||||
}, {
|
||||
'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.amc.com/shows/preacher/full-episodes/season-01/episode-00/pilot',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.wetv.com/shows/million-dollar-matchmaker/season-01/episode-06-the-dumped-dj-and-shallow-hal',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.ifc.com/movies/chaos',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.bbcamerica.com/shows/doctor-who/full-episodes/the-power-of-the-daleks/episode-01-episode-1-color-version',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.wetv.com/shows/mama-june-from-not-to-hot/full-episode/season-01/thin-tervention',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.wetv.com/shows/la-hair/videos/season-05/episode-09-episode-9-2/episode-9-sneak-peek-3',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.sundancetv.com/shows/riviera/full-episodes/season-1/episode-01-episode-1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
'manifest': 'm3u',
|
||||
}
|
||||
media_url = self._search_regex(
|
||||
r'window\.platformLinkURL\s*=\s*[\'"]([^\'"]+)',
|
||||
webpage, 'media url')
|
||||
theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
|
||||
r'link\.theplatform\.com/s/([^?]+)',
|
||||
media_url, 'theplatform_path'), display_id)
|
||||
info = self._parse_theplatform_metadata(theplatform_metadata)
|
||||
video_id = theplatform_metadata['pid']
|
||||
title = theplatform_metadata['title']
|
||||
rating = try_get(
|
||||
theplatform_metadata, lambda x: x['ratings'][0]['rating'])
|
||||
auth_required = self._search_regex(
|
||||
r'window\.authRequired\s*=\s*(true|false);',
|
||||
webpage, 'auth required')
|
||||
if auth_required == 'true':
|
||||
requestor_id = self._search_regex(
|
||||
r'window\.requestor_id\s*=\s*[\'"]([^\'"]+)',
|
||||
webpage, 'requestor id')
|
||||
resource = self._get_mvpd_resource(
|
||||
requestor_id, title, video_id, rating)
|
||||
query['auth'] = self._extract_mvpd_auth(
|
||||
url, video_id, requestor_id, resource)
|
||||
media_url = update_url_query(media_url, query)
|
||||
formats, subtitles = self._extract_theplatform_smil(
|
||||
media_url, video_id)
|
||||
self._sort_formats(formats)
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
'age_limit': parse_age_limit(parse_age_limit(rating)),
|
||||
})
|
||||
ns_keys = theplatform_metadata.get('$xmlns', {}).keys()
|
||||
if ns_keys:
|
||||
ns = list(ns_keys)[0]
|
||||
series = theplatform_metadata.get(ns + '$show')
|
||||
season_number = int_or_none(
|
||||
theplatform_metadata.get(ns + '$season'))
|
||||
episode = theplatform_metadata.get(ns + '$episodeTitle')
|
||||
episode_number = int_or_none(
|
||||
theplatform_metadata.get(ns + '$episode'))
|
||||
if season_number:
|
||||
title = 'Season %d - %s' % (season_number, title)
|
||||
if series:
|
||||
title = '%s - %s' % (series, title)
|
||||
info.update({
|
||||
'title': title,
|
||||
'series': series,
|
||||
'season_number': season_number,
|
||||
'episode': episode,
|
||||
'episode_number': episode_number,
|
||||
})
|
||||
return info
|
82
youtube_dl/extractor/americastestkitchen.py
Normal file
82
youtube_dl/extractor/americastestkitchen.py
Normal file
|
@ -0,0 +1,82 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class AmericasTestKitchenIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com/(?:episode|videos)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.americastestkitchen.com/episode/582-weeknight-japanese-suppers',
|
||||
'md5': 'b861c3e365ac38ad319cfd509c30577f',
|
||||
'info_dict': {
|
||||
'id': '5b400b9ee338f922cb06450c',
|
||||
'title': 'Weeknight Japanese Suppers',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:3d0c1a44bb3b27607ce82652db25b4a8',
|
||||
'thumbnail': r're:^https?://',
|
||||
'timestamp': 1523664000,
|
||||
'upload_date': '20180414',
|
||||
'release_date': '20180414',
|
||||
'series': "America's Test Kitchen",
|
||||
'season_number': 18,
|
||||
'episode': 'Weeknight Japanese Suppers',
|
||||
'episode_number': 15,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'window\.__INITIAL_STATE__\s*=\s*({.+?})\s*;\s*</script>',
|
||||
webpage, 'initial context'),
|
||||
video_id, js_to_json)
|
||||
|
||||
ep_data = try_get(
|
||||
video_data,
|
||||
(lambda x: x['episodeDetail']['content']['data'],
|
||||
lambda x: x['videoDetail']['content']['data']), dict)
|
||||
ep_meta = ep_data.get('full_video', {})
|
||||
|
||||
zype_id = ep_data.get('zype_id') or ep_meta['zype_id']
|
||||
|
||||
title = ep_data.get('title') or ep_meta.get('title')
|
||||
description = clean_html(ep_meta.get('episode_description') or ep_data.get(
|
||||
'description') or ep_meta.get('description'))
|
||||
thumbnail = try_get(ep_meta, lambda x: x['photo']['image_url'])
|
||||
release_date = unified_strdate(ep_data.get('aired_at'))
|
||||
|
||||
season_number = int_or_none(ep_meta.get('season_number'))
|
||||
episode = ep_meta.get('title')
|
||||
episode_number = int_or_none(ep_meta.get('episode_number'))
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % zype_id,
|
||||
'ie_key': 'Zype',
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'release_date': release_date,
|
||||
'series': "America's Test Kitchen",
|
||||
'season_number': season_number,
|
||||
'episode': episode,
|
||||
'episode_number': episode_number,
|
||||
}
|
102
youtube_dl/extractor/amp.py
Normal file
102
youtube_dl/extractor/amp.py
Normal file
|
@ -0,0 +1,102 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class AMPIE(InfoExtractor):
|
||||
# parse Akamai Adaptive Media Player feed
|
||||
def _extract_feed_info(self, url):
|
||||
feed = self._download_json(
|
||||
url, None, 'Downloading Akamai AMP feed',
|
||||
'Unable to download Akamai AMP feed')
|
||||
item = feed.get('channel', {}).get('item')
|
||||
if not item:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, feed['error']))
|
||||
|
||||
video_id = item['guid']
|
||||
|
||||
def get_media_node(name, default=None):
|
||||
media_name = 'media-%s' % name
|
||||
media_group = item.get('media-group') or item
|
||||
return media_group.get(media_name) or item.get(media_name) or item.get(name, default)
|
||||
|
||||
thumbnails = []
|
||||
media_thumbnail = get_media_node('thumbnail')
|
||||
if media_thumbnail:
|
||||
if isinstance(media_thumbnail, dict):
|
||||
media_thumbnail = [media_thumbnail]
|
||||
for thumbnail_data in media_thumbnail:
|
||||
thumbnail = thumbnail_data.get('@attributes', {})
|
||||
thumbnail_url = url_or_none(thumbnail.get('url'))
|
||||
if not thumbnail_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'url': self._proto_relative_url(thumbnail_url, 'http:'),
|
||||
'width': int_or_none(thumbnail.get('width')),
|
||||
'height': int_or_none(thumbnail.get('height')),
|
||||
})
|
||||
|
||||
subtitles = {}
|
||||
media_subtitle = get_media_node('subTitle')
|
||||
if media_subtitle:
|
||||
if isinstance(media_subtitle, dict):
|
||||
media_subtitle = [media_subtitle]
|
||||
for subtitle_data in media_subtitle:
|
||||
subtitle = subtitle_data.get('@attributes', {})
|
||||
subtitle_href = url_or_none(subtitle.get('href'))
|
||||
if not subtitle_href:
|
||||
continue
|
||||
subtitles.setdefault(subtitle.get('lang') or 'en', []).append({
|
||||
'url': subtitle_href,
|
||||
'ext': mimetype2ext(subtitle.get('type')) or determine_ext(subtitle_href),
|
||||
})
|
||||
|
||||
formats = []
|
||||
media_content = get_media_node('content')
|
||||
if isinstance(media_content, dict):
|
||||
media_content = [media_content]
|
||||
for media_data in media_content:
|
||||
media = media_data.get('@attributes', {})
|
||||
media_url = url_or_none(media.get('url'))
|
||||
if not media_url:
|
||||
continue
|
||||
ext = mimetype2ext(media.get('type')) or determine_ext(media_url)
|
||||
if ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
media_url + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124',
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
media_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': media_data.get('media-category', {}).get('@attributes', {}).get('label'),
|
||||
'url': media_url,
|
||||
'tbr': int_or_none(media.get('bitrate')),
|
||||
'filesize': int_or_none(media.get('fileSize')),
|
||||
'ext': ext,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
timestamp = parse_iso8601(item.get('pubDate'), ' ') or parse_iso8601(item.get('dc-date'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': get_media_node('title'),
|
||||
'description': get_media_node('description'),
|
||||
'thumbnails': thumbnails,
|
||||
'timestamp': timestamp,
|
||||
'duration': int_or_none(media_content[0].get('@attributes', {}).get('duration')),
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
}
|
293
youtube_dl/extractor/animeondemand.py
Normal file
293
youtube_dl/extractor/animeondemand.py
Normal file
|
@ -0,0 +1,293 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class AnimeOnDemandIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?anime-on-demand\.de/anime/(?P<id>\d+)'
|
||||
_LOGIN_URL = 'https://www.anime-on-demand.de/users/sign_in'
|
||||
_APPLY_HTML5_URL = 'https://www.anime-on-demand.de/html5apply'
|
||||
_NETRC_MACHINE = 'animeondemand'
|
||||
# German-speaking countries of Europe
|
||||
_GEO_COUNTRIES = ['AT', 'CH', 'DE', 'LI', 'LU']
|
||||
_TESTS = [{
|
||||
# jap, OmU
|
||||
'url': 'https://www.anime-on-demand.de/anime/161',
|
||||
'info_dict': {
|
||||
'id': '161',
|
||||
'title': 'Grimgar, Ashes and Illusions (OmU)',
|
||||
'description': 'md5:6681ce3c07c7189d255ac6ab23812d31',
|
||||
},
|
||||
'playlist_mincount': 4,
|
||||
}, {
|
||||
# Film wording is used instead of Episode, ger/jap, Dub/OmU
|
||||
'url': 'https://www.anime-on-demand.de/anime/39',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Episodes without titles, jap, OmU
|
||||
'url': 'https://www.anime-on-demand.de/anime/162',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# ger/jap, Dub/OmU, account required
|
||||
'url': 'https://www.anime-on-demand.de/anime/169',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Full length film, non-series, ger/jap, Dub/OmU, account required
|
||||
'url': 'https://www.anime-on-demand.de/anime/185',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Flash videos
|
||||
'url': 'https://www.anime-on-demand.de/anime/12',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _login(self):
|
||||
username, password = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
|
||||
login_page = self._download_webpage(
|
||||
self._LOGIN_URL, None, 'Downloading login page')
|
||||
|
||||
if '>Our licensing terms allow the distribution of animes only to German-speaking countries of Europe' in login_page:
|
||||
self.raise_geo_restricted(
|
||||
'%s is only available in German-speaking countries of Europe' % self.IE_NAME)
|
||||
|
||||
login_form = self._form_hidden_inputs('new_user', login_page)
|
||||
|
||||
login_form.update({
|
||||
'user[login]': username,
|
||||
'user[password]': password,
|
||||
})
|
||||
|
||||
post_url = self._search_regex(
|
||||
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
|
||||
'post url', default=self._LOGIN_URL, group='url')
|
||||
|
||||
if not post_url.startswith('http'):
|
||||
post_url = urljoin(self._LOGIN_URL, post_url)
|
||||
|
||||
response = self._download_webpage(
|
||||
post_url, None, 'Logging in',
|
||||
data=urlencode_postdata(login_form), headers={
|
||||
'Referer': self._LOGIN_URL,
|
||||
})
|
||||
|
||||
if all(p not in response for p in ('>Logout<', 'href="/users/sign_out"')):
|
||||
error = self._search_regex(
|
||||
r'<p[^>]+\bclass=(["\'])(?:(?!\1).)*\balert\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</p>',
|
||||
response, 'error', default=None, group='error')
|
||||
if error:
|
||||
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _real_extract(self, url):
|
||||
anime_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, anime_id)
|
||||
|
||||
if 'data-playlist=' not in webpage:
|
||||
self._download_webpage(
|
||||
self._APPLY_HTML5_URL, anime_id,
|
||||
'Activating HTML5 beta', 'Unable to apply HTML5 beta')
|
||||
webpage = self._download_webpage(url, anime_id)
|
||||
|
||||
csrf_token = self._html_search_meta(
|
||||
'csrf-token', webpage, 'csrf token', fatal=True)
|
||||
|
||||
anime_title = self._html_search_regex(
|
||||
r'(?s)<h1[^>]+itemprop="name"[^>]*>(.+?)</h1>',
|
||||
webpage, 'anime name')
|
||||
anime_description = self._html_search_regex(
|
||||
r'(?s)<div[^>]+itemprop="description"[^>]*>(.+?)</div>',
|
||||
webpage, 'anime description', default=None)
|
||||
|
||||
entries = []
|
||||
|
||||
def extract_info(html, video_id, num=None):
|
||||
title, description = [None] * 2
|
||||
formats = []
|
||||
|
||||
for input_ in re.findall(
|
||||
r'<input[^>]+class=["\'].*?streamstarter[^>]+>', html):
|
||||
attributes = extract_attributes(input_)
|
||||
title = attributes.get('data-dialog-header')
|
||||
playlist_urls = []
|
||||
for playlist_key in ('data-playlist', 'data-otherplaylist', 'data-stream'):
|
||||
playlist_url = attributes.get(playlist_key)
|
||||
if isinstance(playlist_url, compat_str) and re.match(
|
||||
r'/?[\da-zA-Z]+', playlist_url):
|
||||
playlist_urls.append(attributes[playlist_key])
|
||||
if not playlist_urls:
|
||||
continue
|
||||
|
||||
lang = attributes.get('data-lang')
|
||||
lang_note = attributes.get('value')
|
||||
|
||||
for playlist_url in playlist_urls:
|
||||
kind = self._search_regex(
|
||||
r'videomaterialurl/\d+/([^/]+)/',
|
||||
playlist_url, 'media kind', default=None)
|
||||
format_id_list = []
|
||||
if lang:
|
||||
format_id_list.append(lang)
|
||||
if kind:
|
||||
format_id_list.append(kind)
|
||||
if not format_id_list and num is not None:
|
||||
format_id_list.append(compat_str(num))
|
||||
format_id = '-'.join(format_id_list)
|
||||
format_note = ', '.join(filter(None, (kind, lang_note)))
|
||||
item_id_list = []
|
||||
if format_id:
|
||||
item_id_list.append(format_id)
|
||||
item_id_list.append('videomaterial')
|
||||
playlist = self._download_json(
|
||||
urljoin(url, playlist_url), video_id,
|
||||
'Downloading %s JSON' % ' '.join(item_id_list),
|
||||
headers={
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
'X-CSRF-Token': csrf_token,
|
||||
'Referer': url,
|
||||
'Accept': 'application/json, text/javascript, */*; q=0.01',
|
||||
}, fatal=False)
|
||||
if not playlist:
|
||||
continue
|
||||
stream_url = url_or_none(playlist.get('streamurl'))
|
||||
if stream_url:
|
||||
rtmp = re.search(
|
||||
r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+/))(?P<playpath>mp[34]:.+)',
|
||||
stream_url)
|
||||
if rtmp:
|
||||
formats.append({
|
||||
'url': rtmp.group('url'),
|
||||
'app': rtmp.group('app'),
|
||||
'play_path': rtmp.group('playpath'),
|
||||
'page_url': url,
|
||||
'player_url': 'https://www.anime-on-demand.de/assets/jwplayer.flash-55abfb34080700304d49125ce9ffb4a6.swf',
|
||||
'rtmp_real_time': True,
|
||||
'format_id': 'rtmp',
|
||||
'ext': 'flv',
|
||||
})
|
||||
continue
|
||||
start_video = playlist.get('startvideo', 0)
|
||||
playlist = playlist.get('playlist')
|
||||
if not playlist or not isinstance(playlist, list):
|
||||
continue
|
||||
playlist = playlist[start_video]
|
||||
title = playlist.get('title')
|
||||
if not title:
|
||||
continue
|
||||
description = playlist.get('description')
|
||||
for source in playlist.get('sources', []):
|
||||
file_ = source.get('file')
|
||||
if not file_:
|
||||
continue
|
||||
ext = determine_ext(file_)
|
||||
format_id_list = [lang, kind]
|
||||
if ext == 'm3u8':
|
||||
format_id_list.append('hls')
|
||||
elif source.get('type') == 'video/dash' or ext == 'mpd':
|
||||
format_id_list.append('dash')
|
||||
format_id = '-'.join(filter(None, format_id_list))
|
||||
if ext == 'm3u8':
|
||||
file_formats = self._extract_m3u8_formats(
|
||||
file_, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id=format_id, fatal=False)
|
||||
elif source.get('type') == 'video/dash' or ext == 'mpd':
|
||||
continue
|
||||
file_formats = self._extract_mpd_formats(
|
||||
file_, video_id, mpd_id=format_id, fatal=False)
|
||||
else:
|
||||
continue
|
||||
for f in file_formats:
|
||||
f.update({
|
||||
'language': lang,
|
||||
'format_note': format_note,
|
||||
})
|
||||
formats.extend(file_formats)
|
||||
|
||||
return {
|
||||
'title': title,
|
||||
'description': description,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
def extract_entries(html, video_id, common_info, num=None):
|
||||
info = extract_info(html, video_id, num)
|
||||
|
||||
if info['formats']:
|
||||
self._sort_formats(info['formats'])
|
||||
f = common_info.copy()
|
||||
f.update(info)
|
||||
entries.append(f)
|
||||
|
||||
# Extract teaser/trailer only when full episode is not available
|
||||
if not info['formats']:
|
||||
m = re.search(
|
||||
r'data-dialog-header=(["\'])(?P<title>.+?)\1[^>]+href=(["\'])(?P<href>.+?)\3[^>]*>(?P<kind>Teaser|Trailer)<',
|
||||
html)
|
||||
if m:
|
||||
f = common_info.copy()
|
||||
f.update({
|
||||
'id': '%s-%s' % (f['id'], m.group('kind').lower()),
|
||||
'title': m.group('title'),
|
||||
'url': urljoin(url, m.group('href')),
|
||||
})
|
||||
entries.append(f)
|
||||
|
||||
def extract_episodes(html):
|
||||
for num, episode_html in enumerate(re.findall(
|
||||
r'(?s)<h3[^>]+class="episodebox-title".+?>Episodeninhalt<', html), 1):
|
||||
episodebox_title = self._search_regex(
|
||||
(r'class="episodebox-title"[^>]+title=(["\'])(?P<title>.+?)\1',
|
||||
r'class="episodebox-title"[^>]+>(?P<title>.+?)<'),
|
||||
episode_html, 'episodebox title', default=None, group='title')
|
||||
if not episodebox_title:
|
||||
continue
|
||||
|
||||
episode_number = int(self._search_regex(
|
||||
r'(?:Episode|Film)\s*(\d+)',
|
||||
episodebox_title, 'episode number', default=num))
|
||||
episode_title = self._search_regex(
|
||||
r'(?:Episode|Film)\s*\d+\s*-\s*(.+)',
|
||||
episodebox_title, 'episode title', default=None)
|
||||
|
||||
video_id = 'episode-%d' % episode_number
|
||||
|
||||
common_info = {
|
||||
'id': video_id,
|
||||
'series': anime_title,
|
||||
'episode': episode_title,
|
||||
'episode_number': episode_number,
|
||||
}
|
||||
|
||||
extract_entries(episode_html, video_id, common_info)
|
||||
|
||||
def extract_film(html, video_id):
|
||||
common_info = {
|
||||
'id': anime_id,
|
||||
'title': anime_title,
|
||||
'description': anime_description,
|
||||
}
|
||||
extract_entries(html, video_id, common_info)
|
||||
|
||||
extract_episodes(webpage)
|
||||
|
||||
if not entries:
|
||||
extract_film(webpage, anime_id)
|
||||
|
||||
return self.playlist_result(entries, anime_id, anime_title, anime_description)
|
314
youtube_dl/extractor/anvato.py
Normal file
314
youtube_dl/extractor/anvato.py
Normal file
|
@ -0,0 +1,314 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import hashlib
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_encrypt
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
bytes_to_intlist,
|
||||
determine_ext,
|
||||
intlist_to_bytes,
|
||||
int_or_none,
|
||||
strip_jsonp,
|
||||
unescapeHTML,
|
||||
unsmuggle_url,
|
||||
)
|
||||
|
||||
|
||||
def md5_text(s):
|
||||
if not isinstance(s, compat_str):
|
||||
s = compat_str(s)
|
||||
return hashlib.md5(s.encode('utf-8')).hexdigest()
|
||||
|
||||
|
||||
class AnvatoIE(InfoExtractor):
|
||||
_VALID_URL = r'anvato:(?P<access_key_or_mcp>[^:]+):(?P<id>\d+)'
|
||||
|
||||
# Copied from anvplayer.min.js
|
||||
_ANVACK_TABLE = {
|
||||
'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ',
|
||||
'nbcu_nbcd_desktop_web_qa_1a6f01bdd0dc45a439043b694c8a031d': 'eSxJUbA2UUKBTXryyQ2d6NuM8oEqaPySvaPzfKNA',
|
||||
'nbcu_nbcd_desktop_web_acc_eb2ff240a5d4ae9a63d4c297c32716b6c523a129': '89JR3RtUGbvKuuJIiKOMK0SoarLb5MUx8v89RcbP',
|
||||
'nbcu_nbcd_watchvod_web_prod_e61107507180976724ec8e8319fe24ba5b4b60e1': 'Uc7dFt7MJ9GsBWB5T7iPvLaMSOt8BBxv4hAXk5vv',
|
||||
'nbcu_nbcd_watchvod_web_qa_42afedba88a36203db5a4c09a5ba29d045302232': 'T12oDYVFP2IaFvxkmYMy5dKxswpLHtGZa4ZAXEi7',
|
||||
'nbcu_nbcd_watchvod_web_acc_9193214448e2e636b0ffb78abacfd9c4f937c6ca': 'MmobcxUxMedUpohNWwXaOnMjlbiyTOBLL6d46ZpR',
|
||||
'nbcu_local_monitor_web_acc_f998ad54eaf26acd8ee033eb36f39a7b791c6335': 'QvfIoPYrwsjUCcASiw3AIkVtQob2LtJHfidp9iWg',
|
||||
'nbcu_cable_monitor_web_acc_a413759603e8bedfcd3c61b14767796e17834077': 'uwVPJLShvJWSs6sWEIuVem7MTF8A4IknMMzIlFto',
|
||||
'nbcu_nbcd_mcpstage_web_qa_4c43a8f6e95a88dbb40276c0630ba9f693a63a4e': 'PxVYZVwjhgd5TeoPRxL3whssb5OUPnM3zyAzq8GY',
|
||||
'nbcu_comcast_comcast_web_prod_074080762ad4ce956b26b43fb22abf153443a8c4': 'afnaRZfDyg1Z3WZHdupKfy6xrbAG2MHqe3VfuSwh',
|
||||
'nbcu_comcast_comcast_web_qa_706103bb93ead3ef70b1de12a0e95e3c4481ade0': 'DcjsVbX9b3uoPlhdriIiovgFQZVxpISZwz0cx1ZK',
|
||||
'nbcu_comcast_comcastcable_web_prod_669f04817536743563d7331c9293e59fbdbe3d07': '0RwMN2cWy10qhAhOscq3eK7aEe0wqnKt3vJ0WS4D',
|
||||
'nbcu_comcast_comcastcable_web_qa_3d9d2d66219094127f0f6b09cc3c7bb076e3e1ca': '2r8G9DEya7PCqBceKZgrn2XkXgASjwLMuaFE1Aad',
|
||||
'hearst_hearst_demo_web_stage_960726dfef3337059a01a78816e43b29ec04dfc7': 'cuZBPXTR6kSdoTCVXwk5KGA8rk3NrgGn4H6e9Dsp',
|
||||
'anvato_mcpqa_demo_web_stage_18b55e00db5a13faa8d03ae6e41f6f5bcb15b922': 'IOaaLQ8ymqVyem14QuAvE5SndQynTcH5CrLkU2Ih',
|
||||
'anvato_nextmedia_demo_web_stage_9787d56a02ff6b9f43e9a2b0920d8ca88beb5818': 'Pqu9zVzI1ApiIzbVA3VkGBEQHvdKSUuKpD6s2uaR',
|
||||
'anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a': 'du1ccmn7RxzgizwbWU7hyUaGodNlJn7HtXI0WgXW',
|
||||
'anvato_scripps_app_web_stage_360797e00fe2826be142155c4618cc52fce6c26c': '2PMrQ0BRoqCWl7nzphj0GouIMEh2mZYivAT0S1Su',
|
||||
'fs2go_fs2go_go_all_prod_21934911ccfafc03a075894ead2260d11e2ddd24': 'RcuHlKikW2IJw6HvVoEkqq2UsuEJlbEl11pWXs4Q',
|
||||
'fs2go_fs2go_go_web_prod_ead4b0eec7460c1a07783808db21b49cf1f2f9a7': '4K0HTT2u1zkQA2MaGaZmkLa1BthGSBdr7jllrhk5',
|
||||
'fs2go_fs2go_go_web_stage_407585454a4400355d4391691c67f361': 'ftnc37VKRJBmHfoGGi3kT05bHyeJzilEzhKJCyl3',
|
||||
'fs2go_fs2go_go_android_stage_44b714db6f8477f29afcba15a41e1d30': 'CtxpPvVpo6AbZGomYUhkKs7juHZwNml9b9J0J2gI',
|
||||
'anvato_cbslocal_app_web_prod_547f3e49241ef0e5d30c79b2efbca5d92c698f67': 'Pw0XX5KBDsyRnPS0R2JrSrXftsy8Jnz5pAjaYC8s',
|
||||
'anvato_cbslocal_app_web_stage_547a5f096594cd3e00620c6f825cad1096d28c80': '37OBUhX2uwNyKhhrNzSSNHSRPZpApC3trdqDBpuz',
|
||||
'fs2go_att_att_web_prod_1042dddd089a05438b6a08f972941176f699ffd8': 'JLcF20JwYvpv6uAGcLWIaV12jKwaL1R8us4b6Zkg',
|
||||
'fs2go_att_att_web_stage_807c5001955fc114a3331fe027ddc76e': 'gbu1oO1y0JiOFh4SUipt86P288JHpyjSqolrrT1x',
|
||||
'fs2go_fs2go_tudor_web_prod_a7dd8e5a7cdc830cae55eae6f3e9fee5ee49eb9b': 'ipcp87VCEZXPPe868j3orLqzc03oTy7DXsGkAXXH',
|
||||
'anvato_mhz_app_web_prod_b808218b30de7fdf60340cbd9831512bc1bf6d37': 'Stlm5Gs6BEhJLRTZHcNquyzxGqr23EuFmE5DCgjX',
|
||||
'fs2go_charter_charter_web_stage_c2c6e5a68375a1bf00fff213d3ff8f61a835a54c': 'Lz4hbJp1fwL6jlcz4M2PMzghM4jp4aAmybtT5dPc',
|
||||
'fs2go_charter_charter_web_prod_ebfe3b10f1af215a7321cd3d629e0b81dfa6fa8c': 'vUJsK345A1bVmyYDRhZX0lqFIgVXuqhmuyp1EtPK',
|
||||
'anvato_epfox_app_web_prod_b3373168e12f423f41504f207000188daf88251b': 'GDKq1ixvX3MoBNdU5IOYmYa2DTUXYOozPjrCJnW7',
|
||||
'anvato_epfox_app_web_stage_a3c2ce60f8f83ef374a88b68ee73a950f8ab87ce': '2jz2NH4BsXMaDsoJ5qkHMbcczAfIReo2eFYuVC1C',
|
||||
'fs2go_verizon_verizon_web_stage_08e6df0354a4803f1b1f2428b5a9a382e8dbcd62': 'rKTVapNaAcmnUbGL4ZcuOoY4SE7VmZSQsblPFr7e',
|
||||
'fs2go_verizon_verizon_web_prod_f909564cb606eff1f731b5e22e0928676732c445': 'qLSUuHerM3u9eNPzaHyUK52obai5MvE4XDJfqYe1',
|
||||
'fs2go_foxcom_synd_web_stage_f7b9091f00ea25a4fdaaae77fca5b54cdc7e7043': '96VKF2vLd24fFiDfwPFpzM5llFN4TiIGAlodE0Re',
|
||||
'fs2go_foxcom_synd_web_prod_0f2cdd64d87e4ab6a1d54aada0ff7a7c8387a064': 'agiPjbXEyEZUkbuhcnmVPhe9NNVbDjCFq2xkcx51',
|
||||
'anvato_own_app_web_stage_1214ade5d28422c4dae9d03c1243aba0563c4dba': 'mzhamNac3swG4WsJAiUTacnGIODi6SWeVWk5D7ho',
|
||||
'anvato_own_app_web_prod_944e162ed927ec3e9ed13eb68ed2f1008ee7565e': '9TSxh6G2TXOLBoYm9ro3LdNjjvnXpKb8UR8KoIP9',
|
||||
'anvato_scripps_app_ftv_prod_a10a10468edd5afb16fb48171c03b956176afad1': 'COJ2i2UIPK7xZqIWswxe7FaVBOVgRkP1F6O6qGoH',
|
||||
'anvato_scripps_app_ftv_stage_77d3ad2bdb021ec37ca2e35eb09acd396a974c9a': 'Q7nnopNLe2PPfGLOTYBqxSaRpl209IhqaEuDZi1F',
|
||||
'anvato_univision_app_web_stage_551236ef07a0e17718c3995c35586b5ed8cb5031': 'D92PoLS6UitwxDRA191HUGT9OYcOjV6mPMa5wNyo',
|
||||
'anvato_univision_app_web_prod_039a5c0a6009e637ae8ac906718a79911e0e65e1': '5mVS5u4SQjtw6NGw2uhMbKEIONIiLqRKck5RwQLR',
|
||||
'nbcu_cnbc_springfield_ios_prod_670207fae43d6e9a94c351688851a2ce': 'M7fqCCIP9lW53oJbHs19OlJlpDrVyc2OL8gNeuTa',
|
||||
'nbcu_cnbc_springfieldvod_ios_prod_7a5f04b1ceceb0e9c9e2264a44aa236e08e034c2': 'Yia6QbJahW0S7K1I0drksimhZb4UFq92xLBmmMvk',
|
||||
'anvato_cox_app_web_prod_ce45cda237969f93e7130f50ee8bb6280c1484ab': 'cc0miZexpFtdoqZGvdhfXsLy7FXjRAOgb9V0f5fZ',
|
||||
'anvato_cox_app_web_stage_c23dbe016a8e9d8c7101d10172b92434f6088bf9': 'yivU3MYHd2eDZcOfmLbINVtqxyecKTOp8OjOuoGJ',
|
||||
'anvato_chnzero_app_web_stage_b1164d1352b579e792e542fddf13ee34c0eeb46b': 'A76QkXMmVH8lTCfU15xva1mZnSVcqeY4Xb22Kp7m',
|
||||
'anvato_chnzero_app_web_prod_253d358928dc08ec161eda2389d53707288a730c': 'OA5QI3ZWZZkdtUEDqh28AH8GedsF6FqzJI32596b',
|
||||
'anvato_discovery_vodpoc_web_stage_9fa7077b5e8af1f8355f65d4fb8d2e0e9d54e2b7': 'q3oT191tTQ5g3JCP67PkjLASI9s16DuWZ6fYmry3',
|
||||
'anvato_discovery_vodpoc_web_prod_688614983167a1af6cdf6d76343fda10a65223c1': 'qRvRQCTVHd0VVOHsMvvfidyWmlYVrTbjby7WqIuK',
|
||||
'nbcu_cnbc_springfieldvod_ftv_stage_826040aad1925a46ac5dfb4b3c5143e648c6a30d': 'JQaSb5a8Tz0PT4ti329DNmzDO30TnngTHmvX8Vua',
|
||||
'nbcu_cnbc_springfield_ftv_stage_826040aad1925a46ac5dfb4b3c5143e648c6a30d': 'JQaSb5a8Tz0PT4ti329DNmzDO30TnngTHmvX8Vua',
|
||||
'nbcu_nbcd_capture_web_stage_4dd9d585bfb984ebf856dee35db027b2465cc4ae': '0j1Ov4Vopyi2HpBZJYdL2m8ERJVGYh3nNpzPiO8F',
|
||||
'nbcu_nbcd_watch3_android_prod_7712ca5fcf1c22f19ec1870a9650f9c37db22dcf': '3LN2UB3rPUAMu7ZriWkHky9vpLMXYha8JbSnxBlx',
|
||||
'nbcu_nbcd_watchvod3_android_prod_0910a3a4692d57c0b5ff4316075bc5d096be45b9': 'mJagcQ2II30vUOAauOXne7ERwbf5S9nlB3IP17lQ',
|
||||
'anvato_scripps_app_atv_prod_790deda22e16e71e83df58f880cd389908a45d52': 'CB6trI1mpoDIM5o54DNTsji90NDBQPZ4z4RqBNSH',
|
||||
'nbcu_nbcd_watchv4_android_prod_ff67cef9cb409158c6f8c3533edddadd0b750507': 'j8CHQCUWjlYERj4NFRmUYOND85QNbHViH09UwuKm',
|
||||
'nbcu_nbcd_watchvodv4_android_prod_a814d781609989dea6a629d50ae4c7ad8cc8e907': 'rkVnUXxdA9rawVLUlDQtMue9Y4Q7lFEaIotcUhjt',
|
||||
'rvVKpA50qlOPLFxMjrCGf5pdkdQDm7qn': '1J7ZkY5Qz5lMLi93QOH9IveE7EYB3rLl',
|
||||
'nbcu_dtv_local_web_prod_b266cf49defe255fd4426a97e27c09e513e9f82f': 'HuLnJDqzLa4saCzYMJ79zDRSQpEduw1TzjMNQu2b',
|
||||
'nbcu_att_local_web_prod_4cef038b2d969a6b7d700a56a599040b6a619f67': 'Q0Em5VDc2KpydUrVwzWRXAwoNBulWUxCq2faK0AV',
|
||||
'nbcu_dish_local_web_prod_c56dcaf2da2e9157a4266c82a78195f1dd570f6b': 'bC1LWmRz9ayj2AlzizeJ1HuhTfIaJGsDBnZNgoRg',
|
||||
'nbcu_verizon_local_web_prod_88bebd2ce006d4ed980de8133496f9a74cb9b3e1': 'wzhDKJZpgvUSS1EQvpCQP8Q59qVzcPixqDGJefSk',
|
||||
'nbcu_charter_local_web_prod_9ad90f7fc4023643bb718f0fe0fd5beea2382a50': 'PyNbxNhEWLzy1ZvWEQelRuIQY88Eub7xbSVRMdfT',
|
||||
'nbcu_suddenlink_local_web_prod_20fb711725cac224baa1c1cb0b1c324d25e97178': '0Rph41lPXZbb3fqeXtHjjbxfSrNbtZp1Ygq7Jypa',
|
||||
'nbcu_wow_local_web_prod_652d9ce4f552d9c2e7b5b1ed37b8cb48155174ad': 'qayIBZ70w1dItm2zS42AptXnxW15mkjRrwnBjMPv',
|
||||
'nbcu_centurylink_local_web_prod_2034402b029bf3e837ad46814d9e4b1d1345ccd5': 'StePcPMkjsX51PcizLdLRMzxMEl5k2FlsMLUNV4k',
|
||||
'nbcu_atlanticbrd_local_web_prod_8d5f5ecbf7f7b2f5e6d908dd75d90ae3565f682e': 'NtYLb4TFUS0pRs3XTkyO5sbVGYjVf17bVbjaGscI',
|
||||
'nbcu_nbcd_watchvod_web_dev_08bc05699be47c4f31d5080263a8cfadc16d0f7c': 'hwxi2dgDoSWgfmVVXOYZm14uuvku4QfopstXckhr',
|
||||
'anvato_nextmedia_app_web_prod_a4fa8c7204aa65e71044b57aaf63711980cfe5a0': 'tQN1oGPYY1nM85rJYePWGcIb92TG0gSqoVpQTWOw',
|
||||
'anvato_mcp_lin_web_prod_4c36fbfd4d8d8ecae6488656e21ac6d1ac972749': 'GUXNf5ZDX2jFUpu4WT2Go4DJ5nhUCzpnwDRRUx1K',
|
||||
'anvato_mcp_univision_web_prod_37fe34850c99a3b5cdb71dab10a417dd5cdecafa': 'bLDYF8JqfG42b7bwKEgQiU9E2LTIAtnKzSgYpFUH',
|
||||
'anvato_mcp_fs2go_web_prod_c7b90a93e171469cdca00a931211a2f556370d0a': 'icgGoYGipQMMSEvhplZX1pwbN69srwKYWksz3xWK',
|
||||
'anvato_mcp_sps_web_prod_54bdc90dd6ba21710e9f7074338365bba28da336': 'fA2iQdI7RDpynqzQYIpXALVS83NTPr8LLFK4LFsu',
|
||||
'anvato_mcp_anv_web_prod_791407490f4c1ef2a4bcb21103e0cb1bcb3352b3': 'rMOUZqe9lwcGq2mNgG3EDusm6lKgsUnczoOX3mbg',
|
||||
'anvato_mcp_gray_web_prod_4c10f067c393ed8fc453d3930f8ab2b159973900': 'rMOUZqe9lwcGq2mNgG3EDusm6lKgsUnczoOX3mbg',
|
||||
'anvato_mcp_hearst_web_prod_5356c3de0fc7c90a3727b4863ca7fec3a4524a99': 'P3uXJ0fXXditBPCGkfvlnVScpPEfKmc64Zv7ZgbK',
|
||||
'anvato_mcp_cbs_web_prod_02f26581ff80e5bda7aad28226a8d369037f2cbe': 'mGPvo5ZA5SgjOFAPEPXv7AnOpFUICX8hvFQVz69n',
|
||||
'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582': 'qyT6PXXLjVNCrHaRVj0ugAhalNRS7Ee9BP7LUokD',
|
||||
'nbcu_nbcd_watchvodv4_web_stage_4108362fba2d4ede21f262fea3c4162cbafd66c7': 'DhaU5lj0W2gEdcSSsnxURq8t7KIWtJfD966crVDk',
|
||||
'anvato_scripps_app_ios_prod_409c41960c60b308db43c3cc1da79cab9f1c3d93': 'WPxj5GraLTkYCyj3M7RozLqIycjrXOEcDGFMIJPn',
|
||||
'EZqvRyKBJLrgpClDPDF8I7Xpdp40Vx73': '4OxGd2dEakylntVKjKF0UK9PDPYB6A9W',
|
||||
'M2v78QkpleXm9hPp9jUXI63x5vA6BogR': 'ka6K32k7ZALmpINkjJUGUo0OE42Md1BQ',
|
||||
'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6_secure': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ'
|
||||
}
|
||||
|
||||
_MCP_TO_ACCESS_KEY_TABLE = {
|
||||
'qa': 'anvato_mcpqa_demo_web_stage_18b55e00db5a13faa8d03ae6e41f6f5bcb15b922',
|
||||
'lin': 'anvato_mcp_lin_web_prod_4c36fbfd4d8d8ecae6488656e21ac6d1ac972749',
|
||||
'univison': 'anvato_mcp_univision_web_prod_37fe34850c99a3b5cdb71dab10a417dd5cdecafa',
|
||||
'uni': 'anvato_mcp_univision_web_prod_37fe34850c99a3b5cdb71dab10a417dd5cdecafa',
|
||||
'dev': 'anvato_mcp_fs2go_web_prod_c7b90a93e171469cdca00a931211a2f556370d0a',
|
||||
'sps': 'anvato_mcp_sps_web_prod_54bdc90dd6ba21710e9f7074338365bba28da336',
|
||||
'spsstg': 'anvato_mcp_sps_web_prod_54bdc90dd6ba21710e9f7074338365bba28da336',
|
||||
'anv': 'anvato_mcp_anv_web_prod_791407490f4c1ef2a4bcb21103e0cb1bcb3352b3',
|
||||
'gray': 'anvato_mcp_gray_web_prod_4c10f067c393ed8fc453d3930f8ab2b159973900',
|
||||
'hearst': 'anvato_mcp_hearst_web_prod_5356c3de0fc7c90a3727b4863ca7fec3a4524a99',
|
||||
'cbs': 'anvato_mcp_cbs_web_prod_02f26581ff80e5bda7aad28226a8d369037f2cbe',
|
||||
'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582'
|
||||
}
|
||||
|
||||
_API_KEY = '3hwbSuqqT690uxjNYBktSQpa5ZrpYYR0Iofx7NcJHyA'
|
||||
|
||||
_ANVP_RE = r'<script[^>]+\bdata-anvp\s*=\s*(["\'])(?P<anvp>(?:(?!\1).)+)\1'
|
||||
_AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce'
|
||||
|
||||
_TESTS = [{
|
||||
# from https://www.boston25news.com/news/watch-humpback-whale-breaches-right-next-to-fishing-boat-near-nh/817484874
|
||||
'url': 'anvato:8v9BEynrwx8EFLYpgfOWcG1qJqyXKlRM:4465496',
|
||||
'info_dict': {
|
||||
'id': '4465496',
|
||||
'ext': 'mp4',
|
||||
'title': 'VIDEO: Humpback whale breaches right next to NH boat',
|
||||
'description': 'VIDEO: Humpback whale breaches right next to NH boat. Footage courtesy: Zach Fahey.',
|
||||
'duration': 22,
|
||||
'timestamp': 1534855680,
|
||||
'upload_date': '20180821',
|
||||
'uploader': 'ANV',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# from https://sanfrancisco.cbslocal.com/2016/06/17/source-oakland-cop-on-leave-for-having-girlfriend-help-with-police-reports/
|
||||
'url': 'anvato:DVzl9QRzox3ZZsP9bNu5Li3X7obQOnqP:3417601',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(AnvatoIE, self).__init__(*args, **kwargs)
|
||||
self.__server_time = None
|
||||
|
||||
def _server_time(self, access_key, video_id):
|
||||
if self.__server_time is not None:
|
||||
return self.__server_time
|
||||
|
||||
self.__server_time = int(self._download_json(
|
||||
self._api_prefix(access_key) + 'server_time?anvack=' + access_key, video_id,
|
||||
note='Fetching server time')['server_time'])
|
||||
|
||||
return self.__server_time
|
||||
|
||||
def _api_prefix(self, access_key):
|
||||
return 'https://tkx2-%s.anvato.net/rest/v2/' % ('prod' if 'prod' in access_key else 'stage')
|
||||
|
||||
def _get_video_json(self, access_key, video_id):
|
||||
# See et() in anvplayer.min.js, which is an alias of getVideoJSON()
|
||||
video_data_url = self._api_prefix(access_key) + 'mcp/video/%s?anvack=%s' % (video_id, access_key)
|
||||
server_time = self._server_time(access_key, video_id)
|
||||
input_data = '%d~%s~%s' % (server_time, md5_text(video_data_url), md5_text(server_time))
|
||||
|
||||
auth_secret = intlist_to_bytes(aes_encrypt(
|
||||
bytes_to_intlist(input_data[:64]), bytes_to_intlist(self._AUTH_KEY)))
|
||||
|
||||
video_data_url += '&X-Anvato-Adst-Auth=' + base64.b64encode(auth_secret).decode('ascii')
|
||||
anvrid = md5_text(time.time() * 1000 * random.random())[:30]
|
||||
payload = {
|
||||
'api': {
|
||||
'anvrid': anvrid,
|
||||
'anvstk': md5_text('%s|%s|%d|%s' % (
|
||||
access_key, anvrid, server_time,
|
||||
self._ANVACK_TABLE.get(access_key, self._API_KEY))),
|
||||
'anvts': server_time,
|
||||
},
|
||||
}
|
||||
|
||||
return self._download_json(
|
||||
video_data_url, video_id, transform_source=strip_jsonp,
|
||||
data=json.dumps(payload).encode('utf-8'))
|
||||
|
||||
def _get_anvato_videos(self, access_key, video_id):
|
||||
video_data = self._get_video_json(access_key, video_id)
|
||||
|
||||
formats = []
|
||||
for published_url in video_data['published_urls']:
|
||||
video_url = published_url['embed_url']
|
||||
media_format = published_url.get('format')
|
||||
ext = determine_ext(video_url)
|
||||
|
||||
if ext == 'smil' or media_format == 'smil':
|
||||
formats.extend(self._extract_smil_formats(video_url, video_id))
|
||||
continue
|
||||
|
||||
tbr = int_or_none(published_url.get('kbps'))
|
||||
a_format = {
|
||||
'url': video_url,
|
||||
'format_id': ('-'.join(filter(None, ['http', published_url.get('cdn_name')]))).lower(),
|
||||
'tbr': tbr if tbr != 0 else None,
|
||||
}
|
||||
|
||||
if media_format == 'm3u8' and tbr is not None:
|
||||
a_format.update({
|
||||
'format_id': '-'.join(filter(None, ['hls', compat_str(tbr)])),
|
||||
'ext': 'mp4',
|
||||
})
|
||||
elif media_format == 'm3u8-variant' or ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
continue
|
||||
elif ext == 'mp3' or media_format == 'mp3':
|
||||
a_format['vcodec'] = 'none'
|
||||
else:
|
||||
a_format.update({
|
||||
'width': int_or_none(published_url.get('width')),
|
||||
'height': int_or_none(published_url.get('height')),
|
||||
})
|
||||
formats.append(a_format)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
for caption in video_data.get('captions', []):
|
||||
a_caption = {
|
||||
'url': caption['url'],
|
||||
'ext': 'tt' if caption.get('format') == 'SMPTE-TT' else None
|
||||
}
|
||||
subtitles.setdefault(caption['language'], []).append(a_caption)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': video_data.get('def_title'),
|
||||
'description': video_data.get('def_description'),
|
||||
'tags': video_data.get('def_tags', '').split(','),
|
||||
'categories': video_data.get('categories'),
|
||||
'thumbnail': video_data.get('thumbnail'),
|
||||
'timestamp': int_or_none(video_data.get(
|
||||
'ts_published') or video_data.get('ts_added')),
|
||||
'uploader': video_data.get('mcp_id'),
|
||||
'duration': int_or_none(video_data.get('duration')),
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(ie, webpage, video_id):
|
||||
entries = []
|
||||
for mobj in re.finditer(AnvatoIE._ANVP_RE, webpage):
|
||||
anvplayer_data = ie._parse_json(
|
||||
mobj.group('anvp'), video_id, transform_source=unescapeHTML,
|
||||
fatal=False)
|
||||
if not anvplayer_data:
|
||||
continue
|
||||
video = anvplayer_data.get('video')
|
||||
if not isinstance(video, compat_str) or not video.isdigit():
|
||||
continue
|
||||
access_key = anvplayer_data.get('accessKey')
|
||||
if not access_key:
|
||||
mcp = anvplayer_data.get('mcp')
|
||||
if mcp:
|
||||
access_key = AnvatoIE._MCP_TO_ACCESS_KEY_TABLE.get(
|
||||
mcp.lower())
|
||||
if not access_key:
|
||||
continue
|
||||
entries.append(ie.url_result(
|
||||
'anvato:%s:%s' % (access_key, video), ie=AnvatoIE.ie_key(),
|
||||
video_id=video))
|
||||
return entries
|
||||
|
||||
def _extract_anvato_videos(self, webpage, video_id):
|
||||
anvplayer_data = self._parse_json(
|
||||
self._html_search_regex(
|
||||
self._ANVP_RE, webpage, 'Anvato player data', group='anvp'),
|
||||
video_id)
|
||||
return self._get_anvato_videos(
|
||||
anvplayer_data['accessKey'], anvplayer_data['video'])
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
self._initialize_geo_bypass({
|
||||
'countries': smuggled_data.get('geo_countries'),
|
||||
})
|
||||
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
access_key, video_id = mobj.group('access_key_or_mcp', 'id')
|
||||
if access_key not in self._ANVACK_TABLE:
|
||||
access_key = self._MCP_TO_ACCESS_KEY_TABLE.get(
|
||||
access_key) or access_key
|
||||
return self._get_anvato_videos(access_key, video_id)
|
133
youtube_dl/extractor/aol.py
Normal file
133
youtube_dl/extractor/aol.py
Normal file
|
@ -0,0 +1,133 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class AolIE(InfoExtractor):
|
||||
IE_NAME = 'aol.com'
|
||||
_VALID_URL = r'(?:aol-video:|https?://(?:www\.)?aol\.(?:com|ca|co\.uk|de|jp)/video/(?:[^/]+/)*)(?P<id>[0-9a-f]+)'
|
||||
|
||||
_TESTS = [{
|
||||
# video with 5min ID
|
||||
'url': 'https://www.aol.com/video/view/u-s--official-warns-of-largest-ever-irs-phone-scam/518167793/',
|
||||
'md5': '18ef68f48740e86ae94b98da815eec42',
|
||||
'info_dict': {
|
||||
'id': '518167793',
|
||||
'ext': 'mp4',
|
||||
'title': 'U.S. Official Warns Of \'Largest Ever\' IRS Phone Scam',
|
||||
'description': 'A major phone scam has cost thousands of taxpayers more than $1 million, with less than a month until income tax returns are due to the IRS.',
|
||||
'timestamp': 1395405060,
|
||||
'upload_date': '20140321',
|
||||
'uploader': 'Newsy Studio',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# video with vidible ID
|
||||
'url': 'https://www.aol.com/video/view/netflix-is-raising-rates/5707d6b8e4b090497b04f706/',
|
||||
'info_dict': {
|
||||
'id': '5707d6b8e4b090497b04f706',
|
||||
'ext': 'mp4',
|
||||
'title': 'Netflix is Raising Rates',
|
||||
'description': 'Netflix is rewarding millions of it’s long-standing members with an increase in cost. Veuer’s Carly Figueroa has more.',
|
||||
'upload_date': '20160408',
|
||||
'timestamp': 1460123280,
|
||||
'uploader': 'Veuer',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.aol.com/video/view/park-bench-season-2-trailer/559a1b9be4b0c3bfad3357a7/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.aol.com/video/view/donald-trump-spokeswoman-tones-down-megyn-kelly-attacks/519442220/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'aol-video:5707d6b8e4b090497b04f706',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.aol.com/video/playlist/PL8245/5ca79d19d21f1a04035db606/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.aol.ca/video/view/u-s-woman-s-family-arrested-for-murder-first-pinned-on-panhandler-police/5c7ccf45bc03931fa04b2fe1/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.aol.co.uk/video/view/-one-dead-and-22-hurt-in-bus-crash-/5cb3a6f3d21f1a072b457347/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.aol.de/video/view/eva-braun-privataufnahmen-von-hitlers-geliebter-werden-digitalisiert/5cb2d49de98ab54c113d3d5d/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.aol.jp/video/playlist/5a28e936a1334d000137da0c/5a28f3151e642219fde19831/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
response = self._download_json(
|
||||
'https://feedapi.b2c.on.aol.com/v1.0/app/videos/aolon/%s/details' % video_id,
|
||||
video_id)['response']
|
||||
if response['statusText'] != 'Ok':
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, response['statusText']), expected=True)
|
||||
|
||||
video_data = response['data']
|
||||
formats = []
|
||||
m3u8_url = url_or_none(video_data.get('videoMasterPlaylist'))
|
||||
if m3u8_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
for rendition in video_data.get('renditions', []):
|
||||
video_url = url_or_none(rendition.get('url'))
|
||||
if not video_url:
|
||||
continue
|
||||
ext = rendition.get('format')
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
f = {
|
||||
'url': video_url,
|
||||
'format_id': rendition.get('quality'),
|
||||
}
|
||||
mobj = re.search(r'(\d+)x(\d+)', video_url)
|
||||
if mobj:
|
||||
f.update({
|
||||
'width': int(mobj.group(1)),
|
||||
'height': int(mobj.group(2)),
|
||||
})
|
||||
else:
|
||||
qs = compat_parse_qs(compat_urllib_parse_urlparse(video_url).query)
|
||||
f.update({
|
||||
'width': int_or_none(qs.get('w', [None])[0]),
|
||||
'height': int_or_none(qs.get('h', [None])[0]),
|
||||
})
|
||||
formats.append(f)
|
||||
self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_data['title'],
|
||||
'duration': int_or_none(video_data.get('duration')),
|
||||
'timestamp': int_or_none(video_data.get('publishDate')),
|
||||
'view_count': int_or_none(video_data.get('views')),
|
||||
'description': video_data.get('description'),
|
||||
'uploader': video_data.get('videoOwner'),
|
||||
'formats': formats,
|
||||
}
|
94
youtube_dl/extractor/apa.py
Normal file
94
youtube_dl/extractor/apa.py
Normal file
|
@ -0,0 +1,94 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
js_to_json,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class APAIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://[^/]+\.apa\.at/embed/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
||||
_TESTS = [{
|
||||
'url': 'http://uvp.apa.at/embed/293f6d17-692a-44e3-9fd5-7b178f3a1029',
|
||||
'md5': '2b12292faeb0a7d930c778c7a5b4759b',
|
||||
'info_dict': {
|
||||
'id': 'jjv85FdZ',
|
||||
'ext': 'mp4',
|
||||
'title': '"Blau ist mysteriös": Die Blue Man Group im Interview',
|
||||
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 254,
|
||||
'timestamp': 1519211149,
|
||||
'upload_date': '20180221',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://uvp-apapublisher.sf.apa.at/embed/2f94e9e6-d945-4db2-9548-f9a41ebf7b78',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://uvp-rma.sf.apa.at/embed/70404cca-2f47-4855-bbb8-20b1fae58f76',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://uvp-kleinezeitung.sf.apa.at/embed/f1c44979-dba2-4ebf-b021-e4cf2cac3c81',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return [
|
||||
mobj.group('url')
|
||||
for mobj in re.finditer(
|
||||
r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//[^/]+\.apa\.at/embed/[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}.*?)\1',
|
||||
webpage)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
jwplatform_id = self._search_regex(
|
||||
r'media[iI]d\s*:\s*["\'](?P<id>[a-zA-Z0-9]{8})', webpage,
|
||||
'jwplatform id', default=None)
|
||||
|
||||
if jwplatform_id:
|
||||
return self.url_result(
|
||||
'jwplatform:' + jwplatform_id, ie='JWPlatform',
|
||||
video_id=video_id)
|
||||
|
||||
sources = self._parse_json(
|
||||
self._search_regex(
|
||||
r'sources\s*=\s*(\[.+?\])\s*;', webpage, 'sources'),
|
||||
video_id, transform_source=js_to_json)
|
||||
|
||||
formats = []
|
||||
for source in sources:
|
||||
if not isinstance(source, dict):
|
||||
continue
|
||||
source_url = url_or_none(source.get('file'))
|
||||
if not source_url:
|
||||
continue
|
||||
ext = determine_ext(source_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': source_url,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = self._search_regex(
|
||||
r'image\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
||||
'thumbnail', fatal=False, group='url')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_id,
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats,
|
||||
}
|
95
youtube_dl/extractor/aparat.py
Normal file
95
youtube_dl/extractor/aparat.py
Normal file
|
@ -0,0 +1,95 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
mimetype2ext,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class AparatIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P<id>[a-zA-Z0-9]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.aparat.com/v/wP8On',
|
||||
'md5': '131aca2e14fe7c4dcb3c4877ba300c89',
|
||||
'info_dict': {
|
||||
'id': 'wP8On',
|
||||
'ext': 'mp4',
|
||||
'title': 'تیم گلکسی 11 - زومیت',
|
||||
'description': 'md5:096bdabcdcc4569f2b8a5e903a3b3028',
|
||||
'duration': 231,
|
||||
'timestamp': 1387394859,
|
||||
'upload_date': '20131218',
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
# multiple formats
|
||||
'url': 'https://www.aparat.com/v/8dflw/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
# Provides more metadata
|
||||
webpage = self._download_webpage(url, video_id, fatal=False)
|
||||
|
||||
if not webpage:
|
||||
# Note: There is an easier-to-parse configuration at
|
||||
# http://www.aparat.com/video/video/config/videohash/%video_id
|
||||
# but the URL in there does not work
|
||||
webpage = self._download_webpage(
|
||||
'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id,
|
||||
video_id)
|
||||
|
||||
options = self._parse_json(
|
||||
self._search_regex(
|
||||
r'options\s*=\s*JSON\.parse\(\s*(["\'])(?P<value>(?:(?!\1).)+)\1\s*\)',
|
||||
webpage, 'options', group='value'),
|
||||
video_id)
|
||||
|
||||
player = options['plugins']['sabaPlayerPlugin']
|
||||
|
||||
formats = []
|
||||
for sources in player['multiSRC']:
|
||||
for item in sources:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
file_url = url_or_none(item.get('src'))
|
||||
if not file_url:
|
||||
continue
|
||||
item_type = item.get('type')
|
||||
if item_type == 'application/vnd.apple.mpegurl':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
file_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls',
|
||||
fatal=False))
|
||||
else:
|
||||
ext = mimetype2ext(item.get('type'))
|
||||
label = item.get('label')
|
||||
formats.append({
|
||||
'url': file_url,
|
||||
'ext': ext,
|
||||
'format_id': 'http-%s' % (label or ext),
|
||||
'height': int_or_none(self._search_regex(
|
||||
r'(\d+)[pP]', label or '', 'height',
|
||||
default=None)),
|
||||
})
|
||||
self._sort_formats(
|
||||
formats, field_preference=('height', 'width', 'tbr', 'format_id'))
|
||||
|
||||
info = self._search_json_ld(webpage, video_id, default={})
|
||||
|
||||
if not info.get('title'):
|
||||
info['title'] = player['title']
|
||||
|
||||
return merge_dicts(info, {
|
||||
'id': video_id,
|
||||
'thumbnail': url_or_none(options.get('poster')),
|
||||
'duration': int_or_none(player.get('duration')),
|
||||
'formats': formats,
|
||||
})
|
50
youtube_dl/extractor/appleconnect.py
Normal file
50
youtube_dl/extractor/appleconnect.py
Normal file
|
@ -0,0 +1,50 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
str_to_int,
|
||||
ExtractorError
|
||||
)
|
||||
|
||||
|
||||
class AppleConnectIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/idsa\.(?P<id>[\w-]+)'
|
||||
_TEST = {
|
||||
'url': 'https://itunes.apple.com/us/post/idsa.4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
|
||||
'md5': 'e7c38568a01ea45402570e6029206723',
|
||||
'info_dict': {
|
||||
'id': '4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
|
||||
'ext': 'm4v',
|
||||
'title': 'Energy',
|
||||
'uploader': 'Drake',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'upload_date': '20150710',
|
||||
'timestamp': 1436545535,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
try:
|
||||
video_json = self._html_search_regex(
|
||||
r'class="auc-video-data">(\{.*?\})', webpage, 'json')
|
||||
except ExtractorError:
|
||||
raise ExtractorError('This post doesn\'t contain a video', expected=True)
|
||||
|
||||
video_data = self._parse_json(video_json, video_id)
|
||||
timestamp = str_to_int(self._html_search_regex(r'data-timestamp="(\d+)"', webpage, 'timestamp'))
|
||||
like_count = str_to_int(self._html_search_regex(r'(\d+) Loves', webpage, 'like count'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_data['sslSrc'],
|
||||
'title': video_data['title'],
|
||||
'description': video_data['description'],
|
||||
'uploader': video_data['artistName'],
|
||||
'thumbnail': video_data['artworkUrl'],
|
||||
'timestamp': timestamp,
|
||||
'like_count': like_count,
|
||||
}
|
283
youtube_dl/extractor/appletrailers.py
Normal file
283
youtube_dl/extractor/appletrailers.py
Normal file
|
@ -0,0 +1,283 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class AppleTrailersIE(InfoExtractor):
|
||||
IE_NAME = 'appletrailers'
|
||||
_VALID_URL = r'https?://(?:www\.|movie)?trailers\.apple\.com/(?:trailers|ca)/(?P<company>[^/]+)/(?P<movie>[^/]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://trailers.apple.com/trailers/wb/manofsteel/',
|
||||
'info_dict': {
|
||||
'id': '5111',
|
||||
'title': 'Man of Steel',
|
||||
},
|
||||
'playlist': [
|
||||
{
|
||||
'md5': 'd97a8e575432dbcb81b7c3acb741f8a8',
|
||||
'info_dict': {
|
||||
'id': 'manofsteel-trailer4',
|
||||
'ext': 'mov',
|
||||
'duration': 111,
|
||||
'title': 'Trailer 4',
|
||||
'upload_date': '20130523',
|
||||
'uploader_id': 'wb',
|
||||
},
|
||||
},
|
||||
{
|
||||
'md5': 'b8017b7131b721fb4e8d6f49e1df908c',
|
||||
'info_dict': {
|
||||
'id': 'manofsteel-trailer3',
|
||||
'ext': 'mov',
|
||||
'duration': 182,
|
||||
'title': 'Trailer 3',
|
||||
'upload_date': '20130417',
|
||||
'uploader_id': 'wb',
|
||||
},
|
||||
},
|
||||
{
|
||||
'md5': 'd0f1e1150989b9924679b441f3404d48',
|
||||
'info_dict': {
|
||||
'id': 'manofsteel-trailer',
|
||||
'ext': 'mov',
|
||||
'duration': 148,
|
||||
'title': 'Trailer',
|
||||
'upload_date': '20121212',
|
||||
'uploader_id': 'wb',
|
||||
},
|
||||
},
|
||||
{
|
||||
'md5': '5fe08795b943eb2e757fa95cb6def1cb',
|
||||
'info_dict': {
|
||||
'id': 'manofsteel-teaser',
|
||||
'ext': 'mov',
|
||||
'duration': 93,
|
||||
'title': 'Teaser',
|
||||
'upload_date': '20120721',
|
||||
'uploader_id': 'wb',
|
||||
},
|
||||
},
|
||||
]
|
||||
}, {
|
||||
'url': 'http://trailers.apple.com/trailers/magnolia/blackthorn/',
|
||||
'info_dict': {
|
||||
'id': '4489',
|
||||
'title': 'Blackthorn',
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
'expected_warnings': ['Unable to download JSON metadata'],
|
||||
}, {
|
||||
# json data only available from http://trailers.apple.com/trailers/feeds/data/15881.json
|
||||
'url': 'http://trailers.apple.com/trailers/fox/kungfupanda3/',
|
||||
'info_dict': {
|
||||
'id': '15881',
|
||||
'title': 'Kung Fu Panda 3',
|
||||
},
|
||||
'playlist_mincount': 4,
|
||||
}, {
|
||||
'url': 'http://trailers.apple.com/ca/metropole/autrui/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://movietrailers.apple.com/trailers/focus_features/kuboandthetwostrings/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_JSON_RE = r'iTunes.playURL\((.*?)\);'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
movie = mobj.group('movie')
|
||||
uploader_id = mobj.group('company')
|
||||
|
||||
webpage = self._download_webpage(url, movie)
|
||||
film_id = self._search_regex(r"FilmId\s*=\s*'(\d+)'", webpage, 'film id')
|
||||
film_data = self._download_json(
|
||||
'http://trailers.apple.com/trailers/feeds/data/%s.json' % film_id,
|
||||
film_id, fatal=False)
|
||||
|
||||
if film_data:
|
||||
entries = []
|
||||
for clip in film_data.get('clips', []):
|
||||
clip_title = clip['title']
|
||||
|
||||
formats = []
|
||||
for version, version_data in clip.get('versions', {}).items():
|
||||
for size, size_data in version_data.get('sizes', {}).items():
|
||||
src = size_data.get('src')
|
||||
if not src:
|
||||
continue
|
||||
formats.append({
|
||||
'format_id': '%s-%s' % (version, size),
|
||||
'url': re.sub(r'_(\d+p\.mov)', r'_h\1', src),
|
||||
'width': int_or_none(size_data.get('width')),
|
||||
'height': int_or_none(size_data.get('height')),
|
||||
'language': version[:2],
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
entries.append({
|
||||
'id': movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', clip_title).lower(),
|
||||
'formats': formats,
|
||||
'title': clip_title,
|
||||
'thumbnail': clip.get('screen') or clip.get('thumb'),
|
||||
'duration': parse_duration(clip.get('runtime') or clip.get('faded')),
|
||||
'upload_date': unified_strdate(clip.get('posted')),
|
||||
'uploader_id': uploader_id,
|
||||
})
|
||||
|
||||
page_data = film_data.get('page', {})
|
||||
return self.playlist_result(entries, film_id, page_data.get('movie_title'))
|
||||
|
||||
playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc')
|
||||
|
||||
def fix_html(s):
|
||||
s = re.sub(r'(?s)<script[^<]*?>.*?</script>', '', s)
|
||||
s = re.sub(r'<img ([^<]*?)/?>', r'<img \1/>', s)
|
||||
# The ' in the onClick attributes are not escaped, it couldn't be parsed
|
||||
# like: http://trailers.apple.com/trailers/wb/gravity/
|
||||
|
||||
def _clean_json(m):
|
||||
return 'iTunes.playURL(%s);' % m.group(1).replace('\'', ''')
|
||||
s = re.sub(self._JSON_RE, _clean_json, s)
|
||||
s = '<html>%s</html>' % s
|
||||
return s
|
||||
doc = self._download_xml(playlist_url, movie, transform_source=fix_html)
|
||||
|
||||
playlist = []
|
||||
for li in doc.findall('./div/ul/li'):
|
||||
on_click = li.find('.//a').attrib['onClick']
|
||||
trailer_info_json = self._search_regex(self._JSON_RE,
|
||||
on_click, 'trailer info')
|
||||
trailer_info = json.loads(trailer_info_json)
|
||||
first_url = trailer_info.get('url')
|
||||
if not first_url:
|
||||
continue
|
||||
title = trailer_info['title']
|
||||
video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower()
|
||||
thumbnail = li.find('.//img').attrib['src']
|
||||
upload_date = trailer_info['posted'].replace('-', '')
|
||||
|
||||
runtime = trailer_info['runtime']
|
||||
m = re.search(r'(?P<minutes>[0-9]+):(?P<seconds>[0-9]{1,2})', runtime)
|
||||
duration = None
|
||||
if m:
|
||||
duration = 60 * int(m.group('minutes')) + int(m.group('seconds'))
|
||||
|
||||
trailer_id = first_url.split('/')[-1].rpartition('_')[0].lower()
|
||||
settings_json_url = compat_urlparse.urljoin(url, 'includes/settings/%s.json' % trailer_id)
|
||||
settings = self._download_json(settings_json_url, trailer_id, 'Downloading settings json')
|
||||
|
||||
formats = []
|
||||
for format in settings['metadata']['sizes']:
|
||||
# The src is a file pointing to the real video file
|
||||
format_url = re.sub(r'_(\d*p\.mov)', r'_h\1', format['src'])
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format': format['type'],
|
||||
'width': int_or_none(format['width']),
|
||||
'height': int_or_none(format['height']),
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
playlist.append({
|
||||
'_type': 'video',
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
'upload_date': upload_date,
|
||||
'uploader_id': uploader_id,
|
||||
'http_headers': {
|
||||
'User-Agent': 'QuickTime compatible (youtube-dlc)',
|
||||
},
|
||||
})
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': movie,
|
||||
'entries': playlist,
|
||||
}
|
||||
|
||||
|
||||
class AppleTrailersSectionIE(InfoExtractor):
|
||||
IE_NAME = 'appletrailers:section'
|
||||
_SECTIONS = {
|
||||
'justadded': {
|
||||
'feed_path': 'just_added',
|
||||
'title': 'Just Added',
|
||||
},
|
||||
'exclusive': {
|
||||
'feed_path': 'exclusive',
|
||||
'title': 'Exclusive',
|
||||
},
|
||||
'justhd': {
|
||||
'feed_path': 'just_hd',
|
||||
'title': 'Just HD',
|
||||
},
|
||||
'mostpopular': {
|
||||
'feed_path': 'most_pop',
|
||||
'title': 'Most Popular',
|
||||
},
|
||||
'moviestudios': {
|
||||
'feed_path': 'studios',
|
||||
'title': 'Movie Studios',
|
||||
},
|
||||
}
|
||||
_VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/#section=(?P<id>%s)' % '|'.join(_SECTIONS)
|
||||
_TESTS = [{
|
||||
'url': 'http://trailers.apple.com/#section=justadded',
|
||||
'info_dict': {
|
||||
'title': 'Just Added',
|
||||
'id': 'justadded',
|
||||
},
|
||||
'playlist_mincount': 80,
|
||||
}, {
|
||||
'url': 'http://trailers.apple.com/#section=exclusive',
|
||||
'info_dict': {
|
||||
'title': 'Exclusive',
|
||||
'id': 'exclusive',
|
||||
},
|
||||
'playlist_mincount': 80,
|
||||
}, {
|
||||
'url': 'http://trailers.apple.com/#section=justhd',
|
||||
'info_dict': {
|
||||
'title': 'Just HD',
|
||||
'id': 'justhd',
|
||||
},
|
||||
'playlist_mincount': 80,
|
||||
}, {
|
||||
'url': 'http://trailers.apple.com/#section=mostpopular',
|
||||
'info_dict': {
|
||||
'title': 'Most Popular',
|
||||
'id': 'mostpopular',
|
||||
},
|
||||
'playlist_mincount': 30,
|
||||
}, {
|
||||
'url': 'http://trailers.apple.com/#section=moviestudios',
|
||||
'info_dict': {
|
||||
'title': 'Movie Studios',
|
||||
'id': 'moviestudios',
|
||||
},
|
||||
'playlist_mincount': 80,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
section = self._match_id(url)
|
||||
section_data = self._download_json(
|
||||
'http://trailers.apple.com/trailers/home/feeds/%s.json' % self._SECTIONS[section]['feed_path'],
|
||||
section)
|
||||
entries = [
|
||||
self.url_result('http://trailers.apple.com' + e['location'])
|
||||
for e in section_data]
|
||||
return self.playlist_result(entries, section, self._SECTIONS[section]['title'])
|
65
youtube_dl/extractor/archiveorg.py
Normal file
65
youtube_dl/extractor/archiveorg.py
Normal file
|
@ -0,0 +1,65 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
unified_strdate,
|
||||
clean_html,
|
||||
)
|
||||
|
||||
|
||||
class ArchiveOrgIE(InfoExtractor):
|
||||
IE_NAME = 'archive.org'
|
||||
IE_DESC = 'archive.org videos'
|
||||
_VALID_URL = r'https?://(?:www\.)?archive\.org/(?:details|embed)/(?P<id>[^/?#]+)(?:[?].*)?$'
|
||||
_TESTS = [{
|
||||
'url': 'http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
|
||||
'md5': '8af1d4cf447933ed3c7f4871162602db',
|
||||
'info_dict': {
|
||||
'id': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect',
|
||||
'ext': 'ogg',
|
||||
'title': '1968 Demo - FJCC Conference Presentation Reel #1',
|
||||
'description': 'md5:da45c349df039f1cc8075268eb1b5c25',
|
||||
'upload_date': '19681210',
|
||||
'uploader': 'SRI International'
|
||||
}
|
||||
}, {
|
||||
'url': 'https://archive.org/details/Cops1922',
|
||||
'md5': '0869000b4ce265e8ca62738b336b268a',
|
||||
'info_dict': {
|
||||
'id': 'Cops1922',
|
||||
'ext': 'mp4',
|
||||
'title': 'Buster Keaton\'s "Cops" (1922)',
|
||||
'description': 'md5:89e7c77bf5d965dd5c0372cfb49470f6',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://archive.org/embed/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
'http://archive.org/embed/' + video_id, video_id)
|
||||
jwplayer_playlist = self._parse_json(self._search_regex(
|
||||
r"(?s)Play\('[^']+'\s*,\s*(\[.+\])\s*,\s*{.*?}\)",
|
||||
webpage, 'jwplayer playlist'), video_id)
|
||||
info = self._parse_jwplayer_data(
|
||||
{'playlist': jwplayer_playlist}, video_id, base_url=url)
|
||||
|
||||
def get_optional(metadata, field):
|
||||
return metadata.get(field, [None])[0]
|
||||
|
||||
metadata = self._download_json(
|
||||
'http://archive.org/details/' + video_id, video_id, query={
|
||||
'output': 'json',
|
||||
})['metadata']
|
||||
info.update({
|
||||
'title': get_optional(metadata, 'title') or info.get('title'),
|
||||
'description': clean_html(get_optional(metadata, 'description')),
|
||||
})
|
||||
if info.get('_type') != 'playlist':
|
||||
info.update({
|
||||
'uploader': get_optional(metadata, 'creator'),
|
||||
'upload_date': unified_strdate(get_optional(metadata, 'date')),
|
||||
})
|
||||
return info
|
422
youtube_dl/extractor/ard.py
Normal file
422
youtube_dl/extractor/ard.py
Normal file
|
@ -0,0 +1,422 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .generic import GenericIE
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
qualities,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
xpath_text,
|
||||
)
|
||||
from ..compat import compat_etree_fromstring
|
||||
|
||||
|
||||
class ARDMediathekBaseIE(InfoExtractor):
|
||||
_GEO_COUNTRIES = ['DE']
|
||||
|
||||
def _extract_media_info(self, media_info_url, webpage, video_id):
|
||||
media_info = self._download_json(
|
||||
media_info_url, video_id, 'Downloading media JSON')
|
||||
return self._parse_media_info(media_info, video_id, '"fsk"' in webpage)
|
||||
|
||||
def _parse_media_info(self, media_info, video_id, fsk):
|
||||
formats = self._extract_formats(media_info, video_id)
|
||||
|
||||
if not formats:
|
||||
if fsk:
|
||||
raise ExtractorError(
|
||||
'This video is only available after 20:00', expected=True)
|
||||
elif media_info.get('_geoblocked'):
|
||||
self.raise_geo_restricted(
|
||||
'This video is not available due to geoblocking',
|
||||
countries=self._GEO_COUNTRIES)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
subtitle_url = media_info.get('_subtitleUrl')
|
||||
if subtitle_url:
|
||||
subtitles['de'] = [{
|
||||
'ext': 'ttml',
|
||||
'url': subtitle_url,
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'duration': int_or_none(media_info.get('_duration')),
|
||||
'thumbnail': media_info.get('_previewImage'),
|
||||
'is_live': media_info.get('_isLive') is True,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
def _extract_formats(self, media_info, video_id):
|
||||
type_ = media_info.get('_type')
|
||||
media_array = media_info.get('_mediaArray', [])
|
||||
formats = []
|
||||
for num, media in enumerate(media_array):
|
||||
for stream in media.get('_mediaStreamArray', []):
|
||||
stream_urls = stream.get('_stream')
|
||||
if not stream_urls:
|
||||
continue
|
||||
if not isinstance(stream_urls, list):
|
||||
stream_urls = [stream_urls]
|
||||
quality = stream.get('_quality')
|
||||
server = stream.get('_server')
|
||||
for stream_url in stream_urls:
|
||||
if not url_or_none(stream_url):
|
||||
continue
|
||||
ext = determine_ext(stream_url)
|
||||
if quality != 'auto' and ext in ('f4m', 'm3u8'):
|
||||
continue
|
||||
if ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
update_url_query(stream_url, {
|
||||
'hdcore': '3.1.1',
|
||||
'plugin': 'aasp-3.1.1.69.124'
|
||||
}), video_id, f4m_id='hds', fatal=False))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
stream_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
if server and server.startswith('rtmp'):
|
||||
f = {
|
||||
'url': server,
|
||||
'play_path': stream_url,
|
||||
'format_id': 'a%s-rtmp-%s' % (num, quality),
|
||||
}
|
||||
else:
|
||||
f = {
|
||||
'url': stream_url,
|
||||
'format_id': 'a%s-%s-%s' % (num, ext, quality)
|
||||
}
|
||||
m = re.search(
|
||||
r'_(?P<width>\d+)x(?P<height>\d+)\.mp4$',
|
||||
stream_url)
|
||||
if m:
|
||||
f.update({
|
||||
'width': int(m.group('width')),
|
||||
'height': int(m.group('height')),
|
||||
})
|
||||
if type_ == 'audio':
|
||||
f['vcodec'] = 'none'
|
||||
formats.append(f)
|
||||
return formats
|
||||
|
||||
|
||||
class ARDMediathekIE(ARDMediathekBaseIE):
|
||||
IE_NAME = 'ARD:mediathek'
|
||||
_VALID_URL = r'^https?://(?:(?:(?:www|classic)\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de|one\.ard\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
|
||||
|
||||
_TESTS = [{
|
||||
# available till 26.07.2022
|
||||
'url': 'http://www.ardmediathek.de/tv/S%C3%9CDLICHT/Was-ist-die-Kunst-der-Zukunft-liebe-Ann/BR-Fernsehen/Video?bcastId=34633636&documentId=44726822',
|
||||
'info_dict': {
|
||||
'id': '44726822',
|
||||
'ext': 'mp4',
|
||||
'title': 'Was ist die Kunst der Zukunft, liebe Anna McCarthy?',
|
||||
'description': 'md5:4ada28b3e3b5df01647310e41f3a62f5',
|
||||
'duration': 1740,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://one.ard.de/tv/Mord-mit-Aussicht/Mord-mit-Aussicht-6-39-T%C3%B6dliche-Nach/ONE/Video?bcastId=46384294&documentId=55586872',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# audio
|
||||
'url': 'http://www.ardmediathek.de/tv/WDR-H%C3%B6rspiel-Speicher/Tod-eines-Fu%C3%9Fballers/WDR-3/Audio-Podcast?documentId=28488308&bcastId=23074086',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# audio
|
||||
'url': 'http://mediathek.rbb-online.de/radio/Hörspiel/Vor-dem-Fest/kulturradio/Audio?documentId=30796318&topRessort=radio&bcastId=9839158',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://classic.ardmediathek.de/tv/Panda-Gorilla-Co/Panda-Gorilla-Co-Folge-274/Das-Erste/Video?bcastId=16355486&documentId=58234698',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if ARDBetaMediathekIE.suitable(url) else super(ARDMediathekIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
# determine video id from url
|
||||
m = re.match(self._VALID_URL, url)
|
||||
|
||||
document_id = None
|
||||
|
||||
numid = re.search(r'documentId=([0-9]+)', url)
|
||||
if numid:
|
||||
document_id = video_id = numid.group(1)
|
||||
else:
|
||||
video_id = m.group('video_id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
ERRORS = (
|
||||
('>Leider liegt eine Störung vor.', 'Video %s is unavailable'),
|
||||
('>Der gewünschte Beitrag ist nicht mehr verfügbar.<',
|
||||
'Video %s is no longer available'),
|
||||
)
|
||||
|
||||
for pattern, message in ERRORS:
|
||||
if pattern in webpage:
|
||||
raise ExtractorError(message % video_id, expected=True)
|
||||
|
||||
if re.search(r'[\?&]rss($|[=&])', url):
|
||||
doc = compat_etree_fromstring(webpage.encode('utf-8'))
|
||||
if doc.tag == 'rss':
|
||||
return GenericIE()._extract_rss(url, video_id, doc)
|
||||
|
||||
title = self._html_search_regex(
|
||||
[r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
|
||||
r'<meta name="dcterms\.title" content="(.*?)"/>',
|
||||
r'<h4 class="headline">(.*?)</h4>',
|
||||
r'<title[^>]*>(.*?)</title>'],
|
||||
webpage, 'title')
|
||||
description = self._html_search_meta(
|
||||
'dcterms.abstract', webpage, 'description', default=None)
|
||||
if description is None:
|
||||
description = self._html_search_meta(
|
||||
'description', webpage, 'meta description', default=None)
|
||||
if description is None:
|
||||
description = self._html_search_regex(
|
||||
r'<p\s+class="teasertext">(.+?)</p>',
|
||||
webpage, 'teaser text', default=None)
|
||||
|
||||
# Thumbnail is sometimes not present.
|
||||
# It is in the mobile version, but that seems to use a different URL
|
||||
# structure altogether.
|
||||
thumbnail = self._og_search_thumbnail(webpage, default=None)
|
||||
|
||||
media_streams = re.findall(r'''(?x)
|
||||
mediaCollection\.addMediaStream\([0-9]+,\s*[0-9]+,\s*"[^"]*",\s*
|
||||
"([^"]+)"''', webpage)
|
||||
|
||||
if media_streams:
|
||||
QUALITIES = qualities(['lo', 'hi', 'hq'])
|
||||
formats = []
|
||||
for furl in set(media_streams):
|
||||
if furl.endswith('.f4m'):
|
||||
fid = 'f4m'
|
||||
else:
|
||||
fid_m = re.match(r'.*\.([^.]+)\.[^.]+$', furl)
|
||||
fid = fid_m.group(1) if fid_m else None
|
||||
formats.append({
|
||||
'quality': QUALITIES(fid),
|
||||
'format_id': fid,
|
||||
'url': furl,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
info = {
|
||||
'formats': formats,
|
||||
}
|
||||
else: # request JSON file
|
||||
if not document_id:
|
||||
video_id = self._search_regex(
|
||||
r'/play/(?:config|media)/(\d+)', webpage, 'media id')
|
||||
info = self._extract_media_info(
|
||||
'http://www.ardmediathek.de/play/media/%s' % video_id,
|
||||
webpage, video_id)
|
||||
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'title': self._live_title(title) if info.get('is_live') else title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
})
|
||||
|
||||
return info
|
||||
|
||||
|
||||
class ARDIE(InfoExtractor):
|
||||
_VALID_URL = r'(?P<mainurl>https?://(www\.)?daserste\.de/[^?#]+/videos(?:extern)?/(?P<display_id>[^/?#]+)-(?P<id>[0-9]+))\.html'
|
||||
_TESTS = [{
|
||||
# available till 14.02.2019
|
||||
'url': 'http://www.daserste.de/information/talk/maischberger/videos/das-groko-drama-zerlegen-sich-die-volksparteien-video-102.html',
|
||||
'md5': '8e4ec85f31be7c7fc08a26cdbc5a1f49',
|
||||
'info_dict': {
|
||||
'display_id': 'das-groko-drama-zerlegen-sich-die-volksparteien-video',
|
||||
'id': '102',
|
||||
'ext': 'mp4',
|
||||
'duration': 4435.0,
|
||||
'title': 'Das GroKo-Drama: Zerlegen sich die Volksparteien?',
|
||||
'upload_date': '20180214',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.daserste.de/information/reportage-dokumentation/erlebnis-erde/videosextern/woelfe-und-herdenschutzhunde-ungleiche-brueder-102.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
player_url = mobj.group('mainurl') + '~playerXml.xml'
|
||||
doc = self._download_xml(player_url, display_id)
|
||||
video_node = doc.find('./video')
|
||||
upload_date = unified_strdate(xpath_text(
|
||||
video_node, './broadcastDate'))
|
||||
thumbnail = xpath_text(video_node, './/teaserImage//variant/url')
|
||||
|
||||
formats = []
|
||||
for a in video_node.findall('.//asset'):
|
||||
f = {
|
||||
'format_id': a.attrib['type'],
|
||||
'width': int_or_none(a.find('./frameWidth').text),
|
||||
'height': int_or_none(a.find('./frameHeight').text),
|
||||
'vbr': int_or_none(a.find('./bitrateVideo').text),
|
||||
'abr': int_or_none(a.find('./bitrateAudio').text),
|
||||
'vcodec': a.find('./codecVideo').text,
|
||||
'tbr': int_or_none(a.find('./totalBitrate').text),
|
||||
}
|
||||
if a.find('./serverPrefix').text:
|
||||
f['url'] = a.find('./serverPrefix').text
|
||||
f['playpath'] = a.find('./fileName').text
|
||||
else:
|
||||
f['url'] = a.find('./fileName').text
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': mobj.group('id'),
|
||||
'formats': formats,
|
||||
'display_id': display_id,
|
||||
'title': video_node.find('./title').text,
|
||||
'duration': parse_duration(video_node.find('./duration').text),
|
||||
'upload_date': upload_date,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
|
||||
class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||
_VALID_URL = r'https://(?:(?:beta|www)\.)?ardmediathek\.de/(?P<client>[^/]+)/(?:player|live|video)/(?P<display_id>(?:[^/]+/)*)(?P<video_id>[a-zA-Z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://ardmediathek.de/ard/video/die-robuste-roswita/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
|
||||
'md5': 'dfdc87d2e7e09d073d5a80770a9ce88f',
|
||||
'info_dict': {
|
||||
'display_id': 'die-robuste-roswita',
|
||||
'id': '70153354',
|
||||
'title': 'Die robuste Roswita',
|
||||
'description': r're:^Der Mord.*trüber ist als die Ilm.',
|
||||
'duration': 5316,
|
||||
'thumbnail': 'https://img.ardmediathek.de/standard/00/70/15/33/90/-1852531467/16x9/960?mandant=ard',
|
||||
'timestamp': 1577047500,
|
||||
'upload_date': '20191222',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://ardmediathek.de/ard/video/saartalk/saartalk-gesellschaftsgift-haltung-gegen-hass/sr-fernsehen/Y3JpZDovL3NyLW9ubGluZS5kZS9TVF84MTY4MA/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/ard/video/trailer/private-eyes-s01-e01/one/Y3JpZDovL3dkci5kZS9CZWl0cmFnLTE1MTgwYzczLWNiMTEtNGNkMS1iMjUyLTg5MGYzOWQxZmQ1YQ/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/ard/player/Y3JpZDovL3N3ci5kZS9hZXgvbzEwNzE5MTU/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/swr/live/Y3JpZDovL3N3ci5kZS8xMzQ4MTA0Mg',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('video_id')
|
||||
display_id = mobj.group('display_id')
|
||||
if display_id:
|
||||
display_id = display_id.rstrip('/')
|
||||
if not display_id:
|
||||
display_id = video_id
|
||||
|
||||
player_page = self._download_json(
|
||||
'https://api.ardmediathek.de/public-gateway',
|
||||
display_id, data=json.dumps({
|
||||
'query': '''{
|
||||
playerPage(client:"%s", clipId: "%s") {
|
||||
blockedByFsk
|
||||
broadcastedOn
|
||||
maturityContentRating
|
||||
mediaCollection {
|
||||
_duration
|
||||
_geoblocked
|
||||
_isLive
|
||||
_mediaArray {
|
||||
_mediaStreamArray {
|
||||
_quality
|
||||
_server
|
||||
_stream
|
||||
}
|
||||
}
|
||||
_previewImage
|
||||
_subtitleUrl
|
||||
_type
|
||||
}
|
||||
show {
|
||||
title
|
||||
}
|
||||
synopsis
|
||||
title
|
||||
tracking {
|
||||
atiCustomVars {
|
||||
contentId
|
||||
}
|
||||
}
|
||||
}
|
||||
}''' % (mobj.group('client'), video_id),
|
||||
}).encode(), headers={
|
||||
'Content-Type': 'application/json'
|
||||
})['data']['playerPage']
|
||||
title = player_page['title']
|
||||
content_id = str_or_none(try_get(
|
||||
player_page, lambda x: x['tracking']['atiCustomVars']['contentId']))
|
||||
media_collection = player_page.get('mediaCollection') or {}
|
||||
if not media_collection and content_id:
|
||||
media_collection = self._download_json(
|
||||
'https://www.ardmediathek.de/play/media/' + content_id,
|
||||
content_id, fatal=False) or {}
|
||||
info = self._parse_media_info(
|
||||
media_collection, content_id or video_id,
|
||||
player_page.get('blockedByFsk'))
|
||||
age_limit = None
|
||||
description = player_page.get('synopsis')
|
||||
maturity_content_rating = player_page.get('maturityContentRating')
|
||||
if maturity_content_rating:
|
||||
age_limit = int_or_none(maturity_content_rating.lstrip('FSK'))
|
||||
if not age_limit and description:
|
||||
age_limit = int_or_none(self._search_regex(
|
||||
r'\(FSK\s*(\d+)\)\s*$', description, 'age limit', default=None))
|
||||
info.update({
|
||||
'age_limit': age_limit,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': unified_timestamp(player_page.get('broadcastedOn')),
|
||||
'series': try_get(player_page, lambda x: x['show']['title']),
|
||||
})
|
||||
return info
|
133
youtube_dl/extractor/arkena.py
Normal file
133
youtube_dl/extractor/arkena.py
Normal file
|
@ -0,0 +1,133 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
strip_jsonp,
|
||||
)
|
||||
|
||||
|
||||
class ArkenaIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
video\.arkena\.com/play2/embed/player\?|
|
||||
play\.arkena\.com/(?:config|embed)/avp/v\d/player/media/(?P<id>[^/]+)/[^/]+/(?P<account_id>\d+)
|
||||
)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411',
|
||||
'md5': 'b96f2f71b359a8ecd05ce4e1daa72365',
|
||||
'info_dict': {
|
||||
'id': 'b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe',
|
||||
'ext': 'mp4',
|
||||
'title': 'Big Buck Bunny',
|
||||
'description': 'Royalty free test video',
|
||||
'timestamp': 1432816365,
|
||||
'upload_date': '20150528',
|
||||
'is_live': False,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://play.arkena.com/config/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411/?callbackMethod=jQuery1111023664739129262213_1469227693893',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://play.arkena.com/config/avp/v1/player/media/327336/darkmatter/131064/?callbackMethod=jQuery1111002221189684892677_1469227595972',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://play.arkena.com/embed/avp/v1/player/media/327336/darkmatter/131064/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://video.arkena.com/play2/embed/player?accountId=472718&mediaId=35763b3b-00090078-bf604299&pageStyling=styled',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
# See https://support.arkena.com/display/PLAY/Ways+to+embed+your+video
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//play\.arkena\.com/embed/avp/.+?)\1',
|
||||
webpage)
|
||||
if mobj:
|
||||
return mobj.group('url')
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
account_id = mobj.group('account_id')
|
||||
|
||||
# Handle http://video.arkena.com/play2/embed/player URL
|
||||
if not video_id:
|
||||
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
||||
video_id = qs.get('mediaId', [None])[0]
|
||||
account_id = qs.get('accountId', [None])[0]
|
||||
if not video_id or not account_id:
|
||||
raise ExtractorError('Invalid URL', expected=True)
|
||||
|
||||
playlist = self._download_json(
|
||||
'https://play.arkena.com/config/avp/v2/player/media/%s/0/%s/?callbackMethod=_'
|
||||
% (video_id, account_id),
|
||||
video_id, transform_source=strip_jsonp)['Playlist'][0]
|
||||
|
||||
media_info = playlist['MediaInfo']
|
||||
title = media_info['Title']
|
||||
media_files = playlist['MediaFiles']
|
||||
|
||||
is_live = False
|
||||
formats = []
|
||||
for kind_case, kind_formats in media_files.items():
|
||||
kind = kind_case.lower()
|
||||
for f in kind_formats:
|
||||
f_url = f.get('Url')
|
||||
if not f_url:
|
||||
continue
|
||||
is_live = f.get('Live') == 'true'
|
||||
exts = (mimetype2ext(f.get('Type')), determine_ext(f_url, None))
|
||||
if kind == 'm3u8' or 'm3u8' in exts:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
f_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id=kind, fatal=False, live=is_live))
|
||||
elif kind == 'flash' or 'f4m' in exts:
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
f_url, video_id, f4m_id=kind, fatal=False))
|
||||
elif kind == 'dash' or 'mpd' in exts:
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
f_url, video_id, mpd_id=kind, fatal=False))
|
||||
elif kind == 'silverlight':
|
||||
# TODO: process when ism is supported (see
|
||||
# https://github.com/ytdl-org/youtube-dl/issues/8118)
|
||||
continue
|
||||
else:
|
||||
tbr = float_or_none(f.get('Bitrate'), 1000)
|
||||
formats.append({
|
||||
'url': f_url,
|
||||
'format_id': '%s-%d' % (kind, tbr) if tbr else kind,
|
||||
'tbr': tbr,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = media_info.get('Description')
|
||||
video_id = media_info.get('VideoId') or video_id
|
||||
timestamp = parse_iso8601(media_info.get('PublishDate'))
|
||||
thumbnails = [{
|
||||
'url': thumbnail['Url'],
|
||||
'width': int_or_none(thumbnail.get('Size')),
|
||||
} for thumbnail in (media_info.get('Poster') or []) if thumbnail.get('Url')]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
'is_live': is_live,
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
}
|
201
youtube_dl/extractor/arte.py
Normal file
201
youtube_dl/extractor/arte.py
Normal file
|
@ -0,0 +1,201 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
qualities,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
# There are different sources of video in arte.tv, the extraction process
|
||||
# is different for each one. The videos usually expire in 7 days, so we can't
|
||||
# add tests.
|
||||
|
||||
|
||||
class ArteTVBaseIE(InfoExtractor):
|
||||
def _extract_from_json_url(self, json_url, video_id, lang, title=None):
|
||||
info = self._download_json(json_url, video_id)
|
||||
player_info = info['videoJsonPlayer']
|
||||
|
||||
vsr = try_get(player_info, lambda x: x['VSR'], dict)
|
||||
if not vsr:
|
||||
error = None
|
||||
if try_get(player_info, lambda x: x['custom_msg']['type']) == 'error':
|
||||
error = try_get(
|
||||
player_info, lambda x: x['custom_msg']['msg'], compat_str)
|
||||
if not error:
|
||||
error = 'Video %s is not available' % player_info.get('VID') or video_id
|
||||
raise ExtractorError(error, expected=True)
|
||||
|
||||
upload_date_str = player_info.get('shootingDate')
|
||||
if not upload_date_str:
|
||||
upload_date_str = (player_info.get('VRA') or player_info.get('VDA') or '').split(' ')[0]
|
||||
|
||||
title = (player_info.get('VTI') or title or player_info['VID']).strip()
|
||||
subtitle = player_info.get('VSU', '').strip()
|
||||
if subtitle:
|
||||
title += ' - %s' % subtitle
|
||||
|
||||
info_dict = {
|
||||
'id': player_info['VID'],
|
||||
'title': title,
|
||||
'description': player_info.get('VDE'),
|
||||
'upload_date': unified_strdate(upload_date_str),
|
||||
'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
|
||||
}
|
||||
qfunc = qualities(['MQ', 'HQ', 'EQ', 'SQ'])
|
||||
|
||||
LANGS = {
|
||||
'fr': 'F',
|
||||
'de': 'A',
|
||||
'en': 'E[ANG]',
|
||||
'es': 'E[ESP]',
|
||||
'it': 'E[ITA]',
|
||||
'pl': 'E[POL]',
|
||||
}
|
||||
|
||||
langcode = LANGS.get(lang, lang)
|
||||
|
||||
formats = []
|
||||
for format_id, format_dict in vsr.items():
|
||||
f = dict(format_dict)
|
||||
versionCode = f.get('versionCode')
|
||||
l = re.escape(langcode)
|
||||
|
||||
# Language preference from most to least priority
|
||||
# Reference: section 6.8 of
|
||||
# https://www.arte.tv/sites/en/corporate/files/complete-technical-guidelines-arte-geie-v1-07-1.pdf
|
||||
PREFERENCES = (
|
||||
# original version in requested language, without subtitles
|
||||
r'VO{0}$'.format(l),
|
||||
# original version in requested language, with partial subtitles in requested language
|
||||
r'VO{0}-ST{0}$'.format(l),
|
||||
# original version in requested language, with subtitles for the deaf and hard-of-hearing in requested language
|
||||
r'VO{0}-STM{0}$'.format(l),
|
||||
# non-original (dubbed) version in requested language, without subtitles
|
||||
r'V{0}$'.format(l),
|
||||
# non-original (dubbed) version in requested language, with subtitles partial subtitles in requested language
|
||||
r'V{0}-ST{0}$'.format(l),
|
||||
# non-original (dubbed) version in requested language, with subtitles for the deaf and hard-of-hearing in requested language
|
||||
r'V{0}-STM{0}$'.format(l),
|
||||
# original version in requested language, with partial subtitles in different language
|
||||
r'VO{0}-ST(?!{0}).+?$'.format(l),
|
||||
# original version in requested language, with subtitles for the deaf and hard-of-hearing in different language
|
||||
r'VO{0}-STM(?!{0}).+?$'.format(l),
|
||||
# original version in different language, with partial subtitles in requested language
|
||||
r'VO(?:(?!{0}).+?)?-ST{0}$'.format(l),
|
||||
# original version in different language, with subtitles for the deaf and hard-of-hearing in requested language
|
||||
r'VO(?:(?!{0}).+?)?-STM{0}$'.format(l),
|
||||
# original version in different language, without subtitles
|
||||
r'VO(?:(?!{0}))?$'.format(l),
|
||||
# original version in different language, with partial subtitles in different language
|
||||
r'VO(?:(?!{0}).+?)?-ST(?!{0}).+?$'.format(l),
|
||||
# original version in different language, with subtitles for the deaf and hard-of-hearing in different language
|
||||
r'VO(?:(?!{0}).+?)?-STM(?!{0}).+?$'.format(l),
|
||||
)
|
||||
|
||||
for pref, p in enumerate(PREFERENCES):
|
||||
if re.match(p, versionCode):
|
||||
lang_pref = len(PREFERENCES) - pref
|
||||
break
|
||||
else:
|
||||
lang_pref = -1
|
||||
|
||||
format = {
|
||||
'format_id': format_id,
|
||||
'preference': -10 if f.get('videoFormat') == 'M3U8' else None,
|
||||
'language_preference': lang_pref,
|
||||
'format_note': '%s, %s' % (f.get('versionCode'), f.get('versionLibelle')),
|
||||
'width': int_or_none(f.get('width')),
|
||||
'height': int_or_none(f.get('height')),
|
||||
'tbr': int_or_none(f.get('bitrate')),
|
||||
'quality': qfunc(f.get('quality')),
|
||||
}
|
||||
|
||||
if f.get('mediaType') == 'rtmp':
|
||||
format['url'] = f['streamer']
|
||||
format['play_path'] = 'mp4:' + f['url']
|
||||
format['ext'] = 'flv'
|
||||
else:
|
||||
format['url'] = f['url']
|
||||
|
||||
formats.append(format)
|
||||
|
||||
self._check_formats(formats, video_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
info_dict['formats'] = formats
|
||||
return info_dict
|
||||
|
||||
|
||||
class ArteTVPlus7IE(ArteTVBaseIE):
|
||||
IE_NAME = 'arte.tv:+7'
|
||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>fr|de|en|es|it|pl)/videos/(?P<id>\d{6}-\d{3}-[AF])'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/',
|
||||
'info_dict': {
|
||||
'id': '088501-000-A',
|
||||
'ext': 'mp4',
|
||||
'title': 'Mexico: Stealing Petrol to Survive',
|
||||
'upload_date': '20190628',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
lang, video_id = re.match(self._VALID_URL, url).groups()
|
||||
return self._extract_from_json_url(
|
||||
'https://api.arte.tv/api/player/v1/config/%s/%s' % (lang, video_id),
|
||||
video_id, lang)
|
||||
|
||||
|
||||
class ArteTVEmbedIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'arte.tv:embed'
|
||||
_VALID_URL = r'''(?x)
|
||||
https://www\.arte\.tv
|
||||
/player/v3/index\.php\?json_url=
|
||||
(?P<json_url>
|
||||
https?://api\.arte\.tv/api/player/v1/config/
|
||||
(?P<lang>[^/]+)/(?P<id>\d{6}-\d{3}-[AF])
|
||||
)
|
||||
'''
|
||||
|
||||
_TESTS = []
|
||||
|
||||
def _real_extract(self, url):
|
||||
json_url, lang, video_id = re.match(self._VALID_URL, url).groups()
|
||||
return self._extract_from_json_url(json_url, video_id, lang)
|
||||
|
||||
|
||||
class ArteTVPlaylistIE(ArteTVBaseIE):
|
||||
IE_NAME = 'arte.tv:playlist'
|
||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>fr|de|en|es|it|pl)/videos/(?P<id>RC-\d{6})'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.arte.tv/en/videos/RC-016954/earn-a-living/',
|
||||
'info_dict': {
|
||||
'id': 'RC-016954',
|
||||
'title': 'Earn a Living',
|
||||
'description': 'md5:d322c55011514b3a7241f7fb80d494c2',
|
||||
},
|
||||
'playlist_mincount': 6,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
lang, playlist_id = re.match(self._VALID_URL, url).groups()
|
||||
collection = self._download_json(
|
||||
'https://api.arte.tv/api/player/v1/collectionData/%s/%s?source=videos'
|
||||
% (lang, playlist_id), playlist_id)
|
||||
title = collection.get('title')
|
||||
description = collection.get('shortDescription') or collection.get('teaserText')
|
||||
entries = [
|
||||
self._extract_from_json_url(
|
||||
video['jsonUrl'], video.get('programId') or playlist_id, lang)
|
||||
for video in collection['videos'] if video.get('jsonUrl')]
|
||||
return self.playlist_result(entries, playlist_id, title, description)
|
145
youtube_dl/extractor/asiancrush.py
Normal file
145
youtube_dl/extractor/asiancrush.py
Normal file
|
@ -0,0 +1,145 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .kaltura import KalturaIE
|
||||
from ..utils import extract_attributes
|
||||
|
||||
|
||||
class AsianCrushIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:www\.)?(?P<host>(?:(?:asiancrush|yuyutv|midnightpulp)\.com|cocoro\.tv))'
|
||||
_VALID_URL = r'%s/video/(?:[^/]+/)?0+(?P<id>\d+)v\b' % _VALID_URL_BASE
|
||||
_TESTS = [{
|
||||
'url': 'https://www.asiancrush.com/video/012869v/women-who-flirt/',
|
||||
'md5': 'c3b740e48d0ba002a42c0b72857beae6',
|
||||
'info_dict': {
|
||||
'id': '1_y4tmjm5r',
|
||||
'ext': 'mp4',
|
||||
'title': 'Women Who Flirt',
|
||||
'description': 'md5:7e986615808bcfb11756eb503a751487',
|
||||
'timestamp': 1496936429,
|
||||
'upload_date': '20170608',
|
||||
'uploader_id': 'craig@crifkin.com',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.asiancrush.com/video/she-was-pretty/011886v-pretty-episode-3/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.yuyutv.com/video/013886v/the-act-of-killing/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.yuyutv.com/video/peep-show/013922v-warring-factions/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.midnightpulp.com/video/010400v/drifters/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.midnightpulp.com/video/mononoke/016378v-zashikiwarashi-part-1/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.cocoro.tv/video/the-wonderful-wizard-of-oz/008878v-the-wonderful-wizard-of-oz-ep01/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
host = mobj.group('host')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
entry_id, partner_id, title = [None] * 3
|
||||
|
||||
vars = self._parse_json(
|
||||
self._search_regex(
|
||||
r'iEmbedVars\s*=\s*({.+?})', webpage, 'embed vars',
|
||||
default='{}'), video_id, fatal=False)
|
||||
if vars:
|
||||
entry_id = vars.get('entry_id')
|
||||
partner_id = vars.get('partner_id')
|
||||
title = vars.get('vid_label')
|
||||
|
||||
if not entry_id:
|
||||
entry_id = self._search_regex(
|
||||
r'\bentry_id["\']\s*:\s*["\'](\d+)', webpage, 'entry id')
|
||||
|
||||
player = self._download_webpage(
|
||||
'https://api.%s/embeddedVideoPlayer' % host, video_id,
|
||||
query={'id': entry_id})
|
||||
|
||||
kaltura_id = self._search_regex(
|
||||
r'entry_id["\']\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1', player,
|
||||
'kaltura id', group='id')
|
||||
|
||||
if not partner_id:
|
||||
partner_id = self._search_regex(
|
||||
r'/p(?:artner_id)?/(\d+)', player, 'partner id',
|
||||
default='513551')
|
||||
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<div[^>]+\bclass=["\']description["\'][^>]*>(.+?)</div>',
|
||||
webpage, 'description', fatal=False)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'kaltura:%s:%s' % (partner_id, kaltura_id),
|
||||
'ie_key': KalturaIE.ie_key(),
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
}
|
||||
|
||||
|
||||
class AsianCrushPlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'%s/series/0+(?P<id>\d+)s\b' % AsianCrushIE._VALID_URL_BASE
|
||||
_TESTS = [{
|
||||
'url': 'https://www.asiancrush.com/series/012481s/scholar-walks-night/',
|
||||
'info_dict': {
|
||||
'id': '12481',
|
||||
'title': 'Scholar Who Walks the Night',
|
||||
'description': 'md5:7addd7c5132a09fd4741152d96cce886',
|
||||
},
|
||||
'playlist_count': 20,
|
||||
}, {
|
||||
'url': 'https://www.yuyutv.com/series/013920s/peep-show/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.midnightpulp.com/series/016375s/mononoke/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.cocoro.tv/series/008549s/the-wonderful-wizard-of-oz/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
entries = []
|
||||
|
||||
for mobj in re.finditer(
|
||||
r'<a[^>]+href=(["\'])(?P<url>%s.*?)\1[^>]*>' % AsianCrushIE._VALID_URL,
|
||||
webpage):
|
||||
attrs = extract_attributes(mobj.group(0))
|
||||
if attrs.get('class') == 'clearfix':
|
||||
entries.append(self.url_result(
|
||||
mobj.group('url'), ie=AsianCrushIE.ie_key()))
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'(?s)<h1\b[^>]\bid=["\']movieTitle[^>]+>(.+?)</h1>', webpage,
|
||||
'title', default=None) or self._og_search_title(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'twitter:title', webpage, 'title',
|
||||
default=None) or self._search_regex(
|
||||
r'<title>([^<]+)</title>', webpage, 'title', fatal=False)
|
||||
if title:
|
||||
title = re.sub(r'\s*\|\s*.+?$', '', title)
|
||||
|
||||
description = self._og_search_description(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'twitter:description', webpage, 'description', fatal=False)
|
||||
|
||||
return self.playlist_result(entries, playlist_id, title, description)
|
118
youtube_dl/extractor/atresplayer.py
Normal file
118
youtube_dl/extractor/atresplayer.py
Normal file
|
@ -0,0 +1,118 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class AtresPlayerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?atresplayer\.com/[^/]+/[^/]+/[^/]+/[^/]+/(?P<display_id>.+?)_(?P<id>[0-9a-f]{24})'
|
||||
_NETRC_MACHINE = 'atresplayer'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://www.atresplayer.com/antena3/series/pequenas-coincidencias/temporada-1/capitulo-7-asuntos-pendientes_5d4aa2c57ed1a88fc715a615/',
|
||||
'info_dict': {
|
||||
'id': '5d4aa2c57ed1a88fc715a615',
|
||||
'ext': 'mp4',
|
||||
'title': 'Capítulo 7: Asuntos pendientes',
|
||||
'description': 'md5:7634cdcb4d50d5381bedf93efb537fbc',
|
||||
'duration': 3413,
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
},
|
||||
'skip': 'This video is only available for registered users'
|
||||
},
|
||||
{
|
||||
'url': 'https://www.atresplayer.com/lasexta/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_5ad08edf986b2855ed47adc4/',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://www.atresplayer.com/antena3/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_5ad51046986b2886722ccdea/',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
_API_BASE = 'https://api.atresplayer.com/'
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _handle_error(self, e, code):
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == code:
|
||||
error = self._parse_json(e.cause.read(), None)
|
||||
if error.get('error') == 'required_registered':
|
||||
self.raise_login_required()
|
||||
raise ExtractorError(error['error_description'], expected=True)
|
||||
raise
|
||||
|
||||
def _login(self):
|
||||
username, password = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
|
||||
self._request_webpage(
|
||||
self._API_BASE + 'login', None, 'Downloading login page')
|
||||
|
||||
try:
|
||||
target_url = self._download_json(
|
||||
'https://account.atresmedia.com/api/login', None,
|
||||
'Logging in', headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded'
|
||||
}, data=urlencode_postdata({
|
||||
'username': username,
|
||||
'password': password,
|
||||
}))['targetUrl']
|
||||
except ExtractorError as e:
|
||||
self._handle_error(e, 400)
|
||||
|
||||
self._request_webpage(target_url, None, 'Following Target URL')
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
try:
|
||||
episode = self._download_json(
|
||||
self._API_BASE + 'client/v1/player/episode/' + video_id, video_id)
|
||||
except ExtractorError as e:
|
||||
self._handle_error(e, 403)
|
||||
|
||||
title = episode['titulo']
|
||||
|
||||
formats = []
|
||||
for source in episode.get('sources', []):
|
||||
src = source.get('src')
|
||||
if not src:
|
||||
continue
|
||||
src_type = source.get('type')
|
||||
if src_type == 'application/vnd.apple.mpegurl':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
src, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif src_type == 'application/dash+xml':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
src, video_id, mpd_id='dash', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
heartbeat = episode.get('heartbeat') or {}
|
||||
omniture = episode.get('omniture') or {}
|
||||
get_meta = lambda x: heartbeat.get(x) or omniture.get(x)
|
||||
|
||||
return {
|
||||
'display_id': display_id,
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': episode.get('descripcion'),
|
||||
'thumbnail': episode.get('imgPoster'),
|
||||
'duration': int_or_none(episode.get('duration')),
|
||||
'formats': formats,
|
||||
'channel': get_meta('channel'),
|
||||
'season': get_meta('season'),
|
||||
'episode_number': int_or_none(get_meta('episodeNumber')),
|
||||
}
|
55
youtube_dl/extractor/atttechchannel.py
Normal file
55
youtube_dl/extractor/atttechchannel.py
Normal file
|
@ -0,0 +1,55 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unified_strdate
|
||||
|
||||
|
||||
class ATTTechChannelIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://techchannel\.att\.com/play-video\.cfm/([^/]+/)*(?P<id>.+)'
|
||||
_TEST = {
|
||||
'url': 'http://techchannel.att.com/play-video.cfm/2014/1/27/ATT-Archives-The-UNIX-System-Making-Computers-Easier-to-Use',
|
||||
'info_dict': {
|
||||
'id': '11316',
|
||||
'display_id': 'ATT-Archives-The-UNIX-System-Making-Computers-Easier-to-Use',
|
||||
'ext': 'flv',
|
||||
'title': 'AT&T Archives : The UNIX System: Making Computers Easier to Use',
|
||||
'description': 'A 1982 film about UNIX is the foundation for software in use around Bell Labs and AT&T.',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'upload_date': '20140127',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_url = self._search_regex(
|
||||
r"url\s*:\s*'(rtmp://[^']+)'",
|
||||
webpage, 'video URL')
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'mediaid\s*=\s*(\d+)',
|
||||
webpage, 'video id', fatal=False)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'[Rr]elease\s+date:\s*(\d{1,2}/\d{1,2}/\d{4})',
|
||||
webpage, 'upload date', fatal=False), False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'url': video_url,
|
||||
'ext': 'flv',
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'upload_date': upload_date,
|
||||
}
|
75
youtube_dl/extractor/atvat.py
Normal file
75
youtube_dl/extractor/atvat.py
Normal file
|
@ -0,0 +1,75 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class ATVAtIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?atv\.at/(?:[^/]+/){2}(?P<id>[dv]\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://atv.at/aktuell/di-210317-2005-uhr/v1698449/',
|
||||
'md5': 'c3b6b975fb3150fc628572939df205f2',
|
||||
'info_dict': {
|
||||
'id': '1698447',
|
||||
'ext': 'mp4',
|
||||
'title': 'DI, 21.03.17 | 20:05 Uhr 1/1',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://atv.at/aktuell/meinrad-knapp/d8416/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_data = self._parse_json(unescapeHTML(self._search_regex(
|
||||
[r'flashPlayerOptions\s*=\s*(["\'])(?P<json>(?:(?!\1).)+)\1',
|
||||
r'class="[^"]*jsb_video/FlashPlayer[^"]*"[^>]+data-jsb="(?P<json>[^"]+)"'],
|
||||
webpage, 'player data', group='json')),
|
||||
display_id)['config']['initial_video']
|
||||
|
||||
video_id = video_data['id']
|
||||
video_title = video_data['title']
|
||||
|
||||
parts = []
|
||||
for part in video_data.get('parts', []):
|
||||
part_id = part['id']
|
||||
part_title = part['title']
|
||||
|
||||
formats = []
|
||||
for source in part.get('sources', []):
|
||||
source_url = source.get('src')
|
||||
if not source_url:
|
||||
continue
|
||||
ext = determine_ext(source_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source_url, part_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': source.get('delivery'),
|
||||
'url': source_url,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
parts.append({
|
||||
'id': part_id,
|
||||
'title': part_title,
|
||||
'thumbnail': part.get('preview_image_url'),
|
||||
'duration': int_or_none(part.get('duration')),
|
||||
'is_live': part.get('is_livestream'),
|
||||
'formats': formats,
|
||||
})
|
||||
|
||||
return {
|
||||
'_type': 'multi_video',
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'entries': parts,
|
||||
}
|
93
youtube_dl/extractor/audimedia.py
Normal file
93
youtube_dl/extractor/audimedia.py
Normal file
|
@ -0,0 +1,93 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class AudiMediaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?audi-mediacenter\.com/(?:en|de)/audimediatv/(?:video/)?(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.audi-mediacenter.com/en/audimediatv/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test-1467',
|
||||
'md5': '79a8b71c46d49042609795ab59779b66',
|
||||
'info_dict': {
|
||||
'id': '1565',
|
||||
'ext': 'mp4',
|
||||
'title': '60 Seconds of Audi Sport 104/2015 - WEC Bahrain, Rookie Test',
|
||||
'description': 'md5:60e5d30a78ced725f7b8d34370762941',
|
||||
'upload_date': '20151124',
|
||||
'timestamp': 1448354940,
|
||||
'duration': 74022,
|
||||
'view_count': int,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.audi-mediacenter.com/en/audimediatv/video/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test-2991',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
raw_payload = self._search_regex([
|
||||
r'class="amtv-embed"[^>]+id="([0-9a-z-]+)"',
|
||||
r'id="([0-9a-z-]+)"[^>]+class="amtv-embed"',
|
||||
r'class=\\"amtv-embed\\"[^>]+id=\\"([0-9a-z-]+)\\"',
|
||||
r'id=\\"([0-9a-z-]+)\\"[^>]+class=\\"amtv-embed\\"',
|
||||
r'id=(?:\\)?"(amtve-[a-z]-\d+-[a-z]{2})',
|
||||
], webpage, 'raw payload')
|
||||
_, stage_mode, video_id, _ = raw_payload.split('-')
|
||||
|
||||
# TODO: handle s and e stage_mode (live streams and ended live streams)
|
||||
if stage_mode not in ('s', 'e'):
|
||||
video_data = self._download_json(
|
||||
'https://www.audimedia.tv/api/video/v1/videos/' + video_id,
|
||||
video_id, query={
|
||||
'embed[]': ['video_versions', 'thumbnail_image'],
|
||||
})['results']
|
||||
formats = []
|
||||
|
||||
stream_url_hls = video_data.get('stream_url_hls')
|
||||
if stream_url_hls:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
stream_url_hls, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
|
||||
|
||||
stream_url_hds = video_data.get('stream_url_hds')
|
||||
if stream_url_hds:
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
stream_url_hds + '?hdcore=3.4.0',
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
|
||||
for video_version in video_data.get('video_versions', []):
|
||||
video_version_url = video_version.get('download_url') or video_version.get('stream_url')
|
||||
if not video_version_url:
|
||||
continue
|
||||
f = {
|
||||
'url': video_version_url,
|
||||
'width': int_or_none(video_version.get('width')),
|
||||
'height': int_or_none(video_version.get('height')),
|
||||
'abr': int_or_none(video_version.get('audio_bitrate')),
|
||||
'vbr': int_or_none(video_version.get('video_bitrate')),
|
||||
}
|
||||
bitrate = self._search_regex(r'(\d+)k', video_version_url, 'bitrate', default=None)
|
||||
if bitrate:
|
||||
f.update({
|
||||
'format_id': 'http-%s' % bitrate,
|
||||
})
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_data['title'],
|
||||
'description': video_data.get('subtitle'),
|
||||
'thumbnail': video_data.get('thumbnail_image', {}).get('file'),
|
||||
'timestamp': parse_iso8601(video_data.get('publication_date')),
|
||||
'duration': int_or_none(video_data.get('duration')),
|
||||
'view_count': int_or_none(video_data.get('view_count')),
|
||||
'formats': formats,
|
||||
}
|
73
youtube_dl/extractor/audioboom.py
Normal file
73
youtube_dl/extractor/audioboom.py
Normal file
|
@ -0,0 +1,73 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
float_or_none,
|
||||
)
|
||||
|
||||
|
||||
class AudioBoomIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?audioboom\.com/(?:boos|posts)/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://audioboom.com/posts/7398103-asim-chaudhry',
|
||||
'md5': '7b00192e593ff227e6a315486979a42d',
|
||||
'info_dict': {
|
||||
'id': '7398103',
|
||||
'ext': 'mp3',
|
||||
'title': 'Asim Chaudhry',
|
||||
'description': 'md5:2f3fef17dacc2595b5362e1d7d3602fc',
|
||||
'duration': 4000.99,
|
||||
'uploader': 'Sue Perkins: An hour or so with...',
|
||||
'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channel/perkins',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://audioboom.com/posts/4279833-3-09-2016-czaban-hour-3?t=0',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
clip = None
|
||||
|
||||
clip_store = self._parse_json(
|
||||
self._html_search_regex(
|
||||
r'data-new-clip-store=(["\'])(?P<json>{.+?})\1',
|
||||
webpage, 'clip store', default='{}', group='json'),
|
||||
video_id, fatal=False)
|
||||
if clip_store:
|
||||
clips = clip_store.get('clips')
|
||||
if clips and isinstance(clips, list) and isinstance(clips[0], dict):
|
||||
clip = clips[0]
|
||||
|
||||
def from_clip(field):
|
||||
if clip:
|
||||
return clip.get(field)
|
||||
|
||||
audio_url = from_clip('clipURLPriorToLoading') or self._og_search_property(
|
||||
'audio', webpage, 'audio url')
|
||||
title = from_clip('title') or self._html_search_meta(
|
||||
['og:title', 'og:audio:title', 'audio_title'], webpage)
|
||||
description = from_clip('description') or clean_html(from_clip('formattedDescription')) or self._og_search_description(webpage)
|
||||
|
||||
duration = float_or_none(from_clip('duration') or self._html_search_meta(
|
||||
'weibo:audio:duration', webpage))
|
||||
|
||||
uploader = from_clip('author') or self._html_search_meta(
|
||||
['og:audio:artist', 'twitter:audio:artist_name', 'audio_artist'], webpage, 'uploader')
|
||||
uploader_url = from_clip('author_url') or self._html_search_meta(
|
||||
'audioboo:channel', webpage, 'uploader url')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': audio_url,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'uploader': uploader,
|
||||
'uploader_url': uploader_url,
|
||||
}
|
145
youtube_dl/extractor/audiomack.py
Normal file
145
youtube_dl/extractor/audiomack.py
Normal file
|
@ -0,0 +1,145 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .soundcloud import SoundcloudIE
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
url_basename,
|
||||
)
|
||||
|
||||
|
||||
class AudiomackIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?audiomack\.com/song/(?P<id>[\w/-]+)'
|
||||
IE_NAME = 'audiomack'
|
||||
_TESTS = [
|
||||
# hosted on audiomack
|
||||
{
|
||||
'url': 'http://www.audiomack.com/song/roosh-williams/extraordinary',
|
||||
'info_dict':
|
||||
{
|
||||
'id': '310086',
|
||||
'ext': 'mp3',
|
||||
'uploader': 'Roosh Williams',
|
||||
'title': 'Extraordinary'
|
||||
}
|
||||
},
|
||||
# audiomack wrapper around soundcloud song
|
||||
{
|
||||
'add_ie': ['Soundcloud'],
|
||||
'url': 'http://www.audiomack.com/song/hip-hop-daily/black-mamba-freestyle',
|
||||
'info_dict': {
|
||||
'id': '258901379',
|
||||
'ext': 'mp3',
|
||||
'description': 'mamba day freestyle for the legend Kobe Bryant ',
|
||||
'title': 'Black Mamba Freestyle [Prod. By Danny Wolf]',
|
||||
'uploader': 'ILOVEMAKONNEN',
|
||||
'upload_date': '20160414',
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
# URLs end with [uploader name]/[uploader title]
|
||||
# this title is whatever the user types in, and is rarely
|
||||
# the proper song title. Real metadata is in the api response
|
||||
album_url_tag = self._match_id(url)
|
||||
|
||||
# Request the extended version of the api for extra fields like artist and title
|
||||
api_response = self._download_json(
|
||||
'http://www.audiomack.com/api/music/url/song/%s?extended=1&_=%d' % (
|
||||
album_url_tag, time.time()),
|
||||
album_url_tag)
|
||||
|
||||
# API is inconsistent with errors
|
||||
if 'url' not in api_response or not api_response['url'] or 'error' in api_response:
|
||||
raise ExtractorError('Invalid url %s' % url)
|
||||
|
||||
# Audiomack wraps a lot of soundcloud tracks in their branded wrapper
|
||||
# if so, pass the work off to the soundcloud extractor
|
||||
if SoundcloudIE.suitable(api_response['url']):
|
||||
return self.url_result(api_response['url'], SoundcloudIE.ie_key())
|
||||
|
||||
return {
|
||||
'id': compat_str(api_response.get('id', album_url_tag)),
|
||||
'uploader': api_response.get('artist'),
|
||||
'title': api_response.get('title'),
|
||||
'url': api_response['url'],
|
||||
}
|
||||
|
||||
|
||||
class AudiomackAlbumIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?audiomack\.com/album/(?P<id>[\w/-]+)'
|
||||
IE_NAME = 'audiomack:album'
|
||||
_TESTS = [
|
||||
# Standard album playlist
|
||||
{
|
||||
'url': 'http://www.audiomack.com/album/flytunezcom/tha-tour-part-2-mixtape',
|
||||
'playlist_count': 15,
|
||||
'info_dict':
|
||||
{
|
||||
'id': '812251',
|
||||
'title': 'Tha Tour: Part 2 (Official Mixtape)'
|
||||
}
|
||||
},
|
||||
# Album playlist ripped from fakeshoredrive with no metadata
|
||||
{
|
||||
'url': 'http://www.audiomack.com/album/fakeshoredrive/ppp-pistol-p-project',
|
||||
'info_dict': {
|
||||
'title': 'PPP (Pistol P Project)',
|
||||
'id': '837572',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'title': 'PPP (Pistol P Project) - 9. Heaven or Hell (CHIMACA) ft Zuse (prod by DJ FU)',
|
||||
'id': '837577',
|
||||
'ext': 'mp3',
|
||||
'uploader': 'Lil Herb a.k.a. G Herbo',
|
||||
}
|
||||
}],
|
||||
'params': {
|
||||
'playliststart': 9,
|
||||
'playlistend': 9,
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
# URLs end with [uploader name]/[uploader title]
|
||||
# this title is whatever the user types in, and is rarely
|
||||
# the proper song title. Real metadata is in the api response
|
||||
album_url_tag = self._match_id(url)
|
||||
result = {'_type': 'playlist', 'entries': []}
|
||||
# There is no one endpoint for album metadata - instead it is included/repeated in each song's metadata
|
||||
# Therefore we don't know how many songs the album has and must infi-loop until failure
|
||||
for track_no in itertools.count():
|
||||
# Get song's metadata
|
||||
api_response = self._download_json(
|
||||
'http://www.audiomack.com/api/music/url/album/%s/%d?extended=1&_=%d'
|
||||
% (album_url_tag, track_no, time.time()), album_url_tag,
|
||||
note='Querying song information (%d)' % (track_no + 1))
|
||||
|
||||
# Total failure, only occurs when url is totally wrong
|
||||
# Won't happen in middle of valid playlist (next case)
|
||||
if 'url' not in api_response or 'error' in api_response:
|
||||
raise ExtractorError('Invalid url for track %d of album url %s' % (track_no, url))
|
||||
# URL is good but song id doesn't exist - usually means end of playlist
|
||||
elif not api_response['url']:
|
||||
break
|
||||
else:
|
||||
# Pull out the album metadata and add to result (if it exists)
|
||||
for resultkey, apikey in [('id', 'album_id'), ('title', 'album_title')]:
|
||||
if apikey in api_response and resultkey not in result:
|
||||
result[resultkey] = api_response[apikey]
|
||||
song_id = url_basename(api_response['url']).rpartition('.')[0]
|
||||
result['entries'].append({
|
||||
'id': compat_str(api_response.get('id', song_id)),
|
||||
'uploader': api_response.get('artist'),
|
||||
'title': api_response.get('title', song_id),
|
||||
'url': api_response['url'],
|
||||
})
|
||||
return result
|
185
youtube_dl/extractor/awaan.py
Normal file
185
youtube_dl/extractor/awaan.py
Normal file
|
@ -0,0 +1,185 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import base64
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
smuggle_url,
|
||||
unsmuggle_url,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class AWAANIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?show/(?P<show_id>\d+)/[^/]+(?:/(?P<video_id>\d+)/(?P<season_id>\d+))?'
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_id, video_id, season_id = re.match(self._VALID_URL, url).groups()
|
||||
if video_id and int(video_id) > 0:
|
||||
return self.url_result(
|
||||
'http://awaan.ae/media/%s' % video_id, 'AWAANVideo')
|
||||
elif season_id and int(season_id) > 0:
|
||||
return self.url_result(smuggle_url(
|
||||
'http://awaan.ae/program/season/%s' % season_id,
|
||||
{'show_id': show_id}), 'AWAANSeason')
|
||||
else:
|
||||
return self.url_result(
|
||||
'http://awaan.ae/program/%s' % show_id, 'AWAANSeason')
|
||||
|
||||
|
||||
class AWAANBaseIE(InfoExtractor):
|
||||
def _parse_video_data(self, video_data, video_id, is_live):
|
||||
title = video_data.get('title_en') or video_data['title_ar']
|
||||
img = video_data.get('img')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._live_title(title) if is_live else title,
|
||||
'description': video_data.get('description_en') or video_data.get('description_ar'),
|
||||
'thumbnail': 'http://admin.mangomolo.com/analytics/%s' % img if img else None,
|
||||
'duration': int_or_none(video_data.get('duration')),
|
||||
'timestamp': parse_iso8601(video_data.get('create_time'), ' '),
|
||||
'is_live': is_live,
|
||||
}
|
||||
|
||||
|
||||
class AWAANVideoIE(AWAANBaseIE):
|
||||
IE_NAME = 'awaan:video'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?(?:video(?:/[^/]+)?|media|catchup/[^/]+/[^/]+)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.dcndigital.ae/#/video/%D8%B1%D8%AD%D9%84%D8%A9-%D8%A7%D9%84%D8%B9%D9%85%D8%B1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/17375',
|
||||
'md5': '5f61c33bfc7794315c671a62d43116aa',
|
||||
'info_dict':
|
||||
{
|
||||
'id': '17375',
|
||||
'ext': 'mp4',
|
||||
'title': 'رحلة العمر : الحلقة 1',
|
||||
'description': 'md5:0156e935d870acb8ef0a66d24070c6d6',
|
||||
'duration': 2041,
|
||||
'timestamp': 1227504126,
|
||||
'upload_date': '20081124',
|
||||
'uploader_id': '71',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://awaan.ae/video/26723981/%D8%AF%D8%A7%D8%B1-%D8%A7%D9%84%D8%B3%D9%84%D8%A7%D9%85:-%D8%AE%D9%8A%D8%B1-%D8%AF%D9%88%D8%B1-%D8%A7%D9%84%D8%A3%D9%86%D8%B5%D8%A7%D8%B1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video_data = self._download_json(
|
||||
'http://admin.mangomolo.com/analytics/index.php/plus/video?id=%s' % video_id,
|
||||
video_id, headers={'Origin': 'http://awaan.ae'})
|
||||
info = self._parse_video_data(video_data, video_id, False)
|
||||
|
||||
embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + compat_urllib_parse_urlencode({
|
||||
'id': video_data['id'],
|
||||
'user_id': video_data['user_id'],
|
||||
'signature': video_data['signature'],
|
||||
'countries': 'Q0M=',
|
||||
'filter': 'DENY',
|
||||
})
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'url': embed_url,
|
||||
'ie_key': 'MangomoloVideo',
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
class AWAANLiveIE(AWAANBaseIE):
|
||||
IE_NAME = 'awaan:live'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?live/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://awaan.ae/live/6/dubai-tv',
|
||||
'info_dict': {
|
||||
'id': '6',
|
||||
'ext': 'mp4',
|
||||
'title': 're:Dubai Al Oula [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'upload_date': '20150107',
|
||||
'timestamp': 1420588800,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
|
||||
channel_data = self._download_json(
|
||||
'http://admin.mangomolo.com/analytics/index.php/plus/getchanneldetails?channel_id=%s' % channel_id,
|
||||
channel_id, headers={'Origin': 'http://awaan.ae'})
|
||||
info = self._parse_video_data(channel_data, channel_id, True)
|
||||
|
||||
embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' + compat_urllib_parse_urlencode({
|
||||
'id': base64.b64encode(channel_data['user_id'].encode()).decode(),
|
||||
'channelid': base64.b64encode(channel_data['id'].encode()).decode(),
|
||||
'signature': channel_data['signature'],
|
||||
'countries': 'Q0M=',
|
||||
'filter': 'DENY',
|
||||
})
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'url': embed_url,
|
||||
'ie_key': 'MangomoloLive',
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
class AWAANSeasonIE(InfoExtractor):
|
||||
IE_NAME = 'awaan:season'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?program/(?:(?P<show_id>\d+)|season/(?P<season_id>\d+))'
|
||||
_TEST = {
|
||||
'url': 'http://dcndigital.ae/#/program/205024/%D9%85%D8%AD%D8%A7%D8%B6%D8%B1%D8%A7%D8%AA-%D8%A7%D9%84%D8%B4%D9%8A%D8%AE-%D8%A7%D9%84%D8%B4%D8%B9%D8%B1%D8%A7%D9%88%D9%8A',
|
||||
'info_dict':
|
||||
{
|
||||
'id': '7910',
|
||||
'title': 'محاضرات الشيخ الشعراوي',
|
||||
},
|
||||
'playlist_mincount': 27,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
show_id, season_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
data = {}
|
||||
if season_id:
|
||||
data['season'] = season_id
|
||||
show_id = smuggled_data.get('show_id')
|
||||
if show_id is None:
|
||||
season = self._download_json(
|
||||
'http://admin.mangomolo.com/analytics/index.php/plus/season_info?id=%s' % season_id,
|
||||
season_id, headers={'Origin': 'http://awaan.ae'})
|
||||
show_id = season['id']
|
||||
data['show_id'] = show_id
|
||||
show = self._download_json(
|
||||
'http://admin.mangomolo.com/analytics/index.php/plus/show',
|
||||
show_id, data=urlencode_postdata(data), headers={
|
||||
'Origin': 'http://awaan.ae',
|
||||
'Content-Type': 'application/x-www-form-urlencoded'
|
||||
})
|
||||
if not season_id:
|
||||
season_id = show['default_season']
|
||||
for season in show['seasons']:
|
||||
if season['id'] == season_id:
|
||||
title = season.get('title_en') or season['title_ar']
|
||||
|
||||
entries = []
|
||||
for video in show['videos']:
|
||||
video_id = compat_str(video['id'])
|
||||
entries.append(self.url_result(
|
||||
'http://awaan.ae/media/%s' % video_id, 'AWAANVideo', video_id))
|
||||
|
||||
return self.playlist_result(entries, season_id, title)
|
78
youtube_dl/extractor/aws.py
Normal file
78
youtube_dl/extractor/aws.py
Normal file
|
@ -0,0 +1,78 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import datetime
|
||||
import hashlib
|
||||
import hmac
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_urlencode
|
||||
|
||||
|
||||
class AWSIE(InfoExtractor):
|
||||
_AWS_ALGORITHM = 'AWS4-HMAC-SHA256'
|
||||
_AWS_REGION = 'us-east-1'
|
||||
|
||||
def _aws_execute_api(self, aws_dict, video_id, query=None):
|
||||
query = query or {}
|
||||
amz_date = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')
|
||||
date = amz_date[:8]
|
||||
headers = {
|
||||
'Accept': 'application/json',
|
||||
'Host': self._AWS_PROXY_HOST,
|
||||
'X-Amz-Date': amz_date,
|
||||
'X-Api-Key': self._AWS_API_KEY
|
||||
}
|
||||
session_token = aws_dict.get('session_token')
|
||||
if session_token:
|
||||
headers['X-Amz-Security-Token'] = session_token
|
||||
|
||||
def aws_hash(s):
|
||||
return hashlib.sha256(s.encode('utf-8')).hexdigest()
|
||||
|
||||
# Task 1: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html
|
||||
canonical_querystring = compat_urllib_parse_urlencode(query)
|
||||
canonical_headers = ''
|
||||
for header_name, header_value in sorted(headers.items()):
|
||||
canonical_headers += '%s:%s\n' % (header_name.lower(), header_value)
|
||||
signed_headers = ';'.join([header.lower() for header in sorted(headers.keys())])
|
||||
canonical_request = '\n'.join([
|
||||
'GET',
|
||||
aws_dict['uri'],
|
||||
canonical_querystring,
|
||||
canonical_headers,
|
||||
signed_headers,
|
||||
aws_hash('')
|
||||
])
|
||||
|
||||
# Task 2: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-string-to-sign.html
|
||||
credential_scope_list = [date, self._AWS_REGION, 'execute-api', 'aws4_request']
|
||||
credential_scope = '/'.join(credential_scope_list)
|
||||
string_to_sign = '\n'.join([self._AWS_ALGORITHM, amz_date, credential_scope, aws_hash(canonical_request)])
|
||||
|
||||
# Task 3: http://docs.aws.amazon.com/general/latest/gr/sigv4-calculate-signature.html
|
||||
def aws_hmac(key, msg):
|
||||
return hmac.new(key, msg.encode('utf-8'), hashlib.sha256)
|
||||
|
||||
def aws_hmac_digest(key, msg):
|
||||
return aws_hmac(key, msg).digest()
|
||||
|
||||
def aws_hmac_hexdigest(key, msg):
|
||||
return aws_hmac(key, msg).hexdigest()
|
||||
|
||||
k_signing = ('AWS4' + aws_dict['secret_key']).encode('utf-8')
|
||||
for value in credential_scope_list:
|
||||
k_signing = aws_hmac_digest(k_signing, value)
|
||||
|
||||
signature = aws_hmac_hexdigest(k_signing, string_to_sign)
|
||||
|
||||
# Task 4: http://docs.aws.amazon.com/general/latest/gr/sigv4-add-signature-to-request.html
|
||||
headers['Authorization'] = ', '.join([
|
||||
'%s Credential=%s/%s' % (self._AWS_ALGORITHM, aws_dict['access_key'], credential_scope),
|
||||
'SignedHeaders=%s' % signed_headers,
|
||||
'Signature=%s' % signature,
|
||||
])
|
||||
|
||||
return self._download_json(
|
||||
'https://%s%s%s' % (self._AWS_PROXY_HOST, aws_dict['uri'], '?' + canonical_querystring if canonical_querystring else ''),
|
||||
video_id, headers=headers)
|
66
youtube_dl/extractor/azmedien.py
Normal file
66
youtube_dl/extractor/azmedien.py
Normal file
|
@ -0,0 +1,66 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .kaltura import KalturaIE
|
||||
|
||||
|
||||
class AZMedienIE(InfoExtractor):
|
||||
IE_DESC = 'AZ Medien videos'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?
|
||||
(?P<host>
|
||||
telezueri\.ch|
|
||||
telebaern\.tv|
|
||||
telem1\.ch
|
||||
)/
|
||||
[^/]+/
|
||||
(?P<id>
|
||||
[^/]+-(?P<article_id>\d+)
|
||||
)
|
||||
(?:
|
||||
\#video=
|
||||
(?P<kaltura_id>
|
||||
[_0-9a-z]+
|
||||
)
|
||||
)?
|
||||
'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.telezueri.ch/sonntalk/bundesrats-vakanzen-eu-rahmenabkommen-133214569',
|
||||
'info_dict': {
|
||||
'id': '1_anruz3wy',
|
||||
'ext': 'mp4',
|
||||
'title': 'Bundesrats-Vakanzen / EU-Rahmenabkommen',
|
||||
'uploader_id': 'TVOnline',
|
||||
'upload_date': '20180930',
|
||||
'timestamp': 1538328802,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.telebaern.tv/telebaern-news/montag-1-oktober-2018-ganze-sendung-133531189#video=0_7xjo9lf1',
|
||||
'only_matching': True
|
||||
}]
|
||||
_API_TEMPL = 'https://www.%s/api/pub/gql/%s/NewsArticleTeaser/cb9f2f81ed22e9b47f4ca64ea3cc5a5d13e88d1d'
|
||||
_PARTNER_ID = '1719221'
|
||||
|
||||
def _real_extract(self, url):
|
||||
host, display_id, article_id, entry_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
if not entry_id:
|
||||
entry_id = self._download_json(
|
||||
self._API_TEMPL % (host, host.split('.')[0]), display_id, query={
|
||||
'variables': json.dumps({
|
||||
'contextId': 'NewsArticle:' + article_id,
|
||||
}),
|
||||
})['data']['context']['mainAsset']['video']['kaltura']['kalturaId']
|
||||
|
||||
return self.url_result(
|
||||
'kaltura:%s:%s' % (self._PARTNER_ID, entry_id),
|
||||
ie=KalturaIE.ie_key(), video_id=entry_id)
|
56
youtube_dl/extractor/baidu.py
Normal file
56
youtube_dl/extractor/baidu.py
Normal file
|
@ -0,0 +1,56 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unescapeHTML
|
||||
|
||||
|
||||
class BaiduVideoIE(InfoExtractor):
|
||||
IE_DESC = '百度视频'
|
||||
_VALID_URL = r'https?://v\.baidu\.com/(?P<type>[a-z]+)/(?P<id>\d+)\.htm'
|
||||
_TESTS = [{
|
||||
'url': 'http://v.baidu.com/comic/1069.htm?frp=bdbrand&q=%E4%B8%AD%E5%8D%8E%E5%B0%8F%E5%BD%93%E5%AE%B6',
|
||||
'info_dict': {
|
||||
'id': '1069',
|
||||
'title': '中华小当家 TV版国语',
|
||||
'description': 'md5:51be07afe461cf99fa61231421b5397c',
|
||||
},
|
||||
'playlist_count': 52,
|
||||
}, {
|
||||
'url': 'http://v.baidu.com/show/11595.htm?frp=bdbrand',
|
||||
'info_dict': {
|
||||
'id': '11595',
|
||||
'title': 're:^奔跑吧兄弟',
|
||||
'description': 'md5:1bf88bad6d850930f542d51547c089b8',
|
||||
},
|
||||
'playlist_mincount': 12,
|
||||
}]
|
||||
|
||||
def _call_api(self, path, category, playlist_id, note):
|
||||
return self._download_json('http://app.video.baidu.com/%s/?worktype=adnative%s&id=%s' % (
|
||||
path, category, playlist_id), playlist_id, note)
|
||||
|
||||
def _real_extract(self, url):
|
||||
category, playlist_id = re.match(self._VALID_URL, url).groups()
|
||||
if category == 'show':
|
||||
category = 'tvshow'
|
||||
if category == 'tv':
|
||||
category = 'tvplay'
|
||||
|
||||
playlist_detail = self._call_api(
|
||||
'xqinfo', category, playlist_id, 'Download playlist JSON metadata')
|
||||
|
||||
playlist_title = playlist_detail['title']
|
||||
playlist_description = unescapeHTML(playlist_detail.get('intro'))
|
||||
|
||||
episodes_detail = self._call_api(
|
||||
'xqsingle', category, playlist_id, 'Download episodes JSON metadata')
|
||||
|
||||
entries = [self.url_result(
|
||||
episode['url'], video_title=episode['title']
|
||||
) for episode in episodes_detail['videos']]
|
||||
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
417
youtube_dl/extractor/bandcamp.py
Normal file
417
youtube_dl/extractor/bandcamp.py
Normal file
|
@ -0,0 +1,417 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
import random
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
KNOWN_EXTENSIONS,
|
||||
parse_filesize,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
update_url_query,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class BandcampIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://[^/]+\.bandcamp\.com/track/(?P<title>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://youtube-dlc.bandcamp.com/track/youtube-dlc-test-song',
|
||||
'md5': 'c557841d5e50261777a6585648adf439',
|
||||
'info_dict': {
|
||||
'id': '1812978515',
|
||||
'ext': 'mp3',
|
||||
'title': "youtube-dlc \"'/\\\u00e4\u21ad - youtube-dlc test song \"'/\\\u00e4\u21ad",
|
||||
'duration': 9.8485,
|
||||
},
|
||||
'_skip': 'There is a limit of 200 free downloads / month for the test song'
|
||||
}, {
|
||||
# free download
|
||||
'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
|
||||
'md5': '853e35bf34aa1d6fe2615ae612564b36',
|
||||
'info_dict': {
|
||||
'id': '2650410135',
|
||||
'ext': 'aiff',
|
||||
'title': 'Ben Prunty - Lanius (Battle)',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Ben Prunty',
|
||||
'timestamp': 1396508491,
|
||||
'upload_date': '20140403',
|
||||
'release_date': '20140403',
|
||||
'duration': 260.877,
|
||||
'track': 'Lanius (Battle)',
|
||||
'track_number': 1,
|
||||
'track_id': '2650410135',
|
||||
'artist': 'Ben Prunty',
|
||||
'album': 'FTL: Advanced Edition Soundtrack',
|
||||
},
|
||||
}, {
|
||||
# no free download, mp3 128
|
||||
'url': 'https://relapsealumni.bandcamp.com/track/hail-to-fire',
|
||||
'md5': 'fec12ff55e804bb7f7ebeb77a800c8b7',
|
||||
'info_dict': {
|
||||
'id': '2584466013',
|
||||
'ext': 'mp3',
|
||||
'title': 'Mastodon - Hail to Fire',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Mastodon',
|
||||
'timestamp': 1322005399,
|
||||
'upload_date': '20111122',
|
||||
'release_date': '20040207',
|
||||
'duration': 120.79,
|
||||
'track': 'Hail to Fire',
|
||||
'track_number': 5,
|
||||
'track_id': '2584466013',
|
||||
'artist': 'Mastodon',
|
||||
'album': 'Call of the Mastodon',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
title = mobj.group('title')
|
||||
webpage = self._download_webpage(url, title)
|
||||
thumbnail = self._html_search_meta('og:image', webpage, default=None)
|
||||
|
||||
track_id = None
|
||||
track = None
|
||||
track_number = None
|
||||
duration = None
|
||||
|
||||
formats = []
|
||||
track_info = self._parse_json(
|
||||
self._search_regex(
|
||||
r'trackinfo\s*:\s*\[\s*({.+?})\s*\]\s*,\s*?\n',
|
||||
webpage, 'track info', default='{}'), title)
|
||||
if track_info:
|
||||
file_ = track_info.get('file')
|
||||
if isinstance(file_, dict):
|
||||
for format_id, format_url in file_.items():
|
||||
if not url_or_none(format_url):
|
||||
continue
|
||||
ext, abr_str = format_id.split('-', 1)
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': self._proto_relative_url(format_url, 'http:'),
|
||||
'ext': ext,
|
||||
'vcodec': 'none',
|
||||
'acodec': ext,
|
||||
'abr': int_or_none(abr_str),
|
||||
})
|
||||
track = track_info.get('title')
|
||||
track_id = str_or_none(track_info.get('track_id') or track_info.get('id'))
|
||||
track_number = int_or_none(track_info.get('track_num'))
|
||||
duration = float_or_none(track_info.get('duration'))
|
||||
|
||||
def extract(key):
|
||||
return self._search_regex(
|
||||
r'\b%s\s*["\']?\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % key,
|
||||
webpage, key, default=None, group='value')
|
||||
|
||||
artist = extract('artist')
|
||||
album = extract('album_title')
|
||||
timestamp = unified_timestamp(
|
||||
extract('publish_date') or extract('album_publish_date'))
|
||||
release_date = unified_strdate(extract('album_release_date'))
|
||||
|
||||
download_link = self._search_regex(
|
||||
r'freeDownloadPage\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
||||
'download link', default=None, group='url')
|
||||
if download_link:
|
||||
track_id = self._search_regex(
|
||||
r'(?ms)var TralbumData = .*?[{,]\s*id: (?P<id>\d+),?$',
|
||||
webpage, 'track id')
|
||||
|
||||
download_webpage = self._download_webpage(
|
||||
download_link, track_id, 'Downloading free downloads page')
|
||||
|
||||
blob = self._parse_json(
|
||||
self._search_regex(
|
||||
r'data-blob=(["\'])(?P<blob>{.+?})\1', download_webpage,
|
||||
'blob', group='blob'),
|
||||
track_id, transform_source=unescapeHTML)
|
||||
|
||||
info = try_get(
|
||||
blob, (lambda x: x['digital_items'][0],
|
||||
lambda x: x['download_items'][0]), dict)
|
||||
if info:
|
||||
downloads = info.get('downloads')
|
||||
if isinstance(downloads, dict):
|
||||
if not track:
|
||||
track = info.get('title')
|
||||
if not artist:
|
||||
artist = info.get('artist')
|
||||
if not thumbnail:
|
||||
thumbnail = info.get('thumb_url')
|
||||
|
||||
download_formats = {}
|
||||
download_formats_list = blob.get('download_formats')
|
||||
if isinstance(download_formats_list, list):
|
||||
for f in blob['download_formats']:
|
||||
name, ext = f.get('name'), f.get('file_extension')
|
||||
if all(isinstance(x, compat_str) for x in (name, ext)):
|
||||
download_formats[name] = ext.strip('.')
|
||||
|
||||
for format_id, f in downloads.items():
|
||||
format_url = f.get('url')
|
||||
if not format_url:
|
||||
continue
|
||||
# Stat URL generation algorithm is reverse engineered from
|
||||
# download_*_bundle_*.js
|
||||
stat_url = update_url_query(
|
||||
format_url.replace('/download/', '/statdownload/'), {
|
||||
'.rand': int(time.time() * 1000 * random.random()),
|
||||
})
|
||||
format_id = f.get('encoding_name') or format_id
|
||||
stat = self._download_json(
|
||||
stat_url, track_id, 'Downloading %s JSON' % format_id,
|
||||
transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1],
|
||||
fatal=False)
|
||||
if not stat:
|
||||
continue
|
||||
retry_url = url_or_none(stat.get('retry_url'))
|
||||
if not retry_url:
|
||||
continue
|
||||
formats.append({
|
||||
'url': self._proto_relative_url(retry_url, 'http:'),
|
||||
'ext': download_formats.get(format_id),
|
||||
'format_id': format_id,
|
||||
'format_note': f.get('description'),
|
||||
'filesize': parse_filesize(f.get('size_mb')),
|
||||
'vcodec': 'none',
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = '%s - %s' % (artist, track) if artist else track
|
||||
|
||||
if not duration:
|
||||
duration = float_or_none(self._html_search_meta(
|
||||
'duration', webpage, default=None))
|
||||
|
||||
return {
|
||||
'id': track_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': artist,
|
||||
'timestamp': timestamp,
|
||||
'release_date': release_date,
|
||||
'duration': duration,
|
||||
'track': track,
|
||||
'track_number': track_number,
|
||||
'track_id': track_id,
|
||||
'artist': artist,
|
||||
'album': album,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class BandcampAlbumIE(InfoExtractor):
|
||||
IE_NAME = 'Bandcamp:album'
|
||||
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^/?#&]+))?'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
|
||||
'playlist': [
|
||||
{
|
||||
'md5': '39bc1eded3476e927c724321ddf116cf',
|
||||
'info_dict': {
|
||||
'id': '1353101989',
|
||||
'ext': 'mp3',
|
||||
'title': 'Intro',
|
||||
}
|
||||
},
|
||||
{
|
||||
'md5': '1a2c32e2691474643e912cc6cd4bffaa',
|
||||
'info_dict': {
|
||||
'id': '38097443',
|
||||
'ext': 'mp3',
|
||||
'title': 'Kero One - Keep It Alive (Blazo remix)',
|
||||
}
|
||||
},
|
||||
],
|
||||
'info_dict': {
|
||||
'title': 'Jazz Format Mixtape vol.1',
|
||||
'id': 'jazz-format-mixtape-vol-1',
|
||||
'uploader_id': 'blazo',
|
||||
},
|
||||
'params': {
|
||||
'playlistend': 2
|
||||
},
|
||||
'skip': 'Bandcamp imposes download limits.'
|
||||
}, {
|
||||
'url': 'http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave',
|
||||
'info_dict': {
|
||||
'title': 'Hierophany of the Open Grave',
|
||||
'uploader_id': 'nightbringer',
|
||||
'id': 'hierophany-of-the-open-grave',
|
||||
},
|
||||
'playlist_mincount': 9,
|
||||
}, {
|
||||
'url': 'http://dotscale.bandcamp.com',
|
||||
'info_dict': {
|
||||
'title': 'Loom',
|
||||
'id': 'dotscale',
|
||||
'uploader_id': 'dotscale',
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
}, {
|
||||
# with escaped quote in title
|
||||
'url': 'https://jstrecords.bandcamp.com/album/entropy-ep',
|
||||
'info_dict': {
|
||||
'title': '"Entropy" EP',
|
||||
'uploader_id': 'jstrecords',
|
||||
'id': 'entropy-ep',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
}, {
|
||||
# not all tracks have songs
|
||||
'url': 'https://insulters.bandcamp.com/album/we-are-the-plague',
|
||||
'info_dict': {
|
||||
'id': 'we-are-the-plague',
|
||||
'title': 'WE ARE THE PLAGUE',
|
||||
'uploader_id': 'insulters',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return (False
|
||||
if BandcampWeeklyIE.suitable(url) or BandcampIE.suitable(url)
|
||||
else super(BandcampAlbumIE, cls).suitable(url))
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
uploader_id = mobj.group('subdomain')
|
||||
album_id = mobj.group('album_id')
|
||||
playlist_id = album_id or uploader_id
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
track_elements = re.findall(
|
||||
r'(?s)<div[^>]*>(.*?<a[^>]+href="([^"]+?)"[^>]+itemprop="url"[^>]*>.*?)</div>', webpage)
|
||||
if not track_elements:
|
||||
raise ExtractorError('The page doesn\'t contain any tracks')
|
||||
# Only tracks with duration info have songs
|
||||
entries = [
|
||||
self.url_result(
|
||||
compat_urlparse.urljoin(url, t_path),
|
||||
ie=BandcampIE.ie_key(),
|
||||
video_title=self._search_regex(
|
||||
r'<span\b[^>]+\bitemprop=["\']name["\'][^>]*>([^<]+)',
|
||||
elem_content, 'track title', fatal=False))
|
||||
for elem_content, t_path in track_elements
|
||||
if self._html_search_meta('duration', elem_content, default=None)]
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'album_title\s*:\s*"((?:\\.|[^"\\])+?)"',
|
||||
webpage, 'title', fatal=False)
|
||||
if title:
|
||||
title = title.replace(r'\"', '"')
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'uploader_id': uploader_id,
|
||||
'id': playlist_id,
|
||||
'title': title,
|
||||
'entries': entries,
|
||||
}
|
||||
|
||||
|
||||
class BandcampWeeklyIE(InfoExtractor):
|
||||
IE_NAME = 'Bandcamp:weekly'
|
||||
_VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://bandcamp.com/?show=224',
|
||||
'md5': 'b00df799c733cf7e0c567ed187dea0fd',
|
||||
'info_dict': {
|
||||
'id': '224',
|
||||
'ext': 'opus',
|
||||
'title': 'BC Weekly April 4th 2017 - Magic Moments',
|
||||
'description': 'md5:5d48150916e8e02d030623a48512c874',
|
||||
'duration': 5829.77,
|
||||
'release_date': '20170404',
|
||||
'series': 'Bandcamp Weekly',
|
||||
'episode': 'Magic Moments',
|
||||
'episode_number': 208,
|
||||
'episode_id': '224',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://bandcamp.com/?blah/blah@&show=228',
|
||||
'only_matching': True
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
blob = self._parse_json(
|
||||
self._search_regex(
|
||||
r'data-blob=(["\'])(?P<blob>{.+?})\1', webpage,
|
||||
'blob', group='blob'),
|
||||
video_id, transform_source=unescapeHTML)
|
||||
|
||||
show = blob['bcw_show']
|
||||
|
||||
# This is desired because any invalid show id redirects to `bandcamp.com`
|
||||
# which happens to expose the latest Bandcamp Weekly episode.
|
||||
show_id = int_or_none(show.get('show_id')) or int_or_none(video_id)
|
||||
|
||||
formats = []
|
||||
for format_id, format_url in show['audio_stream'].items():
|
||||
if not url_or_none(format_url):
|
||||
continue
|
||||
for known_ext in KNOWN_EXTENSIONS:
|
||||
if known_ext in format_id:
|
||||
ext = known_ext
|
||||
break
|
||||
else:
|
||||
ext = None
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': format_url,
|
||||
'ext': ext,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = show.get('audio_title') or 'Bandcamp Weekly'
|
||||
subtitle = show.get('subtitle')
|
||||
if subtitle:
|
||||
title += ' - %s' % subtitle
|
||||
|
||||
episode_number = None
|
||||
seq = blob.get('bcw_seq')
|
||||
|
||||
if seq and isinstance(seq, list):
|
||||
try:
|
||||
episode_number = next(
|
||||
int_or_none(e.get('episode_number'))
|
||||
for e in seq
|
||||
if isinstance(e, dict) and int_or_none(e.get('id')) == show_id)
|
||||
except StopIteration:
|
||||
pass
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': show.get('desc') or show.get('short_desc'),
|
||||
'duration': float_or_none(show.get('audio_duration')),
|
||||
'is_live': False,
|
||||
'release_date': unified_strdate(show.get('published_date')),
|
||||
'series': 'Bandcamp Weekly',
|
||||
'episode': show.get('subtitle'),
|
||||
'episode_number': episode_number,
|
||||
'episode_id': compat_str(video_id),
|
||||
'formats': formats
|
||||
}
|
1359
youtube_dl/extractor/bbc.py
Normal file
1359
youtube_dl/extractor/bbc.py
Normal file
File diff suppressed because it is too large
Load diff
194
youtube_dl/extractor/beampro.py
Normal file
194
youtube_dl/extractor/beampro.py
Normal file
|
@ -0,0 +1,194 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
compat_str,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class BeamProBaseIE(InfoExtractor):
|
||||
_API_BASE = 'https://mixer.com/api/v1'
|
||||
_RATINGS = {'family': 0, 'teen': 13, '18+': 18}
|
||||
|
||||
def _extract_channel_info(self, chan):
|
||||
user_id = chan.get('userId') or try_get(chan, lambda x: x['user']['id'])
|
||||
return {
|
||||
'uploader': chan.get('token') or try_get(
|
||||
chan, lambda x: x['user']['username'], compat_str),
|
||||
'uploader_id': compat_str(user_id) if user_id else None,
|
||||
'age_limit': self._RATINGS.get(chan.get('audience')),
|
||||
}
|
||||
|
||||
|
||||
class BeamProLiveIE(BeamProBaseIE):
|
||||
IE_NAME = 'Mixer:live'
|
||||
_VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'http://mixer.com/niterhayven',
|
||||
'info_dict': {
|
||||
'id': '261562',
|
||||
'ext': 'mp4',
|
||||
'title': 'Introducing The Witcher 3 // The Grind Starts Now!',
|
||||
'description': 'md5:0b161ac080f15fe05d18a07adb44a74d',
|
||||
'thumbnail': r're:https://.*\.jpg$',
|
||||
'timestamp': 1483477281,
|
||||
'upload_date': '20170103',
|
||||
'uploader': 'niterhayven',
|
||||
'uploader_id': '373396',
|
||||
'age_limit': 18,
|
||||
'is_live': True,
|
||||
'view_count': int,
|
||||
},
|
||||
'skip': 'niterhayven is offline',
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
_MANIFEST_URL_TEMPLATE = '%s/channels/%%s/manifest.%%s' % BeamProBaseIE._API_BASE
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if BeamProVodIE.suitable(url) else super(BeamProLiveIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_name = self._match_id(url)
|
||||
|
||||
chan = self._download_json(
|
||||
'%s/channels/%s' % (self._API_BASE, channel_name), channel_name)
|
||||
|
||||
if chan.get('online') is False:
|
||||
raise ExtractorError(
|
||||
'{0} is offline'.format(channel_name), expected=True)
|
||||
|
||||
channel_id = chan['id']
|
||||
|
||||
def manifest_url(kind):
|
||||
return self._MANIFEST_URL_TEMPLATE % (channel_id, kind)
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
manifest_url('m3u8'), channel_name, ext='mp4', m3u8_id='hls',
|
||||
fatal=False)
|
||||
formats.extend(self._extract_smil_formats(
|
||||
manifest_url('smil'), channel_name, fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
info = {
|
||||
'id': compat_str(chan.get('id') or channel_name),
|
||||
'title': self._live_title(chan.get('name') or channel_name),
|
||||
'description': clean_html(chan.get('description')),
|
||||
'thumbnail': try_get(
|
||||
chan, lambda x: x['thumbnail']['url'], compat_str),
|
||||
'timestamp': parse_iso8601(chan.get('updatedAt')),
|
||||
'is_live': True,
|
||||
'view_count': int_or_none(chan.get('viewersTotal')),
|
||||
'formats': formats,
|
||||
}
|
||||
info.update(self._extract_channel_info(chan))
|
||||
|
||||
return info
|
||||
|
||||
|
||||
class BeamProVodIE(BeamProBaseIE):
|
||||
IE_NAME = 'Mixer:vod'
|
||||
_VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/[^/?#&]+\?.*?\bvod=(?P<id>[^?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://mixer.com/willow8714?vod=2259830',
|
||||
'md5': 'b2431e6e8347dc92ebafb565d368b76b',
|
||||
'info_dict': {
|
||||
'id': '2259830',
|
||||
'ext': 'mp4',
|
||||
'title': 'willow8714\'s Channel',
|
||||
'duration': 6828.15,
|
||||
'thumbnail': r're:https://.*source\.png$',
|
||||
'timestamp': 1494046474,
|
||||
'upload_date': '20170506',
|
||||
'uploader': 'willow8714',
|
||||
'uploader_id': '6085379',
|
||||
'age_limit': 13,
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://mixer.com/streamer?vod=IxFno1rqC0S_XJ1a2yGgNw',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://mixer.com/streamer?vod=Rh3LY0VAqkGpEQUe2pN-ig',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_format(vod, vod_type):
|
||||
if not vod.get('baseUrl'):
|
||||
return []
|
||||
|
||||
if vod_type == 'hls':
|
||||
filename, protocol = 'manifest.m3u8', 'm3u8_native'
|
||||
elif vod_type == 'raw':
|
||||
filename, protocol = 'source.mp4', 'https'
|
||||
else:
|
||||
assert False
|
||||
|
||||
data = vod.get('data') if isinstance(vod.get('data'), dict) else {}
|
||||
|
||||
format_id = [vod_type]
|
||||
if isinstance(data.get('Height'), compat_str):
|
||||
format_id.append('%sp' % data['Height'])
|
||||
|
||||
return [{
|
||||
'url': urljoin(vod['baseUrl'], filename),
|
||||
'format_id': '-'.join(format_id),
|
||||
'ext': 'mp4',
|
||||
'protocol': protocol,
|
||||
'width': int_or_none(data.get('Width')),
|
||||
'height': int_or_none(data.get('Height')),
|
||||
'fps': int_or_none(data.get('Fps')),
|
||||
'tbr': int_or_none(data.get('Bitrate'), 1000),
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
vod_id = self._match_id(url)
|
||||
|
||||
vod_info = self._download_json(
|
||||
'%s/recordings/%s' % (self._API_BASE, vod_id), vod_id)
|
||||
|
||||
state = vod_info.get('state')
|
||||
if state != 'AVAILABLE':
|
||||
raise ExtractorError(
|
||||
'VOD %s is not available (state: %s)' % (vod_id, state),
|
||||
expected=True)
|
||||
|
||||
formats = []
|
||||
thumbnail_url = None
|
||||
|
||||
for vod in vod_info['vods']:
|
||||
vod_type = vod.get('format')
|
||||
if vod_type in ('hls', 'raw'):
|
||||
formats.extend(self._extract_format(vod, vod_type))
|
||||
elif vod_type == 'thumbnail':
|
||||
thumbnail_url = urljoin(vod.get('baseUrl'), 'source.png')
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
info = {
|
||||
'id': vod_id,
|
||||
'title': vod_info.get('name') or vod_id,
|
||||
'duration': float_or_none(vod_info.get('duration')),
|
||||
'thumbnail': thumbnail_url,
|
||||
'timestamp': parse_iso8601(vod_info.get('createdAt')),
|
||||
'view_count': int_or_none(vod_info.get('viewsTotal')),
|
||||
'formats': formats,
|
||||
}
|
||||
info.update(self._extract_channel_info(vod_info.get('channel') or {}))
|
||||
|
||||
return info
|
103
youtube_dl/extractor/beatport.py
Normal file
103
youtube_dl/extractor/beatport.py
Normal file
|
@ -0,0 +1,103 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class BeatportIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.|pro\.)?beatport\.com/track/(?P<display_id>[^/]+)/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://beatport.com/track/synesthesia-original-mix/5379371',
|
||||
'md5': 'b3c34d8639a2f6a7f734382358478887',
|
||||
'info_dict': {
|
||||
'id': '5379371',
|
||||
'display_id': 'synesthesia-original-mix',
|
||||
'ext': 'mp4',
|
||||
'title': 'Froxic - Synesthesia (Original Mix)',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://beatport.com/track/love-and-war-original-mix/3756896',
|
||||
'md5': 'e44c3025dfa38c6577fbaeb43da43514',
|
||||
'info_dict': {
|
||||
'id': '3756896',
|
||||
'display_id': 'love-and-war-original-mix',
|
||||
'ext': 'mp3',
|
||||
'title': 'Wolfgang Gartner - Love & War (Original Mix)',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://beatport.com/track/birds-original-mix/4991738',
|
||||
'md5': 'a1fd8e8046de3950fd039304c186c05f',
|
||||
'info_dict': {
|
||||
'id': '4991738',
|
||||
'display_id': 'birds-original-mix',
|
||||
'ext': 'mp4',
|
||||
'title': "Tos, Middle Milk, Mumblin' Johnsson - Birds (Original Mix)",
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
track_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
playables = self._parse_json(
|
||||
self._search_regex(
|
||||
r'window\.Playables\s*=\s*({.+?});', webpage,
|
||||
'playables info', flags=re.DOTALL),
|
||||
track_id)
|
||||
|
||||
track = next(t for t in playables['tracks'] if t['id'] == int(track_id))
|
||||
|
||||
title = ', '.join((a['name'] for a in track['artists'])) + ' - ' + track['name']
|
||||
if track['mix']:
|
||||
title += ' (' + track['mix'] + ')'
|
||||
|
||||
formats = []
|
||||
for ext, info in track['preview'].items():
|
||||
if not info['url']:
|
||||
continue
|
||||
fmt = {
|
||||
'url': info['url'],
|
||||
'ext': ext,
|
||||
'format_id': ext,
|
||||
'vcodec': 'none',
|
||||
}
|
||||
if ext == 'mp3':
|
||||
fmt['preference'] = 0
|
||||
fmt['acodec'] = 'mp3'
|
||||
fmt['abr'] = 96
|
||||
fmt['asr'] = 44100
|
||||
elif ext == 'mp4':
|
||||
fmt['preference'] = 1
|
||||
fmt['acodec'] = 'aac'
|
||||
fmt['abr'] = 96
|
||||
fmt['asr'] = 44100
|
||||
formats.append(fmt)
|
||||
self._sort_formats(formats)
|
||||
|
||||
images = []
|
||||
for name, info in track['images'].items():
|
||||
image_url = info.get('url')
|
||||
if name == 'dynamic' or not image_url:
|
||||
continue
|
||||
image = {
|
||||
'id': name,
|
||||
'url': image_url,
|
||||
'height': int_or_none(info.get('height')),
|
||||
'width': int_or_none(info.get('width')),
|
||||
}
|
||||
images.append(image)
|
||||
|
||||
return {
|
||||
'id': compat_str(track.get('id')) or track_id,
|
||||
'display_id': track.get('slug') or display_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnails': images,
|
||||
}
|
116
youtube_dl/extractor/beeg.py
Normal file
116
youtube_dl/extractor/beeg.py
Normal file
|
@ -0,0 +1,116 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class BeegIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?beeg\.(?:com|porn(?:/video)?)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
# api/v6 v1
|
||||
'url': 'http://beeg.com/5416503',
|
||||
'md5': 'a1a1b1a8bc70a89e49ccfd113aed0820',
|
||||
'info_dict': {
|
||||
'id': '5416503',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sultry Striptease',
|
||||
'description': 'md5:d22219c09da287c14bed3d6c37ce4bc2',
|
||||
'timestamp': 1391813355,
|
||||
'upload_date': '20140207',
|
||||
'duration': 383,
|
||||
'tags': list,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}, {
|
||||
# api/v6 v2
|
||||
'url': 'https://beeg.com/1941093077?t=911-1391',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# api/v6 v2 w/o t
|
||||
'url': 'https://beeg.com/1277207756',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://beeg.porn/video/5416503',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://beeg.porn/5416503',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
beeg_version = self._search_regex(
|
||||
r'beeg_version\s*=\s*([\da-zA-Z_-]+)', webpage, 'beeg version',
|
||||
default='1546225636701')
|
||||
|
||||
if len(video_id) >= 10:
|
||||
query = {
|
||||
'v': 2,
|
||||
}
|
||||
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
||||
t = qs.get('t', [''])[0].split('-')
|
||||
if len(t) > 1:
|
||||
query.update({
|
||||
's': t[0],
|
||||
'e': t[1],
|
||||
})
|
||||
else:
|
||||
query = {'v': 1}
|
||||
|
||||
for api_path in ('', 'api.'):
|
||||
video = self._download_json(
|
||||
'https://%sbeeg.com/api/v6/%s/video/%s'
|
||||
% (api_path, beeg_version, video_id), video_id,
|
||||
fatal=api_path == 'api.', query=query)
|
||||
if video:
|
||||
break
|
||||
|
||||
formats = []
|
||||
for format_id, video_url in video.items():
|
||||
if not video_url:
|
||||
continue
|
||||
height = self._search_regex(
|
||||
r'^(\d+)[pP]$', format_id, 'height', default=None)
|
||||
if not height:
|
||||
continue
|
||||
formats.append({
|
||||
'url': self._proto_relative_url(
|
||||
video_url.replace('{DATA_MARKERS}', 'data=pc_XX__%s_0' % beeg_version), 'https:'),
|
||||
'format_id': format_id,
|
||||
'height': int(height),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = video['title']
|
||||
video_id = compat_str(video.get('id') or video_id)
|
||||
display_id = video.get('code')
|
||||
description = video.get('desc')
|
||||
series = video.get('ps_name')
|
||||
|
||||
timestamp = unified_timestamp(video.get('date'))
|
||||
duration = int_or_none(video.get('duration'))
|
||||
|
||||
tags = [tag.strip() for tag in video['tags'].split(',')] if video.get('tags') else None
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'series': series,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'tags': tags,
|
||||
'formats': formats,
|
||||
'age_limit': self._rta_search(webpage),
|
||||
}
|
46
youtube_dl/extractor/behindkink.py
Normal file
46
youtube_dl/extractor/behindkink.py
Normal file
|
@ -0,0 +1,46 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import url_basename
|
||||
|
||||
|
||||
class BehindKinkIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?behindkink\.com/(?P<year>[0-9]{4})/(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<id>[^/#?_]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.behindkink.com/2014/12/05/what-are-you-passionate-about-marley-blaze/',
|
||||
'md5': '507b57d8fdcd75a41a9a7bdb7989c762',
|
||||
'info_dict': {
|
||||
'id': '37127',
|
||||
'ext': 'mp4',
|
||||
'title': 'What are you passionate about – Marley Blaze',
|
||||
'description': 'md5:aee8e9611b4ff70186f752975d9b94b4',
|
||||
'upload_date': '20141205',
|
||||
'thumbnail': 'http://www.behindkink.com/wp-content/uploads/2014/12/blaze-1.jpg',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_url = self._search_regex(
|
||||
r'<source src="([^"]+)"', webpage, 'video URL')
|
||||
video_id = url_basename(video_url).split('_')[0]
|
||||
upload_date = mobj.group('year') + mobj.group('month') + mobj.group('day')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'url': video_url,
|
||||
'title': self._og_search_title(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'upload_date': upload_date,
|
||||
'age_limit': 18,
|
||||
}
|
88
youtube_dl/extractor/bellmedia.py
Normal file
88
youtube_dl/extractor/bellmedia.py
Normal file
|
@ -0,0 +1,88 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class BellMediaIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?
|
||||
(?P<domain>
|
||||
(?:
|
||||
ctv|
|
||||
tsn|
|
||||
bnn(?:bloomberg)?|
|
||||
thecomedynetwork|
|
||||
discovery|
|
||||
discoveryvelocity|
|
||||
sciencechannel|
|
||||
investigationdiscovery|
|
||||
animalplanet|
|
||||
bravo|
|
||||
mtv|
|
||||
space|
|
||||
etalk|
|
||||
marilyn
|
||||
)\.ca|
|
||||
(?:much|cp24)\.com
|
||||
)/.*?(?:\b(?:vid(?:eoid)?|clipId)=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6,})'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bnnbloomberg.ca/video/david-cockfield-s-top-picks~1403070',
|
||||
'md5': '36d3ef559cfe8af8efe15922cd3ce950',
|
||||
'info_dict': {
|
||||
'id': '1403070',
|
||||
'ext': 'flv',
|
||||
'title': 'David Cockfield\'s Top Picks',
|
||||
'description': 'md5:810f7f8c6a83ad5b48677c3f8e5bb2c3',
|
||||
'upload_date': '20180525',
|
||||
'timestamp': 1527288600,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.thecomedynetwork.ca/video/player?vid=923582',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.tsn.ca/video/expectations-high-for-milos-raonic-at-us-open~939549',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.bnn.ca/video/berman-s-call-part-two-viewer-questions~939654',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.ctv.ca/YourMorning/Video/S1E6-Monday-August-29-2016-vid938009',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.much.com/shows/atmidnight/episode948007/tuesday-september-13-2016',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.much.com/shows/the-almost-impossible-gameshow/928979/episode-6',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.ctv.ca/DCs-Legends-of-Tomorrow/Video/S2E11-Turncoat-vid1051430',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.etalk.ca/video?videoid=663455',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.cp24.com/video?clipId=1982548',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_DOMAINS = {
|
||||
'thecomedynetwork': 'comedy',
|
||||
'discoveryvelocity': 'discvel',
|
||||
'sciencechannel': 'discsci',
|
||||
'investigationdiscovery': 'invdisc',
|
||||
'animalplanet': 'aniplan',
|
||||
'etalk': 'ctv',
|
||||
'bnnbloomberg': 'bnn',
|
||||
'marilyn': 'ctv_marilyn',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
domain, video_id = re.match(self._VALID_URL, url).groups()
|
||||
domain = domain.split('.')[0]
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'url': '9c9media:%s_web:%s' % (self._DOMAINS.get(domain, domain), video_id),
|
||||
'ie_key': 'NineCNineMedia',
|
||||
}
|
80
youtube_dl/extractor/bet.py
Normal file
80
youtube_dl/extractor/bet.py
Normal file
|
@ -0,0 +1,80 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
from .mtv import MTVServicesInfoExtractor
|
||||
from ..utils import unified_strdate
|
||||
|
||||
|
||||
class BetIE(MTVServicesInfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?bet\.com/(?:[^/]+/)+(?P<id>.+?)\.html'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.bet.com/news/politics/2014/12/08/in-bet-exclusive-obama-talks-race-and-racism.html',
|
||||
'info_dict': {
|
||||
'id': '07e96bd3-8850-3051-b856-271b457f0ab8',
|
||||
'display_id': 'in-bet-exclusive-obama-talks-race-and-racism',
|
||||
'ext': 'flv',
|
||||
'title': 'A Conversation With President Obama',
|
||||
'description': 'President Obama urges persistence in confronting racism and bias.',
|
||||
'duration': 1534,
|
||||
'upload_date': '20141208',
|
||||
'thumbnail': r're:(?i)^https?://.*\.jpg$',
|
||||
'subtitles': {
|
||||
'en': 'mincount:2',
|
||||
}
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.bet.com/video/news/national/2014/justice-for-ferguson-a-community-reacts.html',
|
||||
'info_dict': {
|
||||
'id': '9f516bf1-7543-39c4-8076-dd441b459ba9',
|
||||
'display_id': 'justice-for-ferguson-a-community-reacts',
|
||||
'ext': 'flv',
|
||||
'title': 'Justice for Ferguson: A Community Reacts',
|
||||
'description': 'A BET News special.',
|
||||
'duration': 1696,
|
||||
'upload_date': '20141125',
|
||||
'thumbnail': r're:(?i)^https?://.*\.jpg$',
|
||||
'subtitles': {
|
||||
'en': 'mincount:2',
|
||||
}
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
_FEED_URL = "http://feeds.mtvnservices.com/od/feed/bet-mrss-player"
|
||||
|
||||
def _get_feed_query(self, uri):
|
||||
return {
|
||||
'uuid': uri,
|
||||
}
|
||||
|
||||
def _extract_mgid(self, webpage):
|
||||
return self._search_regex(r'data-uri="([^"]+)', webpage, 'mgid')
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
mgid = self._extract_mgid(webpage)
|
||||
videos_info = self._get_videos_info(mgid)
|
||||
|
||||
info_dict = videos_info['entries'][0]
|
||||
|
||||
upload_date = unified_strdate(self._html_search_meta('date', webpage))
|
||||
description = self._html_search_meta('description', webpage)
|
||||
|
||||
info_dict.update({
|
||||
'display_id': display_id,
|
||||
'description': description,
|
||||
'upload_date': upload_date,
|
||||
})
|
||||
|
||||
return info_dict
|
37
youtube_dl/extractor/bfi.py
Normal file
37
youtube_dl/extractor/bfi.py
Normal file
|
@ -0,0 +1,37 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import extract_attributes
|
||||
|
||||
|
||||
class BFIPlayerIE(InfoExtractor):
|
||||
IE_NAME = 'bfi:player'
|
||||
_VALID_URL = r'https?://player\.bfi\.org\.uk/[^/]+/film/watch-(?P<id>[\w-]+)-online'
|
||||
_TEST = {
|
||||
'url': 'https://player.bfi.org.uk/free/film/watch-computer-doctor-1974-online',
|
||||
'md5': 'e8783ebd8e061ec4bc6e9501ed547de8',
|
||||
'info_dict': {
|
||||
'id': 'htNnhlZjE60C9VySkQEIBtU-cNV1Xx63',
|
||||
'ext': 'mp4',
|
||||
'title': 'Computer Doctor',
|
||||
'description': 'md5:fb6c240d40c4dbe40428bdd62f78203b',
|
||||
},
|
||||
'skip': 'BFI Player films cannot be played outside of the UK',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
entries = []
|
||||
for player_el in re.findall(r'(?s)<[^>]+class="player"[^>]*>', webpage):
|
||||
player_attr = extract_attributes(player_el)
|
||||
ooyala_id = player_attr.get('data-video-id')
|
||||
if not ooyala_id:
|
||||
continue
|
||||
entries.append(self.url_result(
|
||||
'ooyala:' + ooyala_id, 'Ooyala',
|
||||
ooyala_id, player_attr.get('data-label')))
|
||||
return self.playlist_result(entries)
|
78
youtube_dl/extractor/bigflix.py
Normal file
78
youtube_dl/extractor/bigflix.py
Normal file
|
@ -0,0 +1,78 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
|
||||
|
||||
class BigflixIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?bigflix\.com/.+/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
# 2 formats
|
||||
'url': 'http://www.bigflix.com/Tamil-movies/Drama-movies/Madarasapatinam/16070',
|
||||
'info_dict': {
|
||||
'id': '16070',
|
||||
'ext': 'mp4',
|
||||
'title': 'Madarasapatinam',
|
||||
'description': 'md5:9f0470b26a4ba8e824c823b5d95c2f6b',
|
||||
'formats': 'mincount:2',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# multiple formats
|
||||
'url': 'http://www.bigflix.com/Malayalam-movies/Drama-movies/Indian-Rupee/15967',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<div[^>]+class=["\']pagetitle["\'][^>]*>(.+?)</div>',
|
||||
webpage, 'title')
|
||||
|
||||
def decode_url(quoted_b64_url):
|
||||
return compat_b64decode(compat_urllib_parse_unquote(
|
||||
quoted_b64_url)).decode('utf-8')
|
||||
|
||||
formats = []
|
||||
for height, encoded_url in re.findall(
|
||||
r'ContentURL_(\d{3,4})[pP][^=]+=([^&]+)', webpage):
|
||||
video_url = decode_url(encoded_url)
|
||||
f = {
|
||||
'url': video_url,
|
||||
'format_id': '%sp' % height,
|
||||
'height': int(height),
|
||||
}
|
||||
if video_url.startswith('rtmp'):
|
||||
f['ext'] = 'flv'
|
||||
formats.append(f)
|
||||
|
||||
file_url = self._search_regex(
|
||||
r'file=([^&]+)', webpage, 'video url', default=None)
|
||||
if file_url:
|
||||
video_url = decode_url(file_url)
|
||||
if all(f['url'] != video_url for f in formats):
|
||||
formats.append({
|
||||
'url': decode_url(file_url),
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = self._html_search_meta('description', webpage)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'formats': formats
|
||||
}
|
40
youtube_dl/extractor/bild.py
Normal file
40
youtube_dl/extractor/bild.py
Normal file
|
@ -0,0 +1,40 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class BildIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?bild\.de/(?:[^/]+/)+(?P<display_id>[^/]+)-(?P<id>\d+)(?:,auto=true)?\.bild\.html'
|
||||
IE_DESC = 'Bild.de'
|
||||
_TEST = {
|
||||
'url': 'http://www.bild.de/video/clip/apple-ipad-air/das-koennen-die-neuen-ipads-38184146.bild.html',
|
||||
'md5': 'dd495cbd99f2413502a1713a1156ac8a',
|
||||
'info_dict': {
|
||||
'id': '38184146',
|
||||
'ext': 'mp4',
|
||||
'title': 'Das können die neuen iPads',
|
||||
'description': 'md5:a4058c4fa2a804ab59c00d7244bbf62f',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 196,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video_data = self._download_json(
|
||||
url.split('.bild.html')[0] + ',view=json.bild.html', video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': unescapeHTML(video_data['title']).strip(),
|
||||
'description': unescapeHTML(video_data.get('description')),
|
||||
'url': video_data['clipList'][0]['srces'][0]['src'],
|
||||
'thumbnail': video_data.get('poster'),
|
||||
'duration': int_or_none(video_data.get('durationSec')),
|
||||
}
|
450
youtube_dl/extractor/bilibili.py
Normal file
450
youtube_dl/extractor/bilibili.py
Normal file
|
@ -0,0 +1,450 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import hashlib
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
parse_iso8601,
|
||||
smuggle_url,
|
||||
str_or_none,
|
||||
strip_jsonp,
|
||||
unified_timestamp,
|
||||
unsmuggle_url,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class BiliBiliIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:(?:www|bangumi)\.)?
|
||||
bilibili\.(?:tv|com)/
|
||||
(?:
|
||||
(?:
|
||||
video/[aA][vV]|
|
||||
anime/(?P<anime_id>\d+)/play\#
|
||||
)(?P<id_bv>\d+)|
|
||||
video/[bB][vV](?P<id>[^/?#&]+)
|
||||
)
|
||||
'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.bilibili.tv/video/av1074402/',
|
||||
'md5': '5f7d29e1a2872f3df0cf76b1f87d3788',
|
||||
'info_dict': {
|
||||
'id': '1074402',
|
||||
'ext': 'flv',
|
||||
'title': '【金坷垃】金泡沫',
|
||||
'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
|
||||
'duration': 308.067,
|
||||
'timestamp': 1398012678,
|
||||
'upload_date': '20140420',
|
||||
'thumbnail': r're:^https?://.+\.jpg',
|
||||
'uploader': '菊子桑',
|
||||
'uploader_id': '156160',
|
||||
},
|
||||
}, {
|
||||
# Tested in BiliBiliBangumiIE
|
||||
'url': 'http://bangumi.bilibili.com/anime/1869/play#40062',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://bangumi.bilibili.com/anime/5802/play#100643',
|
||||
'md5': '3f721ad1e75030cc06faf73587cfec57',
|
||||
'info_dict': {
|
||||
'id': '100643',
|
||||
'ext': 'mp4',
|
||||
'title': 'CHAOS;CHILD',
|
||||
'description': '如果你是神明,并且能够让妄想成为现实。那你会进行怎么样的妄想?是淫靡的世界?独裁社会?毁灭性的制裁?还是……2015年,涩谷。从6年前发生的大灾害“涩谷地震”之后复兴了的这个街区里新设立的私立高中...',
|
||||
},
|
||||
'skip': 'Geo-restricted to China',
|
||||
}, {
|
||||
# Title with double quotes
|
||||
'url': 'http://www.bilibili.com/video/av8903802/',
|
||||
'info_dict': {
|
||||
'id': '8903802',
|
||||
'title': '阿滴英文|英文歌分享#6 "Closer',
|
||||
'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '8903802_part1',
|
||||
'ext': 'flv',
|
||||
'title': '阿滴英文|英文歌分享#6 "Closer',
|
||||
'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
|
||||
'uploader': '阿滴英文',
|
||||
'uploader_id': '65880958',
|
||||
'timestamp': 1488382634,
|
||||
'upload_date': '20170301',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # Test metadata only
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '8903802_part2',
|
||||
'ext': 'flv',
|
||||
'title': '阿滴英文|英文歌分享#6 "Closer',
|
||||
'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
|
||||
'uploader': '阿滴英文',
|
||||
'uploader_id': '65880958',
|
||||
'timestamp': 1488382634,
|
||||
'upload_date': '20170301',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # Test metadata only
|
||||
},
|
||||
}]
|
||||
}, {
|
||||
# new BV video id format
|
||||
'url': 'https://www.bilibili.com/video/BV1JE411F741',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_APP_KEY = 'iVGUTjsxvpLeuDCf'
|
||||
_BILIBILI_KEY = 'aHRmhWMLkdeMuILqORnYZocwMBpMEOdt'
|
||||
|
||||
def _report_error(self, result):
|
||||
if 'message' in result:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, result['message']), expected=True)
|
||||
elif 'code' in result:
|
||||
raise ExtractorError('%s returns error %d' % (self.IE_NAME, result['code']), expected=True)
|
||||
else:
|
||||
raise ExtractorError('Can\'t extract Bangumi episode ID')
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id') or mobj.group('id_bv')
|
||||
anime_id = mobj.group('anime_id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if 'anime/' not in url:
|
||||
cid = self._search_regex(
|
||||
r'\bcid(?:["\']:|=)(\d+)', webpage, 'cid',
|
||||
default=None
|
||||
) or compat_parse_qs(self._search_regex(
|
||||
[r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
|
||||
r'EmbedPlayer\([^)]+,\s*\\"([^"]+)\\"\)',
|
||||
r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'],
|
||||
webpage, 'player parameters'))['cid'][0]
|
||||
else:
|
||||
if 'no_bangumi_tip' not in smuggled_data:
|
||||
self.to_screen('Downloading episode %s. To download all videos in anime %s, re-run youtube-dlc with %s' % (
|
||||
video_id, anime_id, compat_urlparse.urljoin(url, '//bangumi.bilibili.com/anime/%s' % anime_id)))
|
||||
headers = {
|
||||
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
||||
'Referer': url
|
||||
}
|
||||
headers.update(self.geo_verification_headers())
|
||||
|
||||
js = self._download_json(
|
||||
'http://bangumi.bilibili.com/web_api/get_source', video_id,
|
||||
data=urlencode_postdata({'episode_id': video_id}),
|
||||
headers=headers)
|
||||
if 'result' not in js:
|
||||
self._report_error(js)
|
||||
cid = js['result']['cid']
|
||||
|
||||
headers = {
|
||||
'Referer': url
|
||||
}
|
||||
headers.update(self.geo_verification_headers())
|
||||
|
||||
entries = []
|
||||
|
||||
RENDITIONS = ('qn=80&quality=80&type=', 'quality=2&type=mp4')
|
||||
for num, rendition in enumerate(RENDITIONS, start=1):
|
||||
payload = 'appkey=%s&cid=%s&otype=json&%s' % (self._APP_KEY, cid, rendition)
|
||||
sign = hashlib.md5((payload + self._BILIBILI_KEY).encode('utf-8')).hexdigest()
|
||||
|
||||
video_info = self._download_json(
|
||||
'http://interface.bilibili.com/v2/playurl?%s&sign=%s' % (payload, sign),
|
||||
video_id, note='Downloading video info page',
|
||||
headers=headers, fatal=num == len(RENDITIONS))
|
||||
|
||||
if not video_info:
|
||||
continue
|
||||
|
||||
if 'durl' not in video_info:
|
||||
if num < len(RENDITIONS):
|
||||
continue
|
||||
self._report_error(video_info)
|
||||
|
||||
for idx, durl in enumerate(video_info['durl']):
|
||||
formats = [{
|
||||
'url': durl['url'],
|
||||
'filesize': int_or_none(durl['size']),
|
||||
}]
|
||||
for backup_url in durl.get('backup_url', []):
|
||||
formats.append({
|
||||
'url': backup_url,
|
||||
# backup URLs have lower priorities
|
||||
'preference': -2 if 'hd.mp4' in backup_url else -3,
|
||||
})
|
||||
|
||||
for a_format in formats:
|
||||
a_format.setdefault('http_headers', {}).update({
|
||||
'Referer': url,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
entries.append({
|
||||
'id': '%s_part%s' % (video_id, idx),
|
||||
'duration': float_or_none(durl.get('length'), 1000),
|
||||
'formats': formats,
|
||||
})
|
||||
break
|
||||
|
||||
title = self._html_search_regex(
|
||||
('<h1[^>]+\btitle=(["\'])(?P<title>(?:(?!\1).)+)\1',
|
||||
'(?s)<h1[^>]*>(?P<title>.+?)</h1>'), webpage, 'title',
|
||||
group='title')
|
||||
description = self._html_search_meta('description', webpage)
|
||||
timestamp = unified_timestamp(self._html_search_regex(
|
||||
r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time',
|
||||
default=None) or self._html_search_meta(
|
||||
'uploadDate', webpage, 'timestamp', default=None))
|
||||
thumbnail = self._html_search_meta(['og:image', 'thumbnailUrl'], webpage)
|
||||
|
||||
# TODO 'view_count' requires deobfuscating Javascript
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': float_or_none(video_info.get('timelength'), scale=1000),
|
||||
}
|
||||
|
||||
uploader_mobj = re.search(
|
||||
r'<a[^>]+href="(?:https?:)?//space\.bilibili\.com/(?P<id>\d+)"[^>]*>(?P<name>[^<]+)',
|
||||
webpage)
|
||||
if uploader_mobj:
|
||||
info.update({
|
||||
'uploader': uploader_mobj.group('name'),
|
||||
'uploader_id': uploader_mobj.group('id'),
|
||||
})
|
||||
if not info.get('uploader'):
|
||||
info['uploader'] = self._html_search_meta(
|
||||
'author', webpage, 'uploader', default=None)
|
||||
|
||||
for entry in entries:
|
||||
entry.update(info)
|
||||
|
||||
if len(entries) == 1:
|
||||
return entries[0]
|
||||
else:
|
||||
for idx, entry in enumerate(entries):
|
||||
entry['id'] = '%s_part%d' % (video_id, (idx + 1))
|
||||
|
||||
return {
|
||||
'_type': 'multi_video',
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'entries': entries,
|
||||
}
|
||||
|
||||
|
||||
class BiliBiliBangumiIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://bangumi\.bilibili\.com/anime/(?P<id>\d+)'
|
||||
|
||||
IE_NAME = 'bangumi.bilibili.com'
|
||||
IE_DESC = 'BiliBili番剧'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://bangumi.bilibili.com/anime/1869',
|
||||
'info_dict': {
|
||||
'id': '1869',
|
||||
'title': '混沌武士',
|
||||
'description': 'md5:6a9622b911565794c11f25f81d6a97d2',
|
||||
},
|
||||
'playlist_count': 26,
|
||||
}, {
|
||||
'url': 'http://bangumi.bilibili.com/anime/1869',
|
||||
'info_dict': {
|
||||
'id': '1869',
|
||||
'title': '混沌武士',
|
||||
'description': 'md5:6a9622b911565794c11f25f81d6a97d2',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': '91da8621454dd58316851c27c68b0c13',
|
||||
'info_dict': {
|
||||
'id': '40062',
|
||||
'ext': 'mp4',
|
||||
'title': '混沌武士',
|
||||
'description': '故事发生在日本的江户时代。风是一个小酒馆的打工女。一日,酒馆里来了一群恶霸,虽然他们的举动令风十分不满,但是毕竟风只是一届女流,无法对他们采取什么行动,只能在心里嘟哝。这时,酒家里又进来了个“不良份子...',
|
||||
'timestamp': 1414538739,
|
||||
'upload_date': '20141028',
|
||||
'episode': '疾风怒涛 Tempestuous Temperaments',
|
||||
'episode_number': 1,
|
||||
},
|
||||
}],
|
||||
'params': {
|
||||
'playlist_items': '1',
|
||||
},
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if BiliBiliIE.suitable(url) else super(BiliBiliBangumiIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
bangumi_id = self._match_id(url)
|
||||
|
||||
# Sometimes this API returns a JSONP response
|
||||
season_info = self._download_json(
|
||||
'http://bangumi.bilibili.com/jsonp/seasoninfo/%s.ver' % bangumi_id,
|
||||
bangumi_id, transform_source=strip_jsonp)['result']
|
||||
|
||||
entries = [{
|
||||
'_type': 'url_transparent',
|
||||
'url': smuggle_url(episode['webplay_url'], {'no_bangumi_tip': 1}),
|
||||
'ie_key': BiliBiliIE.ie_key(),
|
||||
'timestamp': parse_iso8601(episode.get('update_time'), delimiter=' '),
|
||||
'episode': episode.get('index_title'),
|
||||
'episode_number': int_or_none(episode.get('index')),
|
||||
} for episode in season_info['episodes']]
|
||||
|
||||
entries = sorted(entries, key=lambda entry: entry.get('episode_number'))
|
||||
|
||||
return self.playlist_result(
|
||||
entries, bangumi_id,
|
||||
season_info.get('bangumi_title'), season_info.get('evaluate'))
|
||||
|
||||
|
||||
class BilibiliAudioBaseIE(InfoExtractor):
|
||||
def _call_api(self, path, sid, query=None):
|
||||
if not query:
|
||||
query = {'sid': sid}
|
||||
return self._download_json(
|
||||
'https://www.bilibili.com/audio/music-service-c/web/' + path,
|
||||
sid, query=query)['data']
|
||||
|
||||
|
||||
class BilibiliAudioIE(BilibiliAudioBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/au(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.bilibili.com/audio/au1003142',
|
||||
'md5': 'fec4987014ec94ef9e666d4d158ad03b',
|
||||
'info_dict': {
|
||||
'id': '1003142',
|
||||
'ext': 'm4a',
|
||||
'title': '【tsukimi】YELLOW / 神山羊',
|
||||
'artist': 'tsukimi',
|
||||
'comment_count': int,
|
||||
'description': 'YELLOW的mp3版!',
|
||||
'duration': 183,
|
||||
'subtitles': {
|
||||
'origin': [{
|
||||
'ext': 'lrc',
|
||||
}],
|
||||
},
|
||||
'thumbnail': r're:^https?://.+\.jpg',
|
||||
'timestamp': 1564836614,
|
||||
'upload_date': '20190803',
|
||||
'uploader': 'tsukimi-つきみぐー',
|
||||
'view_count': int,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
au_id = self._match_id(url)
|
||||
|
||||
play_data = self._call_api('url', au_id)
|
||||
formats = [{
|
||||
'url': play_data['cdns'][0],
|
||||
'filesize': int_or_none(play_data.get('size')),
|
||||
}]
|
||||
|
||||
song = self._call_api('song/info', au_id)
|
||||
title = song['title']
|
||||
statistic = song.get('statistic') or {}
|
||||
|
||||
subtitles = None
|
||||
lyric = song.get('lyric')
|
||||
if lyric:
|
||||
subtitles = {
|
||||
'origin': [{
|
||||
'url': lyric,
|
||||
}]
|
||||
}
|
||||
|
||||
return {
|
||||
'id': au_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'artist': song.get('author'),
|
||||
'comment_count': int_or_none(statistic.get('comment')),
|
||||
'description': song.get('intro'),
|
||||
'duration': int_or_none(song.get('duration')),
|
||||
'subtitles': subtitles,
|
||||
'thumbnail': song.get('cover'),
|
||||
'timestamp': int_or_none(song.get('passtime')),
|
||||
'uploader': song.get('uname'),
|
||||
'view_count': int_or_none(statistic.get('play')),
|
||||
}
|
||||
|
||||
|
||||
class BilibiliAudioAlbumIE(BilibiliAudioBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/am(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.bilibili.com/audio/am10624',
|
||||
'info_dict': {
|
||||
'id': '10624',
|
||||
'title': '每日新曲推荐(每日11:00更新)',
|
||||
'description': '每天11:00更新,为你推送最新音乐',
|
||||
},
|
||||
'playlist_count': 19,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
am_id = self._match_id(url)
|
||||
|
||||
songs = self._call_api(
|
||||
'song/of-menu', am_id, {'sid': am_id, 'pn': 1, 'ps': 100})['data']
|
||||
|
||||
entries = []
|
||||
for song in songs:
|
||||
sid = str_or_none(song.get('id'))
|
||||
if not sid:
|
||||
continue
|
||||
entries.append(self.url_result(
|
||||
'https://www.bilibili.com/audio/au' + sid,
|
||||
BilibiliAudioIE.ie_key(), sid))
|
||||
|
||||
if entries:
|
||||
album_data = self._call_api('menu/info', am_id) or {}
|
||||
album_title = album_data.get('title')
|
||||
if album_title:
|
||||
for entry in entries:
|
||||
entry['album'] = album_title
|
||||
return self.playlist_result(
|
||||
entries, am_id, album_title, album_data.get('intro'))
|
||||
|
||||
return self.playlist_result(entries, am_id)
|
||||
|
||||
|
||||
class BiliBiliPlayerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://player\.bilibili\.com/player\.html\?.*?\baid=(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://player.bilibili.com/player.html?aid=92494333&cid=157926707&page=1',
|
||||
'only_matching': True,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self.url_result(
|
||||
'http://www.bilibili.tv/video/av%s/' % video_id,
|
||||
ie=BiliBiliIE.ie_key(), video_id=video_id)
|
86
youtube_dl/extractor/biobiochiletv.py
Normal file
86
youtube_dl/extractor/biobiochiletv.py
Normal file
|
@ -0,0 +1,86 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
remove_end,
|
||||
)
|
||||
|
||||
|
||||
class BioBioChileTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:tv|www)\.biobiochile\.cl/(?:notas|noticias)/(?:[^/]+/)+(?P<id>[^/]+)\.shtml'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://tv.biobiochile.cl/notas/2015/10/21/sobre-camaras-y-camarillas-parlamentarias.shtml',
|
||||
'md5': '26f51f03cf580265defefb4518faec09',
|
||||
'info_dict': {
|
||||
'id': 'sobre-camaras-y-camarillas-parlamentarias',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sobre Cámaras y camarillas parlamentarias',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Fernando Atria',
|
||||
},
|
||||
'skip': 'URL expired and redirected to http://www.biobiochile.cl/portada/bbtv/index.html',
|
||||
}, {
|
||||
# different uploader layout
|
||||
'url': 'http://tv.biobiochile.cl/notas/2016/03/18/natalia-valdebenito-repasa-a-diputado-hasbun-paso-a-la-categoria-de-hablar-brutalidades.shtml',
|
||||
'md5': 'edc2e6b58974c46d5b047dea3c539ff3',
|
||||
'info_dict': {
|
||||
'id': 'natalia-valdebenito-repasa-a-diputado-hasbun-paso-a-la-categoria-de-hablar-brutalidades',
|
||||
'ext': 'mp4',
|
||||
'title': 'Natalia Valdebenito repasa a diputado Hasbún: Pasó a la categoría de hablar brutalidades',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Piangella Obrador',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'URL expired and redirected to http://www.biobiochile.cl/portada/bbtv/index.html',
|
||||
}, {
|
||||
'url': 'http://www.biobiochile.cl/noticias/bbtv/comentarios-bio-bio/2016/07/08/edecanes-del-congreso-figuras-decorativas-que-le-cuestan-muy-caro-a-los-chilenos.shtml',
|
||||
'info_dict': {
|
||||
'id': 'b4xd0LK3SK',
|
||||
'ext': 'mp4',
|
||||
# TODO: fix url_transparent information overriding
|
||||
# 'uploader': 'Juan Pablo Echenique',
|
||||
'title': 'Comentario Oscar Cáceres',
|
||||
},
|
||||
'params': {
|
||||
# empty m3u8 manifest
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://tv.biobiochile.cl/notas/2015/10/22/ninos-transexuales-de-quien-es-la-decision.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://tv.biobiochile.cl/notas/2015/10/21/exclusivo-hector-pinto-formador-de-chupete-revela-version-del-ex-delantero-albo.shtml',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
rudo_url = self._search_regex(
|
||||
r'<iframe[^>]+src=(?P<q1>[\'"])(?P<url>(?:https?:)?//rudo\.video/vod/[0-9a-zA-Z]+)(?P=q1)',
|
||||
webpage, 'embed URL', None, group='url')
|
||||
if not rudo_url:
|
||||
raise ExtractorError('No videos found')
|
||||
|
||||
title = remove_end(self._og_search_title(webpage), ' - BioBioChile TV')
|
||||
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
uploader = self._html_search_regex(
|
||||
r'<a[^>]+href=["\'](?:https?://(?:busca|www)\.biobiochile\.cl)?/(?:lista/)?(?:author|autor)[^>]+>(.+?)</a>',
|
||||
webpage, 'uploader', fatal=False)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': rudo_url,
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
}
|
105
youtube_dl/extractor/biqle.py
Normal file
105
youtube_dl/extractor/biqle.py
Normal file
|
@ -0,0 +1,105 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .vk import VKIE
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class BIQLEIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?biqle\.(?:com|org|ru)/watch/(?P<id>-?\d+_\d+)'
|
||||
_TESTS = [{
|
||||
# Youtube embed
|
||||
'url': 'https://biqle.ru/watch/-115995369_456239081',
|
||||
'md5': '97af5a06ee4c29bbf9c001bdb1cf5c06',
|
||||
'info_dict': {
|
||||
'id': '8v4f-avW-VI',
|
||||
'ext': 'mp4',
|
||||
'title': "PASSE-PARTOUT - L'ete c'est fait pour jouer",
|
||||
'description': 'Passe-Partout',
|
||||
'uploader_id': 'mrsimpsonstef3',
|
||||
'uploader': 'Phanolito',
|
||||
'upload_date': '20120822',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://biqle.org/watch/-44781847_168547604',
|
||||
'md5': '7f24e72af1db0edf7c1aaba513174f97',
|
||||
'info_dict': {
|
||||
'id': '-44781847_168547604',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ребенок в шоке от автоматической мойки',
|
||||
'timestamp': 1396633454,
|
||||
'uploader': 'Dmitry Kotov',
|
||||
'upload_date': '20140404',
|
||||
'uploader_id': '47850140',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
embed_url = self._proto_relative_url(self._search_regex(
|
||||
r'<iframe.+?src="((?:https?:)?//(?:daxab\.com|dxb\.to|[^/]+/player)/[^"]+)".*?></iframe>',
|
||||
webpage, 'embed url'))
|
||||
if VKIE.suitable(embed_url):
|
||||
return self.url_result(embed_url, VKIE.ie_key(), video_id)
|
||||
|
||||
embed_page = self._download_webpage(
|
||||
embed_url, video_id, headers={'Referer': url})
|
||||
video_ext = self._get_cookies(embed_url).get('video_ext')
|
||||
if video_ext:
|
||||
video_ext = compat_urllib_parse_unquote(video_ext.value)
|
||||
if not video_ext:
|
||||
video_ext = compat_b64decode(self._search_regex(
|
||||
r'video_ext\s*:\s*[\'"]([A-Za-z0-9+/=]+)',
|
||||
embed_page, 'video_ext')).decode()
|
||||
video_id, sig, _, access_token = video_ext.split(':')
|
||||
item = self._download_json(
|
||||
'https://api.vk.com/method/video.get', video_id,
|
||||
headers={'User-Agent': 'okhttp/3.4.1'}, query={
|
||||
'access_token': access_token,
|
||||
'sig': sig,
|
||||
'v': 5.44,
|
||||
'videos': video_id,
|
||||
})['response']['items'][0]
|
||||
title = item['title']
|
||||
|
||||
formats = []
|
||||
for f_id, f_url in item.get('files', {}).items():
|
||||
if f_id == 'external':
|
||||
return self.url_result(f_url)
|
||||
ext, height = f_id.split('_')
|
||||
formats.append({
|
||||
'format_id': height + 'p',
|
||||
'url': f_url,
|
||||
'height': int_or_none(height),
|
||||
'ext': ext,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = []
|
||||
for k, v in item.items():
|
||||
if k.startswith('photo_') and v:
|
||||
width = k.replace('photo_', '')
|
||||
thumbnails.append({
|
||||
'id': width,
|
||||
'url': v,
|
||||
'width': int_or_none(width),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'comment_count': int_or_none(item.get('comments')),
|
||||
'description': item.get('description'),
|
||||
'duration': int_or_none(item.get('duration')),
|
||||
'thumbnails': thumbnails,
|
||||
'timestamp': int_or_none(item.get('date')),
|
||||
'uploader': item.get('owner_id'),
|
||||
'view_count': int_or_none(item.get('views')),
|
||||
}
|
142
youtube_dl/extractor/bitchute.py
Normal file
142
youtube_dl/extractor/bitchute.py
Normal file
|
@ -0,0 +1,142 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
orderedSet,
|
||||
unified_strdate,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class BitChuteIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bitchute.com/video/szoMrox2JEI/',
|
||||
'md5': '66c4a70e6bfc40dcb6be3eb1d74939eb',
|
||||
'info_dict': {
|
||||
'id': 'szoMrox2JEI',
|
||||
'ext': 'mp4',
|
||||
'title': 'Fuck bitches get money',
|
||||
'description': 'md5:3f21f6fb5b1d17c3dee9cf6b5fe60b3a',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Victoria X Rave',
|
||||
'upload_date': '20170813',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bitchute.com/embed/lbb5G1hjPhw/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.bitchute.com/torrent/Zee5BE49045h/szoMrox2JEI.webtorrent',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'https://www.bitchute.com/video/%s' % video_id, video_id, headers={
|
||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.57 Safari/537.36',
|
||||
})
|
||||
|
||||
title = self._html_search_regex(
|
||||
(r'<[^>]+\bid=["\']video-title[^>]+>([^<]+)', r'<title>([^<]+)'),
|
||||
webpage, 'title', default=None) or self._html_search_meta(
|
||||
'description', webpage, 'title',
|
||||
default=None) or self._og_search_description(webpage)
|
||||
|
||||
format_urls = []
|
||||
for mobj in re.finditer(
|
||||
r'addWebSeed\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage):
|
||||
format_urls.append(mobj.group('url'))
|
||||
format_urls.extend(re.findall(r'as=(https?://[^&"\']+)', webpage))
|
||||
|
||||
formats = [
|
||||
{'url': format_url}
|
||||
for format_url in orderedSet(format_urls)]
|
||||
|
||||
if not formats:
|
||||
formats = self._parse_html5_media_entries(
|
||||
url, webpage, video_id)[0]['formats']
|
||||
|
||||
self._check_formats(formats, video_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<div\b[^>]+\bclass=["\']full hidden[^>]+>(.+?)</div>',
|
||||
webpage, 'description', fatal=False)
|
||||
thumbnail = self._og_search_thumbnail(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'twitter:image:src', webpage, 'thumbnail')
|
||||
uploader = self._html_search_regex(
|
||||
(r'(?s)<div class=["\']channel-banner.*?<p\b[^>]+\bclass=["\']name[^>]+>(.+?)</p>',
|
||||
r'(?s)<p\b[^>]+\bclass=["\']video-author[^>]+>(.+?)</p>'),
|
||||
webpage, 'uploader', fatal=False)
|
||||
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'class=["\']video-publish-date[^>]+>[^<]+ at \d+:\d+ UTC on (.+?)\.',
|
||||
webpage, 'upload date', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'upload_date': upload_date,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class BitChuteChannelIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?bitchute\.com/channel/(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.bitchute.com/channel/victoriaxrave/',
|
||||
'playlist_mincount': 185,
|
||||
'info_dict': {
|
||||
'id': 'victoriaxrave',
|
||||
},
|
||||
}
|
||||
|
||||
_TOKEN = 'zyG6tQcGPE5swyAEFLqKUwMuMMuF6IO2DZ6ZDQjGfsL0e4dcTLwqkTTul05Jdve7'
|
||||
|
||||
def _entries(self, channel_id):
|
||||
channel_url = 'https://www.bitchute.com/channel/%s/' % channel_id
|
||||
offset = 0
|
||||
for page_num in itertools.count(1):
|
||||
data = self._download_json(
|
||||
'%sextend/' % channel_url, channel_id,
|
||||
'Downloading channel page %d' % page_num,
|
||||
data=urlencode_postdata({
|
||||
'csrfmiddlewaretoken': self._TOKEN,
|
||||
'name': '',
|
||||
'offset': offset,
|
||||
}), headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
||||
'Referer': channel_url,
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
'Cookie': 'csrftoken=%s' % self._TOKEN,
|
||||
})
|
||||
if data.get('success') is False:
|
||||
break
|
||||
html = data.get('html')
|
||||
if not html:
|
||||
break
|
||||
video_ids = re.findall(
|
||||
r'class=["\']channel-videos-image-container[^>]+>\s*<a\b[^>]+\bhref=["\']/video/([^"\'/]+)',
|
||||
html)
|
||||
if not video_ids:
|
||||
break
|
||||
offset += len(video_ids)
|
||||
for video_id in video_ids:
|
||||
yield self.url_result(
|
||||
'https://www.bitchute.com/video/%s' % video_id,
|
||||
ie=BitChuteIE.ie_key(), video_id=video_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
return self.playlist_result(
|
||||
self._entries(channel_id), playlist_id=channel_id)
|
106
youtube_dl/extractor/bleacherreport.py
Normal file
106
youtube_dl/extractor/bleacherreport.py
Normal file
|
@ -0,0 +1,106 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .amp import AMPIE
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class BleacherReportIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/articles/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://bleacherreport.com/articles/2496438-fsu-stat-projections-is-jalen-ramsey-best-defensive-player-in-college-football',
|
||||
'md5': 'a3ffc3dc73afdbc2010f02d98f990f20',
|
||||
'info_dict': {
|
||||
'id': '2496438',
|
||||
'ext': 'mp4',
|
||||
'title': 'FSU Stat Projections: Is Jalen Ramsey Best Defensive Player in College Football?',
|
||||
'uploader_id': 3992341,
|
||||
'description': 'CFB, ACC, Florida State',
|
||||
'timestamp': 1434380212,
|
||||
'upload_date': '20150615',
|
||||
'uploader': 'Team Stream Now ',
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}, {
|
||||
'url': 'http://bleacherreport.com/articles/2586817-aussie-golfers-get-fright-of-their-lives-after-being-chased-by-angry-kangaroo',
|
||||
'md5': '6a5cd403418c7b01719248ca97fb0692',
|
||||
'info_dict': {
|
||||
'id': '2586817',
|
||||
'ext': 'webm',
|
||||
'title': 'Aussie Golfers Get Fright of Their Lives After Being Chased by Angry Kangaroo',
|
||||
'timestamp': 1446839961,
|
||||
'uploader': 'Sean Fay',
|
||||
'description': 'md5:b1601e2314c4d8eec23b6eafe086a757',
|
||||
'uploader_id': 6466954,
|
||||
'upload_date': '20151011',
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
article_id = self._match_id(url)
|
||||
|
||||
article_data = self._download_json('http://api.bleacherreport.com/api/v1/articles/%s' % article_id, article_id)['article']
|
||||
|
||||
thumbnails = []
|
||||
primary_photo = article_data.get('primaryPhoto')
|
||||
if primary_photo:
|
||||
thumbnails = [{
|
||||
'url': primary_photo['url'],
|
||||
'width': primary_photo.get('width'),
|
||||
'height': primary_photo.get('height'),
|
||||
}]
|
||||
|
||||
info = {
|
||||
'_type': 'url_transparent',
|
||||
'id': article_id,
|
||||
'title': article_data['title'],
|
||||
'uploader': article_data.get('author', {}).get('name'),
|
||||
'uploader_id': article_data.get('authorId'),
|
||||
'timestamp': parse_iso8601(article_data.get('createdAt')),
|
||||
'thumbnails': thumbnails,
|
||||
'comment_count': int_or_none(article_data.get('commentsCount')),
|
||||
'view_count': int_or_none(article_data.get('hitCount')),
|
||||
}
|
||||
|
||||
video = article_data.get('video')
|
||||
if video:
|
||||
video_type = video['type']
|
||||
if video_type in ('cms.bleacherreport.com', 'vid.bleacherreport.com'):
|
||||
info['url'] = 'http://bleacherreport.com/video_embed?id=%s' % video['id']
|
||||
elif video_type == 'ooyala.com':
|
||||
info['url'] = 'ooyala:%s' % video['id']
|
||||
elif video_type == 'youtube.com':
|
||||
info['url'] = video['id']
|
||||
elif video_type == 'vine.co':
|
||||
info['url'] = 'https://vine.co/v/%s' % video['id']
|
||||
else:
|
||||
info['url'] = video_type + video['id']
|
||||
return info
|
||||
else:
|
||||
raise ExtractorError('no video in the article', expected=True)
|
||||
|
||||
|
||||
class BleacherReportCMSIE(AMPIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36}|\d{5})'
|
||||
_TESTS = [{
|
||||
'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1&library=video-cms',
|
||||
'md5': '2e4b0a997f9228ffa31fada5c53d1ed1',
|
||||
'info_dict': {
|
||||
'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
|
||||
'ext': 'flv',
|
||||
'title': 'Cena vs. Rollins Would Expose the Heavyweight Division',
|
||||
'description': 'md5:984afb4ade2f9c0db35f3267ed88b36e',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
info = self._extract_feed_info('http://vid.bleacherreport.com/videos/%s.akamai' % video_id)
|
||||
info['id'] = video_id
|
||||
return info
|
86
youtube_dl/extractor/blinkx.py
Normal file
86
youtube_dl/extractor/blinkx.py
Normal file
|
@ -0,0 +1,86 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
remove_start,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class BlinkxIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)'
|
||||
IE_NAME = 'blinkx'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.blinkx.com/ce/Da0Gw3xc5ucpNduzLuDDlv4WC9PuI4fDi1-t6Y3LyfdY2SZS5Urbvn-UPJvrvbo8LTKTc67Wu2rPKSQDJyZeeORCR8bYkhs8lI7eqddznH2ofh5WEEdjYXnoRtj7ByQwt7atMErmXIeYKPsSDuMAAqJDlQZ-3Ff4HJVeH_s3Gh8oQ',
|
||||
'md5': '337cf7a344663ec79bf93a526a2e06c7',
|
||||
'info_dict': {
|
||||
'id': 'Da0Gw3xc',
|
||||
'ext': 'mp4',
|
||||
'title': 'No Daily Show for John Oliver; HBO Show Renewed - IGN News',
|
||||
'uploader': 'IGN News',
|
||||
'upload_date': '20150217',
|
||||
'timestamp': 1424215740,
|
||||
'description': 'HBO has renewed Last Week Tonight With John Oliver for two more seasons.',
|
||||
'duration': 47.743333,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
display_id = video_id[:8]
|
||||
|
||||
api_url = ('https://apib4.blinkx.com/api.php?action=play_video&'
|
||||
+ 'video=%s' % video_id)
|
||||
data_json = self._download_webpage(api_url, display_id)
|
||||
data = json.loads(data_json)['api']['results'][0]
|
||||
duration = None
|
||||
thumbnails = []
|
||||
formats = []
|
||||
for m in data['media']:
|
||||
if m['type'] == 'jpg':
|
||||
thumbnails.append({
|
||||
'url': m['link'],
|
||||
'width': int(m['w']),
|
||||
'height': int(m['h']),
|
||||
})
|
||||
elif m['type'] == 'original':
|
||||
duration = float(m['d'])
|
||||
elif m['type'] == 'youtube':
|
||||
yt_id = m['link']
|
||||
self.to_screen('Youtube video detected: %s' % yt_id)
|
||||
return self.url_result(yt_id, 'Youtube', video_id=yt_id)
|
||||
elif m['type'] in ('flv', 'mp4'):
|
||||
vcodec = remove_start(m['vcodec'], 'ff')
|
||||
acodec = remove_start(m['acodec'], 'ff')
|
||||
vbr = int_or_none(m.get('vbr') or m.get('vbitrate'), 1000)
|
||||
abr = int_or_none(m.get('abr') or m.get('abitrate'), 1000)
|
||||
tbr = vbr + abr if vbr and abr else None
|
||||
format_id = '%s-%sk-%s' % (vcodec, tbr, m['w'])
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': m['link'],
|
||||
'vcodec': vcodec,
|
||||
'acodec': acodec,
|
||||
'abr': abr,
|
||||
'vbr': vbr,
|
||||
'tbr': tbr,
|
||||
'width': int_or_none(m.get('w')),
|
||||
'height': int_or_none(m.get('h')),
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': display_id,
|
||||
'fullid': video_id,
|
||||
'title': data['title'],
|
||||
'formats': formats,
|
||||
'uploader': data['channel_name'],
|
||||
'timestamp': data['pubdate_epoch'],
|
||||
'description': data.get('description'),
|
||||
'thumbnails': thumbnails,
|
||||
'duration': duration,
|
||||
}
|
83
youtube_dl/extractor/bloomberg.py
Normal file
83
youtube_dl/extractor/bloomberg.py
Normal file
|
@ -0,0 +1,83 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class BloombergIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?bloomberg\.com/(?:[^/]+/)*(?P<id>[^/?#]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.bloomberg.com/news/videos/b/aaeae121-5949-481e-a1ce-4562db6f5df2',
|
||||
# The md5 checksum changes
|
||||
'info_dict': {
|
||||
'id': 'qurhIVlJSB6hzkVi229d8g',
|
||||
'ext': 'flv',
|
||||
'title': 'Shah\'s Presentation on Foreign-Exchange Strategies',
|
||||
'description': 'md5:a8ba0302912d03d246979735c17d2761',
|
||||
},
|
||||
'params': {
|
||||
'format': 'best[format_id^=hds]',
|
||||
},
|
||||
}, {
|
||||
# video ID in BPlayer(...)
|
||||
'url': 'http://www.bloomberg.com/features/2016-hello-world-new-zealand/',
|
||||
'info_dict': {
|
||||
'id': '938c7e72-3f25-4ddb-8b85-a9be731baa74',
|
||||
'ext': 'flv',
|
||||
'title': 'Meet the Real-Life Tech Wizards of Middle Earth',
|
||||
'description': 'Hello World, Episode 1: New Zealand’s freaky AI babies, robot exoskeletons, and a virtual you.',
|
||||
},
|
||||
'params': {
|
||||
'format': 'best[format_id^=hds]',
|
||||
},
|
||||
}, {
|
||||
# data-bmmrid=
|
||||
'url': 'https://www.bloomberg.com/politics/articles/2017-02-08/le-pen-aide-briefed-french-central-banker-on-plan-to-print-money',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.bloomberg.com/news/articles/2015-11-12/five-strange-things-that-have-been-happening-in-financial-markets',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.bloomberg.com/politics/videos/2015-11-25/karl-rove-on-jeb-bush-s-struggles-stopping-trump',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
name = self._match_id(url)
|
||||
webpage = self._download_webpage(url, name)
|
||||
video_id = self._search_regex(
|
||||
(r'["\']bmmrId["\']\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
|
||||
r'videoId\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
|
||||
r'data-bmmrid=(["\'])(?P<id>(?:(?!\1).)+)\1'),
|
||||
webpage, 'id', group='id', default=None)
|
||||
if not video_id:
|
||||
bplayer_data = self._parse_json(self._search_regex(
|
||||
r'BPlayer\(null,\s*({[^;]+})\);', webpage, 'id'), name)
|
||||
video_id = bplayer_data['id']
|
||||
title = re.sub(': Video$', '', self._og_search_title(webpage))
|
||||
|
||||
embed_info = self._download_json(
|
||||
'http://www.bloomberg.com/api/embed?id=%s' % video_id, video_id)
|
||||
formats = []
|
||||
for stream in embed_info['streams']:
|
||||
stream_url = stream.get('url')
|
||||
if not stream_url:
|
||||
continue
|
||||
if stream['muxing_format'] == 'TS':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
stream_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
stream_url, video_id, f4m_id='hds', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
}
|
60
youtube_dl/extractor/bokecc.py
Normal file
60
youtube_dl/extractor/bokecc.py
Normal file
|
@ -0,0 +1,60 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_parse_qs
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class BokeCCBaseIE(InfoExtractor):
|
||||
def _extract_bokecc_formats(self, webpage, video_id, format_id=None):
|
||||
player_params_str = self._html_search_regex(
|
||||
r'<(?:script|embed)[^>]+src=(?P<q>["\'])(?:https?:)?//p\.bokecc\.com/(?:player|flash/player\.swf)\?(?P<query>.+?)(?P=q)',
|
||||
webpage, 'player params', group='query')
|
||||
|
||||
player_params = compat_parse_qs(player_params_str)
|
||||
|
||||
info_xml = self._download_xml(
|
||||
'http://p.bokecc.com/servlet/playinfo?uid=%s&vid=%s&m=1' % (
|
||||
player_params['siteid'][0], player_params['vid'][0]), video_id)
|
||||
|
||||
formats = [{
|
||||
'format_id': format_id,
|
||||
'url': quality.find('./copy').attrib['playurl'],
|
||||
'preference': int(quality.attrib['value']),
|
||||
} for quality in info_xml.findall('./video/quality')]
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return formats
|
||||
|
||||
|
||||
class BokeCCIE(BokeCCBaseIE):
|
||||
_IE_DESC = 'CC视频'
|
||||
_VALID_URL = r'https?://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://union.bokecc.com/playvideo.bo?vid=E0ABAE9D4F509B189C33DC5901307461&uid=FE644790DE9D154A',
|
||||
'info_dict': {
|
||||
'id': 'FE644790DE9D154A_E0ABAE9D4F509B189C33DC5901307461',
|
||||
'ext': 'flv',
|
||||
'title': 'BokeCC Video',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
qs = compat_parse_qs(re.match(self._VALID_URL, url).group('query'))
|
||||
if not qs.get('vid') or not qs.get('uid'):
|
||||
raise ExtractorError('Invalid URL', expected=True)
|
||||
|
||||
video_id = '%s_%s' % (qs['uid'][0], qs['vid'][0])
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': 'BokeCC Video', # no title provided in the webpage
|
||||
'formats': self._extract_bokecc_formats(webpage, video_id),
|
||||
}
|
72
youtube_dl/extractor/bostonglobe.py
Normal file
72
youtube_dl/extractor/bostonglobe.py
Normal file
|
@ -0,0 +1,72 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
)
|
||||
|
||||
|
||||
class BostonGlobeIE(InfoExtractor):
|
||||
_VALID_URL = r'(?i)https?://(?:www\.)?bostonglobe\.com/.*/(?P<id>[^/]+)/\w+(?:\.html)?'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.bostonglobe.com/metro/2017/02/11/tree-finally-succumbs-disease-leaving-hole-neighborhood/h1b4lviqzMTIn9sVy8F3gP/story.html',
|
||||
'md5': '0a62181079c85c2d2b618c9a738aedaf',
|
||||
'info_dict': {
|
||||
'title': 'A tree finally succumbs to disease, leaving a hole in a neighborhood',
|
||||
'id': '5320421710001',
|
||||
'ext': 'mp4',
|
||||
'description': 'It arrived as a sapling when the Back Bay was in its infancy, a spindly American elm tamped down into a square of dirt cut into the brick sidewalk of 1880s Marlborough Street, no higher than the first bay window of the new brownstone behind it.',
|
||||
'timestamp': 1486877593,
|
||||
'upload_date': '20170212',
|
||||
'uploader_id': '245991542',
|
||||
},
|
||||
},
|
||||
{
|
||||
# Embedded youtube video; we hand it off to the Generic extractor.
|
||||
'url': 'https://www.bostonglobe.com/lifestyle/names/2017/02/17/does-ben-affleck-play-matt-damon-favorite-version-batman/ruqkc9VxKBYmh5txn1XhSI/story.html',
|
||||
'md5': '582b40327089d5c0c949b3c54b13c24b',
|
||||
'info_dict': {
|
||||
'title': "Who Is Matt Damon's Favorite Batman?",
|
||||
'id': 'ZW1QCnlA6Qc',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20170217',
|
||||
'description': 'md5:3b3dccb9375867e0b4d527ed87d307cb',
|
||||
'uploader': 'The Late Late Show with James Corden',
|
||||
'uploader_id': 'TheLateLateShow',
|
||||
},
|
||||
'expected_warnings': ['404'],
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
page_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
|
||||
page_title = self._og_search_title(webpage, default=None)
|
||||
|
||||
# <video data-brightcove-video-id="5320421710001" data-account="245991542" data-player="SJWAiyYWg" data-embed="default" class="video-js" controls itemscope itemtype="http://schema.org/VideoObject">
|
||||
entries = []
|
||||
for video in re.findall(r'(?i)(<video[^>]+>)', webpage):
|
||||
attrs = extract_attributes(video)
|
||||
|
||||
video_id = attrs.get('data-brightcove-video-id')
|
||||
account_id = attrs.get('data-account')
|
||||
player_id = attrs.get('data-player')
|
||||
embed = attrs.get('data-embed')
|
||||
|
||||
if video_id and account_id and player_id and embed:
|
||||
entries.append(
|
||||
'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s'
|
||||
% (account_id, player_id, embed, video_id))
|
||||
|
||||
if len(entries) == 0:
|
||||
return self.url_result(url, 'Generic')
|
||||
elif len(entries) == 1:
|
||||
return self.url_result(entries[0], 'BrightcoveNew')
|
||||
else:
|
||||
return self.playlist_from_matches(entries, page_id, page_title, ie='BrightcoveNew')
|
62
youtube_dl/extractor/bpb.py
Normal file
62
youtube_dl/extractor/bpb.py
Normal file
|
@ -0,0 +1,62 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
js_to_json,
|
||||
determine_ext,
|
||||
)
|
||||
|
||||
|
||||
class BpbIE(InfoExtractor):
|
||||
IE_DESC = 'Bundeszentrale für politische Bildung'
|
||||
_VALID_URL = r'https?://(?:www\.)?bpb\.de/mediathek/(?P<id>[0-9]+)/'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.bpb.de/mediathek/297/joachim-gauck-zu-1989-und-die-erinnerung-an-die-ddr',
|
||||
# md5 fails in Python 2.6 due to buggy server response and wrong handling of urllib2
|
||||
'md5': 'c4f84c8a8044ca9ff68bb8441d300b3f',
|
||||
'info_dict': {
|
||||
'id': '297',
|
||||
'ext': 'mp4',
|
||||
'title': 'Joachim Gauck zu 1989 und die Erinnerung an die DDR',
|
||||
'description': 'Joachim Gauck, erster Beauftragter für die Stasi-Unterlagen, spricht auf dem Geschichtsforum über die friedliche Revolution 1989 und eine "gewisse Traurigkeit" im Umgang mit der DDR-Vergangenheit.'
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<h2 class="white">(.*?)</h2>', webpage, 'title')
|
||||
video_info_dicts = re.findall(
|
||||
r"({\s*src\s*:\s*'https?://film\.bpb\.de/[^}]+})", webpage)
|
||||
|
||||
formats = []
|
||||
for video_info in video_info_dicts:
|
||||
video_info = self._parse_json(
|
||||
video_info, video_id, transform_source=js_to_json, fatal=False)
|
||||
if not video_info:
|
||||
continue
|
||||
video_url = video_info.get('src')
|
||||
if not video_url:
|
||||
continue
|
||||
quality = 'high' if '_high' in video_url else 'low'
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'preference': 10 if quality == 'high' else 0,
|
||||
'format_note': quality,
|
||||
'format_id': '%s-%s' % (quality, determine_ext(video_url)),
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'description': self._og_search_description(webpage),
|
||||
}
|
311
youtube_dl/extractor/br.py
Normal file
311
youtube_dl/extractor/br.py
Normal file
|
@ -0,0 +1,311 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
class BRIE(InfoExtractor):
|
||||
IE_DESC = 'Bayerischer Rundfunk'
|
||||
_VALID_URL = r'(?P<base_url>https?://(?:www\.)?br(?:-klassik)?\.de)/(?:[a-z0-9\-_]+/)+(?P<id>[a-z0-9\-_]+)\.html'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.br.de/mediathek/video/sendungen/abendschau/betriebliche-altersvorsorge-104.html',
|
||||
'md5': '83a0477cf0b8451027eb566d88b51106',
|
||||
'info_dict': {
|
||||
'id': '48f656ef-287e-486f-be86-459122db22cc',
|
||||
'ext': 'mp4',
|
||||
'title': 'Die böse Überraschung',
|
||||
'description': 'md5:ce9ac81b466ce775b8018f6801b48ac9',
|
||||
'duration': 180,
|
||||
'uploader': 'Reinhard Weber',
|
||||
'upload_date': '20150422',
|
||||
},
|
||||
'skip': '404 not found',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.br.de/nachrichten/oberbayern/inhalt/muenchner-polizeipraesident-schreiber-gestorben-100.html',
|
||||
'md5': 'af3a3a4aa43ff0ce6a89504c67f427ef',
|
||||
'info_dict': {
|
||||
'id': 'a4b83e34-123d-4b81-9f4e-c0d3121a4e05',
|
||||
'ext': 'flv',
|
||||
'title': 'Manfred Schreiber ist tot',
|
||||
'description': 'md5:b454d867f2a9fc524ebe88c3f5092d97',
|
||||
'duration': 26,
|
||||
},
|
||||
'skip': '404 not found',
|
||||
},
|
||||
{
|
||||
'url': 'https://www.br-klassik.de/audio/peeping-tom-premierenkritik-dance-festival-muenchen-100.html',
|
||||
'md5': '8b5b27c0b090f3b35eac4ab3f7a73d3d',
|
||||
'info_dict': {
|
||||
'id': '74c603c9-26d3-48bb-b85b-079aeed66e0b',
|
||||
'ext': 'aac',
|
||||
'title': 'Kurzweilig und sehr bewegend',
|
||||
'description': 'md5:0351996e3283d64adeb38ede91fac54e',
|
||||
'duration': 296,
|
||||
},
|
||||
'skip': '404 not found',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.br.de/radio/bayern1/service/team/videos/team-video-erdelt100.html',
|
||||
'md5': 'dbab0aef2e047060ea7a21fc1ce1078a',
|
||||
'info_dict': {
|
||||
'id': '6ba73750-d405-45d3-861d-1ce8c524e059',
|
||||
'ext': 'mp4',
|
||||
'title': 'Umweltbewusster Häuslebauer',
|
||||
'description': 'md5:d52dae9792d00226348c1dbb13c9bae2',
|
||||
'duration': 116,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.br.de/fernsehen/br-alpha/sendungen/kant-fuer-anfaenger/kritik-der-reinen-vernunft/kant-kritik-01-metaphysik100.html',
|
||||
'md5': '23bca295f1650d698f94fc570977dae3',
|
||||
'info_dict': {
|
||||
'id': 'd982c9ce-8648-4753-b358-98abb8aec43d',
|
||||
'ext': 'mp4',
|
||||
'title': 'Folge 1 - Metaphysik',
|
||||
'description': 'md5:bb659990e9e59905c3d41e369db1fbe3',
|
||||
'duration': 893,
|
||||
'uploader': 'Eva Maria Steimle',
|
||||
'upload_date': '20170208',
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
base_url, display_id = re.search(self._VALID_URL, url).groups()
|
||||
page = self._download_webpage(url, display_id)
|
||||
xml_url = self._search_regex(
|
||||
r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/(?:[a-z0-9\-]+/)+[a-z0-9/~_.-]+)'}\)\);", page, 'XMLURL')
|
||||
xml = self._download_xml(base_url + xml_url, display_id)
|
||||
|
||||
medias = []
|
||||
|
||||
for xml_media in xml.findall('video') + xml.findall('audio'):
|
||||
media_id = xml_media.get('externalId')
|
||||
media = {
|
||||
'id': media_id,
|
||||
'title': xpath_text(xml_media, 'title', 'title', True),
|
||||
'duration': parse_duration(xpath_text(xml_media, 'duration')),
|
||||
'formats': self._extract_formats(xpath_element(
|
||||
xml_media, 'assets'), media_id),
|
||||
'thumbnails': self._extract_thumbnails(xpath_element(
|
||||
xml_media, 'teaserImage/variants'), base_url),
|
||||
'description': xpath_text(xml_media, 'desc'),
|
||||
'webpage_url': xpath_text(xml_media, 'permalink'),
|
||||
'uploader': xpath_text(xml_media, 'author'),
|
||||
}
|
||||
broadcast_date = xpath_text(xml_media, 'broadcastDate')
|
||||
if broadcast_date:
|
||||
media['upload_date'] = ''.join(reversed(broadcast_date.split('.')))
|
||||
medias.append(media)
|
||||
|
||||
if len(medias) > 1:
|
||||
self._downloader.report_warning(
|
||||
'found multiple medias; please '
|
||||
'report this with the video URL to http://yt-dl.org/bug')
|
||||
if not medias:
|
||||
raise ExtractorError('No media entries found')
|
||||
return medias[0]
|
||||
|
||||
def _extract_formats(self, assets, media_id):
|
||||
formats = []
|
||||
for asset in assets.findall('asset'):
|
||||
format_url = xpath_text(asset, ['downloadUrl', 'url'])
|
||||
asset_type = asset.get('type')
|
||||
if asset_type.startswith('HDS'):
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
format_url + '?hdcore=3.2.0', media_id, f4m_id='hds', fatal=False))
|
||||
elif asset_type.startswith('HLS'):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, media_id, 'mp4', 'm3u8_native', m3u8_id='hds', fatal=False))
|
||||
else:
|
||||
format_info = {
|
||||
'ext': xpath_text(asset, 'mediaType'),
|
||||
'width': int_or_none(xpath_text(asset, 'frameWidth')),
|
||||
'height': int_or_none(xpath_text(asset, 'frameHeight')),
|
||||
'tbr': int_or_none(xpath_text(asset, 'bitrateVideo')),
|
||||
'abr': int_or_none(xpath_text(asset, 'bitrateAudio')),
|
||||
'vcodec': xpath_text(asset, 'codecVideo'),
|
||||
'acodec': xpath_text(asset, 'codecAudio'),
|
||||
'container': xpath_text(asset, 'mediaType'),
|
||||
'filesize': int_or_none(xpath_text(asset, 'size')),
|
||||
}
|
||||
format_url = self._proto_relative_url(format_url)
|
||||
if format_url:
|
||||
http_format_info = format_info.copy()
|
||||
http_format_info.update({
|
||||
'url': format_url,
|
||||
'format_id': 'http-%s' % asset_type,
|
||||
})
|
||||
formats.append(http_format_info)
|
||||
server_prefix = xpath_text(asset, 'serverPrefix')
|
||||
if server_prefix:
|
||||
rtmp_format_info = format_info.copy()
|
||||
rtmp_format_info.update({
|
||||
'url': server_prefix,
|
||||
'play_path': xpath_text(asset, 'fileName'),
|
||||
'format_id': 'rtmp-%s' % asset_type,
|
||||
})
|
||||
formats.append(rtmp_format_info)
|
||||
self._sort_formats(formats)
|
||||
return formats
|
||||
|
||||
def _extract_thumbnails(self, variants, base_url):
|
||||
thumbnails = [{
|
||||
'url': base_url + xpath_text(variant, 'url'),
|
||||
'width': int_or_none(xpath_text(variant, 'width')),
|
||||
'height': int_or_none(xpath_text(variant, 'height')),
|
||||
} for variant in variants.findall('variant') if xpath_text(variant, 'url')]
|
||||
thumbnails.sort(key=lambda x: x['width'] * x['height'], reverse=True)
|
||||
return thumbnails
|
||||
|
||||
|
||||
class BRMediathekIE(InfoExtractor):
|
||||
IE_DESC = 'Bayerischer Rundfunk Mediathek'
|
||||
_VALID_URL = r'https?://(?:www\.)?br\.de/mediathek/video/[^/?&#]*?-(?P<id>av:[0-9a-f]{24})'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.br.de/mediathek/video/gesundheit-die-sendung-vom-28112017-av:5a1e6a6e8fce6d001871cc8e',
|
||||
'md5': 'fdc3d485835966d1622587d08ba632ec',
|
||||
'info_dict': {
|
||||
'id': 'av:5a1e6a6e8fce6d001871cc8e',
|
||||
'ext': 'mp4',
|
||||
'title': 'Die Sendung vom 28.11.2017',
|
||||
'description': 'md5:6000cdca5912ab2277e5b7339f201ccc',
|
||||
'timestamp': 1511942766,
|
||||
'upload_date': '20171129',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
clip_id = self._match_id(url)
|
||||
|
||||
clip = self._download_json(
|
||||
'https://proxy-base.master.mango.express/graphql',
|
||||
clip_id, data=json.dumps({
|
||||
"query": """{
|
||||
viewer {
|
||||
clip(id: "%s") {
|
||||
title
|
||||
description
|
||||
duration
|
||||
createdAt
|
||||
ageRestriction
|
||||
videoFiles {
|
||||
edges {
|
||||
node {
|
||||
publicLocation
|
||||
fileSize
|
||||
videoProfile {
|
||||
width
|
||||
height
|
||||
bitrate
|
||||
encoding
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
captionFiles {
|
||||
edges {
|
||||
node {
|
||||
publicLocation
|
||||
}
|
||||
}
|
||||
}
|
||||
teaserImages {
|
||||
edges {
|
||||
node {
|
||||
imageFiles {
|
||||
edges {
|
||||
node {
|
||||
publicLocation
|
||||
width
|
||||
height
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}""" % clip_id}).encode(), headers={
|
||||
'Content-Type': 'application/json',
|
||||
})['data']['viewer']['clip']
|
||||
title = clip['title']
|
||||
|
||||
formats = []
|
||||
for edge in clip.get('videoFiles', {}).get('edges', []):
|
||||
node = edge.get('node', {})
|
||||
n_url = node.get('publicLocation')
|
||||
if not n_url:
|
||||
continue
|
||||
ext = determine_ext(n_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
n_url, clip_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
video_profile = node.get('videoProfile', {})
|
||||
tbr = int_or_none(video_profile.get('bitrate'))
|
||||
format_id = 'http'
|
||||
if tbr:
|
||||
format_id += '-%d' % tbr
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': n_url,
|
||||
'width': int_or_none(video_profile.get('width')),
|
||||
'height': int_or_none(video_profile.get('height')),
|
||||
'tbr': tbr,
|
||||
'filesize': int_or_none(node.get('fileSize')),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
for edge in clip.get('captionFiles', {}).get('edges', []):
|
||||
node = edge.get('node', {})
|
||||
n_url = node.get('publicLocation')
|
||||
if not n_url:
|
||||
continue
|
||||
subtitles.setdefault('de', []).append({
|
||||
'url': n_url,
|
||||
})
|
||||
|
||||
thumbnails = []
|
||||
for edge in clip.get('teaserImages', {}).get('edges', []):
|
||||
for image_edge in edge.get('node', {}).get('imageFiles', {}).get('edges', []):
|
||||
node = image_edge.get('node', {})
|
||||
n_url = node.get('publicLocation')
|
||||
if not n_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'url': n_url,
|
||||
'width': int_or_none(node.get('width')),
|
||||
'height': int_or_none(node.get('height')),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': clip_id,
|
||||
'title': title,
|
||||
'description': clip.get('description'),
|
||||
'duration': int_or_none(clip.get('duration')),
|
||||
'timestamp': parse_iso8601(clip.get('createdAt')),
|
||||
'age_limit': int_or_none(clip.get('ageRestriction')),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'thumbnails': thumbnails,
|
||||
}
|
84
youtube_dl/extractor/bravotv.py
Normal file
84
youtube_dl/extractor/bravotv.py
Normal file
|
@ -0,0 +1,84 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .adobepass import AdobePassIE
|
||||
from ..utils import (
|
||||
smuggle_url,
|
||||
update_url_query,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class BravoTVIE(AdobePassIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?bravotv\.com/(?:[^/]+/)+(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bravotv.com/top-chef/season-16/episode-15/videos/the-top-chef-season-16-winner-is',
|
||||
'md5': 'e34684cfea2a96cd2ee1ef3a60909de9',
|
||||
'info_dict': {
|
||||
'id': 'epL0pmK1kQlT',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Top Chef Season 16 Winner Is...',
|
||||
'description': 'Find out who takes the title of Top Chef!',
|
||||
'uploader': 'NBCU-BRAV',
|
||||
'upload_date': '20190314',
|
||||
'timestamp': 1552591860,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
settings = self._parse_json(self._search_regex(
|
||||
r'<script[^>]+data-drupal-selector="drupal-settings-json"[^>]*>({.+?})</script>', webpage, 'drupal settings'),
|
||||
display_id)
|
||||
info = {}
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
}
|
||||
account_pid, release_pid = [None] * 2
|
||||
tve = settings.get('ls_tve')
|
||||
if tve:
|
||||
query['manifest'] = 'm3u'
|
||||
mobj = re.search(r'<[^>]+id="pdk-player"[^>]+data-url=["\']?(?:https?:)?//player\.theplatform\.com/p/([^/]+)/(?:[^/]+/)*select/([^?#&"\']+)', webpage)
|
||||
if mobj:
|
||||
account_pid, tp_path = mobj.groups()
|
||||
release_pid = tp_path.strip('/').split('/')[-1]
|
||||
else:
|
||||
account_pid = 'HNK2IC'
|
||||
tp_path = release_pid = tve['release_pid']
|
||||
if tve.get('entitlement') == 'auth':
|
||||
adobe_pass = settings.get('tve_adobe_auth', {})
|
||||
resource = self._get_mvpd_resource(
|
||||
adobe_pass.get('adobePassResourceId', 'bravo'),
|
||||
tve['title'], release_pid, tve.get('rating'))
|
||||
query['auth'] = self._extract_mvpd_auth(
|
||||
url, release_pid, adobe_pass.get('adobePassRequestorId', 'bravo'), resource)
|
||||
else:
|
||||
shared_playlist = settings['ls_playlist']
|
||||
account_pid = shared_playlist['account_pid']
|
||||
metadata = shared_playlist['video_metadata'][shared_playlist['default_clip']]
|
||||
tp_path = release_pid = metadata.get('release_pid')
|
||||
if not release_pid:
|
||||
release_pid = metadata['guid']
|
||||
tp_path = 'media/guid/2140479951/' + release_pid
|
||||
info.update({
|
||||
'title': metadata['title'],
|
||||
'description': metadata.get('description'),
|
||||
'season_number': int_or_none(metadata.get('season_num')),
|
||||
'episode_number': int_or_none(metadata.get('episode_num')),
|
||||
})
|
||||
query['switch'] = 'progressive'
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'id': release_pid,
|
||||
'url': smuggle_url(update_url_query(
|
||||
'http://link.theplatform.com/s/%s/%s' % (account_pid, tp_path),
|
||||
query), {'force_smil_url': True}),
|
||||
'ie_key': 'ThePlatform',
|
||||
})
|
||||
return info
|
91
youtube_dl/extractor/breakcom.py
Normal file
91
youtube_dl/extractor/breakcom.py
Normal file
|
@ -0,0 +1,91 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class BreakIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?break\.com/video/(?P<display_id>[^/]+?)(?:-(?P<id>\d+))?(?:[/?#&]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.break.com/video/when-girls-act-like-guys-2468056',
|
||||
'info_dict': {
|
||||
'id': '2468056',
|
||||
'ext': 'mp4',
|
||||
'title': 'When Girls Act Like D-Bags',
|
||||
'age_limit': 13,
|
||||
},
|
||||
}, {
|
||||
# youtube embed
|
||||
'url': 'http://www.break.com/video/someone-forgot-boat-brakes-work',
|
||||
'info_dict': {
|
||||
'id': 'RrrDLdeL2HQ',
|
||||
'ext': 'mp4',
|
||||
'title': 'Whale Watching Boat Crashing Into San Diego Dock',
|
||||
'description': 'md5:afc1b2772f0a8468be51dd80eb021069',
|
||||
'upload_date': '20160331',
|
||||
'uploader': 'Steve Holden',
|
||||
'uploader_id': 'sdholden07',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.break.com/video/ugc/baby-flex-2773063',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
youtube_url = YoutubeIE._extract_url(webpage)
|
||||
if youtube_url:
|
||||
return self.url_result(youtube_url, ie=YoutubeIE.ie_key())
|
||||
|
||||
content = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)content["\']\s*:\s*(\[.+?\])\s*[,\n]', webpage,
|
||||
'content'),
|
||||
display_id)
|
||||
|
||||
formats = []
|
||||
for video in content:
|
||||
video_url = url_or_none(video.get('url'))
|
||||
if not video_url:
|
||||
continue
|
||||
bitrate = int_or_none(self._search_regex(
|
||||
r'(\d+)_kbps', video_url, 'tbr', default=None))
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': 'http-%d' % bitrate if bitrate else 'http',
|
||||
'tbr': bitrate,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._search_regex(
|
||||
(r'title["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
|
||||
r'<h1[^>]*>(?P<value>[^<]+)'), webpage, 'title', group='value')
|
||||
|
||||
def get(key, name):
|
||||
return int_or_none(self._search_regex(
|
||||
r'%s["\']\s*:\s*["\'](\d+)' % key, webpage, name,
|
||||
default=None))
|
||||
|
||||
age_limit = get('ratings', 'age limit')
|
||||
video_id = video_id or get('pid', 'video id') or display_id
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'age_limit': age_limit,
|
||||
'formats': formats,
|
||||
}
|
677
youtube_dl/extractor/brightcove.py
Normal file
677
youtube_dl/extractor/brightcove.py
Normal file
|
@ -0,0 +1,677 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import re
|
||||
import struct
|
||||
|
||||
from .adobepass import AdobePassIE
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_HTTPError,
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urlparse,
|
||||
compat_xml_parse_error,
|
||||
)
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
find_xpath_attr,
|
||||
fix_xml_ampersands,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
smuggle_url,
|
||||
str_or_none,
|
||||
unescapeHTML,
|
||||
unsmuggle_url,
|
||||
UnsupportedError,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class BrightcoveLegacyIE(InfoExtractor):
|
||||
IE_NAME = 'brightcove:legacy'
|
||||
_VALID_URL = r'(?:https?://.*brightcove\.com/(services|viewer).*?\?|brightcove:)(?P<query>.*)'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
# From http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/
|
||||
'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1654948606001&flashID=myExperience&%40videoPlayer=2371591881001',
|
||||
'md5': '5423e113865d26e40624dce2e4b45d95',
|
||||
'note': 'Test Brightcove downloads and detection in GenericIE',
|
||||
'info_dict': {
|
||||
'id': '2371591881001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”',
|
||||
'uploader': '8TV',
|
||||
'description': 'md5:a950cc4285c43e44d763d036710cd9cd',
|
||||
'timestamp': 1368213670,
|
||||
'upload_date': '20130510',
|
||||
'uploader_id': '1589608506001',
|
||||
},
|
||||
'skip': 'The player has been deactivated by the content owner',
|
||||
},
|
||||
{
|
||||
# From http://medianetwork.oracle.com/video/player/1785452137001
|
||||
'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1217746023001&flashID=myPlayer&%40videoPlayer=1785452137001',
|
||||
'info_dict': {
|
||||
'id': '1785452137001',
|
||||
'ext': 'flv',
|
||||
'title': 'JVMLS 2012: Arrays 2.0 - Opportunities and Challenges',
|
||||
'description': 'John Rose speaks at the JVM Language Summit, August 1, 2012.',
|
||||
'uploader': 'Oracle',
|
||||
'timestamp': 1344975024,
|
||||
'upload_date': '20120814',
|
||||
'uploader_id': '1460825906',
|
||||
},
|
||||
'skip': 'video not playable',
|
||||
},
|
||||
{
|
||||
# From http://mashable.com/2013/10/26/thermoelectric-bracelet-lets-you-control-your-body-temperature/
|
||||
'url': 'http://c.brightcove.com/services/viewer/federated_f9?&playerID=1265504713001&publisherID=AQ%7E%7E%2CAAABBzUwv1E%7E%2CxP-xFHVUstiMFlNYfvF4G9yFnNaqCw_9&videoID=2750934548001',
|
||||
'info_dict': {
|
||||
'id': '2750934548001',
|
||||
'ext': 'mp4',
|
||||
'title': 'This Bracelet Acts as a Personal Thermostat',
|
||||
'description': 'md5:547b78c64f4112766ccf4e151c20b6a0',
|
||||
# 'uploader': 'Mashable',
|
||||
'timestamp': 1382041798,
|
||||
'upload_date': '20131017',
|
||||
'uploader_id': '1130468786001',
|
||||
},
|
||||
},
|
||||
{
|
||||
# test that the default referer works
|
||||
# from http://national.ballet.ca/interact/video/Lost_in_Motion_II/
|
||||
'url': 'http://link.brightcove.com/services/player/bcpid756015033001?bckey=AQ~~,AAAApYJi_Ck~,GxhXCegT1Dp39ilhXuxMJxasUhVNZiil&bctid=2878862109001',
|
||||
'info_dict': {
|
||||
'id': '2878862109001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Lost in Motion II',
|
||||
'description': 'md5:363109c02998fee92ec02211bd8000df',
|
||||
'uploader': 'National Ballet of Canada',
|
||||
},
|
||||
'skip': 'Video gone',
|
||||
},
|
||||
{
|
||||
# test flv videos served by akamaihd.net
|
||||
# From http://www.redbull.com/en/bike/stories/1331655643987/replay-uci-dh-world-cup-2014-from-fort-william
|
||||
'url': 'http://c.brightcove.com/services/viewer/htmlFederated?%40videoPlayer=ref%3Aevent-stream-356&linkBaseURL=http%3A%2F%2Fwww.redbull.com%2Fen%2Fbike%2Fvideos%2F1331655630249%2Freplay-uci-fort-william-2014-dh&playerKey=AQ%7E%7E%2CAAAApYJ7UqE%7E%2Cxqr_zXk0I-zzNndy8NlHogrCb5QdyZRf&playerID=1398061561001#__youtubedl_smuggle=%7B%22Referer%22%3A+%22http%3A%2F%2Fwww.redbull.com%2Fen%2Fbike%2Fstories%2F1331655643987%2Freplay-uci-dh-world-cup-2014-from-fort-william%22%7D',
|
||||
# The md5 checksum changes on each download
|
||||
'info_dict': {
|
||||
'id': '3750436379001',
|
||||
'ext': 'flv',
|
||||
'title': 'UCI MTB World Cup 2014: Fort William, UK - Downhill Finals',
|
||||
'uploader': 'RBTV Old (do not use)',
|
||||
'description': 'UCI MTB World Cup 2014: Fort William, UK - Downhill Finals',
|
||||
'timestamp': 1409122195,
|
||||
'upload_date': '20140827',
|
||||
'uploader_id': '710858724001',
|
||||
},
|
||||
'skip': 'Video gone',
|
||||
},
|
||||
{
|
||||
# playlist with 'videoList'
|
||||
# from http://support.brightcove.com/en/video-cloud/docs/playlist-support-single-video-players
|
||||
'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=3550052898001&playerKey=AQ%7E%7E%2CAAABmA9XpXk%7E%2C-Kp7jNgisre1fG5OdqpAFUTcs0lP_ZoL',
|
||||
'info_dict': {
|
||||
'title': 'Sealife',
|
||||
'id': '3550319591001',
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
'skip': 'Unsupported URL',
|
||||
},
|
||||
{
|
||||
# playlist with 'playlistTab' (https://github.com/ytdl-org/youtube-dl/issues/9965)
|
||||
'url': 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=AQ%7E%7E,AAABXlLMdok%7E,NJ4EoMlZ4rZdx9eU1rkMVd8EaYPBBUlg',
|
||||
'info_dict': {
|
||||
'id': '1522758701001',
|
||||
'title': 'Lesson 08',
|
||||
},
|
||||
'playlist_mincount': 10,
|
||||
'skip': 'Unsupported URL',
|
||||
},
|
||||
{
|
||||
# playerID inferred from bcpid
|
||||
# from http://www.un.org/chinese/News/story.asp?NewsID=27724
|
||||
'url': 'https://link.brightcove.com/services/player/bcpid1722935254001/?bctid=5360463607001&autoStart=false&secureConnections=true&width=650&height=350',
|
||||
'only_matching': True, # Tested in GenericIE
|
||||
}
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def _build_brighcove_url(cls, object_str):
|
||||
"""
|
||||
Build a Brightcove url from a xml string containing
|
||||
<object class="BrightcoveExperience">{params}</object>
|
||||
"""
|
||||
|
||||
# Fix up some stupid HTML, see https://github.com/ytdl-org/youtube-dl/issues/1553
|
||||
object_str = re.sub(r'(<param(?:\s+[a-zA-Z0-9_]+="[^"]*")*)>',
|
||||
lambda m: m.group(1) + '/>', object_str)
|
||||
# Fix up some stupid XML, see https://github.com/ytdl-org/youtube-dl/issues/1608
|
||||
object_str = object_str.replace('<--', '<!--')
|
||||
# remove namespace to simplify extraction
|
||||
object_str = re.sub(r'(<object[^>]*)(xmlns=".*?")', r'\1', object_str)
|
||||
object_str = fix_xml_ampersands(object_str)
|
||||
|
||||
try:
|
||||
object_doc = compat_etree_fromstring(object_str.encode('utf-8'))
|
||||
except compat_xml_parse_error:
|
||||
return
|
||||
|
||||
fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars')
|
||||
if fv_el is not None:
|
||||
flashvars = dict(
|
||||
(k, v[0])
|
||||
for k, v in compat_parse_qs(fv_el.attrib['value']).items())
|
||||
else:
|
||||
flashvars = {}
|
||||
|
||||
data_url = object_doc.attrib.get('data', '')
|
||||
data_url_params = compat_parse_qs(compat_urllib_parse_urlparse(data_url).query)
|
||||
|
||||
def find_param(name):
|
||||
if name in flashvars:
|
||||
return flashvars[name]
|
||||
node = find_xpath_attr(object_doc, './param', 'name', name)
|
||||
if node is not None:
|
||||
return node.attrib['value']
|
||||
return data_url_params.get(name)
|
||||
|
||||
params = {}
|
||||
|
||||
playerID = find_param('playerID') or find_param('playerId')
|
||||
if playerID is None:
|
||||
raise ExtractorError('Cannot find player ID')
|
||||
params['playerID'] = playerID
|
||||
|
||||
playerKey = find_param('playerKey')
|
||||
# Not all pages define this value
|
||||
if playerKey is not None:
|
||||
params['playerKey'] = playerKey
|
||||
# These fields hold the id of the video
|
||||
videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID') or find_param('@videoList')
|
||||
if videoPlayer is not None:
|
||||
if isinstance(videoPlayer, list):
|
||||
videoPlayer = videoPlayer[0]
|
||||
videoPlayer = videoPlayer.strip()
|
||||
# UUID is also possible for videoPlayer (e.g.
|
||||
# http://www.popcornflix.com/hoodies-vs-hooligans/7f2d2b87-bbf2-4623-acfb-ea942b4f01dd
|
||||
# or http://www8.hp.com/cn/zh/home.html)
|
||||
if not (re.match(
|
||||
r'^(?:\d+|[\da-fA-F]{8}-?[\da-fA-F]{4}-?[\da-fA-F]{4}-?[\da-fA-F]{4}-?[\da-fA-F]{12})$',
|
||||
videoPlayer) or videoPlayer.startswith('ref:')):
|
||||
return None
|
||||
params['@videoPlayer'] = videoPlayer
|
||||
linkBase = find_param('linkBaseURL')
|
||||
if linkBase is not None:
|
||||
params['linkBaseURL'] = linkBase
|
||||
return cls._make_brightcove_url(params)
|
||||
|
||||
@classmethod
|
||||
def _build_brighcove_url_from_js(cls, object_js):
|
||||
# The layout of JS is as follows:
|
||||
# customBC.createVideo = function (width, height, playerID, playerKey, videoPlayer, VideoRandomID) {
|
||||
# // build Brightcove <object /> XML
|
||||
# }
|
||||
m = re.search(
|
||||
r'''(?x)customBC\.createVideo\(
|
||||
.*? # skipping width and height
|
||||
["\'](?P<playerID>\d+)["\']\s*,\s* # playerID
|
||||
["\'](?P<playerKey>AQ[^"\']{48})[^"\']*["\']\s*,\s* # playerKey begins with AQ and is 50 characters
|
||||
# in length, however it's appended to itself
|
||||
# in places, so truncate
|
||||
["\'](?P<videoID>\d+)["\'] # @videoPlayer
|
||||
''', object_js)
|
||||
if m:
|
||||
return cls._make_brightcove_url(m.groupdict())
|
||||
|
||||
@classmethod
|
||||
def _make_brightcove_url(cls, params):
|
||||
return update_url_query(
|
||||
'http://c.brightcove.com/services/viewer/htmlFederated', params)
|
||||
|
||||
@classmethod
|
||||
def _extract_brightcove_url(cls, webpage):
|
||||
"""Try to extract the brightcove url from the webpage, returns None
|
||||
if it can't be found
|
||||
"""
|
||||
urls = cls._extract_brightcove_urls(webpage)
|
||||
return urls[0] if urls else None
|
||||
|
||||
@classmethod
|
||||
def _extract_brightcove_urls(cls, webpage):
|
||||
"""Return a list of all Brightcove URLs from the webpage """
|
||||
|
||||
url_m = re.search(
|
||||
r'''(?x)
|
||||
<meta\s+
|
||||
(?:property|itemprop)=([\'"])(?:og:video|embedURL)\1[^>]+
|
||||
content=([\'"])(?P<url>https?://(?:secure|c)\.brightcove.com/(?:(?!\2).)+)\2
|
||||
''', webpage)
|
||||
if url_m:
|
||||
url = unescapeHTML(url_m.group('url'))
|
||||
# Some sites don't add it, we can't download with this url, for example:
|
||||
# http://www.ktvu.com/videos/news/raw-video-caltrain-releases-video-of-man-almost/vCTZdY/
|
||||
if 'playerKey' in url or 'videoId' in url or 'idVideo' in url:
|
||||
return [url]
|
||||
|
||||
matches = re.findall(
|
||||
r'''(?sx)<object
|
||||
(?:
|
||||
[^>]+?class=[\'"][^>]*?BrightcoveExperience.*?[\'"] |
|
||||
[^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/
|
||||
).+?>\s*</object>''',
|
||||
webpage)
|
||||
if matches:
|
||||
return list(filter(None, [cls._build_brighcove_url(m) for m in matches]))
|
||||
|
||||
matches = re.findall(r'(customBC\.createVideo\(.+?\);)', webpage)
|
||||
if matches:
|
||||
return list(filter(None, [
|
||||
cls._build_brighcove_url_from_js(custom_bc)
|
||||
for custom_bc in matches]))
|
||||
return [src for _, src in re.findall(
|
||||
r'<iframe[^>]+src=([\'"])((?:https?:)?//link\.brightcove\.com/services/player/(?!\1).+)\1', webpage)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
|
||||
# Change the 'videoId' and others field to '@videoPlayer'
|
||||
url = re.sub(r'(?<=[?&])(videoI(d|D)|idVideo|bctid)', '%40videoPlayer', url)
|
||||
# Change bckey (used by bcove.me urls) to playerKey
|
||||
url = re.sub(r'(?<=[?&])bckey', 'playerKey', url)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
query_str = mobj.group('query')
|
||||
query = compat_urlparse.parse_qs(query_str)
|
||||
|
||||
videoPlayer = query.get('@videoPlayer')
|
||||
if videoPlayer:
|
||||
# We set the original url as the default 'Referer' header
|
||||
referer = query.get('linkBaseURL', [None])[0] or smuggled_data.get('Referer', url)
|
||||
video_id = videoPlayer[0]
|
||||
if 'playerID' not in query:
|
||||
mobj = re.search(r'/bcpid(\d+)', url)
|
||||
if mobj is not None:
|
||||
query['playerID'] = [mobj.group(1)]
|
||||
publisher_id = query.get('publisherId')
|
||||
if publisher_id and publisher_id[0].isdigit():
|
||||
publisher_id = publisher_id[0]
|
||||
if not publisher_id:
|
||||
player_key = query.get('playerKey')
|
||||
if player_key and ',' in player_key[0]:
|
||||
player_key = player_key[0]
|
||||
else:
|
||||
player_id = query.get('playerID')
|
||||
if player_id and player_id[0].isdigit():
|
||||
headers = {}
|
||||
if referer:
|
||||
headers['Referer'] = referer
|
||||
player_page = self._download_webpage(
|
||||
'http://link.brightcove.com/services/player/bcpid' + player_id[0],
|
||||
video_id, headers=headers, fatal=False)
|
||||
if player_page:
|
||||
player_key = self._search_regex(
|
||||
r'<param\s+name="playerKey"\s+value="([\w~,-]+)"',
|
||||
player_page, 'player key', fatal=False)
|
||||
if player_key:
|
||||
enc_pub_id = player_key.split(',')[1].replace('~', '=')
|
||||
publisher_id = struct.unpack('>Q', base64.urlsafe_b64decode(enc_pub_id))[0]
|
||||
if publisher_id:
|
||||
brightcove_new_url = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (publisher_id, video_id)
|
||||
if referer:
|
||||
brightcove_new_url = smuggle_url(brightcove_new_url, {'referrer': referer})
|
||||
return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id)
|
||||
# TODO: figure out if it's possible to extract playlistId from playerKey
|
||||
# elif 'playerKey' in query:
|
||||
# player_key = query['playerKey']
|
||||
# return self._get_playlist_info(player_key[0])
|
||||
raise UnsupportedError(url)
|
||||
|
||||
|
||||
class BrightcoveNewIE(AdobePassIE):
|
||||
IE_NAME = 'brightcove:new'
|
||||
_VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index\.html\?.*(?P<content_type>video|playlist)Id=(?P<video_id>\d+|ref:[^&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://players.brightcove.net/929656772001/e41d32dc-ec74-459e-a845-6c69f7b724ea_default/index.html?videoId=4463358922001',
|
||||
'md5': 'c8100925723840d4b0d243f7025703be',
|
||||
'info_dict': {
|
||||
'id': '4463358922001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Meet the man behind Popcorn Time',
|
||||
'description': 'md5:eac376a4fe366edc70279bfb681aea16',
|
||||
'duration': 165.768,
|
||||
'timestamp': 1441391203,
|
||||
'upload_date': '20150904',
|
||||
'uploader_id': '929656772001',
|
||||
'formats': 'mincount:20',
|
||||
},
|
||||
}, {
|
||||
# with rtmp streams
|
||||
'url': 'http://players.brightcove.net/4036320279001/5d112ed9-283f-485f-a7f9-33f42e8bc042_default/index.html?videoId=4279049078001',
|
||||
'info_dict': {
|
||||
'id': '4279049078001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Titansgrave: Chapter 0',
|
||||
'description': 'Titansgrave: Chapter 0',
|
||||
'duration': 1242.058,
|
||||
'timestamp': 1433556729,
|
||||
'upload_date': '20150606',
|
||||
'uploader_id': '4036320279001',
|
||||
'formats': 'mincount:39',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# playlist stream
|
||||
'url': 'https://players.brightcove.net/1752604059001/S13cJdUBz_default/index.html?playlistId=5718313430001',
|
||||
'info_dict': {
|
||||
'id': '5718313430001',
|
||||
'title': 'No Audio Playlist',
|
||||
},
|
||||
'playlist_count': 7,
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://players.brightcove.net/5690807595001/HyZNerRl7_default/index.html?playlistId=5743160747001',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# ref: prefixed video id
|
||||
'url': 'http://players.brightcove.net/3910869709001/21519b5c-4b3b-4363-accb-bdc8f358f823_default/index.html?videoId=ref:7069442',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# non numeric ref: prefixed video id
|
||||
'url': 'http://players.brightcove.net/710858724001/default_default/index.html?videoId=ref:event-stream-356',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# unavailable video without message but with error_code
|
||||
'url': 'http://players.brightcove.net/1305187701/c832abfb-641b-44eb-9da0-2fe76786505f_default/index.html?videoId=4377407326001',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(ie, webpage):
|
||||
urls = BrightcoveNewIE._extract_urls(ie, webpage)
|
||||
return urls[0] if urls else None
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(ie, webpage):
|
||||
# Reference:
|
||||
# 1. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideoiniframe
|
||||
# 2. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#tag
|
||||
# 3. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideousingjavascript
|
||||
# 4. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/in-page-embed-player-implementation.html
|
||||
# 5. https://support.brightcove.com/en/video-cloud/docs/dynamically-assigning-videos-player
|
||||
|
||||
entries = []
|
||||
|
||||
# Look for iframe embeds [1]
|
||||
for _, url in re.findall(
|
||||
r'<iframe[^>]+src=(["\'])((?:https?:)?//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage):
|
||||
entries.append(url if url.startswith('http') else 'http:' + url)
|
||||
|
||||
# Look for <video> tags [2] and embed_in_page embeds [3]
|
||||
# [2] looks like:
|
||||
for video, script_tag, account_id, player_id, embed in re.findall(
|
||||
r'''(?isx)
|
||||
(<video(?:-js)?\s+[^>]*\bdata-video-id\s*=\s*['"]?[^>]+>)
|
||||
(?:.*?
|
||||
(<script[^>]+
|
||||
src=["\'](?:https?:)?//players\.brightcove\.net/
|
||||
(\d+)/([^/]+)_([^/]+)/index(?:\.min)?\.js
|
||||
)
|
||||
)?
|
||||
''', webpage):
|
||||
attrs = extract_attributes(video)
|
||||
|
||||
# According to examples from [4] it's unclear whether video id
|
||||
# may be optional and what to do when it is
|
||||
video_id = attrs.get('data-video-id')
|
||||
if not video_id:
|
||||
continue
|
||||
|
||||
account_id = account_id or attrs.get('data-account')
|
||||
if not account_id:
|
||||
continue
|
||||
|
||||
player_id = player_id or attrs.get('data-player') or 'default'
|
||||
embed = embed or attrs.get('data-embed') or 'default'
|
||||
|
||||
bc_url = 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s' % (
|
||||
account_id, player_id, embed, video_id)
|
||||
|
||||
# Some brightcove videos may be embedded with video tag only and
|
||||
# without script tag or any mentioning of brightcove at all. Such
|
||||
# embeds are considered ambiguous since they are matched based only
|
||||
# on data-video-id and data-account attributes and in the wild may
|
||||
# not be brightcove embeds at all. Let's check reconstructed
|
||||
# brightcove URLs in case of such embeds and only process valid
|
||||
# ones. By this we ensure there is indeed a brightcove embed.
|
||||
if not script_tag and not ie._is_valid_url(
|
||||
bc_url, video_id, 'possible brightcove video'):
|
||||
continue
|
||||
|
||||
entries.append(bc_url)
|
||||
|
||||
return entries
|
||||
|
||||
def _parse_brightcove_metadata(self, json_data, video_id, headers={}):
|
||||
title = json_data['name'].strip()
|
||||
|
||||
formats = []
|
||||
for source in json_data.get('sources', []):
|
||||
container = source.get('container')
|
||||
ext = mimetype2ext(source.get('type'))
|
||||
src = source.get('src')
|
||||
# https://support.brightcove.com/playback-api-video-fields-reference#key_systems_object
|
||||
if ext == 'ism' or container == 'WVM' or source.get('key_systems'):
|
||||
continue
|
||||
elif ext == 'm3u8' or container == 'M2TS':
|
||||
if not src:
|
||||
continue
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
src, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
elif ext == 'mpd':
|
||||
if not src:
|
||||
continue
|
||||
formats.extend(self._extract_mpd_formats(src, video_id, 'dash', fatal=False))
|
||||
else:
|
||||
streaming_src = source.get('streaming_src')
|
||||
stream_name, app_name = source.get('stream_name'), source.get('app_name')
|
||||
if not src and not streaming_src and (not stream_name or not app_name):
|
||||
continue
|
||||
tbr = float_or_none(source.get('avg_bitrate'), 1000)
|
||||
height = int_or_none(source.get('height'))
|
||||
width = int_or_none(source.get('width'))
|
||||
f = {
|
||||
'tbr': tbr,
|
||||
'filesize': int_or_none(source.get('size')),
|
||||
'container': container,
|
||||
'ext': ext or container.lower(),
|
||||
}
|
||||
if width == 0 and height == 0:
|
||||
f.update({
|
||||
'vcodec': 'none',
|
||||
})
|
||||
else:
|
||||
f.update({
|
||||
'width': width,
|
||||
'height': height,
|
||||
'vcodec': source.get('codec'),
|
||||
})
|
||||
|
||||
def build_format_id(kind):
|
||||
format_id = kind
|
||||
if tbr:
|
||||
format_id += '-%dk' % int(tbr)
|
||||
if height:
|
||||
format_id += '-%dp' % height
|
||||
return format_id
|
||||
|
||||
if src or streaming_src:
|
||||
f.update({
|
||||
'url': src or streaming_src,
|
||||
'format_id': build_format_id('http' if src else 'http-streaming'),
|
||||
'source_preference': 0 if src else -1,
|
||||
})
|
||||
else:
|
||||
f.update({
|
||||
'url': app_name,
|
||||
'play_path': stream_name,
|
||||
'format_id': build_format_id('rtmp'),
|
||||
})
|
||||
formats.append(f)
|
||||
if not formats:
|
||||
# for sonyliv.com DRM protected videos
|
||||
s3_source_url = json_data.get('custom_fields', {}).get('s3sourceurl')
|
||||
if s3_source_url:
|
||||
formats.append({
|
||||
'url': s3_source_url,
|
||||
'format_id': 'source',
|
||||
})
|
||||
|
||||
errors = json_data.get('errors')
|
||||
if not formats and errors:
|
||||
error = errors[0]
|
||||
raise ExtractorError(
|
||||
error.get('message') or error.get('error_subcode') or error['error_code'], expected=True)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
for f in formats:
|
||||
f.setdefault('http_headers', {}).update(headers)
|
||||
|
||||
subtitles = {}
|
||||
for text_track in json_data.get('text_tracks', []):
|
||||
if text_track.get('kind') != 'captions':
|
||||
continue
|
||||
text_track_url = url_or_none(text_track.get('src'))
|
||||
if not text_track_url:
|
||||
continue
|
||||
lang = (str_or_none(text_track.get('srclang'))
|
||||
or str_or_none(text_track.get('label')) or 'en').lower()
|
||||
subtitles.setdefault(lang, []).append({
|
||||
'url': text_track_url,
|
||||
})
|
||||
|
||||
is_live = False
|
||||
duration = float_or_none(json_data.get('duration'), 1000)
|
||||
if duration is not None and duration <= 0:
|
||||
is_live = True
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._live_title(title) if is_live else title,
|
||||
'description': clean_html(json_data.get('description')),
|
||||
'thumbnail': json_data.get('thumbnail') or json_data.get('poster'),
|
||||
'duration': duration,
|
||||
'timestamp': parse_iso8601(json_data.get('published_at')),
|
||||
'uploader_id': json_data.get('account_id'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'tags': json_data.get('tags', []),
|
||||
'is_live': is_live,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
self._initialize_geo_bypass({
|
||||
'countries': smuggled_data.get('geo_countries'),
|
||||
'ip_blocks': smuggled_data.get('geo_ip_blocks'),
|
||||
})
|
||||
|
||||
account_id, player_id, embed, content_type, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
policy_key_id = '%s_%s' % (account_id, player_id)
|
||||
policy_key = self._downloader.cache.load('brightcove', policy_key_id)
|
||||
policy_key_extracted = False
|
||||
store_pk = lambda x: self._downloader.cache.store('brightcove', policy_key_id, x)
|
||||
|
||||
def extract_policy_key():
|
||||
webpage = self._download_webpage(
|
||||
'http://players.brightcove.net/%s/%s_%s/index.min.js'
|
||||
% (account_id, player_id, embed), video_id)
|
||||
|
||||
policy_key = None
|
||||
|
||||
catalog = self._search_regex(
|
||||
r'catalog\(({.+?})\);', webpage, 'catalog', default=None)
|
||||
if catalog:
|
||||
catalog = self._parse_json(
|
||||
js_to_json(catalog), video_id, fatal=False)
|
||||
if catalog:
|
||||
policy_key = catalog.get('policyKey')
|
||||
|
||||
if not policy_key:
|
||||
policy_key = self._search_regex(
|
||||
r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
|
||||
webpage, 'policy key', group='pk')
|
||||
|
||||
store_pk(policy_key)
|
||||
return policy_key
|
||||
|
||||
api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/%ss/%s' % (account_id, content_type, video_id)
|
||||
headers = {}
|
||||
referrer = smuggled_data.get('referrer')
|
||||
if referrer:
|
||||
headers.update({
|
||||
'Referer': referrer,
|
||||
'Origin': re.search(r'https?://[^/]+', referrer).group(0),
|
||||
})
|
||||
|
||||
for _ in range(2):
|
||||
if not policy_key:
|
||||
policy_key = extract_policy_key()
|
||||
policy_key_extracted = True
|
||||
headers['Accept'] = 'application/json;pk=%s' % policy_key
|
||||
try:
|
||||
json_data = self._download_json(api_url, video_id, headers=headers)
|
||||
break
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403):
|
||||
json_data = self._parse_json(e.cause.read().decode(), video_id)[0]
|
||||
message = json_data.get('message') or json_data['error_code']
|
||||
if json_data.get('error_subcode') == 'CLIENT_GEO':
|
||||
self.raise_geo_restricted(msg=message)
|
||||
elif json_data.get('error_code') == 'INVALID_POLICY_KEY' and not policy_key_extracted:
|
||||
policy_key = None
|
||||
store_pk(None)
|
||||
continue
|
||||
raise ExtractorError(message, expected=True)
|
||||
raise
|
||||
|
||||
errors = json_data.get('errors')
|
||||
if errors and errors[0].get('error_subcode') == 'TVE_AUTH':
|
||||
custom_fields = json_data['custom_fields']
|
||||
tve_token = self._extract_mvpd_auth(
|
||||
smuggled_data['source_url'], video_id,
|
||||
custom_fields['bcadobepassrequestorid'],
|
||||
custom_fields['bcadobepassresourceid'])
|
||||
json_data = self._download_json(
|
||||
api_url, video_id, headers={
|
||||
'Accept': 'application/json;pk=%s' % policy_key
|
||||
}, query={
|
||||
'tveToken': tve_token,
|
||||
})
|
||||
|
||||
if content_type == 'playlist':
|
||||
return self.playlist_result(
|
||||
[self._parse_brightcove_metadata(vid, vid.get('id'), headers)
|
||||
for vid in json_data.get('videos', []) if vid.get('id')],
|
||||
json_data.get('id'), json_data.get('name'),
|
||||
json_data.get('description'))
|
||||
|
||||
return self._parse_brightcove_metadata(
|
||||
json_data, video_id, headers=headers)
|
48
youtube_dl/extractor/businessinsider.py
Normal file
48
youtube_dl/extractor/businessinsider.py
Normal file
|
@ -0,0 +1,48 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .jwplatform import JWPlatformIE
|
||||
|
||||
|
||||
class BusinessInsiderIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?businessinsider\.(?:com|nl)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://uk.businessinsider.com/how-much-radiation-youre-exposed-to-in-everyday-life-2016-6',
|
||||
'md5': 'ffed3e1e12a6f950aa2f7d83851b497a',
|
||||
'info_dict': {
|
||||
'id': 'cjGDb0X9',
|
||||
'ext': 'mp4',
|
||||
'title': "Bananas give you more radiation exposure than living next to a nuclear power plant",
|
||||
'description': 'md5:0175a3baf200dd8fa658f94cade841b3',
|
||||
'upload_date': '20160611',
|
||||
'timestamp': 1465675620,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.businessinsider.nl/5-scientifically-proven-things-make-you-less-attractive-2017-7/',
|
||||
'md5': '43f438dbc6da0b89f5ac42f68529d84a',
|
||||
'info_dict': {
|
||||
'id': '5zJwd4FK',
|
||||
'ext': 'mp4',
|
||||
'title': 'Deze dingen zorgen ervoor dat je minder snel een date scoort',
|
||||
'description': 'md5:2af8975825d38a4fed24717bbe51db49',
|
||||
'upload_date': '20170705',
|
||||
'timestamp': 1499270528,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
jwplatform_id = self._search_regex(
|
||||
(r'data-media-id=["\']([a-zA-Z0-9]{8})',
|
||||
r'id=["\']jwplayer_([a-zA-Z0-9]{8})',
|
||||
r'id["\']?\s*:\s*["\']?([a-zA-Z0-9]{8})',
|
||||
r'(?:jwplatform\.com/players/|jwplayer_)([a-zA-Z0-9]{8})'),
|
||||
webpage, 'jwplatform id')
|
||||
return self.url_result(
|
||||
'jwplatform:%s' % jwplatform_id, ie=JWPlatformIE.ie_key(),
|
||||
video_id=video_id)
|
98
youtube_dl/extractor/buzzfeed.py
Normal file
98
youtube_dl/extractor/buzzfeed.py
Normal file
|
@ -0,0 +1,98 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .facebook import FacebookIE
|
||||
|
||||
|
||||
class BuzzFeedIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?buzzfeed\.com/[^?#]*?/(?P<id>[^?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.buzzfeed.com/abagg/this-angry-ram-destroys-a-punching-bag-like-a-boss?utm_term=4ldqpia',
|
||||
'info_dict': {
|
||||
'id': 'this-angry-ram-destroys-a-punching-bag-like-a-boss',
|
||||
'title': 'This Angry Ram Destroys A Punching Bag Like A Boss',
|
||||
'description': 'Rambro!',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': 'aVCR29aE_OQ',
|
||||
'ext': 'mp4',
|
||||
'title': 'Angry Ram destroys a punching bag..',
|
||||
'description': 'md5:c59533190ef23fd4458a5e8c8c872345',
|
||||
'upload_date': '20141024',
|
||||
'uploader_id': 'Buddhanz1',
|
||||
'uploader': 'Angry Ram',
|
||||
}
|
||||
}]
|
||||
}, {
|
||||
'url': 'http://www.buzzfeed.com/sheridanwatson/look-at-this-cute-dog-omg?utm_term=4ldqpia',
|
||||
'params': {
|
||||
'skip_download': True, # Got enough YouTube download tests
|
||||
},
|
||||
'info_dict': {
|
||||
'id': 'look-at-this-cute-dog-omg',
|
||||
'description': 're:Munchkin the Teddy Bear is back ?!',
|
||||
'title': 'You Need To Stop What You\'re Doing And Watching This Dog Walk On A Treadmill',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': 'mVmBL8B-In0',
|
||||
'ext': 'mp4',
|
||||
'title': 're:Munchkin the Teddy Bear gets her exercise',
|
||||
'description': 'md5:28faab95cda6e361bcff06ec12fc21d8',
|
||||
'upload_date': '20141124',
|
||||
'uploader_id': 'CindysMunchkin',
|
||||
'uploader': 're:^Munchkin the',
|
||||
},
|
||||
}]
|
||||
}, {
|
||||
'url': 'http://www.buzzfeed.com/craigsilverman/the-most-adorable-crash-landing-ever#.eq7pX0BAmK',
|
||||
'info_dict': {
|
||||
'id': 'the-most-adorable-crash-landing-ever',
|
||||
'title': 'Watch This Baby Goose Make The Most Adorable Crash Landing',
|
||||
'description': 'This gosling knows how to stick a landing.',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': '763ca415512f91ca62e4621086900a23',
|
||||
'info_dict': {
|
||||
'id': '971793786185728',
|
||||
'ext': 'mp4',
|
||||
'title': 'We set up crash pads so that the goslings on our roof would have a safe landi...',
|
||||
'uploader': 'Calgary Outdoor Centre-University of Calgary',
|
||||
},
|
||||
}],
|
||||
'add_ie': ['Facebook'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
all_buckets = re.findall(
|
||||
r'(?s)<div class="video-embed[^"]*"..*?rel:bf_bucket_data=\'([^\']+)\'',
|
||||
webpage)
|
||||
|
||||
entries = []
|
||||
for bd_json in all_buckets:
|
||||
bd = json.loads(bd_json)
|
||||
video = bd.get('video') or bd.get('progload_video')
|
||||
if not video:
|
||||
continue
|
||||
entries.append(self.url_result(video['url']))
|
||||
|
||||
facebook_urls = FacebookIE._extract_urls(webpage)
|
||||
entries.extend([
|
||||
self.url_result(facebook_url)
|
||||
for facebook_url in facebook_urls])
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': playlist_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'entries': entries,
|
||||
}
|
117
youtube_dl/extractor/byutv.py
Normal file
117
youtube_dl/extractor/byutv.py
Normal file
|
@ -0,0 +1,117 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
merge_dicts,
|
||||
parse_duration,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class BYUtvIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?byutv\.org/(?:watch|player)/(?!event/)(?P<id>[0-9a-f-]+)(?:/(?P<display_id>[^/?#&]+))?'
|
||||
_TESTS = [{
|
||||
# ooyalaVOD
|
||||
'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5',
|
||||
'info_dict': {
|
||||
'id': 'ZvanRocTpW-G5_yZFeltTAMv6jxOU9KH',
|
||||
'display_id': 'studio-c-season-5-episode-5',
|
||||
'ext': 'mp4',
|
||||
'title': 'Season 5 Episode 5',
|
||||
'description': 'md5:1d31dc18ef4f075b28f6a65937d22c65',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'duration': 1486.486,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}, {
|
||||
# dvr
|
||||
'url': 'https://www.byutv.org/player/8f1dab9b-b243-47c8-b525-3e2d021a3451/byu-softball-pacific-vs-byu-41219---game-2',
|
||||
'info_dict': {
|
||||
'id': '8f1dab9b-b243-47c8-b525-3e2d021a3451',
|
||||
'display_id': 'byu-softball-pacific-vs-byu-41219---game-2',
|
||||
'ext': 'mp4',
|
||||
'title': 'Pacific vs. BYU (4/12/19)',
|
||||
'description': 'md5:1ac7b57cb9a78015910a4834790ce1f3',
|
||||
'duration': 11645,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.byutv.org/player/27741493-dc83-40b0-8420-e7ae38a2ae98/byu-football-toledo-vs-byu-93016?listid=4fe0fee5-0d3c-4a29-b725-e4948627f472&listindex=0&q=toledo',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id') or video_id
|
||||
|
||||
video = self._download_json(
|
||||
'https://api.byutv.org/api3/catalog/getvideosforcontent',
|
||||
display_id, query={
|
||||
'contentid': video_id,
|
||||
'channel': 'byutv',
|
||||
'x-byutv-context': 'web$US',
|
||||
}, headers={
|
||||
'x-byutv-context': 'web$US',
|
||||
'x-byutv-platformkey': 'xsaaw9c7y5',
|
||||
})
|
||||
|
||||
ep = video.get('ooyalaVOD')
|
||||
if ep:
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'Ooyala',
|
||||
'url': 'ooyala:%s' % ep['providerId'],
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': ep.get('title'),
|
||||
'description': ep.get('description'),
|
||||
'thumbnail': ep.get('imageThumbnail'),
|
||||
}
|
||||
|
||||
info = {}
|
||||
formats = []
|
||||
for format_id, ep in video.items():
|
||||
if not isinstance(ep, dict):
|
||||
continue
|
||||
video_url = url_or_none(ep.get('videoUrl'))
|
||||
if not video_url:
|
||||
continue
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
video_url, video_id, mpd_id='dash', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': format_id,
|
||||
})
|
||||
merge_dicts(info, {
|
||||
'title': ep.get('title'),
|
||||
'description': ep.get('description'),
|
||||
'thumbnail': ep.get('imageThumbnail'),
|
||||
'duration': parse_duration(ep.get('length')),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return merge_dicts(info, {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': display_id,
|
||||
'formats': formats,
|
||||
})
|
65
youtube_dl/extractor/c56.py
Normal file
65
youtube_dl/extractor/c56.py
Normal file
|
@ -0,0 +1,65 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import js_to_json
|
||||
|
||||
|
||||
class C56IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:www|player)\.)?56\.com/(?:.+?/)?(?:v_|(?:play_album.+-))(?P<textid>.+?)\.(?:html|swf)'
|
||||
IE_NAME = '56.com'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.56.com/u39/v_OTM0NDA3MTY.html',
|
||||
'md5': 'e59995ac63d0457783ea05f93f12a866',
|
||||
'info_dict': {
|
||||
'id': '93440716',
|
||||
'ext': 'flv',
|
||||
'title': '网事知多少 第32期:车怒',
|
||||
'duration': 283.813,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.56.com/u47/v_MTM5NjQ5ODc2.html',
|
||||
'md5': '',
|
||||
'info_dict': {
|
||||
'id': '82247482',
|
||||
'title': '爱的诅咒之杜鹃花开',
|
||||
},
|
||||
'playlist_count': 7,
|
||||
'add_ie': ['Sohu'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
|
||||
text_id = mobj.group('textid')
|
||||
|
||||
webpage = self._download_webpage(url, text_id)
|
||||
sohu_video_info_str = self._search_regex(
|
||||
r'var\s+sohuVideoInfo\s*=\s*({[^}]+});', webpage, 'Sohu video info', default=None)
|
||||
if sohu_video_info_str:
|
||||
sohu_video_info = self._parse_json(
|
||||
sohu_video_info_str, text_id, transform_source=js_to_json)
|
||||
return self.url_result(sohu_video_info['url'], 'Sohu')
|
||||
|
||||
page = self._download_json(
|
||||
'http://vxml.56.com/json/%s/' % text_id, text_id, 'Downloading video info')
|
||||
|
||||
info = page['info']
|
||||
|
||||
formats = [
|
||||
{
|
||||
'format_id': f['type'],
|
||||
'filesize': int(f['filesize']),
|
||||
'url': f['url']
|
||||
} for f in info['rfiles']
|
||||
]
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': info['vid'],
|
||||
'title': info['Subject'],
|
||||
'duration': int(info['duration']) / 1000.0,
|
||||
'formats': formats,
|
||||
'thumbnail': info.get('bimg') or info.get('img'),
|
||||
}
|
161
youtube_dl/extractor/camdemy.py
Normal file
161
youtube_dl/extractor/camdemy.py
Normal file
|
@ -0,0 +1,161 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
parse_duration,
|
||||
str_to_int,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class CamdemyIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?camdemy\.com/media/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
# single file
|
||||
'url': 'http://www.camdemy.com/media/5181/',
|
||||
'md5': '5a5562b6a98b37873119102e052e311b',
|
||||
'info_dict': {
|
||||
'id': '5181',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ch1-1 Introduction, Signals (02-23-2012)',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'creator': 'ss11spring',
|
||||
'duration': 1591,
|
||||
'upload_date': '20130114',
|
||||
'view_count': int,
|
||||
}
|
||||
}, {
|
||||
# With non-empty description
|
||||
# webpage returns "No permission or not login"
|
||||
'url': 'http://www.camdemy.com/media/13885',
|
||||
'md5': '4576a3bb2581f86c61044822adbd1249',
|
||||
'info_dict': {
|
||||
'id': '13885',
|
||||
'ext': 'mp4',
|
||||
'title': 'EverCam + Camdemy QuickStart',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'description': 'md5:2a9f989c2b153a2342acee579c6e7db6',
|
||||
'creator': 'evercam',
|
||||
'duration': 318,
|
||||
}
|
||||
}, {
|
||||
# External source (YouTube)
|
||||
'url': 'http://www.camdemy.com/media/14842',
|
||||
'info_dict': {
|
||||
'id': '2vsYQzNIsJo',
|
||||
'ext': 'mp4',
|
||||
'title': 'Excel 2013 Tutorial - How to add Password Protection',
|
||||
'description': 'Excel 2013 Tutorial for Beginners - How to add Password Protection',
|
||||
'upload_date': '20130211',
|
||||
'uploader': 'Hun Kim',
|
||||
'uploader_id': 'hunkimtutorials',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
src_from = self._html_search_regex(
|
||||
r"class=['\"]srcFrom['\"][^>]*>Sources?(?:\s+from)?\s*:\s*<a[^>]+(?:href|title)=(['\"])(?P<url>(?:(?!\1).)+)\1",
|
||||
webpage, 'external source', default=None, group='url')
|
||||
if src_from:
|
||||
return self.url_result(src_from)
|
||||
|
||||
oembed_obj = self._download_json(
|
||||
'http://www.camdemy.com/oembed/?format=json&url=' + url, video_id)
|
||||
|
||||
title = oembed_obj['title']
|
||||
thumb_url = oembed_obj['thumbnail_url']
|
||||
video_folder = compat_urlparse.urljoin(thumb_url, 'video/')
|
||||
file_list_doc = self._download_xml(
|
||||
compat_urlparse.urljoin(video_folder, 'fileList.xml'),
|
||||
video_id, 'Downloading filelist XML')
|
||||
file_name = file_list_doc.find('./video/item/fileName').text
|
||||
video_url = compat_urlparse.urljoin(video_folder, file_name)
|
||||
|
||||
# Some URLs return "No permission or not login" in a webpage despite being
|
||||
# freely available via oembed JSON URL (e.g. http://www.camdemy.com/media/13885)
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'>published on ([^<]+)<', webpage,
|
||||
'upload date', default=None))
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'role=["\']viewCnt["\'][^>]*>([\d,.]+) views',
|
||||
webpage, 'view count', default=None))
|
||||
description = self._html_search_meta(
|
||||
'description', webpage, default=None) or clean_html(
|
||||
oembed_obj.get('description'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'thumbnail': thumb_url,
|
||||
'description': description,
|
||||
'creator': oembed_obj.get('author_name'),
|
||||
'duration': parse_duration(oembed_obj.get('duration')),
|
||||
'upload_date': upload_date,
|
||||
'view_count': view_count,
|
||||
}
|
||||
|
||||
|
||||
class CamdemyFolderIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?camdemy\.com/folder/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
# links with trailing slash
|
||||
'url': 'http://www.camdemy.com/folder/450',
|
||||
'info_dict': {
|
||||
'id': '450',
|
||||
'title': '信號與系統 2012 & 2011 (Signals and Systems)',
|
||||
},
|
||||
'playlist_mincount': 145
|
||||
}, {
|
||||
# links without trailing slash
|
||||
# and multi-page
|
||||
'url': 'http://www.camdemy.com/folder/853',
|
||||
'info_dict': {
|
||||
'id': '853',
|
||||
'title': '科學計算 - 使用 Matlab'
|
||||
},
|
||||
'playlist_mincount': 20
|
||||
}, {
|
||||
# with displayMode parameter. For testing the codes to add parameters
|
||||
'url': 'http://www.camdemy.com/folder/853/?displayMode=defaultOrderByOrg',
|
||||
'info_dict': {
|
||||
'id': '853',
|
||||
'title': '科學計算 - 使用 Matlab'
|
||||
},
|
||||
'playlist_mincount': 20
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
folder_id = self._match_id(url)
|
||||
|
||||
# Add displayMode=list so that all links are displayed in a single page
|
||||
parsed_url = list(compat_urlparse.urlparse(url))
|
||||
query = dict(compat_urlparse.parse_qsl(parsed_url[4]))
|
||||
query.update({'displayMode': 'list'})
|
||||
parsed_url[4] = compat_urllib_parse_urlencode(query)
|
||||
final_url = compat_urlparse.urlunparse(parsed_url)
|
||||
|
||||
page = self._download_webpage(final_url, folder_id)
|
||||
matches = re.findall(r"href='(/media/\d+/?)'", page)
|
||||
|
||||
entries = [self.url_result('http://www.camdemy.com' + media_path)
|
||||
for media_path in matches]
|
||||
|
||||
folder_title = self._html_search_meta('keywords', page)
|
||||
|
||||
return self.playlist_result(entries, folder_id, folder_title)
|
98
youtube_dl/extractor/cammodels.py
Normal file
98
youtube_dl/extractor/cammodels.py
Normal file
|
@ -0,0 +1,98 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class CamModelsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?cammodels\.com/cam/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.cammodels.com/cam/AutumnKnight/',
|
||||
'only_matching': True,
|
||||
'age_limit': 18
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
user_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
url, user_id, headers=self.geo_verification_headers())
|
||||
|
||||
manifest_root = self._html_search_regex(
|
||||
r'manifestUrlRoot=([^&\']+)', webpage, 'manifest', default=None)
|
||||
|
||||
if not manifest_root:
|
||||
ERRORS = (
|
||||
("I'm offline, but let's stay connected", 'This user is currently offline'),
|
||||
('in a private show', 'This user is in a private show'),
|
||||
('is currently performing LIVE', 'This model is currently performing live'),
|
||||
)
|
||||
for pattern, message in ERRORS:
|
||||
if pattern in webpage:
|
||||
error = message
|
||||
expected = True
|
||||
break
|
||||
else:
|
||||
error = 'Unable to find manifest URL root'
|
||||
expected = False
|
||||
raise ExtractorError(error, expected=expected)
|
||||
|
||||
manifest = self._download_json(
|
||||
'%s%s.json' % (manifest_root, user_id), user_id)
|
||||
|
||||
formats = []
|
||||
for format_id, format_dict in manifest['formats'].items():
|
||||
if not isinstance(format_dict, dict):
|
||||
continue
|
||||
encodings = format_dict.get('encodings')
|
||||
if not isinstance(encodings, list):
|
||||
continue
|
||||
vcodec = format_dict.get('videoCodec')
|
||||
acodec = format_dict.get('audioCodec')
|
||||
for media in encodings:
|
||||
if not isinstance(media, dict):
|
||||
continue
|
||||
media_url = url_or_none(media.get('location'))
|
||||
if not media_url:
|
||||
continue
|
||||
|
||||
format_id_list = [format_id]
|
||||
height = int_or_none(media.get('videoHeight'))
|
||||
if height is not None:
|
||||
format_id_list.append('%dp' % height)
|
||||
f = {
|
||||
'url': media_url,
|
||||
'format_id': '-'.join(format_id_list),
|
||||
'width': int_or_none(media.get('videoWidth')),
|
||||
'height': height,
|
||||
'vbr': int_or_none(media.get('videoKbps')),
|
||||
'abr': int_or_none(media.get('audioKbps')),
|
||||
'fps': int_or_none(media.get('fps')),
|
||||
'vcodec': vcodec,
|
||||
'acodec': acodec,
|
||||
}
|
||||
if 'rtmp' in format_id:
|
||||
f['ext'] = 'flv'
|
||||
elif 'hls' in format_id:
|
||||
f.update({
|
||||
'ext': 'mp4',
|
||||
# hls skips fragments, preferring rtmp
|
||||
'preference': -1,
|
||||
})
|
||||
else:
|
||||
continue
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': user_id,
|
||||
'title': self._live_title(user_id),
|
||||
'is_live': True,
|
||||
'formats': formats,
|
||||
'age_limit': 18
|
||||
}
|
71
youtube_dl/extractor/camtube.py
Normal file
71
youtube_dl/extractor/camtube.py
Normal file
|
@ -0,0 +1,71 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class CamTubeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:www|api)\.)?camtube\.co/recordings?/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://camtube.co/recording/minafay-030618-1136-chaturbate-female',
|
||||
'info_dict': {
|
||||
'id': '42ad3956-dd5b-445a-8313-803ea6079fac',
|
||||
'display_id': 'minafay-030618-1136-chaturbate-female',
|
||||
'ext': 'mp4',
|
||||
'title': 'minafay-030618-1136-chaturbate-female',
|
||||
'duration': 1274,
|
||||
'timestamp': 1528018608,
|
||||
'upload_date': '20180603',
|
||||
'age_limit': 18
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
_API_BASE = 'https://api.camtube.co'
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
token = self._download_json(
|
||||
'%s/rpc/session/new' % self._API_BASE, display_id,
|
||||
'Downloading session token')['token']
|
||||
|
||||
self._set_cookie('api.camtube.co', 'session', token)
|
||||
|
||||
video = self._download_json(
|
||||
'%s/recordings/%s' % (self._API_BASE, display_id), display_id,
|
||||
headers={'Referer': url})
|
||||
|
||||
video_id = video['uuid']
|
||||
timestamp = unified_timestamp(video.get('createdAt'))
|
||||
duration = int_or_none(video.get('duration'))
|
||||
view_count = int_or_none(video.get('viewCount'))
|
||||
like_count = int_or_none(video.get('likeCount'))
|
||||
creator = video.get('stageName')
|
||||
|
||||
formats = [{
|
||||
'url': '%s/recordings/%s/manifest.m3u8'
|
||||
% (self._API_BASE, video_id),
|
||||
'format_id': 'hls',
|
||||
'ext': 'mp4',
|
||||
'protocol': 'm3u8_native',
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': display_id,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'creator': creator,
|
||||
'formats': formats,
|
||||
'age_limit': 18
|
||||
}
|
89
youtube_dl/extractor/camwithher.py
Normal file
89
youtube_dl/extractor/camwithher.py
Normal file
|
@ -0,0 +1,89 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class CamWithHerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?camwithher\.tv/view_video\.php\?.*\bviewkey=(?P<id>\w+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://camwithher.tv/view_video.php?viewkey=6e9a24e2c0e842e1f177&page=&viewtype=&category=',
|
||||
'info_dict': {
|
||||
'id': '5644',
|
||||
'ext': 'flv',
|
||||
'title': 'Periscope Tease',
|
||||
'description': 'In the clouds teasing on periscope to my favorite song',
|
||||
'duration': 240,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'uploader': 'MileenaK',
|
||||
'upload_date': '20160322',
|
||||
'age_limit': 18,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://camwithher.tv/view_video.php?viewkey=6dfd8b7c97531a459937',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://camwithher.tv/view_video.php?page=&viewkey=6e9a24e2c0e842e1f177&viewtype=&category=',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://camwithher.tv/view_video.php?viewkey=b6c3b5bea9515d1a1fc4&page=&viewtype=&category=mv',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
flv_id = self._html_search_regex(
|
||||
r'<a[^>]+href=["\']/download/\?v=(\d+)', webpage, 'video id')
|
||||
|
||||
# Video URL construction algorithm is reverse-engineered from cwhplayer.swf
|
||||
rtmp_url = 'rtmp://camwithher.tv/clipshare/%s' % (
|
||||
('mp4:%s.mp4' % flv_id) if int(flv_id) > 2010 else flv_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<div[^>]+style="float:left"[^>]*>\s*<h2>(.+?)</h2>', webpage, 'title')
|
||||
description = self._html_search_regex(
|
||||
r'>Description:</span>(.+?)</div>', webpage, 'description', default=None)
|
||||
|
||||
runtime = self._search_regex(
|
||||
r'Runtime\s*:\s*(.+?) \|', webpage, 'duration', default=None)
|
||||
if runtime:
|
||||
runtime = re.sub(r'[\s-]', '', runtime)
|
||||
duration = parse_duration(runtime)
|
||||
view_count = int_or_none(self._search_regex(
|
||||
r'Views\s*:\s*(\d+)', webpage, 'view count', default=None))
|
||||
comment_count = int_or_none(self._search_regex(
|
||||
r'Comments\s*:\s*(\d+)', webpage, 'comment count', default=None))
|
||||
|
||||
uploader = self._search_regex(
|
||||
r'Added by\s*:\s*<a[^>]+>([^<]+)</a>', webpage, 'uploader', default=None)
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'Added on\s*:\s*([\d-]+)', webpage, 'upload date', default=None))
|
||||
|
||||
return {
|
||||
'id': flv_id,
|
||||
'url': rtmp_url,
|
||||
'ext': 'flv',
|
||||
'no_resume': True,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'comment_count': comment_count,
|
||||
'uploader': uploader,
|
||||
'upload_date': upload_date,
|
||||
'age_limit': 18
|
||||
}
|
73
youtube_dl/extractor/canalc2.py
Normal file
73
youtube_dl/extractor/canalc2.py
Normal file
|
@ -0,0 +1,73 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_duration
|
||||
|
||||
|
||||
class Canalc2IE(InfoExtractor):
|
||||
IE_NAME = 'canalc2.tv'
|
||||
_VALID_URL = r'https?://(?:(?:www\.)?canalc2\.tv/video/|archives-canalc2\.u-strasbg\.fr/video\.asp\?.*\bidVideo=)(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.canalc2.tv/video/12163',
|
||||
'md5': '060158428b650f896c542dfbb3d6487f',
|
||||
'info_dict': {
|
||||
'id': '12163',
|
||||
'ext': 'mp4',
|
||||
'title': 'Terrasses du Numérique',
|
||||
'duration': 122,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://archives-canalc2.u-strasbg.fr/video.asp?idVideo=11427&voir=oui',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://www.canalc2.tv/video/%s' % video_id, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'(?s)class="[^"]*col_description[^"]*">.*?<h3>(.+?)</h3>',
|
||||
webpage, 'title')
|
||||
|
||||
formats = []
|
||||
for _, video_url in re.findall(r'file\s*=\s*(["\'])(.+?)\1', webpage):
|
||||
if video_url.startswith('rtmp://'):
|
||||
rtmp = re.search(
|
||||
r'^(?P<url>rtmp://[^/]+/(?P<app>.+/))(?P<play_path>mp4:.+)$', video_url)
|
||||
formats.append({
|
||||
'url': rtmp.group('url'),
|
||||
'format_id': 'rtmp',
|
||||
'ext': 'flv',
|
||||
'app': rtmp.group('app'),
|
||||
'play_path': rtmp.group('play_path'),
|
||||
'page_url': url,
|
||||
})
|
||||
else:
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': 'http',
|
||||
})
|
||||
|
||||
if formats:
|
||||
info = {
|
||||
'formats': formats,
|
||||
}
|
||||
else:
|
||||
info = self._parse_html5_media_entries(url, webpage, url)[0]
|
||||
|
||||
self._sort_formats(info['formats'])
|
||||
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'duration': parse_duration(self._search_regex(
|
||||
r'id=["\']video_duree["\'][^>]*>([^<]+)',
|
||||
webpage, 'duration', fatal=False)),
|
||||
})
|
||||
return info
|
116
youtube_dl/extractor/canalplus.py
Normal file
116
youtube_dl/extractor/canalplus.py
Normal file
|
@ -0,0 +1,116 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
# ExtractorError,
|
||||
# HEADRequest,
|
||||
int_or_none,
|
||||
qualities,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class CanalplusIE(InfoExtractor):
|
||||
IE_DESC = 'mycanal.fr and piwiplus.fr'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<site>mycanal|piwiplus)\.fr/(?:[^/]+/)*(?P<display_id>[^?/]+)(?:\.html\?.*\bvid=|/p/)(?P<id>\d+)'
|
||||
_VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/%s/%s?format=json'
|
||||
_SITE_ID_MAP = {
|
||||
'mycanal': 'cplus',
|
||||
'piwiplus': 'teletoon',
|
||||
}
|
||||
|
||||
# Only works for direct mp4 URLs
|
||||
_GEO_COUNTRIES = ['FR']
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.mycanal.fr/d17-emissions/lolywood/p/1397061',
|
||||
'info_dict': {
|
||||
'id': '1397061',
|
||||
'display_id': 'lolywood',
|
||||
'ext': 'mp4',
|
||||
'title': 'Euro 2016 : Je préfère te prévenir - Lolywood - Episode 34',
|
||||
'description': 'md5:7d97039d455cb29cdba0d652a0efaa5e',
|
||||
'upload_date': '20160602',
|
||||
},
|
||||
}, {
|
||||
# geo restricted, bypassed
|
||||
'url': 'http://www.piwiplus.fr/videos-piwi/pid1405-le-labyrinthe-boing-super-ranger.html?vid=1108190',
|
||||
'info_dict': {
|
||||
'id': '1108190',
|
||||
'display_id': 'pid1405-le-labyrinthe-boing-super-ranger',
|
||||
'ext': 'mp4',
|
||||
'title': 'BOING SUPER RANGER - Ep : Le labyrinthe',
|
||||
'description': 'md5:4cea7a37153be42c1ba2c1d3064376ff',
|
||||
'upload_date': '20140724',
|
||||
},
|
||||
'expected_warnings': ['HTTP Error 403: Forbidden'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
site, display_id, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
site_id = self._SITE_ID_MAP[site]
|
||||
|
||||
info_url = self._VIDEO_INFO_TEMPLATE % (site_id, video_id)
|
||||
video_data = self._download_json(info_url, video_id, 'Downloading video JSON')
|
||||
|
||||
if isinstance(video_data, list):
|
||||
video_data = [video for video in video_data if video.get('ID') == video_id][0]
|
||||
media = video_data['MEDIA']
|
||||
infos = video_data['INFOS']
|
||||
|
||||
preference = qualities(['MOBILE', 'BAS_DEBIT', 'HAUT_DEBIT', 'HD'])
|
||||
|
||||
# _, fmt_url = next(iter(media['VIDEOS'].items()))
|
||||
# if '/geo' in fmt_url.lower():
|
||||
# response = self._request_webpage(
|
||||
# HEADRequest(fmt_url), video_id,
|
||||
# 'Checking if the video is georestricted')
|
||||
# if '/blocage' in response.geturl():
|
||||
# raise ExtractorError(
|
||||
# 'The video is not available in your country',
|
||||
# expected=True)
|
||||
|
||||
formats = []
|
||||
for format_id, format_url in media['VIDEOS'].items():
|
||||
if not format_url:
|
||||
continue
|
||||
if format_id == 'HLS':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', 'm3u8_native', m3u8_id=format_id, fatal=False))
|
||||
elif format_id == 'HDS':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
format_url + '?hdcore=2.11.3', video_id, f4m_id=format_id, fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
# the secret extracted from ya function in http://player.canalplus.fr/common/js/canalPlayer.js
|
||||
'url': format_url + '?secret=pqzerjlsmdkjfoiuerhsdlfknaes',
|
||||
'format_id': format_id,
|
||||
'preference': preference(format_id),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = [{
|
||||
'id': image_id,
|
||||
'url': image_url,
|
||||
} for image_id, image_url in media.get('images', {}).items()]
|
||||
|
||||
titrage = infos['TITRAGE']
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': '%s - %s' % (titrage['TITRE'],
|
||||
titrage['SOUS_TITRE']),
|
||||
'upload_date': unified_strdate(infos.get('PUBLICATION', {}).get('DATE')),
|
||||
'thumbnails': thumbnails,
|
||||
'description': infos.get('DESCRIPTION'),
|
||||
'duration': int_or_none(infos.get('DURATION')),
|
||||
'view_count': int_or_none(infos.get('NB_VUES')),
|
||||
'like_count': int_or_none(infos.get('NB_LIKES')),
|
||||
'comment_count': int_or_none(infos.get('NB_COMMENTS')),
|
||||
'formats': formats,
|
||||
}
|
368
youtube_dl/extractor/canvas.py
Normal file
368
youtube_dl/extractor/canvas.py
Normal file
|
@ -0,0 +1,368 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .gigya import GigyaBaseIE
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
strip_or_none,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class CanvasIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrt(?:video|nieuws)|sporza)/assets/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
|
||||
'md5': '68993eda72ef62386a15ea2cf3c93107',
|
||||
'info_dict': {
|
||||
'id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
|
||||
'display_id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nachtwacht: De Greystook',
|
||||
'description': 'Nachtwacht: De Greystook',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 1468.04,
|
||||
},
|
||||
'expected_warnings': ['is not a supported codec', 'Unknown MIME type'],
|
||||
}, {
|
||||
'url': 'https://mediazone.vrt.be/api/v1/canvas/assets/mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_HLS_ENTRY_PROTOCOLS_MAP = {
|
||||
'HLS': 'm3u8_native',
|
||||
'HLS_AES': 'm3u8',
|
||||
}
|
||||
_REST_API_BASE = 'https://media-services-public.vrt.be/vualto-video-aggregator-web/rest/external/v1'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
site_id, video_id = mobj.group('site_id'), mobj.group('id')
|
||||
|
||||
# Old API endpoint, serves more formats but may fail for some videos
|
||||
data = self._download_json(
|
||||
'https://mediazone.vrt.be/api/v1/%s/assets/%s'
|
||||
% (site_id, video_id), video_id, 'Downloading asset JSON',
|
||||
'Unable to download asset JSON', fatal=False)
|
||||
|
||||
# New API endpoint
|
||||
if not data:
|
||||
token = self._download_json(
|
||||
'%s/tokens' % self._REST_API_BASE, video_id,
|
||||
'Downloading token', data=b'',
|
||||
headers={'Content-Type': 'application/json'})['vrtPlayerToken']
|
||||
data = self._download_json(
|
||||
'%s/videos/%s' % (self._REST_API_BASE, video_id),
|
||||
video_id, 'Downloading video JSON', fatal=False, query={
|
||||
'vrtPlayerToken': token,
|
||||
'client': '%s@PROD' % site_id,
|
||||
}, expected_status=400)
|
||||
message = data.get('message')
|
||||
if message and not data.get('title'):
|
||||
if data.get('code') == 'AUTHENTICATION_REQUIRED':
|
||||
self.raise_login_required(message)
|
||||
raise ExtractorError(message, expected=True)
|
||||
|
||||
title = data['title']
|
||||
description = data.get('description')
|
||||
|
||||
formats = []
|
||||
for target in data['targetUrls']:
|
||||
format_url, format_type = url_or_none(target.get('url')), str_or_none(target.get('type'))
|
||||
if not format_url or not format_type:
|
||||
continue
|
||||
format_type = format_type.upper()
|
||||
if format_type in self._HLS_ENTRY_PROTOCOLS_MAP:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', self._HLS_ENTRY_PROTOCOLS_MAP[format_type],
|
||||
m3u8_id=format_type, fatal=False))
|
||||
elif format_type == 'HDS':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
format_url, video_id, f4m_id=format_type, fatal=False))
|
||||
elif format_type == 'MPEG_DASH':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
format_url, video_id, mpd_id=format_type, fatal=False))
|
||||
elif format_type == 'HSS':
|
||||
formats.extend(self._extract_ism_formats(
|
||||
format_url, video_id, ism_id='mss', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': format_type,
|
||||
'url': format_url,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
subtitle_urls = data.get('subtitleUrls')
|
||||
if isinstance(subtitle_urls, list):
|
||||
for subtitle in subtitle_urls:
|
||||
subtitle_url = subtitle.get('url')
|
||||
if subtitle_url and subtitle.get('type') == 'CLOSED':
|
||||
subtitles.setdefault('nl', []).append({'url': subtitle_url})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'formats': formats,
|
||||
'duration': float_or_none(data.get('duration'), 1000),
|
||||
'thumbnail': data.get('posterImageUrl'),
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
class CanvasEenIE(InfoExtractor):
|
||||
IE_DESC = 'canvas.be and een.be'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<site_id>canvas|een)\.be/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.canvas.be/video/de-afspraak/najaar-2015/de-afspraak-veilt-voor-de-warmste-week',
|
||||
'md5': 'ed66976748d12350b118455979cca293',
|
||||
'info_dict': {
|
||||
'id': 'mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e',
|
||||
'display_id': 'de-afspraak-veilt-voor-de-warmste-week',
|
||||
'ext': 'flv',
|
||||
'title': 'De afspraak veilt voor de Warmste Week',
|
||||
'description': 'md5:24cb860c320dc2be7358e0e5aa317ba6',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 49.02,
|
||||
},
|
||||
'expected_warnings': ['is not a supported codec'],
|
||||
}, {
|
||||
# with subtitles
|
||||
'url': 'http://www.canvas.be/video/panorama/2016/pieter-0167',
|
||||
'info_dict': {
|
||||
'id': 'mz-ast-5240ff21-2d30-4101-bba6-92b5ec67c625',
|
||||
'display_id': 'pieter-0167',
|
||||
'ext': 'mp4',
|
||||
'title': 'Pieter 0167',
|
||||
'description': 'md5:943cd30f48a5d29ba02c3a104dc4ec4e',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 2553.08,
|
||||
'subtitles': {
|
||||
'nl': [{
|
||||
'ext': 'vtt',
|
||||
}],
|
||||
},
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Pagina niet gevonden',
|
||||
}, {
|
||||
'url': 'https://www.een.be/thuis/emma-pakt-thilly-aan',
|
||||
'info_dict': {
|
||||
'id': 'md-ast-3a24ced2-64d7-44fb-b4ed-ed1aafbf90b8',
|
||||
'display_id': 'emma-pakt-thilly-aan',
|
||||
'ext': 'mp4',
|
||||
'title': 'Emma pakt Thilly aan',
|
||||
'description': 'md5:c5c9b572388a99b2690030afa3f3bad7',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 118.24,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['is not a supported codec'],
|
||||
}, {
|
||||
'url': 'https://www.canvas.be/check-point/najaar-2016/de-politie-uw-vriend',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
site_id, display_id = mobj.group('site_id'), mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
title = strip_or_none(self._search_regex(
|
||||
r'<h1[^>]+class="video__body__header__title"[^>]*>(.+?)</h1>',
|
||||
webpage, 'title', default=None) or self._og_search_title(
|
||||
webpage, default=None))
|
||||
|
||||
video_id = self._html_search_regex(
|
||||
r'data-video=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id',
|
||||
group='id')
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'https://mediazone.vrt.be/api/v1/%s/assets/%s' % (site_id, video_id),
|
||||
'ie_key': CanvasIE.ie_key(),
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': self._og_search_description(webpage),
|
||||
}
|
||||
|
||||
|
||||
class VrtNUIE(GigyaBaseIE):
|
||||
IE_DESC = 'VrtNU.be'
|
||||
_VALID_URL = r'https?://(?:www\.)?vrt\.be/(?P<site_id>vrtnu)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
# Available via old API endpoint
|
||||
'url': 'https://www.vrt.be/vrtnu/a-z/postbus-x/1/postbus-x-s1a1/',
|
||||
'info_dict': {
|
||||
'id': 'pbs-pub-2e2d8c27-df26-45c9-9dc6-90c78153044d$vid-90c932b1-e21d-4fb8-99b1-db7b49cf74de',
|
||||
'ext': 'mp4',
|
||||
'title': 'De zwarte weduwe',
|
||||
'description': 'md5:db1227b0f318c849ba5eab1fef895ee4',
|
||||
'duration': 1457.04,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'season': 'Season 1',
|
||||
'season_number': 1,
|
||||
'episode_number': 1,
|
||||
},
|
||||
'skip': 'This video is only available for registered users',
|
||||
'params': {
|
||||
'username': '<snip>',
|
||||
'password': '<snip>',
|
||||
},
|
||||
'expected_warnings': ['is not a supported codec'],
|
||||
}, {
|
||||
# Only available via new API endpoint
|
||||
'url': 'https://www.vrt.be/vrtnu/a-z/kamp-waes/1/kamp-waes-s1a5/',
|
||||
'info_dict': {
|
||||
'id': 'pbs-pub-0763b56c-64fb-4d38-b95b-af60bf433c71$vid-ad36a73c-4735-4f1f-b2c0-a38e6e6aa7e1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Aflevering 5',
|
||||
'description': 'Wie valt door de mand tijdens een missie?',
|
||||
'duration': 2967.06,
|
||||
'season': 'Season 1',
|
||||
'season_number': 1,
|
||||
'episode_number': 5,
|
||||
},
|
||||
'skip': 'This video is only available for registered users',
|
||||
'params': {
|
||||
'username': '<snip>',
|
||||
'password': '<snip>',
|
||||
},
|
||||
'expected_warnings': ['Unable to download asset JSON', 'is not a supported codec', 'Unknown MIME type'],
|
||||
}]
|
||||
_NETRC_MACHINE = 'vrtnu'
|
||||
_APIKEY = '3_0Z2HujMtiWq_pkAjgnS2Md2E11a1AwZjYiBETtwNE-EoEHDINgtnvcAOpNgmrVGy'
|
||||
_CONTEXT_ID = 'R3595707040'
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _login(self):
|
||||
username, password = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
|
||||
auth_data = {
|
||||
'APIKey': self._APIKEY,
|
||||
'targetEnv': 'jssdk',
|
||||
'loginID': username,
|
||||
'password': password,
|
||||
'authMode': 'cookie',
|
||||
}
|
||||
|
||||
auth_info = self._gigya_login(auth_data)
|
||||
|
||||
# Sometimes authentication fails for no good reason, retry
|
||||
login_attempt = 1
|
||||
while login_attempt <= 3:
|
||||
try:
|
||||
# When requesting a token, no actual token is returned, but the
|
||||
# necessary cookies are set.
|
||||
self._request_webpage(
|
||||
'https://token.vrt.be',
|
||||
None, note='Requesting a token', errnote='Could not get a token',
|
||||
headers={
|
||||
'Content-Type': 'application/json',
|
||||
'Referer': 'https://www.vrt.be/vrtnu/',
|
||||
},
|
||||
data=json.dumps({
|
||||
'uid': auth_info['UID'],
|
||||
'uidsig': auth_info['UIDSignature'],
|
||||
'ts': auth_info['signatureTimestamp'],
|
||||
'email': auth_info['profile']['email'],
|
||||
}).encode('utf-8'))
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
|
||||
login_attempt += 1
|
||||
self.report_warning('Authentication failed')
|
||||
self._sleep(1, None, msg_template='Waiting for %(timeout)s seconds before trying again')
|
||||
else:
|
||||
raise e
|
||||
else:
|
||||
break
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage, urlh = self._download_webpage_handle(url, display_id)
|
||||
|
||||
info = self._search_json_ld(webpage, display_id, default={})
|
||||
|
||||
# title is optional here since it may be extracted by extractor
|
||||
# that is delegated from here
|
||||
title = strip_or_none(self._html_search_regex(
|
||||
r'(?ms)<h1 class="content__heading">(.+?)</h1>',
|
||||
webpage, 'title', default=None))
|
||||
|
||||
description = self._html_search_regex(
|
||||
r'(?ms)<div class="content__description">(.+?)</div>',
|
||||
webpage, 'description', default=None)
|
||||
|
||||
season = self._html_search_regex(
|
||||
[r'''(?xms)<div\ class="tabs__tab\ tabs__tab--active">\s*
|
||||
<span>seizoen\ (.+?)</span>\s*
|
||||
</div>''',
|
||||
r'<option value="seizoen (\d{1,3})" data-href="[^"]+?" selected>'],
|
||||
webpage, 'season', default=None)
|
||||
|
||||
season_number = int_or_none(season)
|
||||
|
||||
episode_number = int_or_none(self._html_search_regex(
|
||||
r'''(?xms)<div\ class="content__episode">\s*
|
||||
<abbr\ title="aflevering">afl</abbr>\s*<span>(\d+)</span>
|
||||
</div>''',
|
||||
webpage, 'episode_number', default=None))
|
||||
|
||||
release_date = parse_iso8601(self._html_search_regex(
|
||||
r'(?ms)<div class="content__broadcastdate">\s*<time\ datetime="(.+?)"',
|
||||
webpage, 'release_date', default=None))
|
||||
|
||||
# If there's a ? or a # in the URL, remove them and everything after
|
||||
clean_url = urlh.geturl().split('?')[0].split('#')[0].strip('/')
|
||||
securevideo_url = clean_url + '.mssecurevideo.json'
|
||||
|
||||
try:
|
||||
video = self._download_json(securevideo_url, display_id)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
|
||||
self.raise_login_required()
|
||||
raise
|
||||
|
||||
# We are dealing with a '../<show>.relevant' URL
|
||||
redirect_url = video.get('url')
|
||||
if redirect_url:
|
||||
return self.url_result(self._proto_relative_url(redirect_url, 'https:'))
|
||||
|
||||
# There is only one entry, but with an unknown key, so just get
|
||||
# the first one
|
||||
video_id = list(video.values())[0].get('videoid')
|
||||
|
||||
return merge_dicts(info, {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'https://mediazone.vrt.be/api/v1/vrtvideo/assets/%s' % video_id,
|
||||
'ie_key': CanvasIE.ie_key(),
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'season': season,
|
||||
'season_number': season_number,
|
||||
'episode_number': episode_number,
|
||||
'release_date': release_date,
|
||||
})
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue