[niconico] Fix HLS formats

Closes #171

* The structure of the API JSON was changed
* Smile Video seems to be no longer available. So remove the warning
* Move ping to downloader
* Change heartbeat interval to 40sec
* Remove unnecessary API headers

Authored-by: CXwudi, tsukumijima, nao20010128nao, pukkandan
Tested by: tsukumijima
This commit is contained in:
pukkandan 2021-04-05 13:11:21 +05:30
parent 58f197b76c
commit 2291dbce2a
No known key found for this signature in database
GPG key ID: 0F00D95A001F4698
2 changed files with 54 additions and 54 deletions

View file

@ -24,16 +24,14 @@ def real_download(self, filename, info_dict):
success = download_complete = False success = download_complete = False
timer = [None] timer = [None]
heartbeat_lock = threading.Lock() heartbeat_lock = threading.Lock()
heartbeat_url = heartbeat_info_dict['url'] heartbeat_url = heartbeat_info_dict['url']
heartbeat_data = heartbeat_info_dict['data'] heartbeat_data = heartbeat_info_dict['data'].encode()
heartbeat_interval = heartbeat_info_dict.get('interval', 30) heartbeat_interval = heartbeat_info_dict.get('interval', 30)
self.to_screen('[%s] Heartbeat with %s second interval ...' % (self.FD_NAME, heartbeat_interval))
def heartbeat(): def heartbeat():
try: try:
compat_urllib_request.urlopen(url=heartbeat_url, data=heartbeat_data.encode()) compat_urllib_request.urlopen(url=heartbeat_url, data=heartbeat_data)
except Exception: except Exception:
self.to_screen('[%s] Heartbeat failed' % self.FD_NAME) self.to_screen('[%s] Heartbeat failed' % self.FD_NAME)
@ -42,13 +40,16 @@ def heartbeat():
timer[0] = threading.Timer(heartbeat_interval, heartbeat) timer[0] = threading.Timer(heartbeat_interval, heartbeat)
timer[0].start() timer[0].start()
heartbeat_info_dict['ping']()
self.to_screen('[%s] Heartbeat with %d second interval ...' % (self.FD_NAME, heartbeat_interval))
try: try:
heartbeat() heartbeat()
if type(fd).__name__ == 'HlsFD':
info_dict.update(ie._extract_m3u8_formats(info_dict['url'], info_dict['id'])[0])
success = fd.real_download(filename, info_dict) success = fd.real_download(filename, info_dict)
finally: finally:
if heartbeat_lock: if heartbeat_lock:
with heartbeat_lock: with heartbeat_lock:
timer[0].cancel() timer[0].cancel()
download_complete = True download_complete = True
return success return success

View file

@ -164,6 +164,11 @@ class NiconicoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.|secure\.|sp\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)' _VALID_URL = r'https?://(?:www\.|secure\.|sp\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)'
_NETRC_MACHINE = 'niconico' _NETRC_MACHINE = 'niconico'
_API_HEADERS = {
'X-Frontend-ID': '6',
'X-Frontend-Version': '0'
}
def _real_initialize(self): def _real_initialize(self):
self._login() self._login()
@ -197,46 +202,48 @@ def _get_heartbeat_info(self, info_dict):
video_id, video_src_id, audio_src_id = info_dict['url'].split(':')[1].split('/') video_id, video_src_id, audio_src_id = info_dict['url'].split(':')[1].split('/')
# Get video webpage for API data. api_data = (
webpage, handle = self._download_webpage_handle( info_dict.get('_api_data')
'http://www.nicovideo.jp/watch/' + video_id, video_id) or self._parse_json(
self._html_search_regex(
api_data = self._parse_json(self._html_search_regex( 'data-api-data="([^"]+)"',
'data-api-data="([^"]+)"', webpage, self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id),
'API data', default='{}'), video_id) 'API data', default='{}'),
video_id))
session_api_data = try_get(api_data, lambda x: x['media']['delivery']['movie']['session']) session_api_data = try_get(api_data, lambda x: x['media']['delivery']['movie']['session'])
session_api_endpoint = try_get(session_api_data, lambda x: x['urls'][0]) session_api_endpoint = try_get(session_api_data, lambda x: x['urls'][0])
# ping def ping():
status = try_get(
self._download_json( self._download_json(
'https://nvapi.nicovideo.jp/v1/2ab0cbaa/watch', video_id, 'https://nvapi.nicovideo.jp/v1/2ab0cbaa/watch', video_id,
query={'t': try_get(api_data, lambda x: x['video']['dmcInfo']['tracking_id'])}, query={'t': try_get(api_data, lambda x: x['media']['delivery']['trackingId'])},
headers={ note='Acquiring permission for downloading video',
'Origin': 'https://www.nicovideo.jp', headers=self._API_HEADERS),
'Referer': 'https://www.nicovideo.jp/watch/' + video_id, lambda x: x['meta']['status'])
'X-Frontend-Id': '6', if status != 200:
'X-Frontend-Version': '0' self.report_warning('Failed to acquire permission for playing video. The video may not download.')
})
yesno = lambda x: 'yes' if x else 'no' yesno = lambda x: 'yes' if x else 'no'
# m3u8 (encryption) # m3u8 (encryption)
if 'encryption' in (try_get(api_data, lambda x: x['media']['delivery']['movie']) or {}): if try_get(api_data, lambda x: x['media']['delivery']['encryption']) is not None:
protocol = 'm3u8' protocol = 'm3u8'
encryption = self._parse_json(session_api_data['token'], video_id)['hls_encryption']
session_api_http_parameters = { session_api_http_parameters = {
'parameters': { 'parameters': {
'hls_parameters': { 'hls_parameters': {
'encryption': { 'encryption': {
'hls_encryption_v1': { encryption: {
'encrypted_key': try_get(api_data, lambda x: x['video']['dmcInfo']['encryption']['hls_encryption_v1']['encrypted_key']), 'encrypted_key': try_get(api_data, lambda x: x['media']['delivery']['encryption']['encryptedKey']),
'key_uri': try_get(api_data, lambda x: x['video']['dmcInfo']['encryption']['hls_encryption_v1']['key_uri']) 'key_uri': try_get(api_data, lambda x: x['media']['delivery']['encryption']['keyUri'])
} }
}, },
'transfer_preset': '', 'transfer_preset': '',
'use_ssl': yesno(session_api_endpoint['is_ssl']), 'use_ssl': yesno(session_api_endpoint['isSsl']),
'use_well_known_port': yesno(session_api_endpoint['is_well_known_port']), 'use_well_known_port': yesno(session_api_endpoint['isWellKnownPort']),
'segment_duration': 6000 'segment_duration': 6000,
} }
} }
} }
@ -310,7 +317,8 @@ def _get_heartbeat_info(self, info_dict):
'url': session_api_endpoint['url'] + '/' + session_response['data']['session']['id'] + '?_format=json&_method=PUT', 'url': session_api_endpoint['url'] + '/' + session_response['data']['session']['id'] + '?_format=json&_method=PUT',
'data': json.dumps(session_response['data']), 'data': json.dumps(session_response['data']),
# interval, convert milliseconds to seconds, then halve to make a buffer. # interval, convert milliseconds to seconds, then halve to make a buffer.
'interval': float_or_none(session_api_data.get('heartbeatLifetime'), scale=2000), 'interval': float_or_none(session_api_data.get('heartbeatLifetime'), scale=3000),
'ping': ping
} }
return info_dict, heartbeat_info_dict return info_dict, heartbeat_info_dict
@ -400,7 +408,7 @@ def get_video_info_xml(items):
# Get HTML5 videos info # Get HTML5 videos info
quality_info = try_get(api_data, lambda x: x['media']['delivery']['movie']) quality_info = try_get(api_data, lambda x: x['media']['delivery']['movie'])
if not quality_info: if not quality_info:
raise ExtractorError('The video can\'t downloaded.', expected=True) raise ExtractorError('The video can\'t be downloaded', expected=True)
for audio_quality in quality_info.get('audios') or {}: for audio_quality in quality_info.get('audios') or {}:
for video_quality in quality_info.get('videos') or {}: for video_quality in quality_info.get('videos') or {}:
@ -412,9 +420,7 @@ def get_video_info_xml(items):
# Get flv/swf info # Get flv/swf info
timestamp = None timestamp = None
video_real_url = try_get(api_data, lambda x: x['video']['smileInfo']['url']) video_real_url = try_get(api_data, lambda x: x['video']['smileInfo']['url'])
if not video_real_url: if video_real_url:
self.report_warning('Unable to obtain smile video information')
else:
is_economy = video_real_url.endswith('low') is_economy = video_real_url.endswith('low')
if is_economy: if is_economy:
@ -486,9 +492,6 @@ def get_video_info_xml(items):
'filesize': filesize 'filesize': filesize
}) })
if len(formats) == 0:
raise ExtractorError('Unable to find video info.')
self._sort_formats(formats) self._sort_formats(formats)
# Start extracting information # Start extracting information
@ -585,6 +588,7 @@ def get_video_info_xml(items):
return { return {
'id': video_id, 'id': video_id,
'_api_data': api_data,
'title': title, 'title': title,
'formats': formats, 'formats': formats,
'thumbnail': thumbnail, 'thumbnail': thumbnail,
@ -619,24 +623,19 @@ class NiconicoPlaylistIE(InfoExtractor):
'only_matching': True, 'only_matching': True,
}] }]
_API_HEADERS = {
'X-Frontend-ID': '6',
'X-Frontend-Version': '0'
}
def _real_extract(self, url): def _real_extract(self, url):
list_id = self._match_id(url) list_id = self._match_id(url)
webpage = self._download_webpage(url, list_id)
header = self._parse_json(self._html_search_regex(
r'data-common-header="([^"]+)"', webpage,
'webpage header'), list_id)
frontendId = header.get('initConfig').get('frontendId')
frontendVersion = header.get('initConfig').get('frontendVersion')
def get_page_data(pagenum, pagesize): def get_page_data(pagenum, pagesize):
return self._download_json( return self._download_json(
'http://nvapi.nicovideo.jp/v2/mylists/' + list_id, list_id, 'http://nvapi.nicovideo.jp/v2/mylists/' + list_id, list_id,
query={'page': 1 + pagenum, 'pageSize': pagesize}, query={'page': 1 + pagenum, 'pageSize': pagesize},
headers={ headers=self._API_HEADERS).get('data').get('mylist')
'X-Frontend-Id': frontendId,
'X-Frontend-Version': frontendVersion,
}).get('data').get('mylist')
data = get_page_data(0, 1) data = get_page_data(0, 1)
title = data.get('name') title = data.get('name')
@ -672,20 +671,20 @@ class NiconicoUserIE(InfoExtractor):
'playlist_mincount': 101, 'playlist_mincount': 101,
} }
_API_URL = "https://nvapi.nicovideo.jp/v1/users/%s/videos?sortKey=registeredAt&sortOrder=desc&pageSize=%s&page=%s" _API_URL = "https://nvapi.nicovideo.jp/v1/users/%s/videos?sortKey=registeredAt&sortOrder=desc&pageSize=%s&page=%s"
_api_headers = {
'X-Frontend-ID': '6',
'X-Frontend-Version': '0',
'X-Niconico-Language': 'en-us'
}
_PAGE_SIZE = 100 _PAGE_SIZE = 100
_API_HEADERS = {
'X-Frontend-ID': '6',
'X-Frontend-Version': '0'
}
def _entries(self, list_id, ): def _entries(self, list_id, ):
total_count = 1 total_count = 1
count = page_num = 0 count = page_num = 0
while count < total_count: while count < total_count:
json_parsed = self._download_json( json_parsed = self._download_json(
self._API_URL % (list_id, self._PAGE_SIZE, page_num + 1), list_id, self._API_URL % (list_id, self._PAGE_SIZE, page_num + 1), list_id,
headers=self._api_headers, headers=self._API_HEADERS,
note='Downloading JSON metadata%s' % (' page %d' % page_num if page_num else '')) note='Downloading JSON metadata%s' % (' page %d' % page_num if page_num else ''))
if not page_num: if not page_num:
total_count = int_or_none(json_parsed['data'].get('totalCount')) total_count = int_or_none(json_parsed['data'].get('totalCount'))