diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py
index 634d17d919..cd22df25a2 100644
--- a/youtube_dl/extractor/vk.py
+++ b/youtube_dl/extractor/vk.py
@@ -1,6 +1,7 @@
# encoding: utf-8
from __future__ import unicode_literals
+import collections
import re
import json
import sys
@@ -16,7 +17,6 @@
get_element_by_class,
int_or_none,
orderedSet,
- parse_duration,
remove_start,
str_to_int,
unescapeHTML,
@@ -447,6 +447,9 @@ class VKWallPostIE(VKBaseIE):
'skip_download': True,
},
}],
+ 'params': {
+ 'usenetrc': True,
+ },
'skip': 'Requires vk account credentials',
}, {
# single YouTube embed, no leading -
@@ -456,6 +459,9 @@ class VKWallPostIE(VKBaseIE):
'title': 'Sergey Gorbunov - Wall post 85155021_6319',
},
'playlist_count': 1,
+ 'params': {
+ 'usenetrc': True,
+ },
'skip': 'Requires vk account credentials',
}, {
# wall page URL
@@ -483,37 +489,41 @@ def _real_extract(self, url):
raise ExtractorError('VK said: %s' % error, expected=True)
description = clean_html(get_element_by_class('wall_post_text', webpage))
- uploader = clean_html(get_element_by_class(
- 'fw_post_author', webpage)) or self._og_search_description(webpage)
+ uploader = clean_html(get_element_by_class('author', webpage))
thumbnail = self._og_search_thumbnail(webpage)
entries = []
- for audio in re.finditer(r'''(?sx)
- ]+
- id=(?P["\'])audio_info(?P\d+_\d+).*?(?P=q1)[^>]+
- value=(?P["\'])(?Phttp.+?)(?P=q2)
- .+?
- ''', webpage):
- audio_html = audio.group(0)
- audio_id = audio.group('id')
- duration = parse_duration(get_element_by_class('duration', audio_html))
- track = self._html_search_regex(
- r']+id=["\']title%s[^>]*>([^<]+)' % audio_id,
- audio_html, 'title', default=None)
- artist = self._html_search_regex(
- r'>([^<]+)\s*&ndash', audio_html,
- 'artist', default=None)
- entries.append({
- 'id': audio_id,
- 'url': audio.group('url'),
- 'title': '%s - %s' % (artist, track) if artist and track else audio_id,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'uploader': uploader,
- 'artist': artist,
- 'track': track,
- })
+ audio_ids = re.findall(r'data-full-id=["\'](\d+_\d+)', webpage)
+ if audio_ids:
+ al_audio = self._download_webpage(
+ 'https://vk.com/al_audio.php', post_id,
+ note='Downloading audio info', fatal=False,
+ data=urlencode_postdata({
+ 'act': 'reload_audio',
+ 'al': '1',
+ 'ids': ','.join(audio_ids)
+ }))
+ if al_audio:
+ Audio = collections.namedtuple(
+ 'Audio', ['id', 'user_id', 'url', 'track', 'artist', 'duration'])
+ audios = self._parse_json(
+ self._search_regex(
+ r'(.+?)', al_audio, 'audios', default='[]'),
+ post_id, fatal=False, transform_source=unescapeHTML)
+ if isinstance(audios, list):
+ for audio in audios:
+ a = Audio._make(audio[:6])
+ entries.append({
+ 'id': '%s_%s' % (a.user_id, a.id),
+ 'url': a.url,
+ 'title': '%s - %s' % (a.artist, a.track) if a.artist and a.track else a.id,
+ 'thumbnail': thumbnail,
+ 'duration': a.duration,
+ 'uploader': uploader,
+ 'artist': a.artist,
+ 'track': a.track,
+ })
for video in re.finditer(
r']+href=(["\'])(?P/video(?:-?[\d_]+).*?)\1', webpage):