[extractor/WSJArticle] Fix video id extraction (#4268)

Closes #4249
Authored by: sqrtNOT
This commit is contained in:
sqrtNOT 2022-07-17 10:34:33 +00:00 committed by GitHub
parent 3df6a603e4
commit 129dfa5f45
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -116,5 +116,6 @@ def _real_extract(self, url):
article_id = self._match_id(url) article_id = self._match_id(url)
webpage = self._download_webpage(url, article_id) webpage = self._download_webpage(url, article_id)
video_id = self._search_regex( video_id = self._search_regex(
r'data-src=["\']([a-fA-F0-9-]{36})', webpage, 'video id') r'(?:id=["\']video|video-|iframe\.html\?guid=|data-src=["\'])([a-fA-F0-9-]{36})',
webpage, 'video id')
return self.url_result('wsj:%s' % video_id, WSJIE.ie_key(), video_id) return self.url_result('wsj:%s' % video_id, WSJIE.ie_key(), video_id)