[pornhub] Improve title extraction (closes #24184)

This commit is contained in:
Sergey M․ 2020-03-03 06:23:39 +07:00
parent 1e1c1960aa
commit 46cc54ca8f
No known key found for this signature in database
GPG key ID: 2C393E0F18A9236D

View file

@ -189,10 +189,10 @@ def dl_webpage(platform):
# http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying
# on that anymore.
title = self._html_search_meta(
'twitter:title', webpage, default=None) or self._search_regex(
(r'<h1[^>]+class=["\']title["\'][^>]*>(?P<title>[^<]+)',
r'<div[^>]+data-video-title=(["\'])(?P<title>.+?)\1',
r'shareTitle\s*=\s*(["\'])(?P<title>.+?)\1'),
'twitter:title', webpage, default=None) or self._html_search_regex(
(r'(?s)<h1[^>]+class=["\']title["\'][^>]*>(?P<title>.+?)</h1>',
r'<div[^>]+data-video-title=(["\'])(?P<title>(?:(?!\1).)+)\1',
r'shareTitle["\']\s*[=:]\s*(["\'])(?P<title>(?:(?!\1).)+)\1'),
webpage, 'title', group='title')
video_urls = []