[extractor/common] Allow quoteless content attribute in og regexes (Closes #7115)

This commit is contained in:
Sergey M․ 2015-10-10 01:44:33 +06:00
parent ef47b2c15f
commit 4180a3d8b7

View file

@ -645,7 +645,7 @@ def _get_tfa_info(self, note='two-factor verification code'):
# Helper functions for extracting OpenGraph info
@staticmethod
def _og_regexes(prop):
content_re = r'content=(?:"([^>]+?)"|\'([^>]+?)\')'
content_re = r'content=(?:"([^>]+?)"|\'([^>]+?)\'|\s*([^\s"\'=<>`]+?))'
property_re = r'(?:name|property)=[\'"]?og:%s[\'"]?' % re.escape(prop)
template = r'<meta[^>]+?%s[^>]+?%s'
return [