[vporn] Make video URL regex more strict

There is a garbage instead of proper URL for some HD videos
2025-01-05 23:54:24 +00:00 · 2014-09-15 19:19:37 +07:00 · 2014-09-15 19:19:37 +07:00 · 59d284c316
parent b04c8f7358
commit 59d284c316
1 changed files with 43 additions and 17 deletions
--- a/youtube_dl/extractor/vporn.py
+++ b/youtube_dl/extractor/vporn.py
@ -11,22 +11,48 @@

 class VpornIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?vporn\.com/[^/]+/(?P<display_id>[^/]+)/(?P<id>\d+)'
-    _TEST = {
-        'url': 'http://www.vporn.com/masturbation/violet-on-her-th-birthday/497944/',
-        'md5': 'facf37c1b86546fa0208058546842c55',
-        'info_dict': {
-            'id': '497944',
-            'display_id': 'violet-on-her-th-birthday',
-            'ext': 'mp4',
-            'title': 'Violet on her 19th birthday',
-            'description': 'Violet dances in front of the camera which is sure to get you horny.',
-            'thumbnail': 're:^https?://.*\.jpg$',
-            'uploader': 'kileyGrope',
-            'categories': ['Masturbation', 'Teen'],
-            'duration': 393,
-            'age_limit': 18,
-        }
-    }
+    _TESTS = [
+        {
+            'url': 'http://www.vporn.com/masturbation/violet-on-her-th-birthday/497944/',
+            'md5': 'facf37c1b86546fa0208058546842c55',
+            'info_dict': {
+                'id': '497944',
+                'display_id': 'violet-on-her-th-birthday',
+                'ext': 'mp4',
+                'title': 'Violet on her 19th birthday',
+                'description': 'Violet dances in front of the camera which is sure to get you horny.',
+                'thumbnail': 're:^https?://.*\.jpg$',
+                'uploader': 'kileyGrope',
+                'categories': ['Masturbation', 'Teen'],
+                'duration': 393,
+                'age_limit': 18,
+                'view_count': int,
+                'like_count': int,
+                'dislike_count': int,
+                'comment_count': int,
+            }
+        },
+        {
+            'url': 'http://www.vporn.com/female/hana-shower/523564/',
+            'md5': 'ced35a4656198a1664cf2cda1575a25f',
+            'info_dict': {
+                'id': '523564',
+                'display_id': 'hana-shower',
+                'ext': 'mp4',
+                'title': 'Hana Shower',
+                'description': 'Hana showers at the bathroom.',
+                'thumbnail': 're:^https?://.*\.jpg$',
+                'uploader': 'Hmmmmm',
+                'categories': ['Big Boobs', 'Erotic', 'Teen', 'Female'],
+                'duration': 588,
+                'age_limit': 18,
+                'view_count': int,
+                'like_count': int,
+                'dislike_count': int,
+                'comment_count': int,
+            }
+        },
+    ]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
@ -64,7 +90,7 @@ def _real_extract(self, url):

        formats = []

-        for video in re.findall(r'flashvars\.videoUrl([^=]+?)\s*=\s*"([^"]+)"', webpage):
+        for video in re.findall(r'flashvars\.videoUrl([^=]+?)\s*=\s*"(https?://[^"]+)"', webpage):
            video_url = video[1]
            fmt = {
                'url': video_url,