diff --git a/yt_dlp/extractor/bitchute.py b/yt_dlp/extractor/bitchute.py index c9cbb6d1d..87d04468a 100644 --- a/yt_dlp/extractor/bitchute.py +++ b/yt_dlp/extractor/bitchute.py @@ -4,8 +4,12 @@ from .common import InfoExtractor from ..utils import ( ExtractorError, - GeoRestrictedError, + HEADRequest, + clean_html, + get_element_by_class, + int_or_none, orderedSet, + traverse_obj, unified_strdate, urlencode_postdata, ) @@ -18,7 +22,7 @@ class BitChuteIE(InfoExtractor): 'url': 'https://www.bitchute.com/video/UGlrF9o9b-Q/', 'md5': '7e427d7ed7af5a75b5855705ec750e2b', 'info_dict': { - 'id': 'szoMrox2JEI', + 'id': 'UGlrF9o9b-Q', 'ext': 'mp4', 'title': 'This is the first video on #BitChute !', 'description': 'md5:a0337e7b1fe39e32336974af8173a034', @@ -26,6 +30,21 @@ class BitChuteIE(InfoExtractor): 'uploader': 'BitChute', 'upload_date': '20170103', }, + }, { + # video not downloadable in browser, but we can recover it + 'url': 'https://www.bitchute.com/video/2s6B3nZjAk7R/', + 'md5': '05c12397d5354bf24494885b08d24ed1', + 'info_dict': { + 'id': '2s6B3nZjAk7R', + 'ext': 'mp4', + 'filesize': 71537926, + 'title': 'STYXHEXENHAMMER666 - Election Fraud, Clinton 2020, EU Armies, and Gun Control', + 'description': 'md5:228ee93bd840a24938f536aeac9cf749', + 'thumbnail': r're:^https?://.*\.jpg$', + 'uploader': 'BitChute', + 'upload_date': '20181113', + }, + 'params': {'check_formats': None}, }, { 'url': 'https://www.bitchute.com/embed/lbb5G1hjPhw/', 'only_matching': True, @@ -34,67 +53,57 @@ class BitChuteIE(InfoExtractor): 'only_matching': True, }] + _HEADERS = { + 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.57 Safari/537.36', + 'Referer': 'https://www.bitchute.com/', + } + + def _check_format(self, video_url, video_id): + urls = orderedSet( + re.sub(r'(^https?://)(seed\d+)(?=\.bitchute\.com)', fr'\g<1>{host}', video_url) + for host in (r'\g<2>', 'seed150', 'seed151', 'seed152', 'seed153')) + for url in urls: + try: + response = self._request_webpage( + HEADRequest(url), video_id=video_id, note=f'Checking {url}', headers=self._HEADERS) + except ExtractorError as e: + self.to_screen(f'{video_id}: URL is invalid, skipping: {e.cause}') + continue + return { + 'url': url, + 'filesize': int_or_none(response.headers.get('Content-Length')) + } + def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage( - 'https://www.bitchute.com/video/%s' % video_id, video_id, headers={ - 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.57 Safari/537.36', - }) + f'https://www.bitchute.com/video/{video_id}', video_id, headers=self._HEADERS) - title = self._html_search_regex( - (r'<[^>]+\bid=["\']video-title[^>]+>([^<]+)', r'