mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-01-05 23:54:24 +00:00
Add experimental geo restriction bypass mechanism
Based on faking X-Forwarded-For HTTP header
This commit is contained in:
parent
bf5b9d859a
commit
773f291dcb
|
@ -56,6 +56,8 @@
|
|||
ExtractorError,
|
||||
format_bytes,
|
||||
formatSeconds,
|
||||
GeoRestrictedError,
|
||||
ISO3166Utils,
|
||||
locked_file,
|
||||
make_HTTPS_handler,
|
||||
MaxDownloadsReached,
|
||||
|
@ -272,6 +274,13 @@ class YoutubeDL(object):
|
|||
If it returns None, the video is downloaded.
|
||||
match_filter_func in utils.py is one example for this.
|
||||
no_color: Do not emit color codes in output.
|
||||
bypass_geo_restriction:
|
||||
Bypass geographic restriction via faking X-Forwarded-For
|
||||
HTTP header (experimental)
|
||||
bypass_geo_restriction_as_country:
|
||||
Two-letter ISO 3166-2 country code that will be used for
|
||||
explicit geographic restriction bypassing via faking
|
||||
X-Forwarded-For HTTP header (experimental)
|
||||
|
||||
The following options determine which downloader is picked:
|
||||
external_downloader: Executable of the external downloader to call.
|
||||
|
@ -707,6 +716,14 @@ def extract_info(self, url, download=True, ie_key=None, extra_info={},
|
|||
return self.process_ie_result(ie_result, download, extra_info)
|
||||
else:
|
||||
return ie_result
|
||||
except GeoRestrictedError as e:
|
||||
msg = e.msg
|
||||
if e.countries:
|
||||
msg += '\nThis video is available in %s.' % ', '.join(
|
||||
map(ISO3166Utils.short2full, e.countries))
|
||||
msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
|
||||
self.report_error(msg)
|
||||
break
|
||||
except ExtractorError as e: # An error we somewhat expected
|
||||
self.report_error(compat_str(e), e.format_traceback())
|
||||
break
|
||||
|
|
|
@ -414,6 +414,8 @@ def parse_retries(retries):
|
|||
'cn_verification_proxy': opts.cn_verification_proxy,
|
||||
'geo_verification_proxy': opts.geo_verification_proxy,
|
||||
'config_location': opts.config_location,
|
||||
'bypass_geo_restriction': opts.bypass_geo_restriction,
|
||||
'bypass_geo_restriction_as_country': opts.bypass_geo_restriction_as_country,
|
||||
}
|
||||
|
||||
with YoutubeDL(ydl_opts) as ydl:
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
import json
|
||||
import netrc
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
import socket
|
||||
import sys
|
||||
|
@ -39,6 +40,8 @@
|
|||
ExtractorError,
|
||||
fix_xml_ampersands,
|
||||
float_or_none,
|
||||
GeoRestrictedError,
|
||||
GeoUtils,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
parse_iso8601,
|
||||
|
@ -320,17 +323,25 @@ class InfoExtractor(object):
|
|||
_real_extract() methods and define a _VALID_URL regexp.
|
||||
Probably, they should also be added to the list of extractors.
|
||||
|
||||
_BYPASS_GEO attribute may be set to False in order to disable
|
||||
geo restriction bypass mechanisms for a particular extractor.
|
||||
Though it won't disable explicit geo restriction bypass based on
|
||||
country code provided with bypass_geo_restriction_as_country.
|
||||
|
||||
Finally, the _WORKING attribute should be set to False for broken IEs
|
||||
in order to warn the users and skip the tests.
|
||||
"""
|
||||
|
||||
_ready = False
|
||||
_downloader = None
|
||||
_x_forwarded_for_ip = None
|
||||
_BYPASS_GEO = True
|
||||
_WORKING = True
|
||||
|
||||
def __init__(self, downloader=None):
|
||||
"""Constructor. Receives an optional downloader."""
|
||||
self._ready = False
|
||||
self._x_forwarded_for_ip = None
|
||||
self.set_downloader(downloader)
|
||||
|
||||
@classmethod
|
||||
|
@ -359,6 +370,10 @@ def working(cls):
|
|||
|
||||
def initialize(self):
|
||||
"""Initializes an instance (authentication, etc)."""
|
||||
if not self._x_forwarded_for_ip:
|
||||
country_code = self._downloader.params.get('bypass_geo_restriction_as_country', None)
|
||||
if country_code:
|
||||
self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
|
||||
if not self._ready:
|
||||
self._real_initialize()
|
||||
self._ready = True
|
||||
|
@ -366,8 +381,22 @@ def initialize(self):
|
|||
def extract(self, url):
|
||||
"""Extracts URL information and returns it in list of dicts."""
|
||||
try:
|
||||
self.initialize()
|
||||
return self._real_extract(url)
|
||||
for _ in range(2):
|
||||
try:
|
||||
self.initialize()
|
||||
return self._real_extract(url)
|
||||
except GeoRestrictedError as e:
|
||||
if (not self._downloader.params.get('bypass_geo_restriction_as_country', None) and
|
||||
self._BYPASS_GEO and
|
||||
self._downloader.params.get('bypass_geo_restriction', True) and
|
||||
not self._x_forwarded_for_ip and
|
||||
e.countries):
|
||||
self._x_forwarded_for_ip = GeoUtils.random_ipv4(random.choice(e.countries))
|
||||
if self._x_forwarded_for_ip:
|
||||
self.report_warning(
|
||||
'Video is geo restricted. Retrying extraction with fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip)
|
||||
continue
|
||||
raise
|
||||
except ExtractorError:
|
||||
raise
|
||||
except compat_http_client.IncompleteRead as e:
|
||||
|
@ -434,6 +463,15 @@ def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=
|
|||
if isinstance(url_or_request, (compat_str, str)):
|
||||
url_or_request = url_or_request.partition('#')[0]
|
||||
|
||||
# Some sites check X-Forwarded-For HTTP header in order to figure out
|
||||
# the origin of the client behind proxy. This allows bypassing geo
|
||||
# restriction by faking this header's value to IP that belongs to some
|
||||
# geo unrestricted country. We will do so once we encounter any
|
||||
# geo restriction error.
|
||||
if self._x_forwarded_for_ip:
|
||||
if 'X-Forwarded-For' not in headers:
|
||||
headers['X-Forwarded-For'] = self._x_forwarded_for_ip
|
||||
|
||||
urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query)
|
||||
if urlh is False:
|
||||
assert not fatal
|
||||
|
@ -609,10 +647,8 @@ def raise_login_required(msg='This video is only available for registered users'
|
|||
expected=True)
|
||||
|
||||
@staticmethod
|
||||
def raise_geo_restricted(msg='This video is not available from your location due to geo restriction'):
|
||||
raise ExtractorError(
|
||||
'%s. You might want to use --proxy to workaround.' % msg,
|
||||
expected=True)
|
||||
def raise_geo_restricted(msg='This video is not available from your location due to geo restriction', countries=None):
|
||||
raise GeoRestrictedError(msg, countries=countries)
|
||||
|
||||
# Methods for following #608
|
||||
@staticmethod
|
||||
|
|
|
@ -549,6 +549,18 @@ def _scrub_eq(o):
|
|||
'Upper bound of a range for randomized sleep before each download '
|
||||
'(maximum possible number of seconds to sleep). Must only be used '
|
||||
'along with --min-sleep-interval.'))
|
||||
workarounds.add_option(
|
||||
'--bypass-geo',
|
||||
action='store_true', dest='bypass_geo_restriction', default=True,
|
||||
help='Bypass geographic restriction via faking X-Forwarded-For HTTP header (experimental)')
|
||||
workarounds.add_option(
|
||||
'--no-bypass-geo',
|
||||
action='store_false', dest='bypass_geo_restriction', default=True,
|
||||
help='Do not bypass geographic restriction via faking X-Forwarded-For HTTP header (experimental)')
|
||||
workarounds.add_option(
|
||||
'--bypass-geo-as-country', metavar='CODE',
|
||||
dest='bypass_geo_restriction_as_country', default=None,
|
||||
help='Force bypass geographic restriction with explicitly provided two-letter ISO 3166-2 country code (experimental)')
|
||||
|
||||
verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
|
||||
verbosity.add_option(
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
import os
|
||||
import pipes
|
||||
import platform
|
||||
import random
|
||||
import re
|
||||
import socket
|
||||
import ssl
|
||||
|
@ -747,6 +748,18 @@ class RegexNotFoundError(ExtractorError):
|
|||
pass
|
||||
|
||||
|
||||
class GeoRestrictedError(ExtractorError):
|
||||
"""Geographic restriction Error exception.
|
||||
|
||||
This exception may be thrown when a video is not available from your
|
||||
geographic location due to geographic restrictions imposed by a website.
|
||||
"""
|
||||
def __init__(self, msg, countries=None):
|
||||
super(GeoRestrictedError, self).__init__(msg, expected=True)
|
||||
self.msg = msg
|
||||
self.countries = countries
|
||||
|
||||
|
||||
class DownloadError(YoutubeDLError):
|
||||
"""Download Error exception.
|
||||
|
||||
|
@ -3027,6 +3040,260 @@ def short2full(cls, code):
|
|||
return cls._country_map.get(code.upper())
|
||||
|
||||
|
||||
class GeoUtils(object):
|
||||
# Major IPv4 address blocks per country
|
||||
_country_ip_map = {
|
||||
'AD': '85.94.160.0/19',
|
||||
'AE': '94.200.0.0/13',
|
||||
'AF': '149.54.0.0/17',
|
||||
'AG': '209.59.64.0/18',
|
||||
'AI': '204.14.248.0/21',
|
||||
'AL': '46.99.0.0/16',
|
||||
'AM': '46.70.0.0/15',
|
||||
'AO': '105.168.0.0/13',
|
||||
'AP': '159.117.192.0/21',
|
||||
'AR': '181.0.0.0/12',
|
||||
'AS': '202.70.112.0/20',
|
||||
'AT': '84.112.0.0/13',
|
||||
'AU': '1.128.0.0/11',
|
||||
'AW': '181.41.0.0/18',
|
||||
'AZ': '5.191.0.0/16',
|
||||
'BA': '31.176.128.0/17',
|
||||
'BB': '65.48.128.0/17',
|
||||
'BD': '114.130.0.0/16',
|
||||
'BE': '57.0.0.0/8',
|
||||
'BF': '129.45.128.0/17',
|
||||
'BG': '95.42.0.0/15',
|
||||
'BH': '37.131.0.0/17',
|
||||
'BI': '154.117.192.0/18',
|
||||
'BJ': '137.255.0.0/16',
|
||||
'BL': '192.131.134.0/24',
|
||||
'BM': '196.12.64.0/18',
|
||||
'BN': '156.31.0.0/16',
|
||||
'BO': '161.56.0.0/16',
|
||||
'BQ': '161.0.80.0/20',
|
||||
'BR': '152.240.0.0/12',
|
||||
'BS': '24.51.64.0/18',
|
||||
'BT': '119.2.96.0/19',
|
||||
'BW': '168.167.0.0/16',
|
||||
'BY': '178.120.0.0/13',
|
||||
'BZ': '179.42.192.0/18',
|
||||
'CA': '99.224.0.0/11',
|
||||
'CD': '41.243.0.0/16',
|
||||
'CF': '196.32.200.0/21',
|
||||
'CG': '197.214.128.0/17',
|
||||
'CH': '85.0.0.0/13',
|
||||
'CI': '154.232.0.0/14',
|
||||
'CK': '202.65.32.0/19',
|
||||
'CL': '152.172.0.0/14',
|
||||
'CM': '165.210.0.0/15',
|
||||
'CN': '36.128.0.0/10',
|
||||
'CO': '181.240.0.0/12',
|
||||
'CR': '201.192.0.0/12',
|
||||
'CU': '152.206.0.0/15',
|
||||
'CV': '165.90.96.0/19',
|
||||
'CW': '190.88.128.0/17',
|
||||
'CY': '46.198.0.0/15',
|
||||
'CZ': '88.100.0.0/14',
|
||||
'DE': '53.0.0.0/8',
|
||||
'DJ': '197.241.0.0/17',
|
||||
'DK': '87.48.0.0/12',
|
||||
'DM': '192.243.48.0/20',
|
||||
'DO': '152.166.0.0/15',
|
||||
'DZ': '41.96.0.0/12',
|
||||
'EC': '186.68.0.0/15',
|
||||
'EE': '90.190.0.0/15',
|
||||
'EG': '156.160.0.0/11',
|
||||
'ER': '196.200.96.0/20',
|
||||
'ES': '88.0.0.0/11',
|
||||
'ET': '196.188.0.0/14',
|
||||
'EU': '2.16.0.0/13',
|
||||
'FI': '91.152.0.0/13',
|
||||
'FJ': '144.120.0.0/16',
|
||||
'FM': '119.252.112.0/20',
|
||||
'FO': '88.85.32.0/19',
|
||||
'FR': '90.0.0.0/9',
|
||||
'GA': '41.158.0.0/15',
|
||||
'GB': '25.0.0.0/8',
|
||||
'GD': '74.122.88.0/21',
|
||||
'GE': '31.146.0.0/16',
|
||||
'GF': '161.22.64.0/18',
|
||||
'GG': '62.68.160.0/19',
|
||||
'GH': '45.208.0.0/14',
|
||||
'GI': '85.115.128.0/19',
|
||||
'GL': '88.83.0.0/19',
|
||||
'GM': '160.182.0.0/15',
|
||||
'GN': '197.149.192.0/18',
|
||||
'GP': '104.250.0.0/19',
|
||||
'GQ': '105.235.224.0/20',
|
||||
'GR': '94.64.0.0/13',
|
||||
'GT': '168.234.0.0/16',
|
||||
'GU': '168.123.0.0/16',
|
||||
'GW': '197.214.80.0/20',
|
||||
'GY': '181.41.64.0/18',
|
||||
'HK': '113.252.0.0/14',
|
||||
'HN': '181.210.0.0/16',
|
||||
'HR': '93.136.0.0/13',
|
||||
'HT': '148.102.128.0/17',
|
||||
'HU': '84.0.0.0/14',
|
||||
'ID': '39.192.0.0/10',
|
||||
'IE': '87.32.0.0/12',
|
||||
'IL': '79.176.0.0/13',
|
||||
'IM': '5.62.80.0/20',
|
||||
'IN': '117.192.0.0/10',
|
||||
'IO': '203.83.48.0/21',
|
||||
'IQ': '37.236.0.0/14',
|
||||
'IR': '2.176.0.0/12',
|
||||
'IS': '82.221.0.0/16',
|
||||
'IT': '79.0.0.0/10',
|
||||
'JE': '87.244.64.0/18',
|
||||
'JM': '72.27.0.0/17',
|
||||
'JO': '176.29.0.0/16',
|
||||
'JP': '126.0.0.0/8',
|
||||
'KE': '105.48.0.0/12',
|
||||
'KG': '158.181.128.0/17',
|
||||
'KH': '36.37.128.0/17',
|
||||
'KI': '103.25.140.0/22',
|
||||
'KM': '197.255.224.0/20',
|
||||
'KN': '198.32.32.0/19',
|
||||
'KP': '175.45.176.0/22',
|
||||
'KR': '175.192.0.0/10',
|
||||
'KW': '37.36.0.0/14',
|
||||
'KY': '64.96.0.0/15',
|
||||
'KZ': '2.72.0.0/13',
|
||||
'LA': '115.84.64.0/18',
|
||||
'LB': '178.135.0.0/16',
|
||||
'LC': '192.147.231.0/24',
|
||||
'LI': '82.117.0.0/19',
|
||||
'LK': '112.134.0.0/15',
|
||||
'LR': '41.86.0.0/19',
|
||||
'LS': '129.232.0.0/17',
|
||||
'LT': '78.56.0.0/13',
|
||||
'LU': '188.42.0.0/16',
|
||||
'LV': '46.109.0.0/16',
|
||||
'LY': '41.252.0.0/14',
|
||||
'MA': '105.128.0.0/11',
|
||||
'MC': '88.209.64.0/18',
|
||||
'MD': '37.246.0.0/16',
|
||||
'ME': '178.175.0.0/17',
|
||||
'MF': '74.112.232.0/21',
|
||||
'MG': '154.126.0.0/17',
|
||||
'MH': '117.103.88.0/21',
|
||||
'MK': '77.28.0.0/15',
|
||||
'ML': '154.118.128.0/18',
|
||||
'MM': '37.111.0.0/17',
|
||||
'MN': '49.0.128.0/17',
|
||||
'MO': '60.246.0.0/16',
|
||||
'MP': '202.88.64.0/20',
|
||||
'MQ': '109.203.224.0/19',
|
||||
'MR': '41.188.64.0/18',
|
||||
'MS': '208.90.112.0/22',
|
||||
'MT': '46.11.0.0/16',
|
||||
'MU': '105.16.0.0/12',
|
||||
'MV': '27.114.128.0/18',
|
||||
'MW': '105.234.0.0/16',
|
||||
'MX': '187.192.0.0/11',
|
||||
'MY': '175.136.0.0/13',
|
||||
'MZ': '197.218.0.0/15',
|
||||
'NA': '41.182.0.0/16',
|
||||
'NC': '101.101.0.0/18',
|
||||
'NE': '197.214.0.0/18',
|
||||
'NF': '203.17.240.0/22',
|
||||
'NG': '105.112.0.0/12',
|
||||
'NI': '186.76.0.0/15',
|
||||
'NL': '145.96.0.0/11',
|
||||
'NO': '84.208.0.0/13',
|
||||
'NP': '36.252.0.0/15',
|
||||
'NR': '203.98.224.0/19',
|
||||
'NU': '49.156.48.0/22',
|
||||
'NZ': '49.224.0.0/14',
|
||||
'OM': '5.36.0.0/15',
|
||||
'PA': '186.72.0.0/15',
|
||||
'PE': '186.160.0.0/14',
|
||||
'PF': '123.50.64.0/18',
|
||||
'PG': '124.240.192.0/19',
|
||||
'PH': '49.144.0.0/13',
|
||||
'PK': '39.32.0.0/11',
|
||||
'PL': '83.0.0.0/11',
|
||||
'PM': '70.36.0.0/20',
|
||||
'PR': '66.50.0.0/16',
|
||||
'PS': '188.161.0.0/16',
|
||||
'PT': '85.240.0.0/13',
|
||||
'PW': '202.124.224.0/20',
|
||||
'PY': '181.120.0.0/14',
|
||||
'QA': '37.210.0.0/15',
|
||||
'RE': '139.26.0.0/16',
|
||||
'RO': '79.112.0.0/13',
|
||||
'RS': '178.220.0.0/14',
|
||||
'RU': '5.136.0.0/13',
|
||||
'RW': '105.178.0.0/15',
|
||||
'SA': '188.48.0.0/13',
|
||||
'SB': '202.1.160.0/19',
|
||||
'SC': '154.192.0.0/11',
|
||||
'SD': '154.96.0.0/13',
|
||||
'SE': '78.64.0.0/12',
|
||||
'SG': '152.56.0.0/14',
|
||||
'SI': '188.196.0.0/14',
|
||||
'SK': '78.98.0.0/15',
|
||||
'SL': '197.215.0.0/17',
|
||||
'SM': '89.186.32.0/19',
|
||||
'SN': '41.82.0.0/15',
|
||||
'SO': '197.220.64.0/19',
|
||||
'SR': '186.179.128.0/17',
|
||||
'SS': '105.235.208.0/21',
|
||||
'ST': '197.159.160.0/19',
|
||||
'SV': '168.243.0.0/16',
|
||||
'SX': '190.102.0.0/20',
|
||||
'SY': '5.0.0.0/16',
|
||||
'SZ': '41.84.224.0/19',
|
||||
'TC': '65.255.48.0/20',
|
||||
'TD': '154.68.128.0/19',
|
||||
'TG': '196.168.0.0/14',
|
||||
'TH': '171.96.0.0/13',
|
||||
'TJ': '85.9.128.0/18',
|
||||
'TK': '27.96.24.0/21',
|
||||
'TL': '180.189.160.0/20',
|
||||
'TM': '95.85.96.0/19',
|
||||
'TN': '197.0.0.0/11',
|
||||
'TO': '175.176.144.0/21',
|
||||
'TR': '78.160.0.0/11',
|
||||
'TT': '186.44.0.0/15',
|
||||
'TV': '202.2.96.0/19',
|
||||
'TW': '120.96.0.0/11',
|
||||
'TZ': '156.156.0.0/14',
|
||||
'UA': '93.72.0.0/13',
|
||||
'UG': '154.224.0.0/13',
|
||||
'US': '3.0.0.0/8',
|
||||
'UY': '167.56.0.0/13',
|
||||
'UZ': '82.215.64.0/18',
|
||||
'VA': '212.77.0.0/19',
|
||||
'VC': '24.92.144.0/20',
|
||||
'VE': '186.88.0.0/13',
|
||||
'VG': '172.103.64.0/18',
|
||||
'VI': '146.226.0.0/16',
|
||||
'VN': '14.160.0.0/11',
|
||||
'VU': '202.80.32.0/20',
|
||||
'WF': '117.20.32.0/21',
|
||||
'WS': '202.4.32.0/19',
|
||||
'YE': '134.35.0.0/16',
|
||||
'YT': '41.242.116.0/22',
|
||||
'ZA': '41.0.0.0/11',
|
||||
'ZM': '165.56.0.0/13',
|
||||
'ZW': '41.85.192.0/19',
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def random_ipv4(cls, code):
|
||||
block = cls._country_ip_map.get(code.upper())
|
||||
if not block:
|
||||
return None
|
||||
addr, preflen = block.split('/')
|
||||
addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
|
||||
addr_max = addr_min | (0xffffffff >> int(preflen))
|
||||
return socket.inet_ntoa(
|
||||
compat_struct_pack('!I', random.randint(addr_min, addr_max)))
|
||||
|
||||
|
||||
class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
|
||||
def __init__(self, proxies=None):
|
||||
# Set default handlers
|
||||
|
|
Loading…
Reference in a new issue