mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-01-05 23:54:24 +00:00
Improve geo bypass mechanism
* Rename options to preffixly match with --geo-verification-proxy * Introduce _GEO_COUNTRIES for extractors * Implement faking IP right away for sites with known geo restriction
This commit is contained in:
parent
0a840f584c
commit
4248dad92b
|
@ -323,10 +323,15 @@ class InfoExtractor(object):
|
|||
_real_extract() methods and define a _VALID_URL regexp.
|
||||
Probably, they should also be added to the list of extractors.
|
||||
|
||||
_BYPASS_GEO attribute may be set to False in order to disable
|
||||
_GEO_BYPASS attribute may be set to False in order to disable
|
||||
geo restriction bypass mechanisms for a particular extractor.
|
||||
Though it won't disable explicit geo restriction bypass based on
|
||||
country code provided with geo_bypass_country.
|
||||
country code provided with geo_bypass_country. (experimental)
|
||||
|
||||
_GEO_COUNTRIES attribute may contain a list of presumably geo unrestricted
|
||||
countries for this extractor. One of these countries will be used by
|
||||
geo restriction bypass mechanism right away in order to bypass
|
||||
geo restriction, of course, if the mechanism is not disabled. (experimental)
|
||||
|
||||
Finally, the _WORKING attribute should be set to False for broken IEs
|
||||
in order to warn the users and skip the tests.
|
||||
|
@ -335,7 +340,8 @@ class InfoExtractor(object):
|
|||
_ready = False
|
||||
_downloader = None
|
||||
_x_forwarded_for_ip = None
|
||||
_BYPASS_GEO = True
|
||||
_GEO_BYPASS = True
|
||||
_GEO_COUNTRIES = None
|
||||
_WORKING = True
|
||||
|
||||
def __init__(self, downloader=None):
|
||||
|
@ -370,14 +376,28 @@ def working(cls):
|
|||
|
||||
def initialize(self):
|
||||
"""Initializes an instance (authentication, etc)."""
|
||||
if not self._x_forwarded_for_ip:
|
||||
country_code = self._downloader.params.get('geo_bypass_country', None)
|
||||
if country_code:
|
||||
self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
|
||||
self.__initialize_geo_bypass()
|
||||
if not self._ready:
|
||||
self._real_initialize()
|
||||
self._ready = True
|
||||
|
||||
def __initialize_geo_bypass(self):
|
||||
if not self._x_forwarded_for_ip:
|
||||
country_code = self._downloader.params.get('geo_bypass_country', None)
|
||||
# If there is no explicit country for geo bypass specified and
|
||||
# the extractor is known to be geo restricted let's fake IP
|
||||
# as X-Forwarded-For right away.
|
||||
if (not country_code and
|
||||
self._GEO_BYPASS and
|
||||
self._downloader.params.get('geo_bypass', True) and
|
||||
self._GEO_COUNTRIES):
|
||||
country_code = random.choice(self._GEO_COUNTRIES)
|
||||
if country_code:
|
||||
self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
|
||||
if self._downloader.params.get('verbose', False):
|
||||
self._downloader.to_stdout(
|
||||
'[debug] Using fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip)
|
||||
|
||||
def extract(self, url):
|
||||
"""Extracts URL information and returns it in list of dicts."""
|
||||
try:
|
||||
|
@ -389,16 +409,8 @@ def extract(self, url):
|
|||
ie_result['__x_forwarded_for_ip'] = self._x_forwarded_for_ip
|
||||
return ie_result
|
||||
except GeoRestrictedError as e:
|
||||
if (not self._downloader.params.get('geo_bypass_country', None) and
|
||||
self._BYPASS_GEO and
|
||||
self._downloader.params.get('geo_bypass', True) and
|
||||
not self._x_forwarded_for_ip and
|
||||
e.countries):
|
||||
self._x_forwarded_for_ip = GeoUtils.random_ipv4(random.choice(e.countries))
|
||||
if self._x_forwarded_for_ip:
|
||||
self.report_warning(
|
||||
'Video is geo restricted. Retrying extraction with fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip)
|
||||
continue
|
||||
if self.__maybe_fake_ip_and_retry(e.countries):
|
||||
continue
|
||||
raise
|
||||
except ExtractorError:
|
||||
raise
|
||||
|
@ -407,6 +419,19 @@ def extract(self, url):
|
|||
except (KeyError, StopIteration) as e:
|
||||
raise ExtractorError('An extractor error has occurred.', cause=e)
|
||||
|
||||
def __maybe_fake_ip_and_retry(self, countries):
|
||||
if (not self._downloader.params.get('geo_bypass_country', None) and
|
||||
self._GEO_BYPASS and
|
||||
self._downloader.params.get('geo_bypass', True) and
|
||||
not self._x_forwarded_for_ip and
|
||||
countries):
|
||||
self._x_forwarded_for_ip = GeoUtils.random_ipv4(random.choice(countries))
|
||||
if self._x_forwarded_for_ip:
|
||||
self.report_warning(
|
||||
'Video is geo restricted. Retrying extraction with fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip)
|
||||
return True
|
||||
return False
|
||||
|
||||
def set_downloader(self, downloader):
|
||||
"""Sets the downloader for this IE."""
|
||||
self._downloader = downloader
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
class DramaFeverBaseIE(AMPIE):
|
||||
_LOGIN_URL = 'https://www.dramafever.com/accounts/login/'
|
||||
_NETRC_MACHINE = 'dramafever'
|
||||
_GEO_COUNTRIES = ['US', 'CA']
|
||||
|
||||
_CONSUMER_SECRET = 'DA59dtVXYLxajktV'
|
||||
|
||||
|
@ -118,7 +119,7 @@ def _real_extract(self, url):
|
|||
if isinstance(e.cause, compat_HTTPError):
|
||||
self.raise_geo_restricted(
|
||||
msg='Currently unavailable in your country',
|
||||
countries=['US', 'CA'])
|
||||
countries=self._GEO_COUNTRIES)
|
||||
raise
|
||||
|
||||
series_id, episode_number = video_id.split('.')
|
||||
|
|
|
@ -37,6 +37,7 @@ class GoIE(AdobePassIE):
|
|||
}
|
||||
}
|
||||
_VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:[^/]+/)*(?:vdka(?P<id>\w+)|season-\d+/\d+-(?P<display_id>[^/?#]+))' % '|'.join(_SITE_INFO.keys())
|
||||
_GEO_COUNTRIES = ['US']
|
||||
_TESTS = [{
|
||||
'url': 'http://abc.go.com/shows/castle/video/most-recent/vdka0_g86w5onx',
|
||||
'info_dict': {
|
||||
|
@ -104,7 +105,7 @@ def _real_extract(self, url):
|
|||
for error in errors:
|
||||
if error.get('code') == 1002:
|
||||
self.raise_geo_restricted(
|
||||
error['message'], countries=['US'])
|
||||
error['message'], countries=self._GEO_COUNTRIES)
|
||||
error_message = ', '.join([error['message'] for error in errors])
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True)
|
||||
asset_url += '?' + entitlement['uplynkData']['sessionKey']
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
|
||||
class ITVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P<id>[0-9a-zA-Z]+)'
|
||||
_GEO_COUNTRIES = ['GB']
|
||||
_TEST = {
|
||||
'url': 'http://www.itv.com/hub/mr-bean-animated-series/2a2936a0053',
|
||||
'info_dict': {
|
||||
|
@ -101,7 +102,8 @@ def _add_sub_element(element, name):
|
|||
fault_code = xpath_text(resp_env, './/faultcode')
|
||||
fault_string = xpath_text(resp_env, './/faultstring')
|
||||
if fault_code == 'InvalidGeoRegion':
|
||||
self.raise_geo_restricted(msg=fault_string, countries=['GB'])
|
||||
self.raise_geo_restricted(
|
||||
msg=fault_string, countries=self._GEO_COUNTRIES)
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, fault_string))
|
||||
title = xpath_text(playlist, 'EpisodeTitle', fatal=True)
|
||||
video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True)
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
|
||||
|
||||
class NRKBaseIE(InfoExtractor):
|
||||
_GEO_COUNTRIES = ['NO']
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
|
@ -93,7 +94,8 @@ def video_id_and_title(idx):
|
|||
# Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked*
|
||||
if 'IsGeoBlocked' in message_type:
|
||||
self.raise_geo_restricted(
|
||||
msg=MESSAGES.get('ProgramIsGeoBlocked'), countries=['NO'])
|
||||
msg=MESSAGES.get('ProgramIsGeoBlocked'),
|
||||
countries=self._GEO_COUNTRIES)
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, MESSAGES.get(
|
||||
message_type, message_type)),
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
|
||||
class OnDemandKoreaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?P<id>[^/]+)\.html'
|
||||
_GEO_COUNTRIES = ['US', 'CA']
|
||||
_TEST = {
|
||||
'url': 'http://www.ondemandkorea.com/ask-us-anything-e43.html',
|
||||
'info_dict': {
|
||||
|
@ -36,7 +37,7 @@ def _real_extract(self, url):
|
|||
if 'msg_block_01.png' in webpage:
|
||||
self.raise_geo_restricted(
|
||||
msg='This content is not available in your region',
|
||||
countries=['US', 'CA'])
|
||||
countries=self._GEO_COUNTRIES)
|
||||
|
||||
if 'This video is only available to ODK PLUS members.' in webpage:
|
||||
raise ExtractorError(
|
||||
|
|
|
@ -193,6 +193,8 @@ class PBSIE(InfoExtractor):
|
|||
)
|
||||
''' % '|'.join(list(zip(*_STATIONS))[0])
|
||||
|
||||
_GEO_COUNTRIES = ['US']
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/',
|
||||
|
@ -492,7 +494,8 @@ def extract_redirect_urls(info):
|
|||
message = self._ERRORS.get(
|
||||
redirect_info['http_code'], redirect_info['message'])
|
||||
if redirect_info['http_code'] == 403:
|
||||
self.raise_geo_restricted(msg=message, countries=['US'])
|
||||
self.raise_geo_restricted(
|
||||
msg=message, countries=self._GEO_COUNTRIES)
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, message), expected=True)
|
||||
|
||||
|
|
|
@ -14,7 +14,8 @@
|
|||
|
||||
class SRGSSRIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:https?://tp\.srgssr\.ch/p(?:/[^/]+)+\?urn=urn|srgssr):(?P<bu>srf|rts|rsi|rtr|swi):(?:[^:]+:)?(?P<type>video|audio):(?P<id>[0-9a-f\-]{36}|\d+)'
|
||||
_BYPASS_GEO = False
|
||||
_GEO_BYPASS = False
|
||||
_GEO_COUNTRIES = ['CH']
|
||||
|
||||
_ERRORS = {
|
||||
'AGERATING12': 'To protect children under the age of 12, this video is only available between 8 p.m. and 6 a.m.',
|
||||
|
@ -43,7 +44,8 @@ def get_media_data(self, bu, media_type, media_id):
|
|||
if media_data.get('block') and media_data['block'] in self._ERRORS:
|
||||
message = self._ERRORS[media_data['block']]
|
||||
if media_data['block'] == 'GEOBLOCK':
|
||||
self.raise_geo_restricted(msg=message, countries=['CH'])
|
||||
self.raise_geo_restricted(
|
||||
msg=message, countries=self._GEO_COUNTRIES)
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, message), expected=True)
|
||||
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
|
||||
|
||||
class SVTBaseIE(InfoExtractor):
|
||||
_GEO_COUNTRIES = ['SE']
|
||||
def _extract_video(self, video_info, video_id):
|
||||
formats = []
|
||||
for vr in video_info['videoReferences']:
|
||||
|
@ -39,7 +40,8 @@ def _extract_video(self, video_info, video_id):
|
|||
})
|
||||
if not formats and video_info.get('rights', {}).get('geoBlockedSweden'):
|
||||
self.raise_geo_restricted(
|
||||
'This video is only available in Sweden', countries=['SE'])
|
||||
'This video is only available in Sweden',
|
||||
countries=self._GEO_COUNTRIES)
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
|
|
|
@ -20,6 +20,7 @@ class Vbox7IE(InfoExtractor):
|
|||
)
|
||||
(?P<id>[\da-fA-F]+)
|
||||
'''
|
||||
_GEO_COUNTRIES = ['BG']
|
||||
_TESTS = [{
|
||||
'url': 'http://vbox7.com/play:0946fff23c',
|
||||
'md5': 'a60f9ab3a3a2f013ef9a967d5f7be5bf',
|
||||
|
@ -78,7 +79,7 @@ def _real_extract(self, url):
|
|||
video_url = video['src']
|
||||
|
||||
if '/na.mp4' in video_url:
|
||||
self.raise_geo_restricted(countries=['BG'])
|
||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
||||
|
||||
uploader = video.get('uploader')
|
||||
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
|
||||
class VGTVIE(XstreamIE):
|
||||
IE_DESC = 'VGTV, BTTV, FTV, Aftenposten and Aftonbladet'
|
||||
_BYPASS_GEO = False
|
||||
_GEO_BYPASS = False
|
||||
|
||||
_HOST_TO_APPNAME = {
|
||||
'vgtv.no': 'vgtv',
|
||||
|
@ -218,7 +218,8 @@ def _real_extract(self, url):
|
|||
properties = try_get(
|
||||
data, lambda x: x['streamConfiguration']['properties'], list)
|
||||
if properties and 'geoblocked' in properties:
|
||||
raise self.raise_geo_restricted(countries=['NO'])
|
||||
raise self.raise_geo_restricted(
|
||||
countries=[host.rpartition('.')[-1].partition('/')[0].upper()])
|
||||
|
||||
self._sort_formats(info['formats'])
|
||||
|
||||
|
|
|
@ -27,7 +27,7 @@ class VikiBaseIE(InfoExtractor):
|
|||
_APP_VERSION = '2.2.5.1428709186'
|
||||
_APP_SECRET = '-$iJ}@p7!G@SyU/je1bEyWg}upLu-6V6-Lg9VD(]siH,r.,m-r|ulZ,U4LC/SeR)'
|
||||
|
||||
_BYPASS_GEO = False
|
||||
_GEO_BYPASS = False
|
||||
_NETRC_MACHINE = 'viki'
|
||||
|
||||
_token = None
|
||||
|
|
|
@ -3291,7 +3291,7 @@ def random_ipv4(cls, code):
|
|||
addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
|
||||
addr_max = addr_min | (0xffffffff >> int(preflen))
|
||||
return compat_str(socket.inet_ntoa(
|
||||
compat_struct_pack('!I', random.randint(addr_min, addr_max))))
|
||||
compat_struct_pack('!L', random.randint(addr_min, addr_max))))
|
||||
|
||||
|
||||
class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
|
||||
|
|
Loading…
Reference in a new issue