harden scraper against unexpected API responses
This commit is contained in:
parent
0f2aa3c938
commit
df5ccf83ad
|
@ -27,7 +27,7 @@ from scraper.management.commands._util import require_lock, InvalidResponseError
|
||||||
# TODO: use the /api/v1/server/followers and /api/v1/server/following endpoints in peertube instances
|
# TODO: use the /api/v1/server/followers and /api/v1/server/following endpoints in peertube instances
|
||||||
|
|
||||||
SEED = 'mastodon.social'
|
SEED = 'mastodon.social'
|
||||||
TIMEOUT = 20
|
TIMEOUT = 1
|
||||||
|
|
||||||
|
|
||||||
class Command(BaseCommand):
|
class Command(BaseCommand):
|
||||||
|
@ -51,9 +51,10 @@ class Command(BaseCommand):
|
||||||
"""Collect connected instances"""
|
"""Collect connected instances"""
|
||||||
url = 'https://' + instance_name + '/api/v1/instance/peers'
|
url = 'https://' + instance_name + '/api/v1/instance/peers'
|
||||||
response = requests.get(url, timeout=TIMEOUT)
|
response = requests.get(url, timeout=TIMEOUT)
|
||||||
if response.status_code != 200:
|
json = response.json()
|
||||||
|
if response.status_code != 200 or not isinstance(json, list):
|
||||||
raise InvalidResponseError("Could not get peers for {}".format(instance_name))
|
raise InvalidResponseError("Could not get peers for {}".format(instance_name))
|
||||||
return response.json()
|
return json
|
||||||
|
|
||||||
def process_instance(self, instance_name: str):
|
def process_instance(self, instance_name: str):
|
||||||
"""Given an instance, get all the data we're interested in"""
|
"""Given an instance, get all the data we're interested in"""
|
||||||
|
@ -63,6 +64,9 @@ class Command(BaseCommand):
|
||||||
data['instance'] = instance_name
|
data['instance'] = instance_name
|
||||||
data['info'] = self.get_instance_info(instance_name)
|
data['info'] = self.get_instance_info(instance_name)
|
||||||
data['peers'] = [peer for peer in self.get_instance_peers(instance_name) if peer] # get rid of null peers
|
data['peers'] = [peer for peer in self.get_instance_peers(instance_name) if peer] # get rid of null peers
|
||||||
|
if not data['info'] and not data['peers']:
|
||||||
|
# We got a response from the instance, but it didn't have any of the information we were expecting.
|
||||||
|
raise InvalidResponseError
|
||||||
data['status'] = 'success'
|
data['status'] = 'success'
|
||||||
return data
|
return data
|
||||||
except (InvalidResponseError,
|
except (InvalidResponseError,
|
||||||
|
|
|
@ -18,5 +18,5 @@ class InstanceStats(models.Model):
|
||||||
domain_count = models.IntegerField(blank=True, null=True)
|
domain_count = models.IntegerField(blank=True, null=True)
|
||||||
status_count = models.IntegerField(blank=True, null=True)
|
status_count = models.IntegerField(blank=True, null=True)
|
||||||
user_count = models.IntegerField(blank=True, null=True)
|
user_count = models.IntegerField(blank=True, null=True)
|
||||||
version = models.CharField(max_length=1000, blank=True)
|
version = models.CharField(max_length=1000, blank=True) # In Django CharField is never stored as NULL in the db
|
||||||
status = models.CharField(max_length=100)
|
status = models.CharField(max_length=100)
|
||||||
|
|
Loading…
Reference in a new issue