improve handling of cancelled scrape
This commit is contained in:
parent
1b11c70430
commit
1c1f193542
|
@ -15,6 +15,7 @@ from django_bulk_update.helper import bulk_update
|
|||
from django.core.management.base import BaseCommand
|
||||
from django import db
|
||||
from django.conf import settings
|
||||
from django.utils import timezone
|
||||
from scraper.models import Instance, PeerRelationship
|
||||
from scraper.management.commands._util import require_lock, InvalidResponseException, get_key, log, validate_int, PersonalInstanceException
|
||||
|
||||
|
@ -24,7 +25,7 @@ SEED = 'mastodon.social'
|
|||
TIMEOUT = 20 # seconds
|
||||
NUM_THREADS = 16 # roughly 40MB each
|
||||
PERSONAL_INSTANCE_THRESHOLD = 5 # instances with < this many users won't be scraped
|
||||
STATUS_SCRAPE_LIMIT = 1000
|
||||
STATUS_SCRAPE_LIMIT = 100
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
|
@ -144,6 +145,7 @@ class Command(BaseCommand):
|
|||
instance.description = get_key(data, ['info', 'description'])
|
||||
instance.version = get_key(data, ['info', 'version'])
|
||||
instance.status = get_key(data, ['status'])
|
||||
instance.last_updated = timezone.now()
|
||||
instance.save()
|
||||
if data['status'] == 'success' and data['peers']:
|
||||
# TODO: handle a peer disappeer-ing
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
# Generated by Django 2.1 on 2018-09-03 14:09
|
||||
# Generated by Django 2.1.7 on 2019-02-21 10:37
|
||||
|
||||
from django.db import migrations, models
|
||||
import django.db.models.deletion
|
||||
import django.utils.timezone
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
@ -17,7 +18,7 @@ class Migration(migrations.Migration):
|
|||
fields=[
|
||||
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('weight', models.FloatField(blank=True, null=True)),
|
||||
('last_updated', models.DateTimeField()),
|
||||
('last_updated', models.DateTimeField(default=django.utils.timezone.now)),
|
||||
],
|
||||
),
|
||||
migrations.CreateModel(
|
||||
|
@ -33,7 +34,7 @@ class Migration(migrations.Migration):
|
|||
('x_coord', models.FloatField(blank=True, null=True)),
|
||||
('y_coord', models.FloatField(blank=True, null=True)),
|
||||
('first_seen', models.DateTimeField(auto_now_add=True)),
|
||||
('last_updated', models.DateTimeField(auto_now=True)),
|
||||
('last_updated', models.DateTimeField(default=django.utils.timezone.now)),
|
||||
],
|
||||
),
|
||||
migrations.CreateModel(
|
||||
|
@ -43,7 +44,7 @@ class Migration(migrations.Migration):
|
|||
('mention_count', models.IntegerField(blank=True, null=True)),
|
||||
('statuses_seen', models.IntegerField(blank=True, null=True)),
|
||||
('first_seen', models.DateTimeField(auto_now_add=True)),
|
||||
('last_updated', models.DateTimeField(auto_now=True)),
|
||||
('last_updated', models.DateTimeField(default=django.utils.timezone.now)),
|
||||
('source', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='following_relationship', to='scraper.Instance')),
|
||||
('target', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='follower_relationships', to='scraper.Instance')),
|
||||
],
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
from django.db import models
|
||||
from django.utils import timezone
|
||||
|
||||
|
||||
class Instance(models.Model):
|
||||
|
@ -28,7 +29,7 @@ class Instance(models.Model):
|
|||
|
||||
# Automatic fields
|
||||
first_seen = models.DateTimeField(auto_now_add=True)
|
||||
last_updated = models.DateTimeField(auto_now=True)
|
||||
last_updated = models.DateTimeField(default=timezone.now)
|
||||
|
||||
|
||||
class PeerRelationship(models.Model):
|
||||
|
@ -41,7 +42,7 @@ class PeerRelationship(models.Model):
|
|||
|
||||
# Metadata
|
||||
first_seen = models.DateTimeField(auto_now_add=True)
|
||||
last_updated = models.DateTimeField(auto_now=True)
|
||||
last_updated = models.DateTimeField(default=timezone.now)
|
||||
|
||||
|
||||
class Edge(models.Model):
|
||||
|
@ -55,4 +56,4 @@ class Edge(models.Model):
|
|||
weight = models.FloatField(blank=True, null=True)
|
||||
|
||||
# Metadata
|
||||
last_updated = models.DateTimeField(blank=False, null=False)
|
||||
last_updated = models.DateTimeField(default=timezone.now)
|
||||
|
|
Loading…
Reference in a new issue