improve handling of cancelled scrape

This commit is contained in:
Tao Bror Bojlén 2019-02-21 10:38:49 +00:00
parent 1b11c70430
commit 1c1f193542
No known key found for this signature in database
GPG key ID: C6EC7AAB905F9E6F
3 changed files with 12 additions and 8 deletions

View file

@ -15,6 +15,7 @@ from django_bulk_update.helper import bulk_update
from django.core.management.base import BaseCommand
from django import db
from django.conf import settings
from django.utils import timezone
from scraper.models import Instance, PeerRelationship
from scraper.management.commands._util import require_lock, InvalidResponseException, get_key, log, validate_int, PersonalInstanceException
@ -24,7 +25,7 @@ SEED = 'mastodon.social'
TIMEOUT = 20 # seconds
NUM_THREADS = 16 # roughly 40MB each
PERSONAL_INSTANCE_THRESHOLD = 5 # instances with < this many users won't be scraped
STATUS_SCRAPE_LIMIT = 1000
STATUS_SCRAPE_LIMIT = 100
class Command(BaseCommand):
@ -144,6 +145,7 @@ class Command(BaseCommand):
instance.description = get_key(data, ['info', 'description'])
instance.version = get_key(data, ['info', 'version'])
instance.status = get_key(data, ['status'])
instance.last_updated = timezone.now()
instance.save()
if data['status'] == 'success' and data['peers']:
# TODO: handle a peer disappeer-ing

View file

@ -1,7 +1,8 @@
# Generated by Django 2.1 on 2018-09-03 14:09
# Generated by Django 2.1.7 on 2019-02-21 10:37
from django.db import migrations, models
import django.db.models.deletion
import django.utils.timezone
class Migration(migrations.Migration):
@ -17,7 +18,7 @@ class Migration(migrations.Migration):
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('weight', models.FloatField(blank=True, null=True)),
('last_updated', models.DateTimeField()),
('last_updated', models.DateTimeField(default=django.utils.timezone.now)),
],
),
migrations.CreateModel(
@ -33,7 +34,7 @@ class Migration(migrations.Migration):
('x_coord', models.FloatField(blank=True, null=True)),
('y_coord', models.FloatField(blank=True, null=True)),
('first_seen', models.DateTimeField(auto_now_add=True)),
('last_updated', models.DateTimeField(auto_now=True)),
('last_updated', models.DateTimeField(default=django.utils.timezone.now)),
],
),
migrations.CreateModel(
@ -43,7 +44,7 @@ class Migration(migrations.Migration):
('mention_count', models.IntegerField(blank=True, null=True)),
('statuses_seen', models.IntegerField(blank=True, null=True)),
('first_seen', models.DateTimeField(auto_now_add=True)),
('last_updated', models.DateTimeField(auto_now=True)),
('last_updated', models.DateTimeField(default=django.utils.timezone.now)),
('source', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='following_relationship', to='scraper.Instance')),
('target', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='follower_relationships', to='scraper.Instance')),
],

View file

@ -1,4 +1,5 @@
from django.db import models
from django.utils import timezone
class Instance(models.Model):
@ -28,7 +29,7 @@ class Instance(models.Model):
# Automatic fields
first_seen = models.DateTimeField(auto_now_add=True)
last_updated = models.DateTimeField(auto_now=True)
last_updated = models.DateTimeField(default=timezone.now)
class PeerRelationship(models.Model):
@ -41,7 +42,7 @@ class PeerRelationship(models.Model):
# Metadata
first_seen = models.DateTimeField(auto_now_add=True)
last_updated = models.DateTimeField(auto_now=True)
last_updated = models.DateTimeField(default=timezone.now)
class Edge(models.Model):
@ -55,4 +56,4 @@ class Edge(models.Model):
weight = models.FloatField(blank=True, null=True)
# Metadata
last_updated = models.DateTimeField(blank=False, null=False)
last_updated = models.DateTimeField(default=timezone.now)