From 1c1f193542155835bcfa67370140913a0258a8b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tao=20Bror=20Bojl=C3=A9n?= Date: Thu, 21 Feb 2019 10:38:49 +0000 Subject: [PATCH] improve handling of cancelled scrape --- backend/scraper/management/commands/scrape.py | 4 +++- backend/scraper/migrations/0001_initial.py | 9 +++++---- backend/scraper/models.py | 7 ++++--- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/backend/scraper/management/commands/scrape.py b/backend/scraper/management/commands/scrape.py index 205e680..e3c0c59 100644 --- a/backend/scraper/management/commands/scrape.py +++ b/backend/scraper/management/commands/scrape.py @@ -15,6 +15,7 @@ from django_bulk_update.helper import bulk_update from django.core.management.base import BaseCommand from django import db from django.conf import settings +from django.utils import timezone from scraper.models import Instance, PeerRelationship from scraper.management.commands._util import require_lock, InvalidResponseException, get_key, log, validate_int, PersonalInstanceException @@ -24,7 +25,7 @@ SEED = 'mastodon.social' TIMEOUT = 20 # seconds NUM_THREADS = 16 # roughly 40MB each PERSONAL_INSTANCE_THRESHOLD = 5 # instances with < this many users won't be scraped -STATUS_SCRAPE_LIMIT = 1000 +STATUS_SCRAPE_LIMIT = 100 class Command(BaseCommand): @@ -144,6 +145,7 @@ class Command(BaseCommand): instance.description = get_key(data, ['info', 'description']) instance.version = get_key(data, ['info', 'version']) instance.status = get_key(data, ['status']) + instance.last_updated = timezone.now() instance.save() if data['status'] == 'success' and data['peers']: # TODO: handle a peer disappeer-ing diff --git a/backend/scraper/migrations/0001_initial.py b/backend/scraper/migrations/0001_initial.py index 6d804bd..88746e7 100644 --- a/backend/scraper/migrations/0001_initial.py +++ b/backend/scraper/migrations/0001_initial.py @@ -1,7 +1,8 @@ -# Generated by Django 2.1 on 2018-09-03 14:09 +# Generated by Django 2.1.7 on 2019-02-21 10:37 from django.db import migrations, models import django.db.models.deletion +import django.utils.timezone class Migration(migrations.Migration): @@ -17,7 +18,7 @@ class Migration(migrations.Migration): fields=[ ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), ('weight', models.FloatField(blank=True, null=True)), - ('last_updated', models.DateTimeField()), + ('last_updated', models.DateTimeField(default=django.utils.timezone.now)), ], ), migrations.CreateModel( @@ -33,7 +34,7 @@ class Migration(migrations.Migration): ('x_coord', models.FloatField(blank=True, null=True)), ('y_coord', models.FloatField(blank=True, null=True)), ('first_seen', models.DateTimeField(auto_now_add=True)), - ('last_updated', models.DateTimeField(auto_now=True)), + ('last_updated', models.DateTimeField(default=django.utils.timezone.now)), ], ), migrations.CreateModel( @@ -43,7 +44,7 @@ class Migration(migrations.Migration): ('mention_count', models.IntegerField(blank=True, null=True)), ('statuses_seen', models.IntegerField(blank=True, null=True)), ('first_seen', models.DateTimeField(auto_now_add=True)), - ('last_updated', models.DateTimeField(auto_now=True)), + ('last_updated', models.DateTimeField(default=django.utils.timezone.now)), ('source', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='following_relationship', to='scraper.Instance')), ('target', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='follower_relationships', to='scraper.Instance')), ], diff --git a/backend/scraper/models.py b/backend/scraper/models.py index 20322f7..9a8f57e 100644 --- a/backend/scraper/models.py +++ b/backend/scraper/models.py @@ -1,4 +1,5 @@ from django.db import models +from django.utils import timezone class Instance(models.Model): @@ -28,7 +29,7 @@ class Instance(models.Model): # Automatic fields first_seen = models.DateTimeField(auto_now_add=True) - last_updated = models.DateTimeField(auto_now=True) + last_updated = models.DateTimeField(default=timezone.now) class PeerRelationship(models.Model): @@ -41,7 +42,7 @@ class PeerRelationship(models.Model): # Metadata first_seen = models.DateTimeField(auto_now_add=True) - last_updated = models.DateTimeField(auto_now=True) + last_updated = models.DateTimeField(default=timezone.now) class Edge(models.Model): @@ -55,4 +56,4 @@ class Edge(models.Model): weight = models.FloatField(blank=True, null=True) # Metadata - last_updated = models.DateTimeField(blank=False, null=False) + last_updated = models.DateTimeField(default=timezone.now)