improve handling of cancelled scrape
This commit is contained in:
parent
1b11c70430
commit
1c1f193542
|
@ -15,6 +15,7 @@ from django_bulk_update.helper import bulk_update
|
||||||
from django.core.management.base import BaseCommand
|
from django.core.management.base import BaseCommand
|
||||||
from django import db
|
from django import db
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
|
from django.utils import timezone
|
||||||
from scraper.models import Instance, PeerRelationship
|
from scraper.models import Instance, PeerRelationship
|
||||||
from scraper.management.commands._util import require_lock, InvalidResponseException, get_key, log, validate_int, PersonalInstanceException
|
from scraper.management.commands._util import require_lock, InvalidResponseException, get_key, log, validate_int, PersonalInstanceException
|
||||||
|
|
||||||
|
@ -24,7 +25,7 @@ SEED = 'mastodon.social'
|
||||||
TIMEOUT = 20 # seconds
|
TIMEOUT = 20 # seconds
|
||||||
NUM_THREADS = 16 # roughly 40MB each
|
NUM_THREADS = 16 # roughly 40MB each
|
||||||
PERSONAL_INSTANCE_THRESHOLD = 5 # instances with < this many users won't be scraped
|
PERSONAL_INSTANCE_THRESHOLD = 5 # instances with < this many users won't be scraped
|
||||||
STATUS_SCRAPE_LIMIT = 1000
|
STATUS_SCRAPE_LIMIT = 100
|
||||||
|
|
||||||
|
|
||||||
class Command(BaseCommand):
|
class Command(BaseCommand):
|
||||||
|
@ -144,6 +145,7 @@ class Command(BaseCommand):
|
||||||
instance.description = get_key(data, ['info', 'description'])
|
instance.description = get_key(data, ['info', 'description'])
|
||||||
instance.version = get_key(data, ['info', 'version'])
|
instance.version = get_key(data, ['info', 'version'])
|
||||||
instance.status = get_key(data, ['status'])
|
instance.status = get_key(data, ['status'])
|
||||||
|
instance.last_updated = timezone.now()
|
||||||
instance.save()
|
instance.save()
|
||||||
if data['status'] == 'success' and data['peers']:
|
if data['status'] == 'success' and data['peers']:
|
||||||
# TODO: handle a peer disappeer-ing
|
# TODO: handle a peer disappeer-ing
|
||||||
|
|
|
@ -1,7 +1,8 @@
|
||||||
# Generated by Django 2.1 on 2018-09-03 14:09
|
# Generated by Django 2.1.7 on 2019-02-21 10:37
|
||||||
|
|
||||||
from django.db import migrations, models
|
from django.db import migrations, models
|
||||||
import django.db.models.deletion
|
import django.db.models.deletion
|
||||||
|
import django.utils.timezone
|
||||||
|
|
||||||
|
|
||||||
class Migration(migrations.Migration):
|
class Migration(migrations.Migration):
|
||||||
|
@ -17,7 +18,7 @@ class Migration(migrations.Migration):
|
||||||
fields=[
|
fields=[
|
||||||
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||||
('weight', models.FloatField(blank=True, null=True)),
|
('weight', models.FloatField(blank=True, null=True)),
|
||||||
('last_updated', models.DateTimeField()),
|
('last_updated', models.DateTimeField(default=django.utils.timezone.now)),
|
||||||
],
|
],
|
||||||
),
|
),
|
||||||
migrations.CreateModel(
|
migrations.CreateModel(
|
||||||
|
@ -33,7 +34,7 @@ class Migration(migrations.Migration):
|
||||||
('x_coord', models.FloatField(blank=True, null=True)),
|
('x_coord', models.FloatField(blank=True, null=True)),
|
||||||
('y_coord', models.FloatField(blank=True, null=True)),
|
('y_coord', models.FloatField(blank=True, null=True)),
|
||||||
('first_seen', models.DateTimeField(auto_now_add=True)),
|
('first_seen', models.DateTimeField(auto_now_add=True)),
|
||||||
('last_updated', models.DateTimeField(auto_now=True)),
|
('last_updated', models.DateTimeField(default=django.utils.timezone.now)),
|
||||||
],
|
],
|
||||||
),
|
),
|
||||||
migrations.CreateModel(
|
migrations.CreateModel(
|
||||||
|
@ -43,7 +44,7 @@ class Migration(migrations.Migration):
|
||||||
('mention_count', models.IntegerField(blank=True, null=True)),
|
('mention_count', models.IntegerField(blank=True, null=True)),
|
||||||
('statuses_seen', models.IntegerField(blank=True, null=True)),
|
('statuses_seen', models.IntegerField(blank=True, null=True)),
|
||||||
('first_seen', models.DateTimeField(auto_now_add=True)),
|
('first_seen', models.DateTimeField(auto_now_add=True)),
|
||||||
('last_updated', models.DateTimeField(auto_now=True)),
|
('last_updated', models.DateTimeField(default=django.utils.timezone.now)),
|
||||||
('source', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='following_relationship', to='scraper.Instance')),
|
('source', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='following_relationship', to='scraper.Instance')),
|
||||||
('target', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='follower_relationships', to='scraper.Instance')),
|
('target', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='follower_relationships', to='scraper.Instance')),
|
||||||
],
|
],
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
from django.db import models
|
from django.db import models
|
||||||
|
from django.utils import timezone
|
||||||
|
|
||||||
|
|
||||||
class Instance(models.Model):
|
class Instance(models.Model):
|
||||||
|
@ -28,7 +29,7 @@ class Instance(models.Model):
|
||||||
|
|
||||||
# Automatic fields
|
# Automatic fields
|
||||||
first_seen = models.DateTimeField(auto_now_add=True)
|
first_seen = models.DateTimeField(auto_now_add=True)
|
||||||
last_updated = models.DateTimeField(auto_now=True)
|
last_updated = models.DateTimeField(default=timezone.now)
|
||||||
|
|
||||||
|
|
||||||
class PeerRelationship(models.Model):
|
class PeerRelationship(models.Model):
|
||||||
|
@ -41,7 +42,7 @@ class PeerRelationship(models.Model):
|
||||||
|
|
||||||
# Metadata
|
# Metadata
|
||||||
first_seen = models.DateTimeField(auto_now_add=True)
|
first_seen = models.DateTimeField(auto_now_add=True)
|
||||||
last_updated = models.DateTimeField(auto_now=True)
|
last_updated = models.DateTimeField(default=timezone.now)
|
||||||
|
|
||||||
|
|
||||||
class Edge(models.Model):
|
class Edge(models.Model):
|
||||||
|
@ -55,4 +56,4 @@ class Edge(models.Model):
|
||||||
weight = models.FloatField(blank=True, null=True)
|
weight = models.FloatField(blank=True, null=True)
|
||||||
|
|
||||||
# Metadata
|
# Metadata
|
||||||
last_updated = models.DateTimeField(blank=False, null=False)
|
last_updated = models.DateTimeField(default=timezone.now)
|
||||||
|
|
Loading…
Reference in a new issue