simplify models

This commit is contained in:
Tao Bojlen 2018-08-29 19:58:06 +02:00
parent df5ccf83ad
commit 6526facdc9
9 changed files with 35 additions and 123 deletions

View file

@ -1,3 +0,0 @@
from django.contrib import admin
# Register your models here.

View file

@ -1,3 +0,0 @@
from django.db import models
# Create your models here.

View file

@ -1,12 +1,6 @@
from rest_framework import serializers from rest_framework import serializers
from collections import OrderedDict from collections import OrderedDict
from scraper.models import Instance, InstanceStats from scraper.models import Instance
class InstanceStatsSerializer(serializers.ModelSerializer):
class Meta:
model = InstanceStats
exclude = ('id', 'instance', 'status')
class InstanceListSerializer(serializers.ModelSerializer): class InstanceListSerializer(serializers.ModelSerializer):
@ -25,8 +19,7 @@ class InstanceListSerializer(serializers.ModelSerializer):
class InstanceDetailSerializer(serializers.ModelSerializer): class InstanceDetailSerializer(serializers.ModelSerializer):
peers = InstanceListSerializer(many=True, read_only=True) peers = InstanceListSerializer(many=True, read_only=True)
stats = InstanceStatsSerializer(many=True, read_only=True)
class Meta: class Meta:
model = Instance model = Instance
fields = ('name', 'stats', 'peers') fields = '__all__'

View file

@ -1,3 +0,0 @@
from django.test import TestCase
# Create your tests here.

View file

@ -10,7 +10,7 @@ import time
from datetime import datetime from datetime import datetime
from django.core.management.base import BaseCommand from django.core.management.base import BaseCommand
from django.db import transaction from django.db import transaction
from scraper.models import Instance, InstanceStats from scraper.models import Instance
from scraper.management.commands._util import require_lock, InvalidResponseError, get_key from scraper.management.commands._util import require_lock, InvalidResponseError, get_key
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
@ -80,29 +80,19 @@ class Command(BaseCommand):
@require_lock(Instance, 'ACCESS EXCLUSIVE') @require_lock(Instance, 'ACCESS EXCLUSIVE')
def save_data(self, data): def save_data(self, data):
"""Save data""" """Save data"""
user_count = get_key(data, ['info', 'stats', 'user_count']) defaults = dict()
if user_count: defaults['domain_count'] = get_key(data, ['info', 'stats', 'domain_count']) or None
instance, _ = Instance.objects.update_or_create( defaults['status_count'] = get_key(data, ['info', 'stats', 'status_count']) or None
name=get_key(data, ['instance']), defaults['user_count'] = get_key(data, ['info', 'stats', 'user_count']) or None
defaults={'user_count': user_count}, defaults['version'] = get_key(data, ['info', 'version'])
) defaults['status'] = get_key(data, ['status'])
else: instance, _ = Instance.objects.update_or_create(
instance, _ = Instance.objects.get_or_create(name=get_key(data, ['instance'])) name=get_key(data, ['instance']),
if data['status'] == 'success': defaults=defaults,
# Save stats )
stats = InstanceStats( if defaults['status'] == 'success' and data['peers']:
instance=instance,
domain_count=get_key(data, ['info', 'stats', 'domain_count']),
status_count=get_key(data, ['info', 'stats', 'status_count']),
user_count=get_key(data, ['info', 'stats', 'user_count']),
version=get_key(data, ['info', 'version']),
status=get_key(data, ['status']),
)
stats.save()
# Save peers # Save peers
# TODO: make this shared amongst threads so the database only needs to be queried once # TODO: make this shared amongst threads so the database only needs to be queried once
if not data['peers']:
return
existing_instance_ids = Instance.objects.values_list('name', flat=True) existing_instance_ids = Instance.objects.values_list('name', flat=True)
existing_peers = Instance.objects.filter(name__in=existing_instance_ids) existing_peers = Instance.objects.filter(name__in=existing_instance_ids)
new_peer_ids = [peer for peer in data['peers'] if peer not in existing_instance_ids] new_peer_ids = [peer for peer in data['peers'] if peer not in existing_instance_ids]
@ -110,12 +100,6 @@ class Command(BaseCommand):
new_peers = Instance.objects.bulk_create([Instance(name=peer) for peer in new_peer_ids]) new_peers = Instance.objects.bulk_create([Instance(name=peer) for peer in new_peer_ids])
instance.peers.set(new_peers) instance.peers.set(new_peers)
instance.peers.set(existing_peers) instance.peers.set(existing_peers)
else:
stats = InstanceStats(
instance=instance,
status=get_key(data, ['status'])
)
stats.save()
self.stdout.write("{} - Saved {}".format(datetime.now().isoformat(), data['instance'])) self.stdout.write("{} - Saved {}".format(datetime.now().isoformat(), data['instance']))
def worker(self, queue: multiprocessing.JoinableQueue): def worker(self, queue: multiprocessing.JoinableQueue):

View file

@ -1,7 +1,6 @@
# Generated by Django 2.1 on 2018-08-26 17:26 # Generated by Django 2.1 on 2018-08-29 17:37
from django.db import migrations, models from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration): class Migration(migrations.Migration):
@ -16,20 +15,14 @@ class Migration(migrations.Migration):
name='Instance', name='Instance',
fields=[ fields=[
('name', models.CharField(max_length=200, primary_key=True, serialize=False)), ('name', models.CharField(max_length=200, primary_key=True, serialize=False)),
('peers', models.ManyToManyField(to='scraper.Instance')), ('domain_count', models.IntegerField(blank=True, null=True)),
], ('status_count', models.IntegerField(blank=True, null=True)),
), ('user_count', models.IntegerField(blank=True, null=True)),
migrations.CreateModel(
name='InstanceStats',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('timestamp', models.DateTimeField(auto_now_add=True)),
('num_peers', models.IntegerField(blank=True, null=True)),
('num_statuses', models.IntegerField(blank=True, null=True)),
('num_users', models.IntegerField(blank=True, null=True)),
('version', models.CharField(blank=True, max_length=1000)), ('version', models.CharField(blank=True, max_length=1000)),
('status', models.CharField(max_length=100)), ('status', models.CharField(max_length=100)),
('instance', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='scraper.Instance')), ('first_seen', models.DateTimeField(auto_now_add=True)),
('last_updated', models.DateTimeField(auto_now=True)),
('peers', models.ManyToManyField(related_name='_instance_peers_+', to='scraper.Instance')),
], ],
), ),
] ]

View file

@ -1,19 +0,0 @@
# Generated by Django 2.1 on 2018-08-26 22:01
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
('scraper', '0001_initial'),
]
operations = [
migrations.AlterField(
model_name='instancestats',
name='instance',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='stats', to='scraper.Instance'),
),
]

View file

@ -1,33 +0,0 @@
# Generated by Django 2.1 on 2018-08-28 22:06
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('scraper', '0002_auto_20180826_2201'),
]
operations = [
migrations.RenameField(
model_name='instancestats',
old_name='num_peers',
new_name='domain_count',
),
migrations.RenameField(
model_name='instancestats',
old_name='num_statuses',
new_name='status_count',
),
migrations.RenameField(
model_name='instancestats',
old_name='num_users',
new_name='user_count',
),
migrations.AddField(
model_name='instance',
name='user_count',
field=models.IntegerField(blank=True, null=True),
),
]

View file

@ -2,21 +2,24 @@ from django.db import models
class Instance(models.Model): class Instance(models.Model):
# Primary key
name = models.CharField(max_length=200, primary_key=True) name = models.CharField(max_length=200, primary_key=True)
peers = models.ManyToManyField('self', symmetrical=False)
user_count = models.IntegerField(blank=True, null=True)
# Details
class InstanceStats(models.Model):
# TODO: collect everything the API exposes
timestamp = models.DateTimeField(auto_now_add=True)
instance = models.ForeignKey(
Instance,
on_delete=models.CASCADE,
related_name='stats',
)
domain_count = models.IntegerField(blank=True, null=True) domain_count = models.IntegerField(blank=True, null=True)
status_count = models.IntegerField(blank=True, null=True) status_count = models.IntegerField(blank=True, null=True)
user_count = models.IntegerField(blank=True, null=True) user_count = models.IntegerField(blank=True, null=True)
version = models.CharField(max_length=1000, blank=True) # In Django CharField is never stored as NULL in the db version = models.CharField(max_length=1000, blank=True) # In Django CharField is never stored as NULL in the db
status = models.CharField(max_length=100) status = models.CharField(max_length=100)
# Foreign keys
# The peers endpoint returns a "list of all domain names known to this instance"
# (https://github.com/tootsuite/mastodon/pull/6125)
# In other words, an asymmetrical relationship here doesn't make much sense. If we one day can get a list of
# instances that the instance actively follows (i.e. knows and not suspended), it's worth adding an
# asymmetrical relation.
peers = models.ManyToManyField('self', symmetrical=True)
# Automatic fields
first_seen = models.DateTimeField(auto_now_add=True)
last_updated = models.DateTimeField(auto_now=True)