simplify models

This commit is contained in:
Tao Bojlen 2018-08-29 19:58:06 +02:00
parent df5ccf83ad
commit 6526facdc9
9 changed files with 35 additions and 123 deletions

View file

@ -1,3 +0,0 @@
from django.contrib import admin
# Register your models here.

View file

@ -1,3 +0,0 @@
from django.db import models
# Create your models here.

View file

@ -1,12 +1,6 @@
from rest_framework import serializers
from collections import OrderedDict
from scraper.models import Instance, InstanceStats
class InstanceStatsSerializer(serializers.ModelSerializer):
class Meta:
model = InstanceStats
exclude = ('id', 'instance', 'status')
from scraper.models import Instance
class InstanceListSerializer(serializers.ModelSerializer):
@ -25,8 +19,7 @@ class InstanceListSerializer(serializers.ModelSerializer):
class InstanceDetailSerializer(serializers.ModelSerializer):
peers = InstanceListSerializer(many=True, read_only=True)
stats = InstanceStatsSerializer(many=True, read_only=True)
class Meta:
model = Instance
fields = ('name', 'stats', 'peers')
fields = '__all__'

View file

@ -1,3 +0,0 @@
from django.test import TestCase
# Create your tests here.

View file

@ -10,7 +10,7 @@ import time
from datetime import datetime
from django.core.management.base import BaseCommand
from django.db import transaction
from scraper.models import Instance, InstanceStats
from scraper.models import Instance
from scraper.management.commands._util import require_lock, InvalidResponseError, get_key
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
@ -80,29 +80,19 @@ class Command(BaseCommand):
@require_lock(Instance, 'ACCESS EXCLUSIVE')
def save_data(self, data):
"""Save data"""
user_count = get_key(data, ['info', 'stats', 'user_count'])
if user_count:
defaults = dict()
defaults['domain_count'] = get_key(data, ['info', 'stats', 'domain_count']) or None
defaults['status_count'] = get_key(data, ['info', 'stats', 'status_count']) or None
defaults['user_count'] = get_key(data, ['info', 'stats', 'user_count']) or None
defaults['version'] = get_key(data, ['info', 'version'])
defaults['status'] = get_key(data, ['status'])
instance, _ = Instance.objects.update_or_create(
name=get_key(data, ['instance']),
defaults={'user_count': user_count},
defaults=defaults,
)
else:
instance, _ = Instance.objects.get_or_create(name=get_key(data, ['instance']))
if data['status'] == 'success':
# Save stats
stats = InstanceStats(
instance=instance,
domain_count=get_key(data, ['info', 'stats', 'domain_count']),
status_count=get_key(data, ['info', 'stats', 'status_count']),
user_count=get_key(data, ['info', 'stats', 'user_count']),
version=get_key(data, ['info', 'version']),
status=get_key(data, ['status']),
)
stats.save()
if defaults['status'] == 'success' and data['peers']:
# Save peers
# TODO: make this shared amongst threads so the database only needs to be queried once
if not data['peers']:
return
existing_instance_ids = Instance.objects.values_list('name', flat=True)
existing_peers = Instance.objects.filter(name__in=existing_instance_ids)
new_peer_ids = [peer for peer in data['peers'] if peer not in existing_instance_ids]
@ -110,12 +100,6 @@ class Command(BaseCommand):
new_peers = Instance.objects.bulk_create([Instance(name=peer) for peer in new_peer_ids])
instance.peers.set(new_peers)
instance.peers.set(existing_peers)
else:
stats = InstanceStats(
instance=instance,
status=get_key(data, ['status'])
)
stats.save()
self.stdout.write("{} - Saved {}".format(datetime.now().isoformat(), data['instance']))
def worker(self, queue: multiprocessing.JoinableQueue):

View file

@ -1,7 +1,6 @@
# Generated by Django 2.1 on 2018-08-26 17:26
# Generated by Django 2.1 on 2018-08-29 17:37
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
@ -16,20 +15,14 @@ class Migration(migrations.Migration):
name='Instance',
fields=[
('name', models.CharField(max_length=200, primary_key=True, serialize=False)),
('peers', models.ManyToManyField(to='scraper.Instance')),
],
),
migrations.CreateModel(
name='InstanceStats',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('timestamp', models.DateTimeField(auto_now_add=True)),
('num_peers', models.IntegerField(blank=True, null=True)),
('num_statuses', models.IntegerField(blank=True, null=True)),
('num_users', models.IntegerField(blank=True, null=True)),
('domain_count', models.IntegerField(blank=True, null=True)),
('status_count', models.IntegerField(blank=True, null=True)),
('user_count', models.IntegerField(blank=True, null=True)),
('version', models.CharField(blank=True, max_length=1000)),
('status', models.CharField(max_length=100)),
('instance', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='scraper.Instance')),
('first_seen', models.DateTimeField(auto_now_add=True)),
('last_updated', models.DateTimeField(auto_now=True)),
('peers', models.ManyToManyField(related_name='_instance_peers_+', to='scraper.Instance')),
],
),
]

View file

@ -1,19 +0,0 @@
# Generated by Django 2.1 on 2018-08-26 22:01
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
('scraper', '0001_initial'),
]
operations = [
migrations.AlterField(
model_name='instancestats',
name='instance',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='stats', to='scraper.Instance'),
),
]

View file

@ -1,33 +0,0 @@
# Generated by Django 2.1 on 2018-08-28 22:06
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('scraper', '0002_auto_20180826_2201'),
]
operations = [
migrations.RenameField(
model_name='instancestats',
old_name='num_peers',
new_name='domain_count',
),
migrations.RenameField(
model_name='instancestats',
old_name='num_statuses',
new_name='status_count',
),
migrations.RenameField(
model_name='instancestats',
old_name='num_users',
new_name='user_count',
),
migrations.AddField(
model_name='instance',
name='user_count',
field=models.IntegerField(blank=True, null=True),
),
]

View file

@ -2,21 +2,24 @@ from django.db import models
class Instance(models.Model):
# Primary key
name = models.CharField(max_length=200, primary_key=True)
peers = models.ManyToManyField('self', symmetrical=False)
user_count = models.IntegerField(blank=True, null=True)
class InstanceStats(models.Model):
# TODO: collect everything the API exposes
timestamp = models.DateTimeField(auto_now_add=True)
instance = models.ForeignKey(
Instance,
on_delete=models.CASCADE,
related_name='stats',
)
# Details
domain_count = models.IntegerField(blank=True, null=True)
status_count = models.IntegerField(blank=True, null=True)
user_count = models.IntegerField(blank=True, null=True)
version = models.CharField(max_length=1000, blank=True) # In Django CharField is never stored as NULL in the db
status = models.CharField(max_length=100)
# Foreign keys
# The peers endpoint returns a "list of all domain names known to this instance"
# (https://github.com/tootsuite/mastodon/pull/6125)
# In other words, an asymmetrical relationship here doesn't make much sense. If we one day can get a list of
# instances that the instance actively follows (i.e. knows and not suspended), it's worth adding an
# asymmetrical relation.
peers = models.ManyToManyField('self', symmetrical=True)
# Automatic fields
first_seen = models.DateTimeField(auto_now_add=True)
last_updated = models.DateTimeField(auto_now=True)