simplify models
This commit is contained in:
parent
df5ccf83ad
commit
6526facdc9
|
@ -1,3 +0,0 @@
|
||||||
from django.contrib import admin
|
|
||||||
|
|
||||||
# Register your models here.
|
|
|
@ -1,3 +0,0 @@
|
||||||
from django.db import models
|
|
||||||
|
|
||||||
# Create your models here.
|
|
|
@ -1,12 +1,6 @@
|
||||||
from rest_framework import serializers
|
from rest_framework import serializers
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
from scraper.models import Instance, InstanceStats
|
from scraper.models import Instance
|
||||||
|
|
||||||
|
|
||||||
class InstanceStatsSerializer(serializers.ModelSerializer):
|
|
||||||
class Meta:
|
|
||||||
model = InstanceStats
|
|
||||||
exclude = ('id', 'instance', 'status')
|
|
||||||
|
|
||||||
|
|
||||||
class InstanceListSerializer(serializers.ModelSerializer):
|
class InstanceListSerializer(serializers.ModelSerializer):
|
||||||
|
@ -25,8 +19,7 @@ class InstanceListSerializer(serializers.ModelSerializer):
|
||||||
|
|
||||||
class InstanceDetailSerializer(serializers.ModelSerializer):
|
class InstanceDetailSerializer(serializers.ModelSerializer):
|
||||||
peers = InstanceListSerializer(many=True, read_only=True)
|
peers = InstanceListSerializer(many=True, read_only=True)
|
||||||
stats = InstanceStatsSerializer(many=True, read_only=True)
|
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
model = Instance
|
model = Instance
|
||||||
fields = ('name', 'stats', 'peers')
|
fields = '__all__'
|
||||||
|
|
|
@ -1,3 +0,0 @@
|
||||||
from django.test import TestCase
|
|
||||||
|
|
||||||
# Create your tests here.
|
|
|
@ -10,7 +10,7 @@ import time
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from django.core.management.base import BaseCommand
|
from django.core.management.base import BaseCommand
|
||||||
from django.db import transaction
|
from django.db import transaction
|
||||||
from scraper.models import Instance, InstanceStats
|
from scraper.models import Instance
|
||||||
from scraper.management.commands._util import require_lock, InvalidResponseError, get_key
|
from scraper.management.commands._util import require_lock, InvalidResponseError, get_key
|
||||||
|
|
||||||
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
||||||
|
@ -80,29 +80,19 @@ class Command(BaseCommand):
|
||||||
@require_lock(Instance, 'ACCESS EXCLUSIVE')
|
@require_lock(Instance, 'ACCESS EXCLUSIVE')
|
||||||
def save_data(self, data):
|
def save_data(self, data):
|
||||||
"""Save data"""
|
"""Save data"""
|
||||||
user_count = get_key(data, ['info', 'stats', 'user_count'])
|
defaults = dict()
|
||||||
if user_count:
|
defaults['domain_count'] = get_key(data, ['info', 'stats', 'domain_count']) or None
|
||||||
instance, _ = Instance.objects.update_or_create(
|
defaults['status_count'] = get_key(data, ['info', 'stats', 'status_count']) or None
|
||||||
name=get_key(data, ['instance']),
|
defaults['user_count'] = get_key(data, ['info', 'stats', 'user_count']) or None
|
||||||
defaults={'user_count': user_count},
|
defaults['version'] = get_key(data, ['info', 'version'])
|
||||||
)
|
defaults['status'] = get_key(data, ['status'])
|
||||||
else:
|
instance, _ = Instance.objects.update_or_create(
|
||||||
instance, _ = Instance.objects.get_or_create(name=get_key(data, ['instance']))
|
name=get_key(data, ['instance']),
|
||||||
if data['status'] == 'success':
|
defaults=defaults,
|
||||||
# Save stats
|
)
|
||||||
stats = InstanceStats(
|
if defaults['status'] == 'success' and data['peers']:
|
||||||
instance=instance,
|
|
||||||
domain_count=get_key(data, ['info', 'stats', 'domain_count']),
|
|
||||||
status_count=get_key(data, ['info', 'stats', 'status_count']),
|
|
||||||
user_count=get_key(data, ['info', 'stats', 'user_count']),
|
|
||||||
version=get_key(data, ['info', 'version']),
|
|
||||||
status=get_key(data, ['status']),
|
|
||||||
)
|
|
||||||
stats.save()
|
|
||||||
# Save peers
|
# Save peers
|
||||||
# TODO: make this shared amongst threads so the database only needs to be queried once
|
# TODO: make this shared amongst threads so the database only needs to be queried once
|
||||||
if not data['peers']:
|
|
||||||
return
|
|
||||||
existing_instance_ids = Instance.objects.values_list('name', flat=True)
|
existing_instance_ids = Instance.objects.values_list('name', flat=True)
|
||||||
existing_peers = Instance.objects.filter(name__in=existing_instance_ids)
|
existing_peers = Instance.objects.filter(name__in=existing_instance_ids)
|
||||||
new_peer_ids = [peer for peer in data['peers'] if peer not in existing_instance_ids]
|
new_peer_ids = [peer for peer in data['peers'] if peer not in existing_instance_ids]
|
||||||
|
@ -110,12 +100,6 @@ class Command(BaseCommand):
|
||||||
new_peers = Instance.objects.bulk_create([Instance(name=peer) for peer in new_peer_ids])
|
new_peers = Instance.objects.bulk_create([Instance(name=peer) for peer in new_peer_ids])
|
||||||
instance.peers.set(new_peers)
|
instance.peers.set(new_peers)
|
||||||
instance.peers.set(existing_peers)
|
instance.peers.set(existing_peers)
|
||||||
else:
|
|
||||||
stats = InstanceStats(
|
|
||||||
instance=instance,
|
|
||||||
status=get_key(data, ['status'])
|
|
||||||
)
|
|
||||||
stats.save()
|
|
||||||
self.stdout.write("{} - Saved {}".format(datetime.now().isoformat(), data['instance']))
|
self.stdout.write("{} - Saved {}".format(datetime.now().isoformat(), data['instance']))
|
||||||
|
|
||||||
def worker(self, queue: multiprocessing.JoinableQueue):
|
def worker(self, queue: multiprocessing.JoinableQueue):
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
# Generated by Django 2.1 on 2018-08-26 17:26
|
# Generated by Django 2.1 on 2018-08-29 17:37
|
||||||
|
|
||||||
from django.db import migrations, models
|
from django.db import migrations, models
|
||||||
import django.db.models.deletion
|
|
||||||
|
|
||||||
|
|
||||||
class Migration(migrations.Migration):
|
class Migration(migrations.Migration):
|
||||||
|
@ -16,20 +15,14 @@ class Migration(migrations.Migration):
|
||||||
name='Instance',
|
name='Instance',
|
||||||
fields=[
|
fields=[
|
||||||
('name', models.CharField(max_length=200, primary_key=True, serialize=False)),
|
('name', models.CharField(max_length=200, primary_key=True, serialize=False)),
|
||||||
('peers', models.ManyToManyField(to='scraper.Instance')),
|
('domain_count', models.IntegerField(blank=True, null=True)),
|
||||||
],
|
('status_count', models.IntegerField(blank=True, null=True)),
|
||||||
),
|
('user_count', models.IntegerField(blank=True, null=True)),
|
||||||
migrations.CreateModel(
|
|
||||||
name='InstanceStats',
|
|
||||||
fields=[
|
|
||||||
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
|
||||||
('timestamp', models.DateTimeField(auto_now_add=True)),
|
|
||||||
('num_peers', models.IntegerField(blank=True, null=True)),
|
|
||||||
('num_statuses', models.IntegerField(blank=True, null=True)),
|
|
||||||
('num_users', models.IntegerField(blank=True, null=True)),
|
|
||||||
('version', models.CharField(blank=True, max_length=1000)),
|
('version', models.CharField(blank=True, max_length=1000)),
|
||||||
('status', models.CharField(max_length=100)),
|
('status', models.CharField(max_length=100)),
|
||||||
('instance', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='scraper.Instance')),
|
('first_seen', models.DateTimeField(auto_now_add=True)),
|
||||||
|
('last_updated', models.DateTimeField(auto_now=True)),
|
||||||
|
('peers', models.ManyToManyField(related_name='_instance_peers_+', to='scraper.Instance')),
|
||||||
],
|
],
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
|
@ -1,19 +0,0 @@
|
||||||
# Generated by Django 2.1 on 2018-08-26 22:01
|
|
||||||
|
|
||||||
from django.db import migrations, models
|
|
||||||
import django.db.models.deletion
|
|
||||||
|
|
||||||
|
|
||||||
class Migration(migrations.Migration):
|
|
||||||
|
|
||||||
dependencies = [
|
|
||||||
('scraper', '0001_initial'),
|
|
||||||
]
|
|
||||||
|
|
||||||
operations = [
|
|
||||||
migrations.AlterField(
|
|
||||||
model_name='instancestats',
|
|
||||||
name='instance',
|
|
||||||
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='stats', to='scraper.Instance'),
|
|
||||||
),
|
|
||||||
]
|
|
|
@ -1,33 +0,0 @@
|
||||||
# Generated by Django 2.1 on 2018-08-28 22:06
|
|
||||||
|
|
||||||
from django.db import migrations, models
|
|
||||||
|
|
||||||
|
|
||||||
class Migration(migrations.Migration):
|
|
||||||
|
|
||||||
dependencies = [
|
|
||||||
('scraper', '0002_auto_20180826_2201'),
|
|
||||||
]
|
|
||||||
|
|
||||||
operations = [
|
|
||||||
migrations.RenameField(
|
|
||||||
model_name='instancestats',
|
|
||||||
old_name='num_peers',
|
|
||||||
new_name='domain_count',
|
|
||||||
),
|
|
||||||
migrations.RenameField(
|
|
||||||
model_name='instancestats',
|
|
||||||
old_name='num_statuses',
|
|
||||||
new_name='status_count',
|
|
||||||
),
|
|
||||||
migrations.RenameField(
|
|
||||||
model_name='instancestats',
|
|
||||||
old_name='num_users',
|
|
||||||
new_name='user_count',
|
|
||||||
),
|
|
||||||
migrations.AddField(
|
|
||||||
model_name='instance',
|
|
||||||
name='user_count',
|
|
||||||
field=models.IntegerField(blank=True, null=True),
|
|
||||||
),
|
|
||||||
]
|
|
|
@ -2,21 +2,24 @@ from django.db import models
|
||||||
|
|
||||||
|
|
||||||
class Instance(models.Model):
|
class Instance(models.Model):
|
||||||
|
# Primary key
|
||||||
name = models.CharField(max_length=200, primary_key=True)
|
name = models.CharField(max_length=200, primary_key=True)
|
||||||
peers = models.ManyToManyField('self', symmetrical=False)
|
|
||||||
user_count = models.IntegerField(blank=True, null=True)
|
|
||||||
|
|
||||||
|
# Details
|
||||||
class InstanceStats(models.Model):
|
|
||||||
# TODO: collect everything the API exposes
|
|
||||||
timestamp = models.DateTimeField(auto_now_add=True)
|
|
||||||
instance = models.ForeignKey(
|
|
||||||
Instance,
|
|
||||||
on_delete=models.CASCADE,
|
|
||||||
related_name='stats',
|
|
||||||
)
|
|
||||||
domain_count = models.IntegerField(blank=True, null=True)
|
domain_count = models.IntegerField(blank=True, null=True)
|
||||||
status_count = models.IntegerField(blank=True, null=True)
|
status_count = models.IntegerField(blank=True, null=True)
|
||||||
user_count = models.IntegerField(blank=True, null=True)
|
user_count = models.IntegerField(blank=True, null=True)
|
||||||
version = models.CharField(max_length=1000, blank=True) # In Django CharField is never stored as NULL in the db
|
version = models.CharField(max_length=1000, blank=True) # In Django CharField is never stored as NULL in the db
|
||||||
status = models.CharField(max_length=100)
|
status = models.CharField(max_length=100)
|
||||||
|
|
||||||
|
# Foreign keys
|
||||||
|
# The peers endpoint returns a "list of all domain names known to this instance"
|
||||||
|
# (https://github.com/tootsuite/mastodon/pull/6125)
|
||||||
|
# In other words, an asymmetrical relationship here doesn't make much sense. If we one day can get a list of
|
||||||
|
# instances that the instance actively follows (i.e. knows and not suspended), it's worth adding an
|
||||||
|
# asymmetrical relation.
|
||||||
|
peers = models.ManyToManyField('self', symmetrical=True)
|
||||||
|
|
||||||
|
# Automatic fields
|
||||||
|
first_seen = models.DateTimeField(auto_now_add=True)
|
||||||
|
last_updated = models.DateTimeField(auto_now=True)
|
||||||
|
|
Loading…
Reference in a new issue