finish API

This commit is contained in:
Tao Bojlen 2018-08-27 00:12:24 +02:00
parent 074e2e1b88
commit 21af38a1ea
9 changed files with 117 additions and 23 deletions

View file

@ -1,8 +1,23 @@
from rest_framework import serializers
from scraper.models import Instance
from scraper.models import Instance, InstanceStats
class InstanceSerializer(serializers.HyperlinkedModelSerializer):
class InstanceStatsSerializer(serializers.ModelSerializer):
class Meta:
model = InstanceStats
exclude = ('id', 'instance', 'status')
class InstanceListSerializer(serializers.ModelSerializer):
class Meta:
model = Instance
fields = ('name', 'peers')
fields = ('name', )
class InstanceDetailSerializer(serializers.ModelSerializer):
peers = InstanceListSerializer(many=True, read_only=True)
stats = InstanceStatsSerializer(many=True, read_only=True)
class Meta:
model = Instance
fields = ('name', 'stats', 'peers')

View file

@ -1,11 +1,19 @@
from django.shortcuts import render
from rest_framework import viewsets
from scraper.models import Instance
from apiv1.serializers import InstanceSerializer
from apiv1.serializers import InstanceListSerializer, InstanceDetailSerializer
class InstanceViewSet(viewsets.ModelViewSet):
"""API endpoint to view instance details"""
queryset = Instance.objects.all()
serializer_class = InstanceSerializer
"""API endpoint to view instances"""
lookup_value_regex = '[a-zA-Z0-9-_\.]+'
queryset = Instance.objects.all()
serializer_class = InstanceListSerializer
detail_serializer_class = InstanceDetailSerializer # this serializer also includes stats and a list of peers
def get_serializer_class(self):
if self.action == 'retrieve':
if hasattr(self, 'detail_serializer_class'):
return self.detail_serializer_class
return self.serializer_class

View file

@ -78,8 +78,10 @@ WSGI_APPLICATION = 'backend.wsgi.application'
DATABASES = {
'default': {
'ENGINE': 'django.db.backends.sqlite3',
'NAME': os.path.join(BASE_DIR, 'db.sqlite3'),
'ENGINE': 'django.db.backends.postgresql',
'NAME': 'fediverse',
'USER': 'tao',
'PASSWORD': 'tao',
}
}
@ -121,3 +123,12 @@ USE_TZ = True
# https://docs.djangoproject.com/en/2.1/howto/static-files/
STATIC_URL = '/static/'
if DEBUG:
MIDDLEWARE += (
'silk.middleware.SilkyMiddleware',
)
INSTALLED_APPS += (
'silk',
)

View file

@ -16,10 +16,14 @@ Including another URLconf
from django.urls import path, include
from rest_framework import routers
from apiv1 import views
from backend import settings
router = routers.DefaultRouter()
router.register(r'instances', views.InstanceViewSet)
urlpatterns = [
path(r'', include(router.urls))
path(r'api/v1/', include(router.urls))
]
if settings.DEBUG:
urlpatterns += [path(r'silk/', include('silk.urls', namespace='silk'))]

View file

@ -0,0 +1,40 @@
LOCK_MODES = (
'ACCESS SHARE',
'ROW SHARE',
'ROW EXCLUSIVE',
'SHARE UPDATE EXCLUSIVE',
'SHARE',
'SHARE ROW EXCLUSIVE',
'EXCLUSIVE',
'ACCESS EXCLUSIVE',
)
def require_lock(model, lock):
"""
Decorator for PostgreSQL's table-level lock functionality
Example:
@transaction.commit_on_success
@require_lock(MyModel, 'ACCESS EXCLUSIVE')
def myview(request)
...
PostgreSQL's LOCK Documentation:
http://www.postgresql.org/docs/8.3/interactive/sql-lock.html
"""
def require_lock_decorator(view_func):
def wrapper(*args, **kwargs):
if lock not in LOCK_MODES:
raise ValueError('%s is not a PostgreSQL supported lock mode.')
from django.db import connection
cursor = connection.cursor()
cursor.execute(
'LOCK TABLE %s IN %s MODE' % (model._meta.db_table, lock)
)
return view_func(*args, **kwargs)
return wrapper
return require_lock_decorator

View file

@ -8,7 +8,9 @@ import multiprocessing
import requests
import time
from django.core.management.base import BaseCommand
from django.db import transaction
from scraper.models import Instance, InstanceStats
from scraper.management.commands._util import require_lock
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# Because the script uses the Mastodon API other platforms like #
@ -48,7 +50,6 @@ class Command(BaseCommand):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.done_bag = set()
self.existing_instance_ids = []
@staticmethod
def get_instance_info(instance_name: str):
@ -87,6 +88,8 @@ class Command(BaseCommand):
print("Failed: {}".format(instance_name))
return data
@transaction.atomic
@require_lock(Instance, 'ACCESS EXCLUSIVE')
def save_data(self, data):
"""Save data"""
instance, _ = Instance.objects.get_or_create(name=get_key(data, ['instance']))
@ -102,18 +105,13 @@ class Command(BaseCommand):
)
stats.save()
# Save peers
# Save the list of instances we already have in the database
existing_peers = Instance.objects.filter(name__in=self.existing_instance_ids)
print("setting new_peer_ids")
new_peer_ids = [peer for peer in data['peers'] if peer not in self.existing_instance_ids]
# TODO: make this shared amongst threads so the database only needs to be queried once
existing_instance_ids = Instance.objects.values_list('name', flat=True)
existing_peers = Instance.objects.filter(name__in=existing_instance_ids)
new_peer_ids = [peer for peer in data['peers'] if peer not in existing_instance_ids]
if new_peer_ids:
print("setting new_peers (ids: {})".format(new_peer_ids))
new_peers = Instance.objects.bulk_create([Instance(name=peer) for peer in new_peer_ids])
print("adding to existing_instance_ids")
self.existing_instance_ids.extend(new_peer_ids)
print("adding new peers")
instance.peers.set(new_peers)
print("adding existing peers")
instance.peers.set(existing_peers)
else:
stats = InstanceStats(
@ -141,8 +139,6 @@ class Command(BaseCommand):
def handle(self, *args, **options):
start_time = time.time()
self.existing_instance_ids = Instance.objects.all().values_list('name', flat=True)
print("Existing instances: {}".format(self.existing_instance_ids))
queue = multiprocessing.JoinableQueue()
queue.put(SEED)
# pool = multiprocessing.Pool(1, initializer=self.worker, initargs=(queue, )) # Disable concurrency (debug)

View file

@ -0,0 +1,19 @@
# Generated by Django 2.1 on 2018-08-26 22:01
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
('scraper', '0001_initial'),
]
operations = [
migrations.AlterField(
model_name='instancestats',
name='instance',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='stats', to='scraper.Instance'),
),
]

View file

@ -11,6 +11,7 @@ class InstanceStats(models.Model):
instance = models.ForeignKey(
Instance,
on_delete=models.CASCADE,
related_name='stats',
)
num_peers = models.IntegerField(blank=True, null=True)
num_statuses = models.IntegerField(blank=True, null=True)