finish API

This commit is contained in:
Tao Bojlen 2018-08-27 00:12:24 +02:00
parent 074e2e1b88
commit 21af38a1ea
9 changed files with 117 additions and 23 deletions

View file

@ -1,8 +1,23 @@
from rest_framework import serializers from rest_framework import serializers
from scraper.models import Instance from scraper.models import Instance, InstanceStats
class InstanceSerializer(serializers.HyperlinkedModelSerializer): class InstanceStatsSerializer(serializers.ModelSerializer):
class Meta:
model = InstanceStats
exclude = ('id', 'instance', 'status')
class InstanceListSerializer(serializers.ModelSerializer):
class Meta: class Meta:
model = Instance model = Instance
fields = ('name', 'peers') fields = ('name', )
class InstanceDetailSerializer(serializers.ModelSerializer):
peers = InstanceListSerializer(many=True, read_only=True)
stats = InstanceStatsSerializer(many=True, read_only=True)
class Meta:
model = Instance
fields = ('name', 'stats', 'peers')

View file

@ -1,11 +1,19 @@
from django.shortcuts import render
from rest_framework import viewsets from rest_framework import viewsets
from scraper.models import Instance from scraper.models import Instance
from apiv1.serializers import InstanceSerializer from apiv1.serializers import InstanceListSerializer, InstanceDetailSerializer
class InstanceViewSet(viewsets.ModelViewSet): class InstanceViewSet(viewsets.ModelViewSet):
"""API endpoint to view instance details""" """API endpoint to view instances"""
queryset = Instance.objects.all()
serializer_class = InstanceSerializer
lookup_value_regex = '[a-zA-Z0-9-_\.]+'
queryset = Instance.objects.all()
serializer_class = InstanceListSerializer
detail_serializer_class = InstanceDetailSerializer # this serializer also includes stats and a list of peers
def get_serializer_class(self):
if self.action == 'retrieve':
if hasattr(self, 'detail_serializer_class'):
return self.detail_serializer_class
return self.serializer_class

View file

@ -78,8 +78,10 @@ WSGI_APPLICATION = 'backend.wsgi.application'
DATABASES = { DATABASES = {
'default': { 'default': {
'ENGINE': 'django.db.backends.sqlite3', 'ENGINE': 'django.db.backends.postgresql',
'NAME': os.path.join(BASE_DIR, 'db.sqlite3'), 'NAME': 'fediverse',
'USER': 'tao',
'PASSWORD': 'tao',
} }
} }
@ -121,3 +123,12 @@ USE_TZ = True
# https://docs.djangoproject.com/en/2.1/howto/static-files/ # https://docs.djangoproject.com/en/2.1/howto/static-files/
STATIC_URL = '/static/' STATIC_URL = '/static/'
if DEBUG:
MIDDLEWARE += (
'silk.middleware.SilkyMiddleware',
)
INSTALLED_APPS += (
'silk',
)

View file

@ -16,10 +16,14 @@ Including another URLconf
from django.urls import path, include from django.urls import path, include
from rest_framework import routers from rest_framework import routers
from apiv1 import views from apiv1 import views
from backend import settings
router = routers.DefaultRouter() router = routers.DefaultRouter()
router.register(r'instances', views.InstanceViewSet) router.register(r'instances', views.InstanceViewSet)
urlpatterns = [ urlpatterns = [
path(r'', include(router.urls)) path(r'api/v1/', include(router.urls))
] ]
if settings.DEBUG:
urlpatterns += [path(r'silk/', include('silk.urls', namespace='silk'))]

View file

@ -0,0 +1,40 @@
LOCK_MODES = (
'ACCESS SHARE',
'ROW SHARE',
'ROW EXCLUSIVE',
'SHARE UPDATE EXCLUSIVE',
'SHARE',
'SHARE ROW EXCLUSIVE',
'EXCLUSIVE',
'ACCESS EXCLUSIVE',
)
def require_lock(model, lock):
"""
Decorator for PostgreSQL's table-level lock functionality
Example:
@transaction.commit_on_success
@require_lock(MyModel, 'ACCESS EXCLUSIVE')
def myview(request)
...
PostgreSQL's LOCK Documentation:
http://www.postgresql.org/docs/8.3/interactive/sql-lock.html
"""
def require_lock_decorator(view_func):
def wrapper(*args, **kwargs):
if lock not in LOCK_MODES:
raise ValueError('%s is not a PostgreSQL supported lock mode.')
from django.db import connection
cursor = connection.cursor()
cursor.execute(
'LOCK TABLE %s IN %s MODE' % (model._meta.db_table, lock)
)
return view_func(*args, **kwargs)
return wrapper
return require_lock_decorator

View file

@ -8,7 +8,9 @@ import multiprocessing
import requests import requests
import time import time
from django.core.management.base import BaseCommand from django.core.management.base import BaseCommand
from django.db import transaction
from scraper.models import Instance, InstanceStats from scraper.models import Instance, InstanceStats
from scraper.management.commands._util import require_lock
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# Because the script uses the Mastodon API other platforms like # # Because the script uses the Mastodon API other platforms like #
@ -48,7 +50,6 @@ class Command(BaseCommand):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
self.done_bag = set() self.done_bag = set()
self.existing_instance_ids = []
@staticmethod @staticmethod
def get_instance_info(instance_name: str): def get_instance_info(instance_name: str):
@ -87,6 +88,8 @@ class Command(BaseCommand):
print("Failed: {}".format(instance_name)) print("Failed: {}".format(instance_name))
return data return data
@transaction.atomic
@require_lock(Instance, 'ACCESS EXCLUSIVE')
def save_data(self, data): def save_data(self, data):
"""Save data""" """Save data"""
instance, _ = Instance.objects.get_or_create(name=get_key(data, ['instance'])) instance, _ = Instance.objects.get_or_create(name=get_key(data, ['instance']))
@ -102,18 +105,13 @@ class Command(BaseCommand):
) )
stats.save() stats.save()
# Save peers # Save peers
# Save the list of instances we already have in the database # TODO: make this shared amongst threads so the database only needs to be queried once
existing_peers = Instance.objects.filter(name__in=self.existing_instance_ids) existing_instance_ids = Instance.objects.values_list('name', flat=True)
print("setting new_peer_ids") existing_peers = Instance.objects.filter(name__in=existing_instance_ids)
new_peer_ids = [peer for peer in data['peers'] if peer not in self.existing_instance_ids] new_peer_ids = [peer for peer in data['peers'] if peer not in existing_instance_ids]
if new_peer_ids: if new_peer_ids:
print("setting new_peers (ids: {})".format(new_peer_ids))
new_peers = Instance.objects.bulk_create([Instance(name=peer) for peer in new_peer_ids]) new_peers = Instance.objects.bulk_create([Instance(name=peer) for peer in new_peer_ids])
print("adding to existing_instance_ids")
self.existing_instance_ids.extend(new_peer_ids)
print("adding new peers")
instance.peers.set(new_peers) instance.peers.set(new_peers)
print("adding existing peers")
instance.peers.set(existing_peers) instance.peers.set(existing_peers)
else: else:
stats = InstanceStats( stats = InstanceStats(
@ -141,8 +139,6 @@ class Command(BaseCommand):
def handle(self, *args, **options): def handle(self, *args, **options):
start_time = time.time() start_time = time.time()
self.existing_instance_ids = Instance.objects.all().values_list('name', flat=True)
print("Existing instances: {}".format(self.existing_instance_ids))
queue = multiprocessing.JoinableQueue() queue = multiprocessing.JoinableQueue()
queue.put(SEED) queue.put(SEED)
# pool = multiprocessing.Pool(1, initializer=self.worker, initargs=(queue, )) # Disable concurrency (debug) # pool = multiprocessing.Pool(1, initializer=self.worker, initargs=(queue, )) # Disable concurrency (debug)

View file

@ -0,0 +1,19 @@
# Generated by Django 2.1 on 2018-08-26 22:01
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
('scraper', '0001_initial'),
]
operations = [
migrations.AlterField(
model_name='instancestats',
name='instance',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='stats', to='scraper.Instance'),
),
]

View file

@ -11,6 +11,7 @@ class InstanceStats(models.Model):
instance = models.ForeignKey( instance = models.ForeignKey(
Instance, Instance,
on_delete=models.CASCADE, on_delete=models.CASCADE,
related_name='stats',
) )
num_peers = models.IntegerField(blank=True, null=True) num_peers = models.IntegerField(blank=True, null=True)
num_statuses = models.IntegerField(blank=True, null=True) num_statuses = models.IntegerField(blank=True, null=True)