diff --git a/apiv1/migrations/__init__.py b/apiv1/migrations/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/apiv1/serializers.py b/apiv1/serializers.py index ef4cd69..eb9e747 100644 --- a/apiv1/serializers.py +++ b/apiv1/serializers.py @@ -1,8 +1,23 @@ from rest_framework import serializers -from scraper.models import Instance +from scraper.models import Instance, InstanceStats -class InstanceSerializer(serializers.HyperlinkedModelSerializer): +class InstanceStatsSerializer(serializers.ModelSerializer): + class Meta: + model = InstanceStats + exclude = ('id', 'instance', 'status') + + +class InstanceListSerializer(serializers.ModelSerializer): class Meta: model = Instance - fields = ('name', 'peers') + fields = ('name', ) + + +class InstanceDetailSerializer(serializers.ModelSerializer): + peers = InstanceListSerializer(many=True, read_only=True) + stats = InstanceStatsSerializer(many=True, read_only=True) + + class Meta: + model = Instance + fields = ('name', 'stats', 'peers') diff --git a/apiv1/views.py b/apiv1/views.py index 6a2a8c4..99eed00 100644 --- a/apiv1/views.py +++ b/apiv1/views.py @@ -1,11 +1,19 @@ -from django.shortcuts import render from rest_framework import viewsets from scraper.models import Instance -from apiv1.serializers import InstanceSerializer +from apiv1.serializers import InstanceListSerializer, InstanceDetailSerializer class InstanceViewSet(viewsets.ModelViewSet): - """API endpoint to view instance details""" - queryset = Instance.objects.all() - serializer_class = InstanceSerializer + """API endpoint to view instances""" + lookup_value_regex = '[a-zA-Z0-9-_\.]+' + + queryset = Instance.objects.all() + serializer_class = InstanceListSerializer + detail_serializer_class = InstanceDetailSerializer # this serializer also includes stats and a list of peers + + def get_serializer_class(self): + if self.action == 'retrieve': + if hasattr(self, 'detail_serializer_class'): + return self.detail_serializer_class + return self.serializer_class diff --git a/backend/settings.py b/backend/settings.py index c5d48d3..da4f9b2 100644 --- a/backend/settings.py +++ b/backend/settings.py @@ -78,8 +78,10 @@ WSGI_APPLICATION = 'backend.wsgi.application' DATABASES = { 'default': { - 'ENGINE': 'django.db.backends.sqlite3', - 'NAME': os.path.join(BASE_DIR, 'db.sqlite3'), + 'ENGINE': 'django.db.backends.postgresql', + 'NAME': 'fediverse', + 'USER': 'tao', + 'PASSWORD': 'tao', } } @@ -121,3 +123,12 @@ USE_TZ = True # https://docs.djangoproject.com/en/2.1/howto/static-files/ STATIC_URL = '/static/' + +if DEBUG: + MIDDLEWARE += ( + 'silk.middleware.SilkyMiddleware', + ) + + INSTALLED_APPS += ( + 'silk', + ) diff --git a/backend/urls.py b/backend/urls.py index aa2aac0..78f9c3f 100644 --- a/backend/urls.py +++ b/backend/urls.py @@ -16,10 +16,14 @@ Including another URLconf from django.urls import path, include from rest_framework import routers from apiv1 import views +from backend import settings router = routers.DefaultRouter() router.register(r'instances', views.InstanceViewSet) urlpatterns = [ - path(r'', include(router.urls)) + path(r'api/v1/', include(router.urls)) ] + +if settings.DEBUG: + urlpatterns += [path(r'silk/', include('silk.urls', namespace='silk'))] \ No newline at end of file diff --git a/scraper/management/commands/_util.py b/scraper/management/commands/_util.py new file mode 100644 index 0000000..c330dd9 --- /dev/null +++ b/scraper/management/commands/_util.py @@ -0,0 +1,40 @@ +LOCK_MODES = ( + 'ACCESS SHARE', + 'ROW SHARE', + 'ROW EXCLUSIVE', + 'SHARE UPDATE EXCLUSIVE', + 'SHARE', + 'SHARE ROW EXCLUSIVE', + 'EXCLUSIVE', + 'ACCESS EXCLUSIVE', +) + + +def require_lock(model, lock): + """ + Decorator for PostgreSQL's table-level lock functionality + + Example: + @transaction.commit_on_success + @require_lock(MyModel, 'ACCESS EXCLUSIVE') + def myview(request) + ... + + PostgreSQL's LOCK Documentation: + http://www.postgresql.org/docs/8.3/interactive/sql-lock.html + """ + + def require_lock_decorator(view_func): + def wrapper(*args, **kwargs): + if lock not in LOCK_MODES: + raise ValueError('%s is not a PostgreSQL supported lock mode.') + from django.db import connection + cursor = connection.cursor() + cursor.execute( + 'LOCK TABLE %s IN %s MODE' % (model._meta.db_table, lock) + ) + return view_func(*args, **kwargs) + + return wrapper + + return require_lock_decorator \ No newline at end of file diff --git a/scraper/management/commands/scrape.py b/scraper/management/commands/scrape.py index 6146481..ae90450 100644 --- a/scraper/management/commands/scrape.py +++ b/scraper/management/commands/scrape.py @@ -8,7 +8,9 @@ import multiprocessing import requests import time from django.core.management.base import BaseCommand +from django.db import transaction from scraper.models import Instance, InstanceStats +from scraper.management.commands._util import require_lock # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # Because the script uses the Mastodon API other platforms like # @@ -48,7 +50,6 @@ class Command(BaseCommand): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.done_bag = set() - self.existing_instance_ids = [] @staticmethod def get_instance_info(instance_name: str): @@ -87,6 +88,8 @@ class Command(BaseCommand): print("Failed: {}".format(instance_name)) return data + @transaction.atomic + @require_lock(Instance, 'ACCESS EXCLUSIVE') def save_data(self, data): """Save data""" instance, _ = Instance.objects.get_or_create(name=get_key(data, ['instance'])) @@ -102,18 +105,13 @@ class Command(BaseCommand): ) stats.save() # Save peers - # Save the list of instances we already have in the database - existing_peers = Instance.objects.filter(name__in=self.existing_instance_ids) - print("setting new_peer_ids") - new_peer_ids = [peer for peer in data['peers'] if peer not in self.existing_instance_ids] + # TODO: make this shared amongst threads so the database only needs to be queried once + existing_instance_ids = Instance.objects.values_list('name', flat=True) + existing_peers = Instance.objects.filter(name__in=existing_instance_ids) + new_peer_ids = [peer for peer in data['peers'] if peer not in existing_instance_ids] if new_peer_ids: - print("setting new_peers (ids: {})".format(new_peer_ids)) new_peers = Instance.objects.bulk_create([Instance(name=peer) for peer in new_peer_ids]) - print("adding to existing_instance_ids") - self.existing_instance_ids.extend(new_peer_ids) - print("adding new peers") instance.peers.set(new_peers) - print("adding existing peers") instance.peers.set(existing_peers) else: stats = InstanceStats( @@ -141,8 +139,6 @@ class Command(BaseCommand): def handle(self, *args, **options): start_time = time.time() - self.existing_instance_ids = Instance.objects.all().values_list('name', flat=True) - print("Existing instances: {}".format(self.existing_instance_ids)) queue = multiprocessing.JoinableQueue() queue.put(SEED) # pool = multiprocessing.Pool(1, initializer=self.worker, initargs=(queue, )) # Disable concurrency (debug) diff --git a/scraper/migrations/0002_auto_20180826_2201.py b/scraper/migrations/0002_auto_20180826_2201.py new file mode 100644 index 0000000..a84f4b7 --- /dev/null +++ b/scraper/migrations/0002_auto_20180826_2201.py @@ -0,0 +1,19 @@ +# Generated by Django 2.1 on 2018-08-26 22:01 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('scraper', '0001_initial'), + ] + + operations = [ + migrations.AlterField( + model_name='instancestats', + name='instance', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='stats', to='scraper.Instance'), + ), + ] diff --git a/scraper/models.py b/scraper/models.py index 9dfadde..0878879 100644 --- a/scraper/models.py +++ b/scraper/models.py @@ -11,6 +11,7 @@ class InstanceStats(models.Model): instance = models.ForeignKey( Instance, on_delete=models.CASCADE, + related_name='stats', ) num_peers = models.IntegerField(blank=True, null=True) num_statuses = models.IntegerField(blank=True, null=True)