finish API
This commit is contained in:
parent
074e2e1b88
commit
21af38a1ea
|
@ -1,8 +1,23 @@
|
|||
from rest_framework import serializers
|
||||
from scraper.models import Instance
|
||||
from scraper.models import Instance, InstanceStats
|
||||
|
||||
|
||||
class InstanceSerializer(serializers.HyperlinkedModelSerializer):
|
||||
class InstanceStatsSerializer(serializers.ModelSerializer):
|
||||
class Meta:
|
||||
model = InstanceStats
|
||||
exclude = ('id', 'instance', 'status')
|
||||
|
||||
|
||||
class InstanceListSerializer(serializers.ModelSerializer):
|
||||
class Meta:
|
||||
model = Instance
|
||||
fields = ('name', 'peers')
|
||||
fields = ('name', )
|
||||
|
||||
|
||||
class InstanceDetailSerializer(serializers.ModelSerializer):
|
||||
peers = InstanceListSerializer(many=True, read_only=True)
|
||||
stats = InstanceStatsSerializer(many=True, read_only=True)
|
||||
|
||||
class Meta:
|
||||
model = Instance
|
||||
fields = ('name', 'stats', 'peers')
|
||||
|
|
|
@ -1,11 +1,19 @@
|
|||
from django.shortcuts import render
|
||||
from rest_framework import viewsets
|
||||
from scraper.models import Instance
|
||||
from apiv1.serializers import InstanceSerializer
|
||||
from apiv1.serializers import InstanceListSerializer, InstanceDetailSerializer
|
||||
|
||||
|
||||
class InstanceViewSet(viewsets.ModelViewSet):
|
||||
"""API endpoint to view instance details"""
|
||||
queryset = Instance.objects.all()
|
||||
serializer_class = InstanceSerializer
|
||||
"""API endpoint to view instances"""
|
||||
|
||||
lookup_value_regex = '[a-zA-Z0-9-_\.]+'
|
||||
|
||||
queryset = Instance.objects.all()
|
||||
serializer_class = InstanceListSerializer
|
||||
detail_serializer_class = InstanceDetailSerializer # this serializer also includes stats and a list of peers
|
||||
|
||||
def get_serializer_class(self):
|
||||
if self.action == 'retrieve':
|
||||
if hasattr(self, 'detail_serializer_class'):
|
||||
return self.detail_serializer_class
|
||||
return self.serializer_class
|
||||
|
|
|
@ -78,8 +78,10 @@ WSGI_APPLICATION = 'backend.wsgi.application'
|
|||
|
||||
DATABASES = {
|
||||
'default': {
|
||||
'ENGINE': 'django.db.backends.sqlite3',
|
||||
'NAME': os.path.join(BASE_DIR, 'db.sqlite3'),
|
||||
'ENGINE': 'django.db.backends.postgresql',
|
||||
'NAME': 'fediverse',
|
||||
'USER': 'tao',
|
||||
'PASSWORD': 'tao',
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -121,3 +123,12 @@ USE_TZ = True
|
|||
# https://docs.djangoproject.com/en/2.1/howto/static-files/
|
||||
|
||||
STATIC_URL = '/static/'
|
||||
|
||||
if DEBUG:
|
||||
MIDDLEWARE += (
|
||||
'silk.middleware.SilkyMiddleware',
|
||||
)
|
||||
|
||||
INSTALLED_APPS += (
|
||||
'silk',
|
||||
)
|
||||
|
|
|
@ -16,10 +16,14 @@ Including another URLconf
|
|||
from django.urls import path, include
|
||||
from rest_framework import routers
|
||||
from apiv1 import views
|
||||
from backend import settings
|
||||
|
||||
router = routers.DefaultRouter()
|
||||
router.register(r'instances', views.InstanceViewSet)
|
||||
|
||||
urlpatterns = [
|
||||
path(r'', include(router.urls))
|
||||
path(r'api/v1/', include(router.urls))
|
||||
]
|
||||
|
||||
if settings.DEBUG:
|
||||
urlpatterns += [path(r'silk/', include('silk.urls', namespace='silk'))]
|
40
scraper/management/commands/_util.py
Normal file
40
scraper/management/commands/_util.py
Normal file
|
@ -0,0 +1,40 @@
|
|||
LOCK_MODES = (
|
||||
'ACCESS SHARE',
|
||||
'ROW SHARE',
|
||||
'ROW EXCLUSIVE',
|
||||
'SHARE UPDATE EXCLUSIVE',
|
||||
'SHARE',
|
||||
'SHARE ROW EXCLUSIVE',
|
||||
'EXCLUSIVE',
|
||||
'ACCESS EXCLUSIVE',
|
||||
)
|
||||
|
||||
|
||||
def require_lock(model, lock):
|
||||
"""
|
||||
Decorator for PostgreSQL's table-level lock functionality
|
||||
|
||||
Example:
|
||||
@transaction.commit_on_success
|
||||
@require_lock(MyModel, 'ACCESS EXCLUSIVE')
|
||||
def myview(request)
|
||||
...
|
||||
|
||||
PostgreSQL's LOCK Documentation:
|
||||
http://www.postgresql.org/docs/8.3/interactive/sql-lock.html
|
||||
"""
|
||||
|
||||
def require_lock_decorator(view_func):
|
||||
def wrapper(*args, **kwargs):
|
||||
if lock not in LOCK_MODES:
|
||||
raise ValueError('%s is not a PostgreSQL supported lock mode.')
|
||||
from django.db import connection
|
||||
cursor = connection.cursor()
|
||||
cursor.execute(
|
||||
'LOCK TABLE %s IN %s MODE' % (model._meta.db_table, lock)
|
||||
)
|
||||
return view_func(*args, **kwargs)
|
||||
|
||||
return wrapper
|
||||
|
||||
return require_lock_decorator
|
|
@ -8,7 +8,9 @@ import multiprocessing
|
|||
import requests
|
||||
import time
|
||||
from django.core.management.base import BaseCommand
|
||||
from django.db import transaction
|
||||
from scraper.models import Instance, InstanceStats
|
||||
from scraper.management.commands._util import require_lock
|
||||
|
||||
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
||||
# Because the script uses the Mastodon API other platforms like #
|
||||
|
@ -48,7 +50,6 @@ class Command(BaseCommand):
|
|||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.done_bag = set()
|
||||
self.existing_instance_ids = []
|
||||
|
||||
@staticmethod
|
||||
def get_instance_info(instance_name: str):
|
||||
|
@ -87,6 +88,8 @@ class Command(BaseCommand):
|
|||
print("Failed: {}".format(instance_name))
|
||||
return data
|
||||
|
||||
@transaction.atomic
|
||||
@require_lock(Instance, 'ACCESS EXCLUSIVE')
|
||||
def save_data(self, data):
|
||||
"""Save data"""
|
||||
instance, _ = Instance.objects.get_or_create(name=get_key(data, ['instance']))
|
||||
|
@ -102,18 +105,13 @@ class Command(BaseCommand):
|
|||
)
|
||||
stats.save()
|
||||
# Save peers
|
||||
# Save the list of instances we already have in the database
|
||||
existing_peers = Instance.objects.filter(name__in=self.existing_instance_ids)
|
||||
print("setting new_peer_ids")
|
||||
new_peer_ids = [peer for peer in data['peers'] if peer not in self.existing_instance_ids]
|
||||
# TODO: make this shared amongst threads so the database only needs to be queried once
|
||||
existing_instance_ids = Instance.objects.values_list('name', flat=True)
|
||||
existing_peers = Instance.objects.filter(name__in=existing_instance_ids)
|
||||
new_peer_ids = [peer for peer in data['peers'] if peer not in existing_instance_ids]
|
||||
if new_peer_ids:
|
||||
print("setting new_peers (ids: {})".format(new_peer_ids))
|
||||
new_peers = Instance.objects.bulk_create([Instance(name=peer) for peer in new_peer_ids])
|
||||
print("adding to existing_instance_ids")
|
||||
self.existing_instance_ids.extend(new_peer_ids)
|
||||
print("adding new peers")
|
||||
instance.peers.set(new_peers)
|
||||
print("adding existing peers")
|
||||
instance.peers.set(existing_peers)
|
||||
else:
|
||||
stats = InstanceStats(
|
||||
|
@ -141,8 +139,6 @@ class Command(BaseCommand):
|
|||
|
||||
def handle(self, *args, **options):
|
||||
start_time = time.time()
|
||||
self.existing_instance_ids = Instance.objects.all().values_list('name', flat=True)
|
||||
print("Existing instances: {}".format(self.existing_instance_ids))
|
||||
queue = multiprocessing.JoinableQueue()
|
||||
queue.put(SEED)
|
||||
# pool = multiprocessing.Pool(1, initializer=self.worker, initargs=(queue, )) # Disable concurrency (debug)
|
||||
|
|
19
scraper/migrations/0002_auto_20180826_2201.py
Normal file
19
scraper/migrations/0002_auto_20180826_2201.py
Normal file
|
@ -0,0 +1,19 @@
|
|||
# Generated by Django 2.1 on 2018-08-26 22:01
|
||||
|
||||
from django.db import migrations, models
|
||||
import django.db.models.deletion
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('scraper', '0001_initial'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name='instancestats',
|
||||
name='instance',
|
||||
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='stats', to='scraper.Instance'),
|
||||
),
|
||||
]
|
|
@ -11,6 +11,7 @@ class InstanceStats(models.Model):
|
|||
instance = models.ForeignKey(
|
||||
Instance,
|
||||
on_delete=models.CASCADE,
|
||||
related_name='stats',
|
||||
)
|
||||
num_peers = models.IntegerField(blank=True, null=True)
|
||||
num_statuses = models.IntegerField(blank=True, null=True)
|
||||
|
|
Loading…
Reference in a new issue