finish API
This commit is contained in:
parent
074e2e1b88
commit
21af38a1ea
|
@ -1,8 +1,23 @@
|
||||||
from rest_framework import serializers
|
from rest_framework import serializers
|
||||||
from scraper.models import Instance
|
from scraper.models import Instance, InstanceStats
|
||||||
|
|
||||||
|
|
||||||
class InstanceSerializer(serializers.HyperlinkedModelSerializer):
|
class InstanceStatsSerializer(serializers.ModelSerializer):
|
||||||
|
class Meta:
|
||||||
|
model = InstanceStats
|
||||||
|
exclude = ('id', 'instance', 'status')
|
||||||
|
|
||||||
|
|
||||||
|
class InstanceListSerializer(serializers.ModelSerializer):
|
||||||
class Meta:
|
class Meta:
|
||||||
model = Instance
|
model = Instance
|
||||||
fields = ('name', 'peers')
|
fields = ('name', )
|
||||||
|
|
||||||
|
|
||||||
|
class InstanceDetailSerializer(serializers.ModelSerializer):
|
||||||
|
peers = InstanceListSerializer(many=True, read_only=True)
|
||||||
|
stats = InstanceStatsSerializer(many=True, read_only=True)
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
model = Instance
|
||||||
|
fields = ('name', 'stats', 'peers')
|
||||||
|
|
|
@ -1,11 +1,19 @@
|
||||||
from django.shortcuts import render
|
|
||||||
from rest_framework import viewsets
|
from rest_framework import viewsets
|
||||||
from scraper.models import Instance
|
from scraper.models import Instance
|
||||||
from apiv1.serializers import InstanceSerializer
|
from apiv1.serializers import InstanceListSerializer, InstanceDetailSerializer
|
||||||
|
|
||||||
|
|
||||||
class InstanceViewSet(viewsets.ModelViewSet):
|
class InstanceViewSet(viewsets.ModelViewSet):
|
||||||
"""API endpoint to view instance details"""
|
"""API endpoint to view instances"""
|
||||||
queryset = Instance.objects.all()
|
|
||||||
serializer_class = InstanceSerializer
|
|
||||||
|
|
||||||
|
lookup_value_regex = '[a-zA-Z0-9-_\.]+'
|
||||||
|
|
||||||
|
queryset = Instance.objects.all()
|
||||||
|
serializer_class = InstanceListSerializer
|
||||||
|
detail_serializer_class = InstanceDetailSerializer # this serializer also includes stats and a list of peers
|
||||||
|
|
||||||
|
def get_serializer_class(self):
|
||||||
|
if self.action == 'retrieve':
|
||||||
|
if hasattr(self, 'detail_serializer_class'):
|
||||||
|
return self.detail_serializer_class
|
||||||
|
return self.serializer_class
|
||||||
|
|
|
@ -78,8 +78,10 @@ WSGI_APPLICATION = 'backend.wsgi.application'
|
||||||
|
|
||||||
DATABASES = {
|
DATABASES = {
|
||||||
'default': {
|
'default': {
|
||||||
'ENGINE': 'django.db.backends.sqlite3',
|
'ENGINE': 'django.db.backends.postgresql',
|
||||||
'NAME': os.path.join(BASE_DIR, 'db.sqlite3'),
|
'NAME': 'fediverse',
|
||||||
|
'USER': 'tao',
|
||||||
|
'PASSWORD': 'tao',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -121,3 +123,12 @@ USE_TZ = True
|
||||||
# https://docs.djangoproject.com/en/2.1/howto/static-files/
|
# https://docs.djangoproject.com/en/2.1/howto/static-files/
|
||||||
|
|
||||||
STATIC_URL = '/static/'
|
STATIC_URL = '/static/'
|
||||||
|
|
||||||
|
if DEBUG:
|
||||||
|
MIDDLEWARE += (
|
||||||
|
'silk.middleware.SilkyMiddleware',
|
||||||
|
)
|
||||||
|
|
||||||
|
INSTALLED_APPS += (
|
||||||
|
'silk',
|
||||||
|
)
|
||||||
|
|
|
@ -16,10 +16,14 @@ Including another URLconf
|
||||||
from django.urls import path, include
|
from django.urls import path, include
|
||||||
from rest_framework import routers
|
from rest_framework import routers
|
||||||
from apiv1 import views
|
from apiv1 import views
|
||||||
|
from backend import settings
|
||||||
|
|
||||||
router = routers.DefaultRouter()
|
router = routers.DefaultRouter()
|
||||||
router.register(r'instances', views.InstanceViewSet)
|
router.register(r'instances', views.InstanceViewSet)
|
||||||
|
|
||||||
urlpatterns = [
|
urlpatterns = [
|
||||||
path(r'', include(router.urls))
|
path(r'api/v1/', include(router.urls))
|
||||||
]
|
]
|
||||||
|
|
||||||
|
if settings.DEBUG:
|
||||||
|
urlpatterns += [path(r'silk/', include('silk.urls', namespace='silk'))]
|
40
scraper/management/commands/_util.py
Normal file
40
scraper/management/commands/_util.py
Normal file
|
@ -0,0 +1,40 @@
|
||||||
|
LOCK_MODES = (
|
||||||
|
'ACCESS SHARE',
|
||||||
|
'ROW SHARE',
|
||||||
|
'ROW EXCLUSIVE',
|
||||||
|
'SHARE UPDATE EXCLUSIVE',
|
||||||
|
'SHARE',
|
||||||
|
'SHARE ROW EXCLUSIVE',
|
||||||
|
'EXCLUSIVE',
|
||||||
|
'ACCESS EXCLUSIVE',
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def require_lock(model, lock):
|
||||||
|
"""
|
||||||
|
Decorator for PostgreSQL's table-level lock functionality
|
||||||
|
|
||||||
|
Example:
|
||||||
|
@transaction.commit_on_success
|
||||||
|
@require_lock(MyModel, 'ACCESS EXCLUSIVE')
|
||||||
|
def myview(request)
|
||||||
|
...
|
||||||
|
|
||||||
|
PostgreSQL's LOCK Documentation:
|
||||||
|
http://www.postgresql.org/docs/8.3/interactive/sql-lock.html
|
||||||
|
"""
|
||||||
|
|
||||||
|
def require_lock_decorator(view_func):
|
||||||
|
def wrapper(*args, **kwargs):
|
||||||
|
if lock not in LOCK_MODES:
|
||||||
|
raise ValueError('%s is not a PostgreSQL supported lock mode.')
|
||||||
|
from django.db import connection
|
||||||
|
cursor = connection.cursor()
|
||||||
|
cursor.execute(
|
||||||
|
'LOCK TABLE %s IN %s MODE' % (model._meta.db_table, lock)
|
||||||
|
)
|
||||||
|
return view_func(*args, **kwargs)
|
||||||
|
|
||||||
|
return wrapper
|
||||||
|
|
||||||
|
return require_lock_decorator
|
|
@ -8,7 +8,9 @@ import multiprocessing
|
||||||
import requests
|
import requests
|
||||||
import time
|
import time
|
||||||
from django.core.management.base import BaseCommand
|
from django.core.management.base import BaseCommand
|
||||||
|
from django.db import transaction
|
||||||
from scraper.models import Instance, InstanceStats
|
from scraper.models import Instance, InstanceStats
|
||||||
|
from scraper.management.commands._util import require_lock
|
||||||
|
|
||||||
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
||||||
# Because the script uses the Mastodon API other platforms like #
|
# Because the script uses the Mastodon API other platforms like #
|
||||||
|
@ -48,7 +50,6 @@ class Command(BaseCommand):
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
self.done_bag = set()
|
self.done_bag = set()
|
||||||
self.existing_instance_ids = []
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_instance_info(instance_name: str):
|
def get_instance_info(instance_name: str):
|
||||||
|
@ -87,6 +88,8 @@ class Command(BaseCommand):
|
||||||
print("Failed: {}".format(instance_name))
|
print("Failed: {}".format(instance_name))
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
@transaction.atomic
|
||||||
|
@require_lock(Instance, 'ACCESS EXCLUSIVE')
|
||||||
def save_data(self, data):
|
def save_data(self, data):
|
||||||
"""Save data"""
|
"""Save data"""
|
||||||
instance, _ = Instance.objects.get_or_create(name=get_key(data, ['instance']))
|
instance, _ = Instance.objects.get_or_create(name=get_key(data, ['instance']))
|
||||||
|
@ -102,18 +105,13 @@ class Command(BaseCommand):
|
||||||
)
|
)
|
||||||
stats.save()
|
stats.save()
|
||||||
# Save peers
|
# Save peers
|
||||||
# Save the list of instances we already have in the database
|
# TODO: make this shared amongst threads so the database only needs to be queried once
|
||||||
existing_peers = Instance.objects.filter(name__in=self.existing_instance_ids)
|
existing_instance_ids = Instance.objects.values_list('name', flat=True)
|
||||||
print("setting new_peer_ids")
|
existing_peers = Instance.objects.filter(name__in=existing_instance_ids)
|
||||||
new_peer_ids = [peer for peer in data['peers'] if peer not in self.existing_instance_ids]
|
new_peer_ids = [peer for peer in data['peers'] if peer not in existing_instance_ids]
|
||||||
if new_peer_ids:
|
if new_peer_ids:
|
||||||
print("setting new_peers (ids: {})".format(new_peer_ids))
|
|
||||||
new_peers = Instance.objects.bulk_create([Instance(name=peer) for peer in new_peer_ids])
|
new_peers = Instance.objects.bulk_create([Instance(name=peer) for peer in new_peer_ids])
|
||||||
print("adding to existing_instance_ids")
|
|
||||||
self.existing_instance_ids.extend(new_peer_ids)
|
|
||||||
print("adding new peers")
|
|
||||||
instance.peers.set(new_peers)
|
instance.peers.set(new_peers)
|
||||||
print("adding existing peers")
|
|
||||||
instance.peers.set(existing_peers)
|
instance.peers.set(existing_peers)
|
||||||
else:
|
else:
|
||||||
stats = InstanceStats(
|
stats = InstanceStats(
|
||||||
|
@ -141,8 +139,6 @@ class Command(BaseCommand):
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
self.existing_instance_ids = Instance.objects.all().values_list('name', flat=True)
|
|
||||||
print("Existing instances: {}".format(self.existing_instance_ids))
|
|
||||||
queue = multiprocessing.JoinableQueue()
|
queue = multiprocessing.JoinableQueue()
|
||||||
queue.put(SEED)
|
queue.put(SEED)
|
||||||
# pool = multiprocessing.Pool(1, initializer=self.worker, initargs=(queue, )) # Disable concurrency (debug)
|
# pool = multiprocessing.Pool(1, initializer=self.worker, initargs=(queue, )) # Disable concurrency (debug)
|
||||||
|
|
19
scraper/migrations/0002_auto_20180826_2201.py
Normal file
19
scraper/migrations/0002_auto_20180826_2201.py
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
# Generated by Django 2.1 on 2018-08-26 22:01
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
import django.db.models.deletion
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('scraper', '0001_initial'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='instancestats',
|
||||||
|
name='instance',
|
||||||
|
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='stats', to='scraper.Instance'),
|
||||||
|
),
|
||||||
|
]
|
|
@ -11,6 +11,7 @@ class InstanceStats(models.Model):
|
||||||
instance = models.ForeignKey(
|
instance = models.ForeignKey(
|
||||||
Instance,
|
Instance,
|
||||||
on_delete=models.CASCADE,
|
on_delete=models.CASCADE,
|
||||||
|
related_name='stats',
|
||||||
)
|
)
|
||||||
num_peers = models.IntegerField(blank=True, null=True)
|
num_peers = models.IntegerField(blank=True, null=True)
|
||||||
num_statuses = models.IntegerField(blank=True, null=True)
|
num_statuses = models.IntegerField(blank=True, null=True)
|
||||||
|
|
Loading…
Reference in a new issue