index.community/backend/scraper/management/commands/build_edges.py

39 lines
1.7 KiB
Python
Raw Normal View History

import subprocess
from django.core.management.base import BaseCommand
from django.conf import settings
2018-09-03 14:10:44 +00:00
from scraper.models import PeerRelationship, Edge
class Command(BaseCommand):
help = "Takes what's in the database and calls Gephi to create and layout a graph"
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def handle(self, *args, **options):
2018-09-03 14:10:44 +00:00
self.stdout.write("Creating Edges from PeerRelationships...")
# Turn symmetrical PeerRelationships into symmetrical Edges
relationships = PeerRelationship.objects.filter(source__status='success', target__status='success')
# Loop over once and put 'em into a dict for fast access
relationships = {(r.source_id, r.target_id): r for r in relationships}
edges = []
while relationships:
(source_id, target_id), outgoing = relationships.popitem()
2019-02-21 12:32:50 +00:00
total_statuses = outgoing.statuses_seen
mention_count = outgoing.mention_count
2018-09-03 14:10:44 +00:00
incoming = relationships.pop((target_id, source_id), None)
oldest_data = outgoing.last_updated
if incoming:
2019-02-21 12:32:50 +00:00
total_statuses += (incoming.statuses_seen)
mention_count += (incoming.mention_count)
2018-09-03 14:10:44 +00:00
oldest_data = min(oldest_data, incoming.last_updated)
if mention_count == 0 or total_statuses == 0:
2019-02-21 12:32:50 +00:00
# don't add edges with weight 0
2018-09-03 14:10:44 +00:00
continue
ratio = float(mention_count)/total_statuses
edges.append(Edge(source_id=source_id, target_id=target_id, weight=ratio, last_updated=oldest_data))
Edge.objects.all().delete()
Edge.objects.bulk_create(edges)