diff --git a/apiv1/serializers.py b/apiv1/serializers.py index 06ce009..e508f3d 100644 --- a/apiv1/serializers.py +++ b/apiv1/serializers.py @@ -57,10 +57,12 @@ class NodeSerializer(serializers.ModelSerializer): id = serializers.SerializerMethodField('get_name') label = serializers.SerializerMethodField('get_name') size = serializers.SerializerMethodField() + x = serializers.SerializerMethodField() + y = serializers.SerializerMethodField() class Meta: model = Instance - fields = ('id', 'label', 'size') + fields = ('id', 'label', 'size', 'x', 'y') def get_name(self, obj): return obj.name @@ -68,6 +70,12 @@ class NodeSerializer(serializers.ModelSerializer): def get_size(self, obj): return obj.user_count or 1 + def get_x(self, obj): + return obj.x_coord + + def get_y(self, obj): + return obj.y_coord + def to_representation(self, instance): """ Object instance -> Dict of primitive datatypes. diff --git a/apiv1/views.py b/apiv1/views.py index 128a755..9dff54a 100644 --- a/apiv1/views.py +++ b/apiv1/views.py @@ -1,5 +1,5 @@ from rest_framework import viewsets -from scraper.models import Instance, PeerRelationship +from scraper.models import Instance, Edge from apiv1.serializers import InstanceListSerializer, InstanceDetailSerializer, NodeSerializer, EdgeSerializer @@ -24,7 +24,7 @@ class EdgeView(viewsets.ReadOnlyModelViewSet): """ Endpoint to get a list of the graph's edges in a SigmaJS-friendly format. """ - queryset = PeerRelationship.objects.filter(source__status='success', target__status='success') + queryset = Edge.objects.all() serializer_class = EdgeSerializer diff --git a/frontend/public/favicon.ico b/frontend/public/favicon.ico index a11777c..9a8a279 100644 Binary files a/frontend/public/favicon.ico and b/frontend/public/favicon.ico differ diff --git a/frontend/src/components/Graph.jsx b/frontend/src/components/Graph.jsx index 871012c..e2b0e3d 100644 --- a/frontend/src/components/Graph.jsx +++ b/frontend/src/components/Graph.jsx @@ -36,9 +36,7 @@ class GraphImpl extends React.Component { onClickNode={(e) => this.props.selectAndLoadInstance(e.data.node.label)} onClickStage={(e) => this.props.selectAndLoadInstance(null)} > - - ) } diff --git a/frontend/src/components/Nav.tsx b/frontend/src/components/Nav.tsx index e966f3e..838bb9b 100644 --- a/frontend/src/components/Nav.tsx +++ b/frontend/src/components/Nav.tsx @@ -79,7 +79,7 @@ export class Nav extends React.Component<{}, INavState> {

How do you calculate the strength of relationships between instances?

- fediverse.space scrapes the last 2000 statuses from within the last month on the public + fediverse.space scrapes the last 5000 statuses from within the last month on the public timeline of each instance. It looks at the ratio of mentions of an instance / total statuses. It uses a ratio rather than an absolute number of mentions to reflect that smaller instances diff --git a/frontend/src/components/Sidebar.tsx b/frontend/src/components/Sidebar.tsx index 0e57e60..f58f42e 100644 --- a/frontend/src/components/Sidebar.tsx +++ b/frontend/src/components/Sidebar.tsx @@ -19,7 +19,7 @@ interface ISidebarProps { class SidebarImpl extends React.Component { public render() { return ( - + {this.renderSidebarContents()} ) diff --git a/frontend/src/index.css b/frontend/src/index.css index 16011bc..47090b4 100644 --- a/frontend/src/index.css +++ b/frontend/src/index.css @@ -2,7 +2,8 @@ html, body { margin: 0; padding: 50px 0 0 0; font-family: sans-serif; - background-color: #30404D; + /*background-color: #30404D;*/ + background-color: #293742; height: 100%; font-family: -apple-system,BlinkMacSystemFont,Segoe UI,Roboto,Oxygen,Ubuntu,Cantarell,Open Sans,Helvetica Neue,Icons16,sans-serif; } diff --git a/frontend/src/redux/types.ts b/frontend/src/redux/types.ts index eecd2ce..03151f4 100644 --- a/frontend/src/redux/types.ts +++ b/frontend/src/redux/types.ts @@ -32,6 +32,8 @@ export interface IInstanceDetails { interface IGraphNode { id: string; label: string; + x: number; + y: number; size?: number; color?: string; } diff --git a/gephi/src/main/java/space/fediverse/graph/GraphBuilder.java b/gephi/src/main/java/space/fediverse/graph/GraphBuilder.java index 6997320..13d298f 100644 --- a/gephi/src/main/java/space/fediverse/graph/GraphBuilder.java +++ b/gephi/src/main/java/space/fediverse/graph/GraphBuilder.java @@ -2,7 +2,10 @@ package space.fediverse.graph; import org.gephi.graph.api.GraphController; import org.gephi.graph.api.GraphModel; +import org.gephi.graph.api.Node; +import org.gephi.graph.api.UndirectedGraph; import org.gephi.io.database.drivers.PostgreSQLDriver; +import org.gephi.io.database.drivers.SQLUtils; import org.gephi.io.exporter.api.ExportController; import org.gephi.io.importer.api.Container; import org.gephi.io.importer.api.EdgeDirectionDefault; @@ -20,6 +23,10 @@ import java.io.File; import java.io.IOException; import java.nio.file.Path; import java.nio.file.Paths; +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.SQLException; +import java.sql.Statement; import java.util.concurrent.TimeUnit; public class GraphBuilder { @@ -34,9 +41,9 @@ public class GraphBuilder { private static final String edgeQuery = String.join("" , "SELECT" - , " scraper_instance_peers.from_instance_id AS source," - , " scraper_instance_peers.to_instance_id AS target" - , " FROM scraper_instance_peers" + , " scraper_edge.source_id AS source," + , " scraper_edge.target_id AS target" + , " FROM scraper_edge" ); @@ -55,7 +62,6 @@ public class GraphBuilder { // AttributeModel? // Import from database - EdgeListDatabaseImpl db = new EdgeListDatabaseImpl(); db.setSQLDriver(new PostgreSQLDriver()); db.setHost("localhost"); @@ -77,18 +83,57 @@ public class GraphBuilder { importController.process(container, new DefaultProcessor(), workspace); // Layout - AutoLayout autoLayout = new AutoLayout(2, TimeUnit.MINUTES); + AutoLayout autoLayout = new AutoLayout(1, TimeUnit.MINUTES); autoLayout.setGraphModel(graphModel); // YifanHuLayout firstLayout = new YifanHuLayout(null, new StepDisplacement(1f)); - ForceAtlas2 secondLayout = new ForceAtlas2(null); -// AutoLayout.DynamicProperty adjustBySizeProperty = AutoLayout.createDynamicProperty("forceAtlas.adjustSizes.name", Boolean.TRUE, 0.1f); -// AutoLayout.DynamicProperty repulsionProperty = AutoLayout.createDynamicProperty("forceAtlas.repulsionStrength.name", 500., 0f); -// autoLayout.addLayout(firstLayout, 0.5f); -// autoLayout.addLayout(secondLayout, 0.5f, new AutoLayout.DynamicProperty[]{adjustBySizeProperty, repulsionProperty}); - autoLayout.addLayout(secondLayout, 1f); + ForceAtlas2 forceAtlas2Layout = new ForceAtlas2(null); + forceAtlas2Layout.setLinLogMode(true); + autoLayout.addLayout(forceAtlas2Layout, 1f); autoLayout.execute(); - // Export + // Update coordinates in database + // First, connect + String dbUrl = SQLUtils.getUrl(db.getSQLDriver(), db.getHost(), db.getPort(), db.getDBName()); + Connection conn = null; + try { + conn = db.getSQLDriver().getConnection(dbUrl, db.getUsername(), db.getPasswd()); + } catch (SQLException e) { + if (conn != null) { + try { + conn.close(); + } catch (Exception e2) { + // Closing failed; ah well + } + } + throw new RuntimeException(e); + } + // Update + UndirectedGraph graph = graphModel.getUndirectedGraph(); + for (Node node: graph.getNodes()) { + String id = node.getId().toString(); + float x = node.x(); + float y = node.y(); + + try { + PreparedStatement statement = conn.prepareStatement( + "UPDATE scraper_instance SET x_coord=?, y_coord=? WHERE name=?"); + statement.setFloat(1, x); + statement.setFloat(2, y); + statement.setString(3, id); + statement.executeUpdate(); + } catch (SQLException e) { + throw new RuntimeException(e); + } + } + // Close connection + try { + conn.close(); + } catch (SQLException e) { + // Closing failed; ah well + } + + + // Also export to gexf ExportController exportController = Lookup.getDefault().lookup(ExportController.class); try { exportController.exportFile(new File("fediverse.gexf")); @@ -96,7 +141,7 @@ public class GraphBuilder { throw new RuntimeException(e); } - // Gephi doesn't seem to provide a good way to close the postgres connection, so we have to force close the + // Gephi doesn't seem to provide a good way to close its postgres connection, so we have to force close the // program. This'll leave a hanging connection for some period ¯\_(ツ)_/¯ System.exit(0); } diff --git a/requirements.txt b/requirements.txt index c2acce3..a5dc1b3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -23,4 +23,5 @@ requests==2.19.1 six==1.10.0 sqlparse==0.2.4 tabulate==0.7.7 +tqdm==4.25.0 urllib3==1.23 diff --git a/scraper/management/commands/build_graph.py b/scraper/management/commands/build_graph.py index 3821be0..f863665 100644 --- a/scraper/management/commands/build_graph.py +++ b/scraper/management/commands/build_graph.py @@ -1,6 +1,7 @@ import subprocess from django.core.management.base import BaseCommand from django.conf import settings +from scraper.models import PeerRelationship, Edge class Command(BaseCommand): @@ -10,10 +11,36 @@ class Command(BaseCommand): super().__init__(*args, **kwargs) def handle(self, *args, **options): + self.stdout.write("Creating Edges from PeerRelationships...") + # Turn symmetrical PeerRelationships into symmetrical Edges + relationships = PeerRelationship.objects.filter(source__status='success', target__status='success') + # Loop over once and put 'em into a dict for fast access + relationships = {(r.source_id, r.target_id): r for r in relationships} + + edges = [] + while relationships: + (source_id, target_id), outgoing = relationships.popitem() + total_statuses = outgoing.statuses_seen or 0 + mention_count = outgoing.mention_count or 0 + incoming = relationships.pop((target_id, source_id), None) + oldest_data = outgoing.last_updated + if incoming: + total_statuses += (incoming.statuses_seen or 0) + mention_count += (incoming.mention_count or 0) + oldest_data = min(oldest_data, incoming.last_updated) + if mention_count == 0 or total_statuses == 0: + continue + ratio = float(mention_count)/total_statuses + edges.append(Edge(source_id=source_id, target_id=target_id, weight=ratio, last_updated=oldest_data)) + + Edge.objects.all().delete() + Edge.objects.bulk_create(edges) + + self.stdout.write("Creating layout...") database_config = settings.DATABASES['default'] subprocess.call([ 'java', - '-Xmx4g', + '-Xmx1g', '-jar', 'gephi/build/libs/graphBuilder.jar', database_config['NAME'], diff --git a/scraper/management/commands/scrape.py b/scraper/management/commands/scrape.py index df69dd6..70c06fa 100644 --- a/scraper/management/commands/scrape.py +++ b/scraper/management/commands/scrape.py @@ -33,6 +33,7 @@ SEED = 'mastodon.social' TIMEOUT = 20 # seconds NUM_THREADS = 64 PERSONAL_INSTANCE_THRESHOLD = 5 # instances with <= this many users won't be scraped +STATUS_SCRAPE_LIMIT = 5000 class Command(BaseCommand): @@ -69,7 +70,7 @@ class Command(BaseCommand): def get_statuses(instance_name: str): """Collect all statuses that mention users on other instances""" mentions = [] - datetime_threshold = datetime.now(timezone.utc) - timedelta(months=1) + datetime_threshold = datetime.now(timezone.utc) - timedelta(days=31) statuses_seen = 0 # We'll ask for 1000 statuses, but Mastodon never returns more than 40. Some Pleroma instances will ignore # the limit and return 20. @@ -91,7 +92,7 @@ class Command(BaseCommand): earliest_time_seen = datetime_parser(earliest_status['created_at']) statuses_seen += len(statuses) # Mastodon returns max 40 statuses; if we ever see less than that we know there aren't any more - if earliest_time_seen < datetime_threshold or statuses_seen >= 2000: + if earliest_time_seen < datetime_threshold or statuses_seen >= STATUS_SCRAPE_LIMIT: break # Continuing, so get url for next page min_id = earliest_status['id'] diff --git a/scraper/migrations/0001_initial.py b/scraper/migrations/0001_initial.py index 04fdea1..6d804bd 100644 --- a/scraper/migrations/0001_initial.py +++ b/scraper/migrations/0001_initial.py @@ -1,4 +1,4 @@ -# Generated by Django 2.1 on 2018-09-01 22:28 +# Generated by Django 2.1 on 2018-09-03 14:09 from django.db import migrations, models import django.db.models.deletion @@ -12,6 +12,14 @@ class Migration(migrations.Migration): ] operations = [ + migrations.CreateModel( + name='Edge', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('weight', models.FloatField(blank=True, null=True)), + ('last_updated', models.DateTimeField()), + ], + ), migrations.CreateModel( name='Instance', fields=[ @@ -22,6 +30,8 @@ class Migration(migrations.Migration): ('user_count', models.IntegerField(blank=True, null=True)), ('version', models.CharField(blank=True, max_length=1000)), ('status', models.CharField(max_length=100)), + ('x_coord', models.FloatField(blank=True, null=True)), + ('y_coord', models.FloatField(blank=True, null=True)), ('first_seen', models.DateTimeField(auto_now_add=True)), ('last_updated', models.DateTimeField(auto_now=True)), ], @@ -43,4 +53,14 @@ class Migration(migrations.Migration): name='peers', field=models.ManyToManyField(through='scraper.PeerRelationship', to='scraper.Instance'), ), + migrations.AddField( + model_name='edge', + name='source', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='+', to='scraper.Instance'), + ), + migrations.AddField( + model_name='edge', + name='target', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='+', to='scraper.Instance'), + ), ] diff --git a/scraper/models.py b/scraper/models.py index 004bce0..20322f7 100644 --- a/scraper/models.py +++ b/scraper/models.py @@ -22,6 +22,10 @@ class Instance(models.Model): # Foreign keys peers = models.ManyToManyField('self', symmetrical=False, through='PeerRelationship') + # Graph + x_coord = models.FloatField(blank=True, null=True) + y_coord = models.FloatField(blank=True, null=True) + # Automatic fields first_seen = models.DateTimeField(auto_now_add=True) last_updated = models.DateTimeField(auto_now=True) @@ -38,3 +42,17 @@ class PeerRelationship(models.Model): # Metadata first_seen = models.DateTimeField(auto_now_add=True) last_updated = models.DateTimeField(auto_now=True) + + +class Edge(models.Model): + """ + This class is automatically generated from PeerRelationship using the build_graph command. + It aggregates stats from the asymmetrical PeerRelationship to a symmetrical one that's suitable for serving + to the front-end. + """ + source = models.ForeignKey(Instance, related_name='+', on_delete=models.CASCADE) + target = models.ForeignKey(Instance, related_name='+', on_delete=models.CASCADE) + weight = models.FloatField(blank=True, null=True) + + # Metadata + last_updated = models.DateTimeField(blank=False, null=False)