diff --git a/apiv1/serializers.py b/apiv1/serializers.py
index 06ce009..e508f3d 100644
--- a/apiv1/serializers.py
+++ b/apiv1/serializers.py
@@ -57,10 +57,12 @@ class NodeSerializer(serializers.ModelSerializer):
id = serializers.SerializerMethodField('get_name')
label = serializers.SerializerMethodField('get_name')
size = serializers.SerializerMethodField()
+ x = serializers.SerializerMethodField()
+ y = serializers.SerializerMethodField()
class Meta:
model = Instance
- fields = ('id', 'label', 'size')
+ fields = ('id', 'label', 'size', 'x', 'y')
def get_name(self, obj):
return obj.name
@@ -68,6 +70,12 @@ class NodeSerializer(serializers.ModelSerializer):
def get_size(self, obj):
return obj.user_count or 1
+ def get_x(self, obj):
+ return obj.x_coord
+
+ def get_y(self, obj):
+ return obj.y_coord
+
def to_representation(self, instance):
"""
Object instance -> Dict of primitive datatypes.
diff --git a/apiv1/views.py b/apiv1/views.py
index 128a755..9dff54a 100644
--- a/apiv1/views.py
+++ b/apiv1/views.py
@@ -1,5 +1,5 @@
from rest_framework import viewsets
-from scraper.models import Instance, PeerRelationship
+from scraper.models import Instance, Edge
from apiv1.serializers import InstanceListSerializer, InstanceDetailSerializer, NodeSerializer, EdgeSerializer
@@ -24,7 +24,7 @@ class EdgeView(viewsets.ReadOnlyModelViewSet):
"""
Endpoint to get a list of the graph's edges in a SigmaJS-friendly format.
"""
- queryset = PeerRelationship.objects.filter(source__status='success', target__status='success')
+ queryset = Edge.objects.all()
serializer_class = EdgeSerializer
diff --git a/frontend/public/favicon.ico b/frontend/public/favicon.ico
index a11777c..9a8a279 100644
Binary files a/frontend/public/favicon.ico and b/frontend/public/favicon.ico differ
diff --git a/frontend/src/components/Graph.jsx b/frontend/src/components/Graph.jsx
index 871012c..e2b0e3d 100644
--- a/frontend/src/components/Graph.jsx
+++ b/frontend/src/components/Graph.jsx
@@ -36,9 +36,7 @@ class GraphImpl extends React.Component {
onClickNode={(e) => this.props.selectAndLoadInstance(e.data.node.label)}
onClickStage={(e) => this.props.selectAndLoadInstance(null)}
>
-
-
)
}
diff --git a/frontend/src/components/Nav.tsx b/frontend/src/components/Nav.tsx
index e966f3e..838bb9b 100644
--- a/frontend/src/components/Nav.tsx
+++ b/frontend/src/components/Nav.tsx
@@ -79,7 +79,7 @@ export class Nav extends React.Component<{}, INavState> {
How do you calculate the strength of relationships between instances?
- fediverse.space scrapes the last 2000 statuses from within the last month on the public
+ fediverse.space scrapes the last 5000 statuses from within the last month on the public
timeline of each instance. It looks at the ratio of
mentions of an instance / total statuses
.
It uses a ratio rather than an absolute number of mentions to reflect that smaller instances
diff --git a/frontend/src/components/Sidebar.tsx b/frontend/src/components/Sidebar.tsx
index 0e57e60..f58f42e 100644
--- a/frontend/src/components/Sidebar.tsx
+++ b/frontend/src/components/Sidebar.tsx
@@ -19,7 +19,7 @@ interface ISidebarProps {
class SidebarImpl extends React.Component {
public render() {
return (
-
+
{this.renderSidebarContents()}
)
diff --git a/frontend/src/index.css b/frontend/src/index.css
index 16011bc..47090b4 100644
--- a/frontend/src/index.css
+++ b/frontend/src/index.css
@@ -2,7 +2,8 @@ html, body {
margin: 0;
padding: 50px 0 0 0;
font-family: sans-serif;
- background-color: #30404D;
+ /*background-color: #30404D;*/
+ background-color: #293742;
height: 100%;
font-family: -apple-system,BlinkMacSystemFont,Segoe UI,Roboto,Oxygen,Ubuntu,Cantarell,Open Sans,Helvetica Neue,Icons16,sans-serif;
}
diff --git a/frontend/src/redux/types.ts b/frontend/src/redux/types.ts
index eecd2ce..03151f4 100644
--- a/frontend/src/redux/types.ts
+++ b/frontend/src/redux/types.ts
@@ -32,6 +32,8 @@ export interface IInstanceDetails {
interface IGraphNode {
id: string;
label: string;
+ x: number;
+ y: number;
size?: number;
color?: string;
}
diff --git a/gephi/src/main/java/space/fediverse/graph/GraphBuilder.java b/gephi/src/main/java/space/fediverse/graph/GraphBuilder.java
index 6997320..13d298f 100644
--- a/gephi/src/main/java/space/fediverse/graph/GraphBuilder.java
+++ b/gephi/src/main/java/space/fediverse/graph/GraphBuilder.java
@@ -2,7 +2,10 @@ package space.fediverse.graph;
import org.gephi.graph.api.GraphController;
import org.gephi.graph.api.GraphModel;
+import org.gephi.graph.api.Node;
+import org.gephi.graph.api.UndirectedGraph;
import org.gephi.io.database.drivers.PostgreSQLDriver;
+import org.gephi.io.database.drivers.SQLUtils;
import org.gephi.io.exporter.api.ExportController;
import org.gephi.io.importer.api.Container;
import org.gephi.io.importer.api.EdgeDirectionDefault;
@@ -20,6 +23,10 @@ import java.io.File;
import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;
+import java.sql.Connection;
+import java.sql.PreparedStatement;
+import java.sql.SQLException;
+import java.sql.Statement;
import java.util.concurrent.TimeUnit;
public class GraphBuilder {
@@ -34,9 +41,9 @@ public class GraphBuilder {
private static final String edgeQuery = String.join(""
, "SELECT"
- , " scraper_instance_peers.from_instance_id AS source,"
- , " scraper_instance_peers.to_instance_id AS target"
- , " FROM scraper_instance_peers"
+ , " scraper_edge.source_id AS source,"
+ , " scraper_edge.target_id AS target"
+ , " FROM scraper_edge"
);
@@ -55,7 +62,6 @@ public class GraphBuilder {
// AttributeModel?
// Import from database
-
EdgeListDatabaseImpl db = new EdgeListDatabaseImpl();
db.setSQLDriver(new PostgreSQLDriver());
db.setHost("localhost");
@@ -77,18 +83,57 @@ public class GraphBuilder {
importController.process(container, new DefaultProcessor(), workspace);
// Layout
- AutoLayout autoLayout = new AutoLayout(2, TimeUnit.MINUTES);
+ AutoLayout autoLayout = new AutoLayout(1, TimeUnit.MINUTES);
autoLayout.setGraphModel(graphModel);
// YifanHuLayout firstLayout = new YifanHuLayout(null, new StepDisplacement(1f));
- ForceAtlas2 secondLayout = new ForceAtlas2(null);
-// AutoLayout.DynamicProperty adjustBySizeProperty = AutoLayout.createDynamicProperty("forceAtlas.adjustSizes.name", Boolean.TRUE, 0.1f);
-// AutoLayout.DynamicProperty repulsionProperty = AutoLayout.createDynamicProperty("forceAtlas.repulsionStrength.name", 500., 0f);
-// autoLayout.addLayout(firstLayout, 0.5f);
-// autoLayout.addLayout(secondLayout, 0.5f, new AutoLayout.DynamicProperty[]{adjustBySizeProperty, repulsionProperty});
- autoLayout.addLayout(secondLayout, 1f);
+ ForceAtlas2 forceAtlas2Layout = new ForceAtlas2(null);
+ forceAtlas2Layout.setLinLogMode(true);
+ autoLayout.addLayout(forceAtlas2Layout, 1f);
autoLayout.execute();
- // Export
+ // Update coordinates in database
+ // First, connect
+ String dbUrl = SQLUtils.getUrl(db.getSQLDriver(), db.getHost(), db.getPort(), db.getDBName());
+ Connection conn = null;
+ try {
+ conn = db.getSQLDriver().getConnection(dbUrl, db.getUsername(), db.getPasswd());
+ } catch (SQLException e) {
+ if (conn != null) {
+ try {
+ conn.close();
+ } catch (Exception e2) {
+ // Closing failed; ah well
+ }
+ }
+ throw new RuntimeException(e);
+ }
+ // Update
+ UndirectedGraph graph = graphModel.getUndirectedGraph();
+ for (Node node: graph.getNodes()) {
+ String id = node.getId().toString();
+ float x = node.x();
+ float y = node.y();
+
+ try {
+ PreparedStatement statement = conn.prepareStatement(
+ "UPDATE scraper_instance SET x_coord=?, y_coord=? WHERE name=?");
+ statement.setFloat(1, x);
+ statement.setFloat(2, y);
+ statement.setString(3, id);
+ statement.executeUpdate();
+ } catch (SQLException e) {
+ throw new RuntimeException(e);
+ }
+ }
+ // Close connection
+ try {
+ conn.close();
+ } catch (SQLException e) {
+ // Closing failed; ah well
+ }
+
+
+ // Also export to gexf
ExportController exportController = Lookup.getDefault().lookup(ExportController.class);
try {
exportController.exportFile(new File("fediverse.gexf"));
@@ -96,7 +141,7 @@ public class GraphBuilder {
throw new RuntimeException(e);
}
- // Gephi doesn't seem to provide a good way to close the postgres connection, so we have to force close the
+ // Gephi doesn't seem to provide a good way to close its postgres connection, so we have to force close the
// program. This'll leave a hanging connection for some period ¯\_(ツ)_/¯
System.exit(0);
}
diff --git a/requirements.txt b/requirements.txt
index c2acce3..a5dc1b3 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -23,4 +23,5 @@ requests==2.19.1
six==1.10.0
sqlparse==0.2.4
tabulate==0.7.7
+tqdm==4.25.0
urllib3==1.23
diff --git a/scraper/management/commands/build_graph.py b/scraper/management/commands/build_graph.py
index 3821be0..f863665 100644
--- a/scraper/management/commands/build_graph.py
+++ b/scraper/management/commands/build_graph.py
@@ -1,6 +1,7 @@
import subprocess
from django.core.management.base import BaseCommand
from django.conf import settings
+from scraper.models import PeerRelationship, Edge
class Command(BaseCommand):
@@ -10,10 +11,36 @@ class Command(BaseCommand):
super().__init__(*args, **kwargs)
def handle(self, *args, **options):
+ self.stdout.write("Creating Edges from PeerRelationships...")
+ # Turn symmetrical PeerRelationships into symmetrical Edges
+ relationships = PeerRelationship.objects.filter(source__status='success', target__status='success')
+ # Loop over once and put 'em into a dict for fast access
+ relationships = {(r.source_id, r.target_id): r for r in relationships}
+
+ edges = []
+ while relationships:
+ (source_id, target_id), outgoing = relationships.popitem()
+ total_statuses = outgoing.statuses_seen or 0
+ mention_count = outgoing.mention_count or 0
+ incoming = relationships.pop((target_id, source_id), None)
+ oldest_data = outgoing.last_updated
+ if incoming:
+ total_statuses += (incoming.statuses_seen or 0)
+ mention_count += (incoming.mention_count or 0)
+ oldest_data = min(oldest_data, incoming.last_updated)
+ if mention_count == 0 or total_statuses == 0:
+ continue
+ ratio = float(mention_count)/total_statuses
+ edges.append(Edge(source_id=source_id, target_id=target_id, weight=ratio, last_updated=oldest_data))
+
+ Edge.objects.all().delete()
+ Edge.objects.bulk_create(edges)
+
+ self.stdout.write("Creating layout...")
database_config = settings.DATABASES['default']
subprocess.call([
'java',
- '-Xmx4g',
+ '-Xmx1g',
'-jar',
'gephi/build/libs/graphBuilder.jar',
database_config['NAME'],
diff --git a/scraper/management/commands/scrape.py b/scraper/management/commands/scrape.py
index df69dd6..70c06fa 100644
--- a/scraper/management/commands/scrape.py
+++ b/scraper/management/commands/scrape.py
@@ -33,6 +33,7 @@ SEED = 'mastodon.social'
TIMEOUT = 20 # seconds
NUM_THREADS = 64
PERSONAL_INSTANCE_THRESHOLD = 5 # instances with <= this many users won't be scraped
+STATUS_SCRAPE_LIMIT = 5000
class Command(BaseCommand):
@@ -69,7 +70,7 @@ class Command(BaseCommand):
def get_statuses(instance_name: str):
"""Collect all statuses that mention users on other instances"""
mentions = []
- datetime_threshold = datetime.now(timezone.utc) - timedelta(months=1)
+ datetime_threshold = datetime.now(timezone.utc) - timedelta(days=31)
statuses_seen = 0
# We'll ask for 1000 statuses, but Mastodon never returns more than 40. Some Pleroma instances will ignore
# the limit and return 20.
@@ -91,7 +92,7 @@ class Command(BaseCommand):
earliest_time_seen = datetime_parser(earliest_status['created_at'])
statuses_seen += len(statuses)
# Mastodon returns max 40 statuses; if we ever see less than that we know there aren't any more
- if earliest_time_seen < datetime_threshold or statuses_seen >= 2000:
+ if earliest_time_seen < datetime_threshold or statuses_seen >= STATUS_SCRAPE_LIMIT:
break
# Continuing, so get url for next page
min_id = earliest_status['id']
diff --git a/scraper/migrations/0001_initial.py b/scraper/migrations/0001_initial.py
index 04fdea1..6d804bd 100644
--- a/scraper/migrations/0001_initial.py
+++ b/scraper/migrations/0001_initial.py
@@ -1,4 +1,4 @@
-# Generated by Django 2.1 on 2018-09-01 22:28
+# Generated by Django 2.1 on 2018-09-03 14:09
from django.db import migrations, models
import django.db.models.deletion
@@ -12,6 +12,14 @@ class Migration(migrations.Migration):
]
operations = [
+ migrations.CreateModel(
+ name='Edge',
+ fields=[
+ ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+ ('weight', models.FloatField(blank=True, null=True)),
+ ('last_updated', models.DateTimeField()),
+ ],
+ ),
migrations.CreateModel(
name='Instance',
fields=[
@@ -22,6 +30,8 @@ class Migration(migrations.Migration):
('user_count', models.IntegerField(blank=True, null=True)),
('version', models.CharField(blank=True, max_length=1000)),
('status', models.CharField(max_length=100)),
+ ('x_coord', models.FloatField(blank=True, null=True)),
+ ('y_coord', models.FloatField(blank=True, null=True)),
('first_seen', models.DateTimeField(auto_now_add=True)),
('last_updated', models.DateTimeField(auto_now=True)),
],
@@ -43,4 +53,14 @@ class Migration(migrations.Migration):
name='peers',
field=models.ManyToManyField(through='scraper.PeerRelationship', to='scraper.Instance'),
),
+ migrations.AddField(
+ model_name='edge',
+ name='source',
+ field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='+', to='scraper.Instance'),
+ ),
+ migrations.AddField(
+ model_name='edge',
+ name='target',
+ field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='+', to='scraper.Instance'),
+ ),
]
diff --git a/scraper/models.py b/scraper/models.py
index 004bce0..20322f7 100644
--- a/scraper/models.py
+++ b/scraper/models.py
@@ -22,6 +22,10 @@ class Instance(models.Model):
# Foreign keys
peers = models.ManyToManyField('self', symmetrical=False, through='PeerRelationship')
+ # Graph
+ x_coord = models.FloatField(blank=True, null=True)
+ y_coord = models.FloatField(blank=True, null=True)
+
# Automatic fields
first_seen = models.DateTimeField(auto_now_add=True)
last_updated = models.DateTimeField(auto_now=True)
@@ -38,3 +42,17 @@ class PeerRelationship(models.Model):
# Metadata
first_seen = models.DateTimeField(auto_now_add=True)
last_updated = models.DateTimeField(auto_now=True)
+
+
+class Edge(models.Model):
+ """
+ This class is automatically generated from PeerRelationship using the build_graph command.
+ It aggregates stats from the asymmetrical PeerRelationship to a symmetrical one that's suitable for serving
+ to the front-end.
+ """
+ source = models.ForeignKey(Instance, related_name='+', on_delete=models.CASCADE)
+ target = models.ForeignKey(Instance, related_name='+', on_delete=models.CASCADE)
+ weight = models.FloatField(blank=True, null=True)
+
+ # Metadata
+ last_updated = models.DateTimeField(blank=False, null=False)