add graph w/ proper layout

This commit is contained in:
Tao Bojlen 2018-09-03 16:10:44 +02:00
parent 4a60a3f9b0
commit 3cf584cc96
14 changed files with 146 additions and 25 deletions

View file

@ -57,10 +57,12 @@ class NodeSerializer(serializers.ModelSerializer):
id = serializers.SerializerMethodField('get_name')
label = serializers.SerializerMethodField('get_name')
size = serializers.SerializerMethodField()
x = serializers.SerializerMethodField()
y = serializers.SerializerMethodField()
class Meta:
model = Instance
fields = ('id', 'label', 'size')
fields = ('id', 'label', 'size', 'x', 'y')
def get_name(self, obj):
return obj.name
@ -68,6 +70,12 @@ class NodeSerializer(serializers.ModelSerializer):
def get_size(self, obj):
return obj.user_count or 1
def get_x(self, obj):
return obj.x_coord
def get_y(self, obj):
return obj.y_coord
def to_representation(self, instance):
"""
Object instance -> Dict of primitive datatypes.

View file

@ -1,5 +1,5 @@
from rest_framework import viewsets
from scraper.models import Instance, PeerRelationship
from scraper.models import Instance, Edge
from apiv1.serializers import InstanceListSerializer, InstanceDetailSerializer, NodeSerializer, EdgeSerializer
@ -24,7 +24,7 @@ class EdgeView(viewsets.ReadOnlyModelViewSet):
"""
Endpoint to get a list of the graph's edges in a SigmaJS-friendly format.
"""
queryset = PeerRelationship.objects.filter(source__status='success', target__status='success')
queryset = Edge.objects.all()
serializer_class = EdgeSerializer

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.8 KiB

After

Width:  |  Height:  |  Size: 1.4 KiB

View file

@ -36,9 +36,7 @@ class GraphImpl extends React.Component {
onClickNode={(e) => this.props.selectAndLoadInstance(e.data.node.label)}
onClickStage={(e) => this.props.selectAndLoadInstance(null)}
>
<RandomizeNodePositions />
<Filter neighborsOf={this.props.currentInstanceName} />
<RelativeSize initialSize={15} />
</Sigma>
)
}

View file

@ -79,7 +79,7 @@ export class Nav extends React.Component<{}, INavState> {
</p>
<h4>How do you calculate the strength of relationships between instances?</h4>
<p className={Classes.RUNNING_TEXT}>
fediverse.space scrapes the last 2000 statuses from within the last month on the public
fediverse.space scrapes the last 5000 statuses from within the last month on the public
timeline of each instance. It looks at the ratio of
<code>mentions of an instance / total statuses</code>.
It uses a ratio rather than an absolute number of mentions to reflect that smaller instances

View file

@ -19,7 +19,7 @@ interface ISidebarProps {
class SidebarImpl extends React.Component<ISidebarProps> {
public render() {
return (
<Card className="fediverse-sidebar" elevation={Elevation.TWO}>
<Card className="fediverse-sidebar" elevation={Elevation.THREE}>
{this.renderSidebarContents()}
</Card>
)

View file

@ -2,7 +2,8 @@ html, body {
margin: 0;
padding: 50px 0 0 0;
font-family: sans-serif;
background-color: #30404D;
/*background-color: #30404D;*/
background-color: #293742;
height: 100%;
font-family: -apple-system,BlinkMacSystemFont,Segoe UI,Roboto,Oxygen,Ubuntu,Cantarell,Open Sans,Helvetica Neue,Icons16,sans-serif;
}

View file

@ -32,6 +32,8 @@ export interface IInstanceDetails {
interface IGraphNode {
id: string;
label: string;
x: number;
y: number;
size?: number;
color?: string;
}

View file

@ -2,7 +2,10 @@ package space.fediverse.graph;
import org.gephi.graph.api.GraphController;
import org.gephi.graph.api.GraphModel;
import org.gephi.graph.api.Node;
import org.gephi.graph.api.UndirectedGraph;
import org.gephi.io.database.drivers.PostgreSQLDriver;
import org.gephi.io.database.drivers.SQLUtils;
import org.gephi.io.exporter.api.ExportController;
import org.gephi.io.importer.api.Container;
import org.gephi.io.importer.api.EdgeDirectionDefault;
@ -20,6 +23,10 @@ import java.io.File;
import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.concurrent.TimeUnit;
public class GraphBuilder {
@ -34,9 +41,9 @@ public class GraphBuilder {
private static final String edgeQuery = String.join(""
, "SELECT"
, " scraper_instance_peers.from_instance_id AS source,"
, " scraper_instance_peers.to_instance_id AS target"
, " FROM scraper_instance_peers"
, " scraper_edge.source_id AS source,"
, " scraper_edge.target_id AS target"
, " FROM scraper_edge"
);
@ -55,7 +62,6 @@ public class GraphBuilder {
// AttributeModel?
// Import from database
EdgeListDatabaseImpl db = new EdgeListDatabaseImpl();
db.setSQLDriver(new PostgreSQLDriver());
db.setHost("localhost");
@ -77,18 +83,57 @@ public class GraphBuilder {
importController.process(container, new DefaultProcessor(), workspace);
// Layout
AutoLayout autoLayout = new AutoLayout(2, TimeUnit.MINUTES);
AutoLayout autoLayout = new AutoLayout(1, TimeUnit.MINUTES);
autoLayout.setGraphModel(graphModel);
// YifanHuLayout firstLayout = new YifanHuLayout(null, new StepDisplacement(1f));
ForceAtlas2 secondLayout = new ForceAtlas2(null);
// AutoLayout.DynamicProperty adjustBySizeProperty = AutoLayout.createDynamicProperty("forceAtlas.adjustSizes.name", Boolean.TRUE, 0.1f);
// AutoLayout.DynamicProperty repulsionProperty = AutoLayout.createDynamicProperty("forceAtlas.repulsionStrength.name", 500., 0f);
// autoLayout.addLayout(firstLayout, 0.5f);
// autoLayout.addLayout(secondLayout, 0.5f, new AutoLayout.DynamicProperty[]{adjustBySizeProperty, repulsionProperty});
autoLayout.addLayout(secondLayout, 1f);
ForceAtlas2 forceAtlas2Layout = new ForceAtlas2(null);
forceAtlas2Layout.setLinLogMode(true);
autoLayout.addLayout(forceAtlas2Layout, 1f);
autoLayout.execute();
// Export
// Update coordinates in database
// First, connect
String dbUrl = SQLUtils.getUrl(db.getSQLDriver(), db.getHost(), db.getPort(), db.getDBName());
Connection conn = null;
try {
conn = db.getSQLDriver().getConnection(dbUrl, db.getUsername(), db.getPasswd());
} catch (SQLException e) {
if (conn != null) {
try {
conn.close();
} catch (Exception e2) {
// Closing failed; ah well
}
}
throw new RuntimeException(e);
}
// Update
UndirectedGraph graph = graphModel.getUndirectedGraph();
for (Node node: graph.getNodes()) {
String id = node.getId().toString();
float x = node.x();
float y = node.y();
try {
PreparedStatement statement = conn.prepareStatement(
"UPDATE scraper_instance SET x_coord=?, y_coord=? WHERE name=?");
statement.setFloat(1, x);
statement.setFloat(2, y);
statement.setString(3, id);
statement.executeUpdate();
} catch (SQLException e) {
throw new RuntimeException(e);
}
}
// Close connection
try {
conn.close();
} catch (SQLException e) {
// Closing failed; ah well
}
// Also export to gexf
ExportController exportController = Lookup.getDefault().lookup(ExportController.class);
try {
exportController.exportFile(new File("fediverse.gexf"));
@ -96,7 +141,7 @@ public class GraphBuilder {
throw new RuntimeException(e);
}
// Gephi doesn't seem to provide a good way to close the postgres connection, so we have to force close the
// Gephi doesn't seem to provide a good way to close its postgres connection, so we have to force close the
// program. This'll leave a hanging connection for some period ¯\_()_/¯
System.exit(0);
}

View file

@ -23,4 +23,5 @@ requests==2.19.1
six==1.10.0
sqlparse==0.2.4
tabulate==0.7.7
tqdm==4.25.0
urllib3==1.23

View file

@ -1,6 +1,7 @@
import subprocess
from django.core.management.base import BaseCommand
from django.conf import settings
from scraper.models import PeerRelationship, Edge
class Command(BaseCommand):
@ -10,10 +11,36 @@ class Command(BaseCommand):
super().__init__(*args, **kwargs)
def handle(self, *args, **options):
self.stdout.write("Creating Edges from PeerRelationships...")
# Turn symmetrical PeerRelationships into symmetrical Edges
relationships = PeerRelationship.objects.filter(source__status='success', target__status='success')
# Loop over once and put 'em into a dict for fast access
relationships = {(r.source_id, r.target_id): r for r in relationships}
edges = []
while relationships:
(source_id, target_id), outgoing = relationships.popitem()
total_statuses = outgoing.statuses_seen or 0
mention_count = outgoing.mention_count or 0
incoming = relationships.pop((target_id, source_id), None)
oldest_data = outgoing.last_updated
if incoming:
total_statuses += (incoming.statuses_seen or 0)
mention_count += (incoming.mention_count or 0)
oldest_data = min(oldest_data, incoming.last_updated)
if mention_count == 0 or total_statuses == 0:
continue
ratio = float(mention_count)/total_statuses
edges.append(Edge(source_id=source_id, target_id=target_id, weight=ratio, last_updated=oldest_data))
Edge.objects.all().delete()
Edge.objects.bulk_create(edges)
self.stdout.write("Creating layout...")
database_config = settings.DATABASES['default']
subprocess.call([
'java',
'-Xmx4g',
'-Xmx1g',
'-jar',
'gephi/build/libs/graphBuilder.jar',
database_config['NAME'],

View file

@ -33,6 +33,7 @@ SEED = 'mastodon.social'
TIMEOUT = 20 # seconds
NUM_THREADS = 64
PERSONAL_INSTANCE_THRESHOLD = 5 # instances with <= this many users won't be scraped
STATUS_SCRAPE_LIMIT = 5000
class Command(BaseCommand):
@ -69,7 +70,7 @@ class Command(BaseCommand):
def get_statuses(instance_name: str):
"""Collect all statuses that mention users on other instances"""
mentions = []
datetime_threshold = datetime.now(timezone.utc) - timedelta(months=1)
datetime_threshold = datetime.now(timezone.utc) - timedelta(days=31)
statuses_seen = 0
# We'll ask for 1000 statuses, but Mastodon never returns more than 40. Some Pleroma instances will ignore
# the limit and return 20.
@ -91,7 +92,7 @@ class Command(BaseCommand):
earliest_time_seen = datetime_parser(earliest_status['created_at'])
statuses_seen += len(statuses)
# Mastodon returns max 40 statuses; if we ever see less than that we know there aren't any more
if earliest_time_seen < datetime_threshold or statuses_seen >= 2000:
if earliest_time_seen < datetime_threshold or statuses_seen >= STATUS_SCRAPE_LIMIT:
break
# Continuing, so get url for next page
min_id = earliest_status['id']

View file

@ -1,4 +1,4 @@
# Generated by Django 2.1 on 2018-09-01 22:28
# Generated by Django 2.1 on 2018-09-03 14:09
from django.db import migrations, models
import django.db.models.deletion
@ -12,6 +12,14 @@ class Migration(migrations.Migration):
]
operations = [
migrations.CreateModel(
name='Edge',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('weight', models.FloatField(blank=True, null=True)),
('last_updated', models.DateTimeField()),
],
),
migrations.CreateModel(
name='Instance',
fields=[
@ -22,6 +30,8 @@ class Migration(migrations.Migration):
('user_count', models.IntegerField(blank=True, null=True)),
('version', models.CharField(blank=True, max_length=1000)),
('status', models.CharField(max_length=100)),
('x_coord', models.FloatField(blank=True, null=True)),
('y_coord', models.FloatField(blank=True, null=True)),
('first_seen', models.DateTimeField(auto_now_add=True)),
('last_updated', models.DateTimeField(auto_now=True)),
],
@ -43,4 +53,14 @@ class Migration(migrations.Migration):
name='peers',
field=models.ManyToManyField(through='scraper.PeerRelationship', to='scraper.Instance'),
),
migrations.AddField(
model_name='edge',
name='source',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='+', to='scraper.Instance'),
),
migrations.AddField(
model_name='edge',
name='target',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='+', to='scraper.Instance'),
),
]

View file

@ -22,6 +22,10 @@ class Instance(models.Model):
# Foreign keys
peers = models.ManyToManyField('self', symmetrical=False, through='PeerRelationship')
# Graph
x_coord = models.FloatField(blank=True, null=True)
y_coord = models.FloatField(blank=True, null=True)
# Automatic fields
first_seen = models.DateTimeField(auto_now_add=True)
last_updated = models.DateTimeField(auto_now=True)
@ -38,3 +42,17 @@ class PeerRelationship(models.Model):
# Metadata
first_seen = models.DateTimeField(auto_now_add=True)
last_updated = models.DateTimeField(auto_now=True)
class Edge(models.Model):
"""
This class is automatically generated from PeerRelationship using the build_graph command.
It aggregates stats from the asymmetrical PeerRelationship to a symmetrical one that's suitable for serving
to the front-end.
"""
source = models.ForeignKey(Instance, related_name='+', on_delete=models.CASCADE)
target = models.ForeignKey(Instance, related_name='+', on_delete=models.CASCADE)
weight = models.FloatField(blank=True, null=True)
# Metadata
last_updated = models.DateTimeField(blank=False, null=False)