add graph w/ proper layout
This commit is contained in:
parent
4a60a3f9b0
commit
3cf584cc96
|
@ -57,10 +57,12 @@ class NodeSerializer(serializers.ModelSerializer):
|
|||
id = serializers.SerializerMethodField('get_name')
|
||||
label = serializers.SerializerMethodField('get_name')
|
||||
size = serializers.SerializerMethodField()
|
||||
x = serializers.SerializerMethodField()
|
||||
y = serializers.SerializerMethodField()
|
||||
|
||||
class Meta:
|
||||
model = Instance
|
||||
fields = ('id', 'label', 'size')
|
||||
fields = ('id', 'label', 'size', 'x', 'y')
|
||||
|
||||
def get_name(self, obj):
|
||||
return obj.name
|
||||
|
@ -68,6 +70,12 @@ class NodeSerializer(serializers.ModelSerializer):
|
|||
def get_size(self, obj):
|
||||
return obj.user_count or 1
|
||||
|
||||
def get_x(self, obj):
|
||||
return obj.x_coord
|
||||
|
||||
def get_y(self, obj):
|
||||
return obj.y_coord
|
||||
|
||||
def to_representation(self, instance):
|
||||
"""
|
||||
Object instance -> Dict of primitive datatypes.
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
from rest_framework import viewsets
|
||||
from scraper.models import Instance, PeerRelationship
|
||||
from scraper.models import Instance, Edge
|
||||
from apiv1.serializers import InstanceListSerializer, InstanceDetailSerializer, NodeSerializer, EdgeSerializer
|
||||
|
||||
|
||||
|
@ -24,7 +24,7 @@ class EdgeView(viewsets.ReadOnlyModelViewSet):
|
|||
"""
|
||||
Endpoint to get a list of the graph's edges in a SigmaJS-friendly format.
|
||||
"""
|
||||
queryset = PeerRelationship.objects.filter(source__status='success', target__status='success')
|
||||
queryset = Edge.objects.all()
|
||||
serializer_class = EdgeSerializer
|
||||
|
||||
|
||||
|
|
Binary file not shown.
Before Width: | Height: | Size: 3.8 KiB After Width: | Height: | Size: 1.4 KiB |
|
@ -36,9 +36,7 @@ class GraphImpl extends React.Component {
|
|||
onClickNode={(e) => this.props.selectAndLoadInstance(e.data.node.label)}
|
||||
onClickStage={(e) => this.props.selectAndLoadInstance(null)}
|
||||
>
|
||||
<RandomizeNodePositions />
|
||||
<Filter neighborsOf={this.props.currentInstanceName} />
|
||||
<RelativeSize initialSize={15} />
|
||||
</Sigma>
|
||||
)
|
||||
}
|
||||
|
|
|
@ -79,7 +79,7 @@ export class Nav extends React.Component<{}, INavState> {
|
|||
</p>
|
||||
<h4>How do you calculate the strength of relationships between instances?</h4>
|
||||
<p className={Classes.RUNNING_TEXT}>
|
||||
fediverse.space scrapes the last 2000 statuses from within the last month on the public
|
||||
fediverse.space scrapes the last 5000 statuses from within the last month on the public
|
||||
timeline of each instance. It looks at the ratio of
|
||||
<code>mentions of an instance / total statuses</code>.
|
||||
It uses a ratio rather than an absolute number of mentions to reflect that smaller instances
|
||||
|
|
|
@ -19,7 +19,7 @@ interface ISidebarProps {
|
|||
class SidebarImpl extends React.Component<ISidebarProps> {
|
||||
public render() {
|
||||
return (
|
||||
<Card className="fediverse-sidebar" elevation={Elevation.TWO}>
|
||||
<Card className="fediverse-sidebar" elevation={Elevation.THREE}>
|
||||
{this.renderSidebarContents()}
|
||||
</Card>
|
||||
)
|
||||
|
|
|
@ -2,7 +2,8 @@ html, body {
|
|||
margin: 0;
|
||||
padding: 50px 0 0 0;
|
||||
font-family: sans-serif;
|
||||
background-color: #30404D;
|
||||
/*background-color: #30404D;*/
|
||||
background-color: #293742;
|
||||
height: 100%;
|
||||
font-family: -apple-system,BlinkMacSystemFont,Segoe UI,Roboto,Oxygen,Ubuntu,Cantarell,Open Sans,Helvetica Neue,Icons16,sans-serif;
|
||||
}
|
||||
|
|
|
@ -32,6 +32,8 @@ export interface IInstanceDetails {
|
|||
interface IGraphNode {
|
||||
id: string;
|
||||
label: string;
|
||||
x: number;
|
||||
y: number;
|
||||
size?: number;
|
||||
color?: string;
|
||||
}
|
||||
|
|
|
@ -2,7 +2,10 @@ package space.fediverse.graph;
|
|||
|
||||
import org.gephi.graph.api.GraphController;
|
||||
import org.gephi.graph.api.GraphModel;
|
||||
import org.gephi.graph.api.Node;
|
||||
import org.gephi.graph.api.UndirectedGraph;
|
||||
import org.gephi.io.database.drivers.PostgreSQLDriver;
|
||||
import org.gephi.io.database.drivers.SQLUtils;
|
||||
import org.gephi.io.exporter.api.ExportController;
|
||||
import org.gephi.io.importer.api.Container;
|
||||
import org.gephi.io.importer.api.EdgeDirectionDefault;
|
||||
|
@ -20,6 +23,10 @@ import java.io.File;
|
|||
import java.io.IOException;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.sql.Connection;
|
||||
import java.sql.PreparedStatement;
|
||||
import java.sql.SQLException;
|
||||
import java.sql.Statement;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
public class GraphBuilder {
|
||||
|
@ -34,9 +41,9 @@ public class GraphBuilder {
|
|||
|
||||
private static final String edgeQuery = String.join(""
|
||||
, "SELECT"
|
||||
, " scraper_instance_peers.from_instance_id AS source,"
|
||||
, " scraper_instance_peers.to_instance_id AS target"
|
||||
, " FROM scraper_instance_peers"
|
||||
, " scraper_edge.source_id AS source,"
|
||||
, " scraper_edge.target_id AS target"
|
||||
, " FROM scraper_edge"
|
||||
);
|
||||
|
||||
|
||||
|
@ -55,7 +62,6 @@ public class GraphBuilder {
|
|||
// AttributeModel?
|
||||
|
||||
// Import from database
|
||||
|
||||
EdgeListDatabaseImpl db = new EdgeListDatabaseImpl();
|
||||
db.setSQLDriver(new PostgreSQLDriver());
|
||||
db.setHost("localhost");
|
||||
|
@ -77,18 +83,57 @@ public class GraphBuilder {
|
|||
importController.process(container, new DefaultProcessor(), workspace);
|
||||
|
||||
// Layout
|
||||
AutoLayout autoLayout = new AutoLayout(2, TimeUnit.MINUTES);
|
||||
AutoLayout autoLayout = new AutoLayout(1, TimeUnit.MINUTES);
|
||||
autoLayout.setGraphModel(graphModel);
|
||||
// YifanHuLayout firstLayout = new YifanHuLayout(null, new StepDisplacement(1f));
|
||||
ForceAtlas2 secondLayout = new ForceAtlas2(null);
|
||||
// AutoLayout.DynamicProperty adjustBySizeProperty = AutoLayout.createDynamicProperty("forceAtlas.adjustSizes.name", Boolean.TRUE, 0.1f);
|
||||
// AutoLayout.DynamicProperty repulsionProperty = AutoLayout.createDynamicProperty("forceAtlas.repulsionStrength.name", 500., 0f);
|
||||
// autoLayout.addLayout(firstLayout, 0.5f);
|
||||
// autoLayout.addLayout(secondLayout, 0.5f, new AutoLayout.DynamicProperty[]{adjustBySizeProperty, repulsionProperty});
|
||||
autoLayout.addLayout(secondLayout, 1f);
|
||||
ForceAtlas2 forceAtlas2Layout = new ForceAtlas2(null);
|
||||
forceAtlas2Layout.setLinLogMode(true);
|
||||
autoLayout.addLayout(forceAtlas2Layout, 1f);
|
||||
autoLayout.execute();
|
||||
|
||||
// Export
|
||||
// Update coordinates in database
|
||||
// First, connect
|
||||
String dbUrl = SQLUtils.getUrl(db.getSQLDriver(), db.getHost(), db.getPort(), db.getDBName());
|
||||
Connection conn = null;
|
||||
try {
|
||||
conn = db.getSQLDriver().getConnection(dbUrl, db.getUsername(), db.getPasswd());
|
||||
} catch (SQLException e) {
|
||||
if (conn != null) {
|
||||
try {
|
||||
conn.close();
|
||||
} catch (Exception e2) {
|
||||
// Closing failed; ah well
|
||||
}
|
||||
}
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
// Update
|
||||
UndirectedGraph graph = graphModel.getUndirectedGraph();
|
||||
for (Node node: graph.getNodes()) {
|
||||
String id = node.getId().toString();
|
||||
float x = node.x();
|
||||
float y = node.y();
|
||||
|
||||
try {
|
||||
PreparedStatement statement = conn.prepareStatement(
|
||||
"UPDATE scraper_instance SET x_coord=?, y_coord=? WHERE name=?");
|
||||
statement.setFloat(1, x);
|
||||
statement.setFloat(2, y);
|
||||
statement.setString(3, id);
|
||||
statement.executeUpdate();
|
||||
} catch (SQLException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
// Close connection
|
||||
try {
|
||||
conn.close();
|
||||
} catch (SQLException e) {
|
||||
// Closing failed; ah well
|
||||
}
|
||||
|
||||
|
||||
// Also export to gexf
|
||||
ExportController exportController = Lookup.getDefault().lookup(ExportController.class);
|
||||
try {
|
||||
exportController.exportFile(new File("fediverse.gexf"));
|
||||
|
@ -96,7 +141,7 @@ public class GraphBuilder {
|
|||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
// Gephi doesn't seem to provide a good way to close the postgres connection, so we have to force close the
|
||||
// Gephi doesn't seem to provide a good way to close its postgres connection, so we have to force close the
|
||||
// program. This'll leave a hanging connection for some period ¯\_(ツ)_/¯
|
||||
System.exit(0);
|
||||
}
|
||||
|
|
|
@ -23,4 +23,5 @@ requests==2.19.1
|
|||
six==1.10.0
|
||||
sqlparse==0.2.4
|
||||
tabulate==0.7.7
|
||||
tqdm==4.25.0
|
||||
urllib3==1.23
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
import subprocess
|
||||
from django.core.management.base import BaseCommand
|
||||
from django.conf import settings
|
||||
from scraper.models import PeerRelationship, Edge
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
|
@ -10,10 +11,36 @@ class Command(BaseCommand):
|
|||
super().__init__(*args, **kwargs)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
self.stdout.write("Creating Edges from PeerRelationships...")
|
||||
# Turn symmetrical PeerRelationships into symmetrical Edges
|
||||
relationships = PeerRelationship.objects.filter(source__status='success', target__status='success')
|
||||
# Loop over once and put 'em into a dict for fast access
|
||||
relationships = {(r.source_id, r.target_id): r for r in relationships}
|
||||
|
||||
edges = []
|
||||
while relationships:
|
||||
(source_id, target_id), outgoing = relationships.popitem()
|
||||
total_statuses = outgoing.statuses_seen or 0
|
||||
mention_count = outgoing.mention_count or 0
|
||||
incoming = relationships.pop((target_id, source_id), None)
|
||||
oldest_data = outgoing.last_updated
|
||||
if incoming:
|
||||
total_statuses += (incoming.statuses_seen or 0)
|
||||
mention_count += (incoming.mention_count or 0)
|
||||
oldest_data = min(oldest_data, incoming.last_updated)
|
||||
if mention_count == 0 or total_statuses == 0:
|
||||
continue
|
||||
ratio = float(mention_count)/total_statuses
|
||||
edges.append(Edge(source_id=source_id, target_id=target_id, weight=ratio, last_updated=oldest_data))
|
||||
|
||||
Edge.objects.all().delete()
|
||||
Edge.objects.bulk_create(edges)
|
||||
|
||||
self.stdout.write("Creating layout...")
|
||||
database_config = settings.DATABASES['default']
|
||||
subprocess.call([
|
||||
'java',
|
||||
'-Xmx4g',
|
||||
'-Xmx1g',
|
||||
'-jar',
|
||||
'gephi/build/libs/graphBuilder.jar',
|
||||
database_config['NAME'],
|
||||
|
|
|
@ -33,6 +33,7 @@ SEED = 'mastodon.social'
|
|||
TIMEOUT = 20 # seconds
|
||||
NUM_THREADS = 64
|
||||
PERSONAL_INSTANCE_THRESHOLD = 5 # instances with <= this many users won't be scraped
|
||||
STATUS_SCRAPE_LIMIT = 5000
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
|
@ -69,7 +70,7 @@ class Command(BaseCommand):
|
|||
def get_statuses(instance_name: str):
|
||||
"""Collect all statuses that mention users on other instances"""
|
||||
mentions = []
|
||||
datetime_threshold = datetime.now(timezone.utc) - timedelta(months=1)
|
||||
datetime_threshold = datetime.now(timezone.utc) - timedelta(days=31)
|
||||
statuses_seen = 0
|
||||
# We'll ask for 1000 statuses, but Mastodon never returns more than 40. Some Pleroma instances will ignore
|
||||
# the limit and return 20.
|
||||
|
@ -91,7 +92,7 @@ class Command(BaseCommand):
|
|||
earliest_time_seen = datetime_parser(earliest_status['created_at'])
|
||||
statuses_seen += len(statuses)
|
||||
# Mastodon returns max 40 statuses; if we ever see less than that we know there aren't any more
|
||||
if earliest_time_seen < datetime_threshold or statuses_seen >= 2000:
|
||||
if earliest_time_seen < datetime_threshold or statuses_seen >= STATUS_SCRAPE_LIMIT:
|
||||
break
|
||||
# Continuing, so get url for next page
|
||||
min_id = earliest_status['id']
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# Generated by Django 2.1 on 2018-09-01 22:28
|
||||
# Generated by Django 2.1 on 2018-09-03 14:09
|
||||
|
||||
from django.db import migrations, models
|
||||
import django.db.models.deletion
|
||||
|
@ -12,6 +12,14 @@ class Migration(migrations.Migration):
|
|||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='Edge',
|
||||
fields=[
|
||||
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('weight', models.FloatField(blank=True, null=True)),
|
||||
('last_updated', models.DateTimeField()),
|
||||
],
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='Instance',
|
||||
fields=[
|
||||
|
@ -22,6 +30,8 @@ class Migration(migrations.Migration):
|
|||
('user_count', models.IntegerField(blank=True, null=True)),
|
||||
('version', models.CharField(blank=True, max_length=1000)),
|
||||
('status', models.CharField(max_length=100)),
|
||||
('x_coord', models.FloatField(blank=True, null=True)),
|
||||
('y_coord', models.FloatField(blank=True, null=True)),
|
||||
('first_seen', models.DateTimeField(auto_now_add=True)),
|
||||
('last_updated', models.DateTimeField(auto_now=True)),
|
||||
],
|
||||
|
@ -43,4 +53,14 @@ class Migration(migrations.Migration):
|
|||
name='peers',
|
||||
field=models.ManyToManyField(through='scraper.PeerRelationship', to='scraper.Instance'),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='edge',
|
||||
name='source',
|
||||
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='+', to='scraper.Instance'),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='edge',
|
||||
name='target',
|
||||
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='+', to='scraper.Instance'),
|
||||
),
|
||||
]
|
||||
|
|
|
@ -22,6 +22,10 @@ class Instance(models.Model):
|
|||
# Foreign keys
|
||||
peers = models.ManyToManyField('self', symmetrical=False, through='PeerRelationship')
|
||||
|
||||
# Graph
|
||||
x_coord = models.FloatField(blank=True, null=True)
|
||||
y_coord = models.FloatField(blank=True, null=True)
|
||||
|
||||
# Automatic fields
|
||||
first_seen = models.DateTimeField(auto_now_add=True)
|
||||
last_updated = models.DateTimeField(auto_now=True)
|
||||
|
@ -38,3 +42,17 @@ class PeerRelationship(models.Model):
|
|||
# Metadata
|
||||
first_seen = models.DateTimeField(auto_now_add=True)
|
||||
last_updated = models.DateTimeField(auto_now=True)
|
||||
|
||||
|
||||
class Edge(models.Model):
|
||||
"""
|
||||
This class is automatically generated from PeerRelationship using the build_graph command.
|
||||
It aggregates stats from the asymmetrical PeerRelationship to a symmetrical one that's suitable for serving
|
||||
to the front-end.
|
||||
"""
|
||||
source = models.ForeignKey(Instance, related_name='+', on_delete=models.CASCADE)
|
||||
target = models.ForeignKey(Instance, related_name='+', on_delete=models.CASCADE)
|
||||
weight = models.FloatField(blank=True, null=True)
|
||||
|
||||
# Metadata
|
||||
last_updated = models.DateTimeField(blank=False, null=False)
|
||||
|
|
Loading…
Reference in a new issue