dockerize gephi

This commit is contained in:
Tao Bror Bojlén 2019-02-21 12:32:50 +00:00
parent 1c1f193542
commit 795ad67e8b
No known key found for this signature in database
GPG key ID: C6EC7AAB905F9E6F
17 changed files with 54 additions and 28 deletions

View file

@ -16,7 +16,8 @@ The map of the fediverse that you always wanted.
### Backend ### Backend
- `cp example.env .env` and modify environment variables as required - `cp example.env .env` and modify environment variables as required
- `docker-compose build` - `docker-compose build`
- `docker-compose up -d` - `docker-compose up -d django`
- if you don't specify `django`, it'll also start `gephi` which should only be run as a regular one-off job
### Frontend ### Frontend
- `cd frontend && yarn install` - `cd frontend && yarn install`
- `yarn start` - `yarn start`
@ -27,7 +28,8 @@ The map of the fediverse that you always wanted.
After running the backend in Docker: After running the backend in Docker:
- `docker-compose exec web python manage.py scrape` scrapes the entire fediverse - `docker-compose exec web python manage.py scrape` scrapes the entire fediverse
- `docker-compose exec web python manage.py build_graph` uses this information to lay out a graph - `docker-compose exec web python manage.py build_edges` aggregates this information into edges with weights
- `docker-compose run gephi java -Xmx1g -jar build/libs/graphBuilder.jar` lays out the graph
To run in production, use `docker-compose -f docker-compose.yml -f docker-compose.production.yml` instead of just `docker-compose`. To run in production, use `docker-compose -f docker-compose.yml -f docker-compose.production.yml` instead of just `docker-compose`.

View file

@ -20,30 +20,19 @@ class Command(BaseCommand):
edges = [] edges = []
while relationships: while relationships:
(source_id, target_id), outgoing = relationships.popitem() (source_id, target_id), outgoing = relationships.popitem()
total_statuses = outgoing.statuses_seen or 0 total_statuses = outgoing.statuses_seen
mention_count = outgoing.mention_count or 0 mention_count = outgoing.mention_count
incoming = relationships.pop((target_id, source_id), None) incoming = relationships.pop((target_id, source_id), None)
oldest_data = outgoing.last_updated oldest_data = outgoing.last_updated
if incoming: if incoming:
total_statuses += (incoming.statuses_seen or 0) total_statuses += (incoming.statuses_seen)
mention_count += (incoming.mention_count or 0) mention_count += (incoming.mention_count)
oldest_data = min(oldest_data, incoming.last_updated) oldest_data = min(oldest_data, incoming.last_updated)
if mention_count == 0 or total_statuses == 0: if mention_count == 0 or total_statuses == 0:
# don't add edges with weight 0
continue continue
ratio = float(mention_count)/total_statuses ratio = float(mention_count)/total_statuses
edges.append(Edge(source_id=source_id, target_id=target_id, weight=ratio, last_updated=oldest_data)) edges.append(Edge(source_id=source_id, target_id=target_id, weight=ratio, last_updated=oldest_data))
Edge.objects.all().delete() Edge.objects.all().delete()
Edge.objects.bulk_create(edges) Edge.objects.bulk_create(edges)
self.stdout.write("Creating layout...")
database_config = settings.DATABASES['default']
subprocess.call([
'java',
'-Xmx1g',
'-jar',
'gephi/build/libs/graphBuilder.jar',
database_config['NAME'],
database_config['USER'],
database_config['PASSWORD'],
])

View file

@ -1,4 +1,4 @@
# Generated by Django 2.1.7 on 2019-02-21 10:37 # Generated by Django 2.1.7 on 2019-02-21 12:27
from django.db import migrations, models from django.db import migrations, models
import django.db.models.deletion import django.db.models.deletion
@ -41,8 +41,8 @@ class Migration(migrations.Migration):
name='PeerRelationship', name='PeerRelationship',
fields=[ fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('mention_count', models.IntegerField(blank=True, null=True)), ('mention_count', models.IntegerField(default=0)),
('statuses_seen', models.IntegerField(blank=True, null=True)), ('statuses_seen', models.IntegerField(default=0)),
('first_seen', models.DateTimeField(auto_now_add=True)), ('first_seen', models.DateTimeField(auto_now_add=True)),
('last_updated', models.DateTimeField(default=django.utils.timezone.now)), ('last_updated', models.DateTimeField(default=django.utils.timezone.now)),
('source', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='following_relationship', to='scraper.Instance')), ('source', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='following_relationship', to='scraper.Instance')),

View file

@ -37,8 +37,8 @@ class PeerRelationship(models.Model):
target = models.ForeignKey(Instance, related_name="follower_relationships", on_delete=models.CASCADE) target = models.ForeignKey(Instance, related_name="follower_relationships", on_delete=models.CASCADE)
# Interaction stats # Interaction stats
mention_count = models.IntegerField(blank=True, null=True) mention_count = models.IntegerField(default=0)
statuses_seen = models.IntegerField(blank=True, null=True) # in case we want mention_count as a ratio statuses_seen = models.IntegerField(default=0) # because we want mention_count as a ratio
# Metadata # Metadata
first_seen = models.DateTimeField(auto_now_add=True) first_seen = models.DateTimeField(auto_now_add=True)
@ -47,7 +47,7 @@ class PeerRelationship(models.Model):
class Edge(models.Model): class Edge(models.Model):
""" """
This class is automatically generated from PeerRelationship using the build_graph command. This class is automatically generated from PeerRelationship using the build_edges command.
It aggregates stats from the asymmetrical PeerRelationship to a symmetrical one that's suitable for serving It aggregates stats from the asymmetrical PeerRelationship to a symmetrical one that's suitable for serving
to the front-end. to the front-end.
""" """

View file

@ -28,5 +28,18 @@ services:
- "8000:8000" - "8000:8000"
depends_on: depends_on:
- db - db
# This is for running the occasional graph layout task. It's in docker-compose.yml so that it's built at the same time
# as everything else, but it should be run regularly with a cron job or similar.
gephi:
environment:
- POSTGRES_USER
- POSTGRES_PASSWORD
- POSTGRES_DB
build: ./gephi
volumes:
- gradle-cache:/code/.gradle
depends_on:
- db
volumes: volumes:
pgdata: pgdata:
gradle-cache:

13
gephi/Dockerfile Normal file
View file

@ -0,0 +1,13 @@
FROM openjdk:11.0.2-jdk-slim
RUN mkdir /code
WORKDIR /code
COPY build.gradle gradlew /code/
COPY gradle /code/gradle
COPY lib /code/lib
RUN ./gradlew tasks
COPY src /code/src
RUN ./gradlew shadowJar

View file

@ -61,14 +61,23 @@ public class GraphBuilder {
GraphModel graphModel = Lookup.getDefault().lookup(GraphController.class).getGraphModel(); GraphModel graphModel = Lookup.getDefault().lookup(GraphController.class).getGraphModel();
// AttributeModel? // AttributeModel?
// Get config variables
String postgresDb = System.getenv("POSTGRES_DB");
String postgresUser = System.getenv("POSTGRES_USER");
String postgresPassword = System.getenv("POSTGRES_PASSWORD");
if (postgresDb == null || postgresUser == null || postgresPassword == null) {
throw new RuntimeException(String.format("Incomplete config, canceling. DB: %s, user: %s, pass: %s",
postgresDb, postgresUser, postgresPassword));
}
// Import from database // Import from database
EdgeListDatabaseImpl db = new EdgeListDatabaseImpl(); EdgeListDatabaseImpl db = new EdgeListDatabaseImpl();
db.setSQLDriver(new PostgreSQLDriver()); db.setSQLDriver(new PostgreSQLDriver());
db.setHost("localhost"); db.setHost("db");
db.setPort(5432); db.setPort(5432);
db.setDBName(args[0]); db.setDBName(postgresDb);
db.setUsername(args[1]); db.setUsername(postgresUser);
db.setPasswd(args[2]); db.setPasswd(postgresPassword);
db.setNodeQuery(nodeQuery); db.setNodeQuery(nodeQuery);
db.setEdgeQuery(edgeQuery); db.setEdgeQuery(edgeQuery);