diff --git a/README.md b/README.md index f90d8a5..7a01da6 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,8 @@ The map of the fediverse that you always wanted. ### Backend - `cp example.env .env` and modify environment variables as required - `docker-compose build` -- `docker-compose up -d` +- `docker-compose up -d django` + - if you don't specify `django`, it'll also start `gephi` which should only be run as a regular one-off job ### Frontend - `cd frontend && yarn install` - `yarn start` @@ -27,7 +28,8 @@ The map of the fediverse that you always wanted. After running the backend in Docker: - `docker-compose exec web python manage.py scrape` scrapes the entire fediverse -- `docker-compose exec web python manage.py build_graph` uses this information to lay out a graph +- `docker-compose exec web python manage.py build_edges` aggregates this information into edges with weights +- `docker-compose run gephi java -Xmx1g -jar build/libs/graphBuilder.jar` lays out the graph To run in production, use `docker-compose -f docker-compose.yml -f docker-compose.production.yml` instead of just `docker-compose`. diff --git a/backend/scraper/management/commands/build_graph.py b/backend/scraper/management/commands/build_edges.py similarity index 70% rename from backend/scraper/management/commands/build_graph.py rename to backend/scraper/management/commands/build_edges.py index f863665..0b311eb 100644 --- a/backend/scraper/management/commands/build_graph.py +++ b/backend/scraper/management/commands/build_edges.py @@ -20,30 +20,19 @@ class Command(BaseCommand): edges = [] while relationships: (source_id, target_id), outgoing = relationships.popitem() - total_statuses = outgoing.statuses_seen or 0 - mention_count = outgoing.mention_count or 0 + total_statuses = outgoing.statuses_seen + mention_count = outgoing.mention_count incoming = relationships.pop((target_id, source_id), None) oldest_data = outgoing.last_updated if incoming: - total_statuses += (incoming.statuses_seen or 0) - mention_count += (incoming.mention_count or 0) + total_statuses += (incoming.statuses_seen) + mention_count += (incoming.mention_count) oldest_data = min(oldest_data, incoming.last_updated) if mention_count == 0 or total_statuses == 0: + # don't add edges with weight 0 continue ratio = float(mention_count)/total_statuses edges.append(Edge(source_id=source_id, target_id=target_id, weight=ratio, last_updated=oldest_data)) Edge.objects.all().delete() Edge.objects.bulk_create(edges) - - self.stdout.write("Creating layout...") - database_config = settings.DATABASES['default'] - subprocess.call([ - 'java', - '-Xmx1g', - '-jar', - 'gephi/build/libs/graphBuilder.jar', - database_config['NAME'], - database_config['USER'], - database_config['PASSWORD'], - ]) diff --git a/backend/scraper/migrations/0001_initial.py b/backend/scraper/migrations/0001_initial.py index 88746e7..3733cb5 100644 --- a/backend/scraper/migrations/0001_initial.py +++ b/backend/scraper/migrations/0001_initial.py @@ -1,4 +1,4 @@ -# Generated by Django 2.1.7 on 2019-02-21 10:37 +# Generated by Django 2.1.7 on 2019-02-21 12:27 from django.db import migrations, models import django.db.models.deletion @@ -41,8 +41,8 @@ class Migration(migrations.Migration): name='PeerRelationship', fields=[ ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('mention_count', models.IntegerField(blank=True, null=True)), - ('statuses_seen', models.IntegerField(blank=True, null=True)), + ('mention_count', models.IntegerField(default=0)), + ('statuses_seen', models.IntegerField(default=0)), ('first_seen', models.DateTimeField(auto_now_add=True)), ('last_updated', models.DateTimeField(default=django.utils.timezone.now)), ('source', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='following_relationship', to='scraper.Instance')), diff --git a/backend/scraper/models.py b/backend/scraper/models.py index 9a8f57e..aeb4757 100644 --- a/backend/scraper/models.py +++ b/backend/scraper/models.py @@ -37,8 +37,8 @@ class PeerRelationship(models.Model): target = models.ForeignKey(Instance, related_name="follower_relationships", on_delete=models.CASCADE) # Interaction stats - mention_count = models.IntegerField(blank=True, null=True) - statuses_seen = models.IntegerField(blank=True, null=True) # in case we want mention_count as a ratio + mention_count = models.IntegerField(default=0) + statuses_seen = models.IntegerField(default=0) # because we want mention_count as a ratio # Metadata first_seen = models.DateTimeField(auto_now_add=True) @@ -47,7 +47,7 @@ class PeerRelationship(models.Model): class Edge(models.Model): """ - This class is automatically generated from PeerRelationship using the build_graph command. + This class is automatically generated from PeerRelationship using the build_edges command. It aggregates stats from the asymmetrical PeerRelationship to a symmetrical one that's suitable for serving to the front-end. """ diff --git a/docker-compose.yml b/docker-compose.yml index beddb8c..a1ebb7e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -28,5 +28,18 @@ services: - "8000:8000" depends_on: - db + # This is for running the occasional graph layout task. It's in docker-compose.yml so that it's built at the same time + # as everything else, but it should be run regularly with a cron job or similar. + gephi: + environment: + - POSTGRES_USER + - POSTGRES_PASSWORD + - POSTGRES_DB + build: ./gephi + volumes: + - gradle-cache:/code/.gradle + depends_on: + - db volumes: pgdata: + gradle-cache: diff --git a/backend/gephi/.gitignore b/gephi/.gitignore similarity index 100% rename from backend/gephi/.gitignore rename to gephi/.gitignore diff --git a/gephi/Dockerfile b/gephi/Dockerfile new file mode 100644 index 0000000..84e470a --- /dev/null +++ b/gephi/Dockerfile @@ -0,0 +1,13 @@ +FROM openjdk:11.0.2-jdk-slim + +RUN mkdir /code +WORKDIR /code + +COPY build.gradle gradlew /code/ +COPY gradle /code/gradle +COPY lib /code/lib +RUN ./gradlew tasks + +COPY src /code/src + +RUN ./gradlew shadowJar diff --git a/backend/gephi/README.md b/gephi/README.md similarity index 100% rename from backend/gephi/README.md rename to gephi/README.md diff --git a/backend/gephi/build.gradle b/gephi/build.gradle similarity index 100% rename from backend/gephi/build.gradle rename to gephi/build.gradle diff --git a/backend/gephi/gradle/wrapper/gradle-wrapper.jar b/gephi/gradle/wrapper/gradle-wrapper.jar similarity index 100% rename from backend/gephi/gradle/wrapper/gradle-wrapper.jar rename to gephi/gradle/wrapper/gradle-wrapper.jar diff --git a/backend/gephi/gradle/wrapper/gradle-wrapper.properties b/gephi/gradle/wrapper/gradle-wrapper.properties similarity index 100% rename from backend/gephi/gradle/wrapper/gradle-wrapper.properties rename to gephi/gradle/wrapper/gradle-wrapper.properties diff --git a/backend/gephi/gradlew b/gephi/gradlew similarity index 100% rename from backend/gephi/gradlew rename to gephi/gradlew diff --git a/backend/gephi/gradlew.bat b/gephi/gradlew.bat similarity index 100% rename from backend/gephi/gradlew.bat rename to gephi/gradlew.bat diff --git a/backend/gephi/lib/.gitkeep b/gephi/lib/.gitkeep similarity index 100% rename from backend/gephi/lib/.gitkeep rename to gephi/lib/.gitkeep diff --git a/backend/gephi/lib/gephi-toolkit-0.9.2.jar b/gephi/lib/gephi-toolkit-0.9.2.jar similarity index 100% rename from backend/gephi/lib/gephi-toolkit-0.9.2.jar rename to gephi/lib/gephi-toolkit-0.9.2.jar diff --git a/backend/gephi/settings.gradle b/gephi/settings.gradle similarity index 100% rename from backend/gephi/settings.gradle rename to gephi/settings.gradle diff --git a/backend/gephi/src/main/java/space/fediverse/graph/GraphBuilder.java b/gephi/src/main/java/space/fediverse/graph/GraphBuilder.java similarity index 89% rename from backend/gephi/src/main/java/space/fediverse/graph/GraphBuilder.java rename to gephi/src/main/java/space/fediverse/graph/GraphBuilder.java index 79550e8..964617a 100644 --- a/backend/gephi/src/main/java/space/fediverse/graph/GraphBuilder.java +++ b/gephi/src/main/java/space/fediverse/graph/GraphBuilder.java @@ -61,14 +61,23 @@ public class GraphBuilder { GraphModel graphModel = Lookup.getDefault().lookup(GraphController.class).getGraphModel(); // AttributeModel? + // Get config variables + String postgresDb = System.getenv("POSTGRES_DB"); + String postgresUser = System.getenv("POSTGRES_USER"); + String postgresPassword = System.getenv("POSTGRES_PASSWORD"); + if (postgresDb == null || postgresUser == null || postgresPassword == null) { + throw new RuntimeException(String.format("Incomplete config, canceling. DB: %s, user: %s, pass: %s", + postgresDb, postgresUser, postgresPassword)); + } + // Import from database EdgeListDatabaseImpl db = new EdgeListDatabaseImpl(); db.setSQLDriver(new PostgreSQLDriver()); - db.setHost("localhost"); + db.setHost("db"); db.setPort(5432); - db.setDBName(args[0]); - db.setUsername(args[1]); - db.setPasswd(args[2]); + db.setDBName(postgresDb); + db.setUsername(postgresUser); + db.setPasswd(postgresPassword); db.setNodeQuery(nodeQuery); db.setEdgeQuery(edgeQuery);