add graph (#1)

* add initial graph * add graph
2018-09-01 15:32:04 +02:00 · 2018-09-01 15:32:04 +02:00 · 93932c5196
commit 93932c5196
parent 3cbc4dd1b6
29 changed files with 815 additions and 109 deletions
--- a/.gitignore
+++ b/.gitignore
@ -2,6 +2,7 @@
 .idea/
 config.json
 backend/static/
+*.gexf

 # Byte-compiled / optimized / DLL files
 __pycache__/
--- a/README.md
+++ b/README.md
@ -2,7 +2,11 @@
 fediverse.space is a tool to explore instances in the fediverse.

 ## Running it
-* `cp config.json.template config.json` and enter your configuration details.
+* `cp config.json.template config.json` and enter your configuration details. I've used a postgres database for development.
 * Set the environment variable `FEDIVERSE_CONFIG` to point to the path of this file.
+* `pip install -r requirements.txt` 
+* `yarn install`
+* Make sure you have the Java 8 JRE (to run) or JDK (to develop) installed, and gradle
 * For development, run `python manage.py runserver --settings=backend.settings.dev`
 * In production, set the environment variable `DJANGO_SETTINGS_MODULE=backend.settings.production`
+
--- a/apiv1/_util.py
+++ b/apiv1/_util.py
@ -0,0 +1,8 @@
+def to_representation(self, instance):
+    """
+    Object instance -> Dict of primitive datatypes.
+    We use a custom to_representation function to exclude empty fields in the serialized JSON.
+    """
+    ret = super(InstanceListSerializer, self).to_representation(instance)
+    ret = OrderedDict(list(filter(lambda x: x[1], ret.items())))
+    return ret
--- a/apiv1/serializers.py
+++ b/apiv1/serializers.py
@ -1,6 +1,6 @@
 from rest_framework import serializers
 from collections import OrderedDict
-from scraper.models import Instance
+from scraper.models import Instance, PeerRelationship


 class InstanceListSerializer(serializers.ModelSerializer):
@ -11,6 +11,7 @@ class InstanceListSerializer(serializers.ModelSerializer):
    def to_representation(self, instance):
        """
        Object instance -> Dict of primitive datatypes.
+        We use a custom to_representation function to exclude empty fields in the serialized JSON.
        """
        ret = super(InstanceListSerializer, self).to_representation(instance)
        ret = OrderedDict(list(filter(lambda x: x[1], ret.items())))
@ -23,3 +24,39 @@ class InstanceDetailSerializer(serializers.ModelSerializer):
    class Meta:
        model = Instance
        fields = '__all__'
+
+
+class EdgeSerializer(serializers.ModelSerializer):
+    id = serializers.SerializerMethodField('get_pk')
+
+    class Meta:
+        model = PeerRelationship
+        fields = ('source', 'target', 'id')
+
+    def get_pk(self, obj):
+        return obj.pk
+
+
+class NodeSerializer(serializers.ModelSerializer):
+    id = serializers.SerializerMethodField('get_name')
+    label = serializers.SerializerMethodField('get_name')
+    size = serializers.SerializerMethodField()
+
+    class Meta:
+        model = Instance
+        fields = ('id', 'label', 'size')
+
+    def get_name(self, obj):
+        return obj.name
+
+    def get_size(self, obj):
+        return obj.user_count or 1
+
+    def to_representation(self, instance):
+        """
+        Object instance -> Dict of primitive datatypes.
+        We use a custom to_representation function to exclude empty fields in the serialized JSON.
+        """
+        ret = super(NodeSerializer, self).to_representation(instance)
+        ret = OrderedDict(list(filter(lambda x: x[1], ret.items())))
+        return ret
--- a/apiv1/views.py
+++ b/apiv1/views.py
@ -1,6 +1,6 @@
 from rest_framework import viewsets
-from scraper.models import Instance
-from apiv1.serializers import InstanceListSerializer, InstanceDetailSerializer
+from scraper.models import Instance, PeerRelationship
+from apiv1.serializers import InstanceListSerializer, InstanceDetailSerializer, NodeSerializer, EdgeSerializer


 class InstanceViewSet(viewsets.ReadOnlyModelViewSet):
@ -18,3 +18,20 @@ class InstanceViewSet(viewsets.ReadOnlyModelViewSet):
            if hasattr(self, 'detail_serializer_class'):
                return self.detail_serializer_class
        return self.serializer_class
+
+
+class EdgeView(viewsets.ReadOnlyModelViewSet):
+    """
+    Endpoint to get a list of the graph's edges in a SigmaJS-friendly format.
+    """
+    queryset = PeerRelationship.objects.all()[:1000]
+    serializer_class = EdgeSerializer
+
+
+class NodeView(viewsets.ReadOnlyModelViewSet):
+    """
+    Endpoint to get a list of the graph's nodes in a SigmaJS-friendly format.
+    """
+    # queryset = Instance.objects.filter(status='success')
+    queryset = Instance.objects.all()
+    serializer_class = NodeSerializer
--- a/backend/settings/base.py
+++ b/backend/settings/base.py
@ -135,7 +135,7 @@ USE_I18N = True

 USE_L10N = True

-USE_TZ = True
+USE_TZ = False


 # Static files (CSS, JavaScript, Images)
--- a/backend/urls.py
+++ b/backend/urls.py
@ -28,10 +28,11 @@ class OptionalTrailingSlashRouter(routers.DefaultRouter):

 router = OptionalTrailingSlashRouter()
 router.register(r'instances', views.InstanceViewSet)
+router.register(r'graph/nodes', views.NodeView)
+router.register(r'graph/edges', views.EdgeView)

 urlpatterns = [
    path('api/v1/', include(router.urls)),
    path('silk/', include('silk.urls', namespace='silk')),
    path('', TemplateView.as_view(template_name='index.html')),
 ]
-
--- a/frontend/package.json
+++ b/frontend/package.json
@ -13,6 +13,7 @@
    "react-dom": "^16.4.2",
    "react-redux": "^5.0.7",
    "react-scripts-ts": "2.17.0",
+    "react-sigma": "^1.2.30",
    "react-virtualized": "^9.20.1",
    "redux": "^4.0.0",
    "redux-thunk": "^2.3.0"
--- a/frontend/src/App.tsx
+++ b/frontend/src/App.tsx
@ -5,25 +5,30 @@ import { Dispatch } from 'redux';
 import { Button, Intent, NonIdealState, Spinner } from '@blueprintjs/core';
 import { IconNames } from '@blueprintjs/icons';

+import { Graph } from './components/Graph';
 import { Nav } from './components/Nav';
-import { fetchInstances } from './redux/actions';
-import { IAppState, IInstance } from './redux/types';
+import { fetchGraph, fetchInstances } from './redux/actions';
+import { IAppState, IGraph, IInstance } from './redux/types';

 interface IAppProps {
  currentInstanceName?: string | null;
+  graph?: IGraph;
  instances?: IInstance[],
+  isLoadingGraph: boolean;
  isLoadingInstances: boolean,
  fetchInstances: () => void;
+  fetchGraph: () => void;
 }
 class AppImpl extends React.Component<IAppProps> {
  public render() {
    let body = this.welcomeState();
    if (this.props.isLoadingInstances) {
-      body = this.loadingState();
-    } else if (!!this.props.instances) {
-      body = this.renderGraph()
+      body = this.loadingState("Loading instances...");
+    } else if (this.props.isLoadingGraph) {
+      body = this.loadingState("Loading graph...");
+    } else if (!!this.props.graph) {
+      body = <Graph />;
    }
-    // TODO: show the number of instances up front
    return (
      <div className="App bp3-dark">
        <Nav />
@ -32,49 +37,44 @@ class AppImpl extends React.Component<IAppProps> {
    );
  }

+  public componentDidMount() {
+    this.props.fetchInstances();
+  }
+
  private welcomeState = () => {
+    const numInstances = this.props.instances ? this.props.instances.length : "lots of";
+    const description = `There are ${numInstances} known instances, so loading the graph might take a little while. Ready?`
    return (
        <NonIdealState
          className="fediverse-welcome"
          icon={IconNames.GLOBE_NETWORK}
          title="Welcome to fediverse.space!"
-          description="There are currently $MANY known instances, so loading them might take a little while. Ready?"
-          action={<Button intent={Intent.PRIMARY} text={"Let's go"} onClick={this.props.fetchInstances} />}
+          description={description}
+          action={<Button intent={Intent.PRIMARY} text={"Let's go"} onClick={this.props.fetchGraph} />}
        />
    )
  }

-  private loadingState = () => {
+  private loadingState = (title?: string) => {
    return (
        <NonIdealState
          className="fediverse-welcome"
          icon={<Spinner />}
-          title="Loading..."
+          title={title || "Loading..."}
        />
    )
  }
-
-  private renderGraph = () => {
-    return (
-      <div>
-        <NonIdealState
-          className="fediverse-welcome"
-          icon={IconNames.SEARCH_AROUND}
-          title="Graph. TODO"
-          description={"Selected " + (this.props.currentInstanceName || "nothing")}
-        />
-      </div>
-    );
-  }
-
 }

 const mapStateToProps = (state: IAppState) => ({
  currentInstanceName: state.currentInstanceName,
+  graph: state.data.graph,
  instances: state.data.instances,
+  isLoadingGraph: state.data.isLoadingGraph,
  isLoadingInstances: state.data.isLoadingInstances,
 })
 const mapDispatchToProps = (dispatch: Dispatch) => ({
-  fetchInstances: () => dispatch(fetchInstances() as any)
+  fetchGraph: () => dispatch(fetchGraph() as any),
+  fetchInstances: () => dispatch(fetchInstances() as any),
 })
 export const App = connect(mapStateToProps, mapDispatchToProps)(AppImpl)
--- a/frontend/src/components/Graph.jsx
+++ b/frontend/src/components/Graph.jsx
@ -0,0 +1,60 @@
+import * as React from 'react';
+import { connect } from 'react-redux';
+import { NodeShapes, RandomizeNodePositions, RelativeSize, Sigma, SigmaEnableWebGL, LoadGEXF, Filter } from 'react-sigma';
+
+import { selectInstance } from '../redux/actions';
+
+const STYLE = {
+    bottom: "0",
+    left: "0",
+    position: "absolute",
+    right: "0",
+    top: "50px",
+}
+const SETTINGS = {
+    defaultEdgeColor: "#5C7080",
+    defaultNodeColor: "#CED9E0",
+    drawEdges: true,
+    drawLabels: true,
+    edgeColor: "default",
+}
+
+class GraphImpl extends React.Component {
+
+    render() {
+        if (!this.props.graph) {
+            return null;
+        }
+        return (
+            <Sigma
+                graph={this.props.graph}
+                renderer="webgl"
+                settings={SETTINGS}
+                style={STYLE}
+                onClickNode={(e) => this.props.selectInstance(e.data.node.label)}
+                onClickStage={(e) => this.props.selectInstance(null)}
+            >
+                <RandomizeNodePositions />
+                <Filter neighborsOf={this.props.currentInstanceName} />
+                <RelativeSize initialSize={15} />
+            </Sigma>
+        )
+    }
+
+    // onClickNode = (e) => {
+    //     this.props.selectInstance(e.data.node.label);
+    // }
+
+    // zoomToNode = (camera, node) => {
+    //     s
+    // }
+}
+
+const mapStateToProps = (state) => ({
+    currentInstanceName: state.currentInstanceName,
+    graph: state.data.graph,
+})
+const mapDispatchToProps = (dispatch) => ({
+    selectInstance: (instanceName) => dispatch(selectInstance(instanceName)),
+})
+export const Graph = connect(mapStateToProps, mapDispatchToProps)(GraphImpl)
--- a/frontend/src/index.tsx
+++ b/frontend/src/index.tsx
@ -22,7 +22,7 @@ FocusStyleManager.onlyShowFocusOnTabs();
 // Initialize redux
 // @ts-ignore
 const composeEnhancers = window.__REDUX_DEVTOOLS_EXTENSION_COMPOSE__ || compose;
-const store = createStore(rootReducer, /* preloadedState, */ composeEnhancers(
+const store = createStore(rootReducer, composeEnhancers(
  applyMiddleware(thunk)
 ));

--- a/frontend/src/redux/actions.ts
+++ b/frontend/src/redux/actions.ts
@ -1,7 +1,7 @@
 import { Dispatch } from 'redux';

 import { getFromApi } from '../util';
-import { ActionType, IInstance } from './types';
+import { ActionType, IGraph, IInstance } from './types';

 export const selectInstance = (instanceName: string) => {
    return {
@ -23,6 +23,19 @@ export const receiveInstances = (instances: IInstance[]) => {
    }
 }

+export const requestGraph = () => {
+    return {
+        type: ActionType.REQUEST_GRAPH,
+    }
+}
+
+export const receiveGraph = (graph: IGraph) => {
+    return {
+        payload: graph,
+        type: ActionType.RECEIVE_GRAPH,
+    }
+}
+
 /** Async actions: https://redux.js.org/advanced/asyncactions */

 export const fetchInstances = () => {
@ -30,10 +43,22 @@ export const fetchInstances = () => {
    return (dispatch: Dispatch) => {
        dispatch(requestInstances());
        return getFromApi("instances")
-            .then(response => {
-                return response.json();
-            })
            .then(instances => dispatch(receiveInstances(instances))
        );
    }
 }
+
+export const fetchGraph = () => {
+    // TODO: handle errors
+    return (dispatch: Dispatch) => {
+        dispatch(requestGraph());
+        return Promise.all([getFromApi("graph/edges"), getFromApi("graph/nodes")])
+            .then(responses => {
+                return {
+                    edges: responses[0],
+                    nodes: responses[1],
+                };
+            })
+            .then(graph => dispatch(receiveGraph(graph)))
+    }
+}
--- a/frontend/src/redux/reducers.ts
+++ b/frontend/src/redux/reducers.ts
@ -3,6 +3,7 @@ import { combineReducers } from 'redux';
 import { ActionType, IAction, IDataState } from './types';

 const initialDataState = {
+    isLoadingGraph: false,
    isLoadingInstances: false,
 }
 const data = (state: IDataState = initialDataState, action: IAction) => {
@ -19,6 +20,17 @@ const data = (state: IDataState = initialDataState, action: IAction) => {
                instances: action.payload,
                isLoadingInstances: false,
            };
+        case ActionType.REQUEST_GRAPH:
+            return {
+                ...state,
+                isLoadingGraph: true,
+            };
+        case ActionType.RECEIVE_GRAPH:
+            return {
+                ...state,
+                graph: action.payload,
+                isLoadingGraph: false,
+            };
        default:
            return state;
    }
--- a/frontend/src/redux/types.ts
+++ b/frontend/src/redux/types.ts
@ -2,6 +2,8 @@ export enum ActionType {
    SELECT_INSTANCE = 'SELECT_INSTANCE',
    REQUEST_INSTANCES = 'REQUEST_INSTANCES',
    RECEIVE_INSTANCES = 'RECEIVE_INSTANCES',
+    REQUEST_GRAPH = 'REQUEST_GRAPH',
+    RECEIVE_GRAPH = 'RECEIVE_GRAPH',
 }

 export interface IAction {
@ -14,12 +16,34 @@ export interface IInstance {
    numUsers?: number,
 }

+interface IGraphNode {
+    id: string;
+    label: string;
+    size?: number;
+    color?: string;
+}
+
+interface IGraphEdge {
+    source: string;
+    target: string;
+    id?: string;
+}
+
+export interface IGraph {
+    nodes: IGraphNode[];
+    edges: IGraphEdge[];
+}
+
+// Redux state
+
 export interface IDataState {
    instances?: IInstance[],
+    graph?: IGraph,
    isLoadingInstances: boolean,
+    isLoadingGraph: boolean,
 }

 export interface IAppState {
    currentInstanceName: string | null,
    data: IDataState,
-}
+}
--- a/frontend/src/util.ts
+++ b/frontend/src/util.ts
@ -1,11 +1,11 @@
 import fetch from 'cross-fetch';

-const API_ROOT = "https://fediverse.space/api/v1/"
-// const API_ROOT = "http://localhost:8000/api/v1/"
+// const API_ROOT = "https://fediverse.space/api/v1/"
+const API_ROOT = "http://localhost:8000/api/v1/"

 export const getFromApi = (path: string): Promise<any> => {
    const domain = API_ROOT.endsWith("/") ? API_ROOT : API_ROOT + "/";
    path = path.endsWith("/") ? path : path + "/";
    path += "?format=json"
-    return fetch(domain + path);
+    return fetch(domain + path).then(response => response.json());
 }
--- a/frontend/yarn.lock
+++ b/frontend/yarn.lock
@ -5809,6 +5809,10 @@ react-scripts-ts@2.17.0:
  optionalDependencies:
    fsevents "^1.1.3"

+react-sigma@^1.2.30:
+  version "1.2.30"
+  resolved "https://registry.yarnpkg.com/react-sigma/-/react-sigma-1.2.30.tgz#794f88e796c4f763158afe404d10d9635f848846"
+
 react-transition-group@^2.2.1:
  version "2.4.0"
  resolved "https://registry.yarnpkg.com/react-transition-group/-/react-transition-group-2.4.0.tgz#1d9391fabfd82e016f26fabd1eec329dbd922b5a"
--- a/gephi/.gitignore
+++ b/gephi/.gitignore
@ -0,0 +1,18 @@
+.gradle/
+gradle/
+build/
+lib/*
+!lib/.gitkeep
+
+.idea/
+
+# Ignore Gradle GUI config
+gradle-app.setting
+
+# Avoid ignoring Gradle wrapper jar file (.jar files are usually ignored)
+!gradle-wrapper.jar
+
+# Cache of project
+.gradletasknamecache
+
+*.javac
--- a/gephi/README.md
+++ b/gephi/README.md
@ -0,0 +1,23 @@
+# Gephi
+
+This subproject uses Gephi to layout a graph that'll then be served to people on the front-end.
+Always make sure to run `./gradlew shadowJar` to compile your changes.
+
+Note that it won't compile with the given repos:
+```
+> Could not resolve all files for configuration ':compile'.
+   > Could not find net.java.dev:stax-utils:snapshot-20100402.
+     Searched in the following locations:
+       - https://repo.maven.apache.org/maven2/net/java/dev/stax-utils/snapshot-20100402/stax-utils-snapshot-20100402.pom
+       - https://repo.maven.apache.org/maven2/net/java/dev/stax-utils/snapshot-20100402/stax-utils-snapshot-20100402.jar
+       - https://jcenter.bintray.com/net/java/dev/stax-utils/snapshot-20100402/stax-utils-snapshot-20100402.pom
+       - https://jcenter.bintray.com/net/java/dev/stax-utils/snapshot-20100402/stax-utils-snapshot-20100402.jar
+       - https://dl.google.com/dl/android/maven2/net/java/dev/stax-utils/snapshot-20100402/stax-utils-snapshot-20100402.pom
+       - https://dl.google.com/dl/android/maven2/net/java/dev/stax-utils/snapshot-20100402/stax-utils-snapshot-20100402.jar
+       - http://bits.netbeans.org/nexus/content/groups/netbeans/net/java/dev/stax-utils/snapshot-20100402/stax-utils-snapshot-20100402.pom
+       - http://bits.netbeans.org/nexus/content/groups/netbeans/net/java/dev/stax-utils/snapshot-20100402/stax-utils-snapshot-20100402.jar
+     Required by:
+         project : > org.gephi:gephi-toolkit:0.9.2 > org.gephi:core-library-wrapper:0.9.2
+```
+
+I just downloaded version 0.9.2 from the gephi-toolkit GitHub and manually added it to fediverse.space/gephi/lib... ¯\_(ツ)_/¯
--- a/gephi/build.gradle
+++ b/gephi/build.gradle
@ -0,0 +1,47 @@
+buildscript {
+//    repositories {
+//        jcenter()
+//    }
+    dependencies {
+        classpath 'com.github.jengelman.gradle.plugins:shadow:2.0.4'
+    }
+}
+
+plugins {
+    id "base"
+    id "java"
+    id "com.github.johnrengelman.shadow" version "2.0.4"
+}
+
+
+repositories {
+    flatDir {
+        dirs 'lib'
+    }
+//    mavenCentral()
+//    jcenter()
+//    google()
+//    maven {
+//        url "http://bits.netbeans.org/nexus/content/groups/netbeans/"
+//    }
+}
+
+dependencies {
+    compile group: 'org.gephi', name: 'gephi-toolkit', version: '0.9.2'
+}
+
+jar {
+    manifest {
+        attributes (
+                'Class-Path': configurations.compile.collect { it.getName() }.join(' '),
+                'Main-Class': 'space.fediverse.graph.GraphBuilder'
+        )
+    }
+}
+
+shadowJar {
+    baseName = 'graphBuilder'
+    classifier = null
+    version = null
+}
+
--- a/gephi/gradlew
+++ b/gephi/gradlew
@ -0,0 +1,172 @@
+#!/usr/bin/env sh
+
+##############################################################################
+##
+##  Gradle start up script for UN*X
+##
+##############################################################################
+
+# Attempt to set APP_HOME
+# Resolve links: $0 may be a link
+PRG="$0"
+# Need this for relative symlinks.
+while [ -h "$PRG" ] ; do
+    ls=`ls -ld "$PRG"`
+    link=`expr "$ls" : '.*-> \(.*\)$'`
+    if expr "$link" : '/.*' > /dev/null; then
+        PRG="$link"
+    else
+        PRG=`dirname "$PRG"`"/$link"
+    fi
+done
+SAVED="`pwd`"
+cd "`dirname \"$PRG\"`/" >/dev/null
+APP_HOME="`pwd -P`"
+cd "$SAVED" >/dev/null
+
+APP_NAME="Gradle"
+APP_BASE_NAME=`basename "$0"`
+
+# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+DEFAULT_JVM_OPTS=""
+
+# Use the maximum available, or set MAX_FD != -1 to use that value.
+MAX_FD="maximum"
+
+warn () {
+    echo "$*"
+}
+
+die () {
+    echo
+    echo "$*"
+    echo
+    exit 1
+}
+
+# OS specific support (must be 'true' or 'false').
+cygwin=false
+msys=false
+darwin=false
+nonstop=false
+case "`uname`" in
+  CYGWIN* )
+    cygwin=true
+    ;;
+  Darwin* )
+    darwin=true
+    ;;
+  MINGW* )
+    msys=true
+    ;;
+  NONSTOP* )
+    nonstop=true
+    ;;
+esac
+
+CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
+
+# Determine the Java command to use to start the JVM.
+if [ -n "$JAVA_HOME" ] ; then
+    if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
+        # IBM's JDK on AIX uses strange locations for the executables
+        JAVACMD="$JAVA_HOME/jre/sh/java"
+    else
+        JAVACMD="$JAVA_HOME/bin/java"
+    fi
+    if [ ! -x "$JAVACMD" ] ; then
+        die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+    fi
+else
+    JAVACMD="java"
+    which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+fi
+
+# Increase the maximum file descriptors if we can.
+if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
+    MAX_FD_LIMIT=`ulimit -H -n`
+    if [ $? -eq 0 ] ; then
+        if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
+            MAX_FD="$MAX_FD_LIMIT"
+        fi
+        ulimit -n $MAX_FD
+        if [ $? -ne 0 ] ; then
+            warn "Could not set maximum file descriptor limit: $MAX_FD"
+        fi
+    else
+        warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
+    fi
+fi
+
+# For Darwin, add options to specify how the application appears in the dock
+if $darwin; then
+    GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
+fi
+
+# For Cygwin, switch paths to Windows format before running java
+if $cygwin ; then
+    APP_HOME=`cygpath --path --mixed "$APP_HOME"`
+    CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
+    JAVACMD=`cygpath --unix "$JAVACMD"`
+
+    # We build the pattern for arguments to be converted via cygpath
+    ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
+    SEP=""
+    for dir in $ROOTDIRSRAW ; do
+        ROOTDIRS="$ROOTDIRS$SEP$dir"
+        SEP="|"
+    done
+    OURCYGPATTERN="(^($ROOTDIRS))"
+    # Add a user-defined pattern to the cygpath arguments
+    if [ "$GRADLE_CYGPATTERN" != "" ] ; then
+        OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
+    fi
+    # Now convert the arguments - kludge to limit ourselves to /bin/sh
+    i=0
+    for arg in "$@" ; do
+        CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
+        CHECK2=`echo "$arg"|egrep -c "^-"`                                 ### Determine if an option
+
+        if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then                    ### Added a condition
+            eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
+        else
+            eval `echo args$i`="\"$arg\""
+        fi
+        i=$((i+1))
+    done
+    case $i in
+        (0) set -- ;;
+        (1) set -- "$args0" ;;
+        (2) set -- "$args0" "$args1" ;;
+        (3) set -- "$args0" "$args1" "$args2" ;;
+        (4) set -- "$args0" "$args1" "$args2" "$args3" ;;
+        (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
+        (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
+        (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
+        (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
+        (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
+    esac
+fi
+
+# Escape application args
+save () {
+    for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
+    echo " "
+}
+APP_ARGS=$(save "$@")
+
+# Collect all arguments for the java command, following the shell quoting and substitution rules
+eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
+
+# by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong
+if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then
+  cd "$(dirname "$0")"
+fi
+
+exec "$JAVACMD" "$@"
--- a/gephi/gradlew.bat
+++ b/gephi/gradlew.bat
@ -0,0 +1,84 @@
+@if "%DEBUG%" == "" @echo off
+@rem ##########################################################################
+@rem
+@rem  Gradle startup script for Windows
+@rem
+@rem ##########################################################################
+
+@rem Set local scope for the variables with windows NT shell
+if "%OS%"=="Windows_NT" setlocal
+
+set DIRNAME=%~dp0
+if "%DIRNAME%" == "" set DIRNAME=.
+set APP_BASE_NAME=%~n0
+set APP_HOME=%DIRNAME%
+
+@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+set DEFAULT_JVM_OPTS=
+
+@rem Find java.exe
+if defined JAVA_HOME goto findJavaFromJavaHome
+
+set JAVA_EXE=java.exe
+%JAVA_EXE% -version >NUL 2>&1
+if "%ERRORLEVEL%" == "0" goto init
+
+echo.
+echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+echo.
+echo Please set the JAVA_HOME variable in your environment to match the
+echo location of your Java installation.
+
+goto fail
+
+:findJavaFromJavaHome
+set JAVA_HOME=%JAVA_HOME:"=%
+set JAVA_EXE=%JAVA_HOME%/bin/java.exe
+
+if exist "%JAVA_EXE%" goto init
+
+echo.
+echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
+echo.
+echo Please set the JAVA_HOME variable in your environment to match the
+echo location of your Java installation.
+
+goto fail
+
+:init
+@rem Get command-line arguments, handling Windows variants
+
+if not "%OS%" == "Windows_NT" goto win9xME_args
+
+:win9xME_args
+@rem Slurp the command line arguments.
+set CMD_LINE_ARGS=
+set _SKIP=2
+
+:win9xME_args_slurp
+if "x%~1" == "x" goto execute
+
+set CMD_LINE_ARGS=%*
+
+:execute
+@rem Setup the command line
+
+set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
+
+@rem Execute Gradle
+"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
+
+:end
+@rem End local scope for the variables with windows NT shell
+if "%ERRORLEVEL%"=="0" goto mainEnd
+
+:fail
+rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
+rem the _cmd.exe /c_ return code!
+if  not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
+exit /b 1
+
+:mainEnd
+if "%OS%"=="Windows_NT" endlocal
+
+:omega
--- a/gephi/settings.gradle
+++ b/gephi/settings.gradle
@ -0,0 +1,10 @@
+/*
+ * This file was generated by the Gradle 'init' task.
+ *
+ * The settings file is used to specify which projects to include in your build.
+ * 
+ * Detailed information about configuring a multi-project build in Gradle can be found
+ * in the user guide at https://docs.gradle.org/4.10/userguide/multi_project_builds.html
+ */
+
+rootProject.name = 'gephi'
--- a/gephi/src/main/java/space/fediverse/graph/GraphBuilder.java
+++ b/gephi/src/main/java/space/fediverse/graph/GraphBuilder.java
@ -0,0 +1,103 @@
+package space.fediverse.graph;
+
+import org.gephi.graph.api.GraphController;
+import org.gephi.graph.api.GraphModel;
+import org.gephi.io.database.drivers.PostgreSQLDriver;
+import org.gephi.io.exporter.api.ExportController;
+import org.gephi.io.importer.api.Container;
+import org.gephi.io.importer.api.EdgeDirectionDefault;
+import org.gephi.io.importer.api.ImportController;
+import org.gephi.io.importer.plugin.database.EdgeListDatabaseImpl;
+import org.gephi.io.importer.plugin.database.ImporterEdgeList;
+import org.gephi.io.processor.plugin.DefaultProcessor;
+import org.gephi.layout.plugin.AutoLayout;
+import org.gephi.layout.plugin.forceAtlas2.ForceAtlas2;
+import org.gephi.project.api.ProjectController;
+import org.gephi.project.api.Workspace;
+import org.openide.util.Lookup;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.concurrent.TimeUnit;
+
+public class GraphBuilder {
+
+    private static final String nodeQuery = String.join(""
+            , "SELECT"
+            , " scraper_instance.name AS id,"
+            , " scraper_instance.name AS label,"
+            , " scraper_instance.user_count"
+            , " FROM scraper_instance WHERE status = 'success'"
+    );
+
+    private static final String edgeQuery = String.join(""
+            , "SELECT"
+            , " scraper_instance_peers.from_instance_id AS source,"
+            , " scraper_instance_peers.to_instance_id AS target"
+            , " FROM scraper_instance_peers"
+    );
+
+
+    public static void main(String[] args) {
+
+        Path currentRelativePath = Paths.get(".");
+
+        // Init project & workspace; required to do things w/ gephi
+        ProjectController pc = Lookup.getDefault().lookup(ProjectController.class);
+        pc.newProject();
+        Workspace workspace = pc.getCurrentWorkspace();
+
+        // Get controllers and models
+        ImportController importController = Lookup.getDefault().lookup(ImportController.class);
+        GraphModel graphModel = Lookup.getDefault().lookup(GraphController.class).getGraphModel();
+        // AttributeModel?
+
+        // Import from database
+
+        EdgeListDatabaseImpl db = new EdgeListDatabaseImpl();
+        db.setSQLDriver(new PostgreSQLDriver());
+        db.setHost("localhost");
+        db.setPort(5432);
+        db.setDBName(args[0]);
+        db.setUsername(args[1]);
+        db.setPasswd(args[2]);
+        db.setNodeQuery(nodeQuery);
+        db.setEdgeQuery(edgeQuery);
+
+        ImporterEdgeList edgeListImporter = new ImporterEdgeList();
+        Container container = importController.importDatabase(db, edgeListImporter);
+        // If a node is in the edge list, but not node list, we don't want to create it automatically
+        container.getLoader().setAllowAutoNode(false);
+        container.getLoader().setAllowSelfLoop(false);
+        container.getLoader().setEdgeDefault(EdgeDirectionDefault.UNDIRECTED);  // This is an undirected graph
+
+        // Add imported data to graph
+        importController.process(container, new DefaultProcessor(), workspace);
+
+        // Layout
+        AutoLayout autoLayout = new AutoLayout(2, TimeUnit.MINUTES);
+        autoLayout.setGraphModel(graphModel);
+//        YifanHuLayout firstLayout = new YifanHuLayout(null, new StepDisplacement(1f));
+        ForceAtlas2 secondLayout = new ForceAtlas2(null);
+//        AutoLayout.DynamicProperty adjustBySizeProperty = AutoLayout.createDynamicProperty("forceAtlas.adjustSizes.name", Boolean.TRUE, 0.1f);
+//        AutoLayout.DynamicProperty repulsionProperty = AutoLayout.createDynamicProperty("forceAtlas.repulsionStrength.name", 500., 0f);
+//        autoLayout.addLayout(firstLayout, 0.5f);
+//        autoLayout.addLayout(secondLayout, 0.5f, new AutoLayout.DynamicProperty[]{adjustBySizeProperty, repulsionProperty});
+        autoLayout.addLayout(secondLayout, 1f);
+        autoLayout.execute();
+
+        // Export
+        ExportController exportController = Lookup.getDefault().lookup(ExportController.class);
+        try {
+            exportController.exportFile(new File("fediverse.gexf"));
+        } catch (IOException e) {
+            throw new RuntimeException(e);
+        }
+
+        // Gephi doesn't seem to provide a good way to close the postgres connection, so we have to force close the
+        // program. This'll leave a hanging connection for some period ¯\_(ツ)_/¯
+        System.exit(0);
+    }
+}
--- a/scraper/management/commands/_util.py
+++ b/scraper/management/commands/_util.py
@ -1,3 +1,5 @@
+from datetime import datetime
+
 LOCK_MODES = (
    'ACCESS SHARE',
    'ROW SHARE',
@ -53,3 +55,11 @@ def get_key(data, keys: list):
        return val
    except KeyError:
        return ''
+
+
+def validate_int(integer):
+    return isinstance(integer, int) and 0 <= integer < 2147483647 or None
+
+
+def log(text):
+    return "{} - {}".format(datetime.now().isoformat(), text)
--- a/scraper/management/commands/build_graph.py
+++ b/scraper/management/commands/build_graph.py
@ -0,0 +1,22 @@
+import subprocess
+from django.core.management.base import BaseCommand
+from django.conf import settings
+
+
+class Command(BaseCommand):
+    help = "Takes what's in the database and calls Gephi to create and layout a graph"
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def handle(self, *args, **options):
+        database_config = settings.DATABASES['default']
+        subprocess.call([
+            'java',
+            '-Xmx4g',
+            '-jar',
+            'gephi/build/libs/graphBuilder.jar',
+            database_config['NAME'],
+            database_config['USER'],
+            database_config['PASSWORD'],
+        ])
--- a/scraper/management/commands/scrape.py
+++ b/scraper/management/commands/scrape.py
@ -7,11 +7,11 @@ import json
 import multiprocessing
 import requests
 import time
-from datetime import datetime
+from datetime import datetime, timedelta
 from django.core.management.base import BaseCommand
-from django.db import transaction
-from scraper.models import Instance
-from scraper.management.commands._util import require_lock, InvalidResponseError, get_key
+from django import db
+from scraper.models import Instance, PeerRelationship
+from scraper.management.commands._util import require_lock, InvalidResponseError, get_key, log, validate_int

 # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
 # Because the script uses the Mastodon API other platforms like         #
@ -27,7 +27,8 @@ from scraper.management.commands._util import require_lock, InvalidResponseError
 # TODO: use the /api/v1/server/followers and /api/v1/server/following endpoints in peertube instances

 SEED = 'mastodon.social'
-TIMEOUT = 1
+TIMEOUT = 10
+NUM_THREADS = 4


 class Command(BaseCommand):
@ -49,6 +50,8 @@ class Command(BaseCommand):
    @staticmethod
    def get_instance_peers(instance_name: str):
        """Collect connected instances"""
+        # The peers endpoint returns a "list of all domain names known to this instance"
+        # (https://github.com/tootsuite/mastodon/pull/6125)
        url = 'https://' + instance_name + '/api/v1/instance/peers'
        response = requests.get(url, timeout=TIMEOUT)
        json = response.json()
@ -56,14 +59,14 @@ class Command(BaseCommand):
            raise InvalidResponseError("Could not get peers for {}".format(instance_name))
        return json

-    def process_instance(self, instance_name: str):
+    def process_instance(self, instance: Instance):
        """Given an instance, get all the data we're interested in"""
-        self.stdout.write("{} - Processing {}".format(datetime.now().isoformat(), instance_name))
        data = dict()
        try:
-            data['instance'] = instance_name
-            data['info'] = self.get_instance_info(instance_name)
-            data['peers'] = [peer for peer in self.get_instance_peers(instance_name) if peer]  # get rid of null peers
+            data['instance_name'] = instance.name
+            data['info'] = self.get_instance_info(instance.name)
+            # Get rid of peers that just say "null" and the instance itself
+            data['peers'] = [peer for peer in self.get_instance_peers(instance.name) if peer and peer != instance.name]
            if not data['info'] and not data['peers']:
                # We got a response from the instance, but it didn't have any of the information we were expecting.
                raise InvalidResponseError
@ -72,60 +75,74 @@ class Command(BaseCommand):
        except (InvalidResponseError,
                requests.exceptions.RequestException,
                json.decoder.JSONDecodeError) as e:
-            data['instance'] = instance_name
+            data['instance_name'] = instance.name
            data['status'] = type(e).__name__
            return data

-    @transaction.atomic
+    @db.transaction.atomic
    @require_lock(Instance, 'ACCESS EXCLUSIVE')
-    def save_data(self, data):
+    def save_data(self, instance, data, queue):
        """Save data"""
-        defaults = dict()
-        defaults['domain_count'] = get_key(data, ['info', 'stats', 'domain_count']) or None
-        defaults['status_count'] = get_key(data, ['info', 'stats', 'status_count']) or None
-        defaults['user_count'] = get_key(data, ['info', 'stats', 'user_count']) or None
-        defaults['description'] = get_key(data, ['info', 'description'])
-        defaults['version'] = get_key(data, ['info', 'version'])
-        defaults['status'] = get_key(data, ['status'])
-        instance, _ = Instance.objects.update_or_create(
-            name=get_key(data, ['instance']),
-            defaults=defaults,
-        )
-        if defaults['status'] == 'success' and data['peers']:
-            # Save peers
-            # TODO: make this shared amongst threads so the database only needs to be queried once
+        # Validate the ints. Some servers that appear to be fake instances have e.g. negative numbers here.
+        # TODO: these always return 1!
+        instance.domain_count = validate_int(get_key(data, ['info', 'stats', 'domain_count']))
+        instance.status_count = validate_int(get_key(data, ['info', 'stats', 'status_count']))
+        instance.user_count = validate_int(get_key(data, ['info', 'stats', 'user_count']))
+        instance.description = get_key(data, ['info', 'description'])
+        instance.version = get_key(data, ['info', 'version'])
+        instance.status = get_key(data, ['status'])
+        instance.save()
+        if data['status'] == 'success' and data['peers']:
+            # TODO: handle a peer disappeer-ing
+            # Create instances for the peers we haven't seen before and add them to the queue
+            # TODO: share this among all threads so we only have to call it once at the start
            existing_instance_ids = Instance.objects.values_list('name', flat=True)
-            existing_peers = Instance.objects.filter(name__in=existing_instance_ids)
-            new_peer_ids = [peer for peer in data['peers'] if peer not in existing_instance_ids]
+            new_instance_ids = [peer_id for peer_id in data['peers'] if peer_id not in existing_instance_ids]
+            # bulk_create doesn't call save(), so the auto_now_add field won't get set automatically
+            new_instances = [Instance(name=id, first_seen=datetime.now(), last_updated=datetime.now())
+                             for id in new_instance_ids]
+            Instance.objects.bulk_create(new_instances)
+            for new_instance in new_instances:
+                queue.put(new_instance)
+
+            # Create relationships we haven't seen before
+            existing_peer_ids = PeerRelationship.objects.filter(source=instance).values_list('target', flat=True)
+            new_peer_ids = [peer_id for peer_id in data['peers'] if peer_id not in existing_peer_ids]
            if new_peer_ids:
-                new_peers = Instance.objects.bulk_create([Instance(name=peer) for peer in new_peer_ids])
-                instance.peers.set(new_peers)
-            instance.peers.set(existing_peers)
-        self.stdout.write("{} - Saved {}".format(datetime.now().isoformat(), data['instance']))
+                new_peers = Instance.objects.filter(name__in=new_peer_ids)
+                new_relationships = [PeerRelationship(source=instance, target=new_peer, first_seen=datetime.now())
+                                     for new_peer in new_peers]
+                PeerRelationship.objects.bulk_create(new_relationships)
+        self.stdout.write(log("Saved {}".format(data['instance_name'])))

    def worker(self, queue: multiprocessing.JoinableQueue):
        """The main worker that processes URLs"""
+        # https://stackoverflow.com/a/38356519/3697202
+        db.connections.close_all()
        while True:
-            # Get an item from the queue. Block if the queue is empty.
            instance = queue.get()
            if instance in self.done_bag:
-                print("Skipping {}, already done".format(instance))
+                self.stderr.write(log("Skipping {}, already done. This should not have been added to the queue!".format(instance)))
                queue.task_done()
            else:
+                # Fetch data on instance
+                self.stdout.write(log("Processing {}".format(instance.name)))
                data = self.process_instance(instance)
-                if 'peers' in data:
-                    for peer in [p for p in data['peers'] if p not in self.done_bag]:
-                        queue.put(peer)
-                self.save_data(data)
+                self.save_data(instance, data, queue)
                self.done_bag.add(instance)
                queue.task_done()

    def handle(self, *args, **options):
        start_time = time.time()
+        stale_instances = Instance.objects.filter(last_updated__lte=datetime.now()-timedelta(weeks=1))
        queue = multiprocessing.JoinableQueue()
-        queue.put(SEED)
-        # pool = multiprocessing.Pool(1, initializer=self.worker, initargs=(queue, ))  # Disable concurrency (debug)
-        pool = multiprocessing.Pool(initializer=self.worker, initargs=(queue, ))
+        if stale_instances:
+            queue.put(list(stale_instances))
+        elif not Instance.objects.exists():
+            instance, _ = Instance.objects.get_or_create(name=SEED)
+            queue.put(instance)
+
+        pool = multiprocessing.Pool(NUM_THREADS, initializer=self.worker, initargs=(queue, ))
        queue.join()
        end_time = time.time()
-        self.stdout.write(self.style.SUCCESS("Successfully scraped the fediverse in {:.0f}s".format(end_time-start_time)))
+        self.stdout.write(self.style.SUCCESS(log("Successfully scraped the fediverse in {:.0f}s".format(end_time-start_time))))
--- a/scraper/migrations/0001_initial.py
+++ b/scraper/migrations/0001_initial.py
@ -1,6 +1,7 @@
-# Generated by Django 2.1 on 2018-08-29 17:37
+# Generated by Django 2.1 on 2018-08-30 19:57

 from django.db import migrations, models
+import django.db.models.deletion


 class Migration(migrations.Migration):
@ -15,6 +16,7 @@ class Migration(migrations.Migration):
            name='Instance',
            fields=[
                ('name', models.CharField(max_length=200, primary_key=True, serialize=False)),
+                ('description', models.TextField(blank=True)),
                ('domain_count', models.IntegerField(blank=True, null=True)),
                ('status_count', models.IntegerField(blank=True, null=True)),
                ('user_count', models.IntegerField(blank=True, null=True)),
@ -22,7 +24,20 @@ class Migration(migrations.Migration):
                ('status', models.CharField(max_length=100)),
                ('first_seen', models.DateTimeField(auto_now_add=True)),
                ('last_updated', models.DateTimeField(auto_now=True)),
-                ('peers', models.ManyToManyField(related_name='_instance_peers_+', to='scraper.Instance')),
            ],
        ),
+        migrations.CreateModel(
+            name='PeerRelationship',
+            fields=[
+                ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+                ('first_seen', models.DateTimeField(auto_now_add=True)),
+                ('source', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='following_relationship', to='scraper.Instance')),
+                ('target', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='follower_relationships', to='scraper.Instance')),
+            ],
+        ),
+        migrations.AddField(
+            model_name='instance',
+            name='following',
+            field=models.ManyToManyField(related_name='followers', through='scraper.PeerRelationship', to='scraper.Instance'),
+        ),
    ]
--- a/scraper/migrations/0002_instance_description.py
+++ b/scraper/migrations/0002_instance_description.py
@ -1,18 +0,0 @@
-# Generated by Django 2.1 on 2018-08-29 18:01
-
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-
-    dependencies = [
-        ('scraper', '0001_initial'),
-    ]
-
-    operations = [
-        migrations.AddField(
-            model_name='instance',
-            name='description',
-            field=models.TextField(blank=True),
-        ),
-    ]
--- a/scraper/models.py
+++ b/scraper/models.py
@ -2,6 +2,12 @@ from django.db import models


 class Instance(models.Model):
+    """
+    The main model that saves details of an instance and links between them in the peers
+    property.
+
+    Don't change the schema without verifying that the gephi script can still read the data.
+    """
    # Primary key
    name = models.CharField(max_length=200, primary_key=True)

@ -14,13 +20,16 @@ class Instance(models.Model):
    status = models.CharField(max_length=100)

    # Foreign keys
-    # The peers endpoint returns a "list of all domain names known to this instance"
-    # (https://github.com/tootsuite/mastodon/pull/6125)
-    # In other words, an asymmetrical relationship here doesn't make much sense. If we one day can get a list of
-    # instances that the instance actively follows (i.e. knows and not suspended), it's worth adding an
-    # asymmetrical relation.
-    peers = models.ManyToManyField('self', symmetrical=True)
+    following = models.ManyToManyField('self', symmetrical=False, through='PeerRelationship', related_name="followers")

    # Automatic fields
    first_seen = models.DateTimeField(auto_now_add=True)
    last_updated = models.DateTimeField(auto_now=True)
+
+
+class PeerRelationship(models.Model):
+    source = models.ForeignKey(Instance, related_name="following_relationship", on_delete=models.CASCADE)
+    target = models.ForeignKey(Instance, related_name="follower_relationships", on_delete=models.CASCADE)
+
+    # Metadata
+    first_seen = models.DateTimeField(auto_now_add=True)