add graph (#1)

* add initial graph

* add graph
This commit is contained in:
Tao Bror Bojlén 2018-09-01 15:32:04 +02:00 committed by GitHub
parent 3cbc4dd1b6
commit 93932c5196
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
29 changed files with 815 additions and 109 deletions

1
.gitignore vendored
View File

@ -2,6 +2,7 @@
.idea/
config.json
backend/static/
*.gexf
# Byte-compiled / optimized / DLL files
__pycache__/

View File

@ -2,7 +2,11 @@
fediverse.space is a tool to explore instances in the fediverse.
## Running it
* `cp config.json.template config.json` and enter your configuration details.
* `cp config.json.template config.json` and enter your configuration details. I've used a postgres database for development.
* Set the environment variable `FEDIVERSE_CONFIG` to point to the path of this file.
* `pip install -r requirements.txt`
* `yarn install`
* Make sure you have the Java 8 JRE (to run) or JDK (to develop) installed, and gradle
* For development, run `python manage.py runserver --settings=backend.settings.dev`
* In production, set the environment variable `DJANGO_SETTINGS_MODULE=backend.settings.production`

8
apiv1/_util.py Normal file
View File

@ -0,0 +1,8 @@
def to_representation(self, instance):
"""
Object instance -> Dict of primitive datatypes.
We use a custom to_representation function to exclude empty fields in the serialized JSON.
"""
ret = super(InstanceListSerializer, self).to_representation(instance)
ret = OrderedDict(list(filter(lambda x: x[1], ret.items())))
return ret

View File

@ -1,6 +1,6 @@
from rest_framework import serializers
from collections import OrderedDict
from scraper.models import Instance
from scraper.models import Instance, PeerRelationship
class InstanceListSerializer(serializers.ModelSerializer):
@ -11,6 +11,7 @@ class InstanceListSerializer(serializers.ModelSerializer):
def to_representation(self, instance):
"""
Object instance -> Dict of primitive datatypes.
We use a custom to_representation function to exclude empty fields in the serialized JSON.
"""
ret = super(InstanceListSerializer, self).to_representation(instance)
ret = OrderedDict(list(filter(lambda x: x[1], ret.items())))
@ -23,3 +24,39 @@ class InstanceDetailSerializer(serializers.ModelSerializer):
class Meta:
model = Instance
fields = '__all__'
class EdgeSerializer(serializers.ModelSerializer):
id = serializers.SerializerMethodField('get_pk')
class Meta:
model = PeerRelationship
fields = ('source', 'target', 'id')
def get_pk(self, obj):
return obj.pk
class NodeSerializer(serializers.ModelSerializer):
id = serializers.SerializerMethodField('get_name')
label = serializers.SerializerMethodField('get_name')
size = serializers.SerializerMethodField()
class Meta:
model = Instance
fields = ('id', 'label', 'size')
def get_name(self, obj):
return obj.name
def get_size(self, obj):
return obj.user_count or 1
def to_representation(self, instance):
"""
Object instance -> Dict of primitive datatypes.
We use a custom to_representation function to exclude empty fields in the serialized JSON.
"""
ret = super(NodeSerializer, self).to_representation(instance)
ret = OrderedDict(list(filter(lambda x: x[1], ret.items())))
return ret

View File

@ -1,6 +1,6 @@
from rest_framework import viewsets
from scraper.models import Instance
from apiv1.serializers import InstanceListSerializer, InstanceDetailSerializer
from scraper.models import Instance, PeerRelationship
from apiv1.serializers import InstanceListSerializer, InstanceDetailSerializer, NodeSerializer, EdgeSerializer
class InstanceViewSet(viewsets.ReadOnlyModelViewSet):
@ -18,3 +18,20 @@ class InstanceViewSet(viewsets.ReadOnlyModelViewSet):
if hasattr(self, 'detail_serializer_class'):
return self.detail_serializer_class
return self.serializer_class
class EdgeView(viewsets.ReadOnlyModelViewSet):
"""
Endpoint to get a list of the graph's edges in a SigmaJS-friendly format.
"""
queryset = PeerRelationship.objects.all()[:1000]
serializer_class = EdgeSerializer
class NodeView(viewsets.ReadOnlyModelViewSet):
"""
Endpoint to get a list of the graph's nodes in a SigmaJS-friendly format.
"""
# queryset = Instance.objects.filter(status='success')
queryset = Instance.objects.all()
serializer_class = NodeSerializer

View File

@ -135,7 +135,7 @@ USE_I18N = True
USE_L10N = True
USE_TZ = True
USE_TZ = False
# Static files (CSS, JavaScript, Images)

View File

@ -28,10 +28,11 @@ class OptionalTrailingSlashRouter(routers.DefaultRouter):
router = OptionalTrailingSlashRouter()
router.register(r'instances', views.InstanceViewSet)
router.register(r'graph/nodes', views.NodeView)
router.register(r'graph/edges', views.EdgeView)
urlpatterns = [
path('api/v1/', include(router.urls)),
path('silk/', include('silk.urls', namespace='silk')),
path('', TemplateView.as_view(template_name='index.html')),
]

View File

@ -13,6 +13,7 @@
"react-dom": "^16.4.2",
"react-redux": "^5.0.7",
"react-scripts-ts": "2.17.0",
"react-sigma": "^1.2.30",
"react-virtualized": "^9.20.1",
"redux": "^4.0.0",
"redux-thunk": "^2.3.0"

View File

@ -5,25 +5,30 @@ import { Dispatch } from 'redux';
import { Button, Intent, NonIdealState, Spinner } from '@blueprintjs/core';
import { IconNames } from '@blueprintjs/icons';
import { Graph } from './components/Graph';
import { Nav } from './components/Nav';
import { fetchInstances } from './redux/actions';
import { IAppState, IInstance } from './redux/types';
import { fetchGraph, fetchInstances } from './redux/actions';
import { IAppState, IGraph, IInstance } from './redux/types';
interface IAppProps {
currentInstanceName?: string | null;
graph?: IGraph;
instances?: IInstance[],
isLoadingGraph: boolean;
isLoadingInstances: boolean,
fetchInstances: () => void;
fetchGraph: () => void;
}
class AppImpl extends React.Component<IAppProps> {
public render() {
let body = this.welcomeState();
if (this.props.isLoadingInstances) {
body = this.loadingState();
} else if (!!this.props.instances) {
body = this.renderGraph()
body = this.loadingState("Loading instances...");
} else if (this.props.isLoadingGraph) {
body = this.loadingState("Loading graph...");
} else if (!!this.props.graph) {
body = <Graph />;
}
// TODO: show the number of instances up front
return (
<div className="App bp3-dark">
<Nav />
@ -32,49 +37,44 @@ class AppImpl extends React.Component<IAppProps> {
);
}
public componentDidMount() {
this.props.fetchInstances();
}
private welcomeState = () => {
const numInstances = this.props.instances ? this.props.instances.length : "lots of";
const description = `There are ${numInstances} known instances, so loading the graph might take a little while. Ready?`
return (
<NonIdealState
className="fediverse-welcome"
icon={IconNames.GLOBE_NETWORK}
title="Welcome to fediverse.space!"
description="There are currently $MANY known instances, so loading them might take a little while. Ready?"
action={<Button intent={Intent.PRIMARY} text={"Let's go"} onClick={this.props.fetchInstances} />}
description={description}
action={<Button intent={Intent.PRIMARY} text={"Let's go"} onClick={this.props.fetchGraph} />}
/>
)
}
private loadingState = () => {
private loadingState = (title?: string) => {
return (
<NonIdealState
className="fediverse-welcome"
icon={<Spinner />}
title="Loading..."
title={title || "Loading..."}
/>
)
}
private renderGraph = () => {
return (
<div>
<NonIdealState
className="fediverse-welcome"
icon={IconNames.SEARCH_AROUND}
title="Graph. TODO"
description={"Selected " + (this.props.currentInstanceName || "nothing")}
/>
</div>
);
}
}
const mapStateToProps = (state: IAppState) => ({
currentInstanceName: state.currentInstanceName,
graph: state.data.graph,
instances: state.data.instances,
isLoadingGraph: state.data.isLoadingGraph,
isLoadingInstances: state.data.isLoadingInstances,
})
const mapDispatchToProps = (dispatch: Dispatch) => ({
fetchInstances: () => dispatch(fetchInstances() as any)
fetchGraph: () => dispatch(fetchGraph() as any),
fetchInstances: () => dispatch(fetchInstances() as any),
})
export const App = connect(mapStateToProps, mapDispatchToProps)(AppImpl)

View File

@ -0,0 +1,60 @@
import * as React from 'react';
import { connect } from 'react-redux';
import { NodeShapes, RandomizeNodePositions, RelativeSize, Sigma, SigmaEnableWebGL, LoadGEXF, Filter } from 'react-sigma';
import { selectInstance } from '../redux/actions';
const STYLE = {
bottom: "0",
left: "0",
position: "absolute",
right: "0",
top: "50px",
}
const SETTINGS = {
defaultEdgeColor: "#5C7080",
defaultNodeColor: "#CED9E0",
drawEdges: true,
drawLabels: true,
edgeColor: "default",
}
class GraphImpl extends React.Component {
render() {
if (!this.props.graph) {
return null;
}
return (
<Sigma
graph={this.props.graph}
renderer="webgl"
settings={SETTINGS}
style={STYLE}
onClickNode={(e) => this.props.selectInstance(e.data.node.label)}
onClickStage={(e) => this.props.selectInstance(null)}
>
<RandomizeNodePositions />
<Filter neighborsOf={this.props.currentInstanceName} />
<RelativeSize initialSize={15} />
</Sigma>
)
}
// onClickNode = (e) => {
// this.props.selectInstance(e.data.node.label);
// }
// zoomToNode = (camera, node) => {
// s
// }
}
const mapStateToProps = (state) => ({
currentInstanceName: state.currentInstanceName,
graph: state.data.graph,
})
const mapDispatchToProps = (dispatch) => ({
selectInstance: (instanceName) => dispatch(selectInstance(instanceName)),
})
export const Graph = connect(mapStateToProps, mapDispatchToProps)(GraphImpl)

View File

@ -22,7 +22,7 @@ FocusStyleManager.onlyShowFocusOnTabs();
// Initialize redux
// @ts-ignore
const composeEnhancers = window.__REDUX_DEVTOOLS_EXTENSION_COMPOSE__ || compose;
const store = createStore(rootReducer, /* preloadedState, */ composeEnhancers(
const store = createStore(rootReducer, composeEnhancers(
applyMiddleware(thunk)
));

View File

@ -1,7 +1,7 @@
import { Dispatch } from 'redux';
import { getFromApi } from '../util';
import { ActionType, IInstance } from './types';
import { ActionType, IGraph, IInstance } from './types';
export const selectInstance = (instanceName: string) => {
return {
@ -23,6 +23,19 @@ export const receiveInstances = (instances: IInstance[]) => {
}
}
export const requestGraph = () => {
return {
type: ActionType.REQUEST_GRAPH,
}
}
export const receiveGraph = (graph: IGraph) => {
return {
payload: graph,
type: ActionType.RECEIVE_GRAPH,
}
}
/** Async actions: https://redux.js.org/advanced/asyncactions */
export const fetchInstances = () => {
@ -30,10 +43,22 @@ export const fetchInstances = () => {
return (dispatch: Dispatch) => {
dispatch(requestInstances());
return getFromApi("instances")
.then(response => {
return response.json();
})
.then(instances => dispatch(receiveInstances(instances))
);
}
}
export const fetchGraph = () => {
// TODO: handle errors
return (dispatch: Dispatch) => {
dispatch(requestGraph());
return Promise.all([getFromApi("graph/edges"), getFromApi("graph/nodes")])
.then(responses => {
return {
edges: responses[0],
nodes: responses[1],
};
})
.then(graph => dispatch(receiveGraph(graph)))
}
}

View File

@ -3,6 +3,7 @@ import { combineReducers } from 'redux';
import { ActionType, IAction, IDataState } from './types';
const initialDataState = {
isLoadingGraph: false,
isLoadingInstances: false,
}
const data = (state: IDataState = initialDataState, action: IAction) => {
@ -19,6 +20,17 @@ const data = (state: IDataState = initialDataState, action: IAction) => {
instances: action.payload,
isLoadingInstances: false,
};
case ActionType.REQUEST_GRAPH:
return {
...state,
isLoadingGraph: true,
};
case ActionType.RECEIVE_GRAPH:
return {
...state,
graph: action.payload,
isLoadingGraph: false,
};
default:
return state;
}

View File

@ -2,6 +2,8 @@ export enum ActionType {
SELECT_INSTANCE = 'SELECT_INSTANCE',
REQUEST_INSTANCES = 'REQUEST_INSTANCES',
RECEIVE_INSTANCES = 'RECEIVE_INSTANCES',
REQUEST_GRAPH = 'REQUEST_GRAPH',
RECEIVE_GRAPH = 'RECEIVE_GRAPH',
}
export interface IAction {
@ -14,12 +16,34 @@ export interface IInstance {
numUsers?: number,
}
interface IGraphNode {
id: string;
label: string;
size?: number;
color?: string;
}
interface IGraphEdge {
source: string;
target: string;
id?: string;
}
export interface IGraph {
nodes: IGraphNode[];
edges: IGraphEdge[];
}
// Redux state
export interface IDataState {
instances?: IInstance[],
graph?: IGraph,
isLoadingInstances: boolean,
isLoadingGraph: boolean,
}
export interface IAppState {
currentInstanceName: string | null,
data: IDataState,
}
}

View File

@ -1,11 +1,11 @@
import fetch from 'cross-fetch';
const API_ROOT = "https://fediverse.space/api/v1/"
// const API_ROOT = "http://localhost:8000/api/v1/"
// const API_ROOT = "https://fediverse.space/api/v1/"
const API_ROOT = "http://localhost:8000/api/v1/"
export const getFromApi = (path: string): Promise<any> => {
const domain = API_ROOT.endsWith("/") ? API_ROOT : API_ROOT + "/";
path = path.endsWith("/") ? path : path + "/";
path += "?format=json"
return fetch(domain + path);
return fetch(domain + path).then(response => response.json());
}

View File

@ -5809,6 +5809,10 @@ react-scripts-ts@2.17.0:
optionalDependencies:
fsevents "^1.1.3"
react-sigma@^1.2.30:
version "1.2.30"
resolved "https://registry.yarnpkg.com/react-sigma/-/react-sigma-1.2.30.tgz#794f88e796c4f763158afe404d10d9635f848846"
react-transition-group@^2.2.1:
version "2.4.0"
resolved "https://registry.yarnpkg.com/react-transition-group/-/react-transition-group-2.4.0.tgz#1d9391fabfd82e016f26fabd1eec329dbd922b5a"

18
gephi/.gitignore vendored Normal file
View File

@ -0,0 +1,18 @@
.gradle/
gradle/
build/
lib/*
!lib/.gitkeep
.idea/
# Ignore Gradle GUI config
gradle-app.setting
# Avoid ignoring Gradle wrapper jar file (.jar files are usually ignored)
!gradle-wrapper.jar
# Cache of project
.gradletasknamecache
*.javac

23
gephi/README.md Normal file
View File

@ -0,0 +1,23 @@
# Gephi
This subproject uses Gephi to layout a graph that'll then be served to people on the front-end.
Always make sure to run `./gradlew shadowJar` to compile your changes.
Note that it won't compile with the given repos:
```
> Could not resolve all files for configuration ':compile'.
> Could not find net.java.dev:stax-utils:snapshot-20100402.
Searched in the following locations:
- https://repo.maven.apache.org/maven2/net/java/dev/stax-utils/snapshot-20100402/stax-utils-snapshot-20100402.pom
- https://repo.maven.apache.org/maven2/net/java/dev/stax-utils/snapshot-20100402/stax-utils-snapshot-20100402.jar
- https://jcenter.bintray.com/net/java/dev/stax-utils/snapshot-20100402/stax-utils-snapshot-20100402.pom
- https://jcenter.bintray.com/net/java/dev/stax-utils/snapshot-20100402/stax-utils-snapshot-20100402.jar
- https://dl.google.com/dl/android/maven2/net/java/dev/stax-utils/snapshot-20100402/stax-utils-snapshot-20100402.pom
- https://dl.google.com/dl/android/maven2/net/java/dev/stax-utils/snapshot-20100402/stax-utils-snapshot-20100402.jar
- http://bits.netbeans.org/nexus/content/groups/netbeans/net/java/dev/stax-utils/snapshot-20100402/stax-utils-snapshot-20100402.pom
- http://bits.netbeans.org/nexus/content/groups/netbeans/net/java/dev/stax-utils/snapshot-20100402/stax-utils-snapshot-20100402.jar
Required by:
project : > org.gephi:gephi-toolkit:0.9.2 > org.gephi:core-library-wrapper:0.9.2
```
I just downloaded version 0.9.2 from the gephi-toolkit GitHub and manually added it to fediverse.space/gephi/lib... ¯\_(ツ)_/¯

47
gephi/build.gradle Normal file
View File

@ -0,0 +1,47 @@
buildscript {
// repositories {
// jcenter()
// }
dependencies {
classpath 'com.github.jengelman.gradle.plugins:shadow:2.0.4'
}
}
plugins {
id "base"
id "java"
id "com.github.johnrengelman.shadow" version "2.0.4"
}
repositories {
flatDir {
dirs 'lib'
}
// mavenCentral()
// jcenter()
// google()
// maven {
// url "http://bits.netbeans.org/nexus/content/groups/netbeans/"
// }
}
dependencies {
compile group: 'org.gephi', name: 'gephi-toolkit', version: '0.9.2'
}
jar {
manifest {
attributes (
'Class-Path': configurations.compile.collect { it.getName() }.join(' '),
'Main-Class': 'space.fediverse.graph.GraphBuilder'
)
}
}
shadowJar {
baseName = 'graphBuilder'
classifier = null
version = null
}

172
gephi/gradlew vendored Executable file
View File

@ -0,0 +1,172 @@
#!/usr/bin/env sh
##############################################################################
##
## Gradle start up script for UN*X
##
##############################################################################
# Attempt to set APP_HOME
# Resolve links: $0 may be a link
PRG="$0"
# Need this for relative symlinks.
while [ -h "$PRG" ] ; do
ls=`ls -ld "$PRG"`
link=`expr "$ls" : '.*-> \(.*\)$'`
if expr "$link" : '/.*' > /dev/null; then
PRG="$link"
else
PRG=`dirname "$PRG"`"/$link"
fi
done
SAVED="`pwd`"
cd "`dirname \"$PRG\"`/" >/dev/null
APP_HOME="`pwd -P`"
cd "$SAVED" >/dev/null
APP_NAME="Gradle"
APP_BASE_NAME=`basename "$0"`
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
DEFAULT_JVM_OPTS=""
# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD="maximum"
warn () {
echo "$*"
}
die () {
echo
echo "$*"
echo
exit 1
}
# OS specific support (must be 'true' or 'false').
cygwin=false
msys=false
darwin=false
nonstop=false
case "`uname`" in
CYGWIN* )
cygwin=true
;;
Darwin* )
darwin=true
;;
MINGW* )
msys=true
;;
NONSTOP* )
nonstop=true
;;
esac
CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
# Determine the Java command to use to start the JVM.
if [ -n "$JAVA_HOME" ] ; then
if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
# IBM's JDK on AIX uses strange locations for the executables
JAVACMD="$JAVA_HOME/jre/sh/java"
else
JAVACMD="$JAVA_HOME/bin/java"
fi
if [ ! -x "$JAVACMD" ] ; then
die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi
else
JAVACMD="java"
which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi
# Increase the maximum file descriptors if we can.
if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
MAX_FD_LIMIT=`ulimit -H -n`
if [ $? -eq 0 ] ; then
if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
MAX_FD="$MAX_FD_LIMIT"
fi
ulimit -n $MAX_FD
if [ $? -ne 0 ] ; then
warn "Could not set maximum file descriptor limit: $MAX_FD"
fi
else
warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
fi
fi
# For Darwin, add options to specify how the application appears in the dock
if $darwin; then
GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
fi
# For Cygwin, switch paths to Windows format before running java
if $cygwin ; then
APP_HOME=`cygpath --path --mixed "$APP_HOME"`
CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
JAVACMD=`cygpath --unix "$JAVACMD"`
# We build the pattern for arguments to be converted via cygpath
ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
SEP=""
for dir in $ROOTDIRSRAW ; do
ROOTDIRS="$ROOTDIRS$SEP$dir"
SEP="|"
done
OURCYGPATTERN="(^($ROOTDIRS))"
# Add a user-defined pattern to the cygpath arguments
if [ "$GRADLE_CYGPATTERN" != "" ] ; then
OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
fi
# Now convert the arguments - kludge to limit ourselves to /bin/sh
i=0
for arg in "$@" ; do
CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
else
eval `echo args$i`="\"$arg\""
fi
i=$((i+1))
done
case $i in
(0) set -- ;;
(1) set -- "$args0" ;;
(2) set -- "$args0" "$args1" ;;
(3) set -- "$args0" "$args1" "$args2" ;;
(4) set -- "$args0" "$args1" "$args2" "$args3" ;;
(5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
(6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
(7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
(8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
(9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
esac
fi
# Escape application args
save () {
for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
echo " "
}
APP_ARGS=$(save "$@")
# Collect all arguments for the java command, following the shell quoting and substitution rules
eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
# by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong
if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then
cd "$(dirname "$0")"
fi
exec "$JAVACMD" "$@"

84
gephi/gradlew.bat vendored Normal file
View File

@ -0,0 +1,84 @@
@if "%DEBUG%" == "" @echo off
@rem ##########################################################################
@rem
@rem Gradle startup script for Windows
@rem
@rem ##########################################################################
@rem Set local scope for the variables with windows NT shell
if "%OS%"=="Windows_NT" setlocal
set DIRNAME=%~dp0
if "%DIRNAME%" == "" set DIRNAME=.
set APP_BASE_NAME=%~n0
set APP_HOME=%DIRNAME%
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
set DEFAULT_JVM_OPTS=
@rem Find java.exe
if defined JAVA_HOME goto findJavaFromJavaHome
set JAVA_EXE=java.exe
%JAVA_EXE% -version >NUL 2>&1
if "%ERRORLEVEL%" == "0" goto init
echo.
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.
goto fail
:findJavaFromJavaHome
set JAVA_HOME=%JAVA_HOME:"=%
set JAVA_EXE=%JAVA_HOME%/bin/java.exe
if exist "%JAVA_EXE%" goto init
echo.
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.
goto fail
:init
@rem Get command-line arguments, handling Windows variants
if not "%OS%" == "Windows_NT" goto win9xME_args
:win9xME_args
@rem Slurp the command line arguments.
set CMD_LINE_ARGS=
set _SKIP=2
:win9xME_args_slurp
if "x%~1" == "x" goto execute
set CMD_LINE_ARGS=%*
:execute
@rem Setup the command line
set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
@rem Execute Gradle
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
:end
@rem End local scope for the variables with windows NT shell
if "%ERRORLEVEL%"=="0" goto mainEnd
:fail
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
rem the _cmd.exe /c_ return code!
if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
exit /b 1
:mainEnd
if "%OS%"=="Windows_NT" endlocal
:omega

10
gephi/settings.gradle Normal file
View File

@ -0,0 +1,10 @@
/*
* This file was generated by the Gradle 'init' task.
*
* The settings file is used to specify which projects to include in your build.
*
* Detailed information about configuring a multi-project build in Gradle can be found
* in the user guide at https://docs.gradle.org/4.10/userguide/multi_project_builds.html
*/
rootProject.name = 'gephi'

View File

@ -0,0 +1,103 @@
package space.fediverse.graph;
import org.gephi.graph.api.GraphController;
import org.gephi.graph.api.GraphModel;
import org.gephi.io.database.drivers.PostgreSQLDriver;
import org.gephi.io.exporter.api.ExportController;
import org.gephi.io.importer.api.Container;
import org.gephi.io.importer.api.EdgeDirectionDefault;
import org.gephi.io.importer.api.ImportController;
import org.gephi.io.importer.plugin.database.EdgeListDatabaseImpl;
import org.gephi.io.importer.plugin.database.ImporterEdgeList;
import org.gephi.io.processor.plugin.DefaultProcessor;
import org.gephi.layout.plugin.AutoLayout;
import org.gephi.layout.plugin.forceAtlas2.ForceAtlas2;
import org.gephi.project.api.ProjectController;
import org.gephi.project.api.Workspace;
import org.openide.util.Lookup;
import java.io.File;
import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.concurrent.TimeUnit;
public class GraphBuilder {
private static final String nodeQuery = String.join(""
, "SELECT"
, " scraper_instance.name AS id,"
, " scraper_instance.name AS label,"
, " scraper_instance.user_count"
, " FROM scraper_instance WHERE status = 'success'"
);
private static final String edgeQuery = String.join(""
, "SELECT"
, " scraper_instance_peers.from_instance_id AS source,"
, " scraper_instance_peers.to_instance_id AS target"
, " FROM scraper_instance_peers"
);
public static void main(String[] args) {
Path currentRelativePath = Paths.get(".");
// Init project & workspace; required to do things w/ gephi
ProjectController pc = Lookup.getDefault().lookup(ProjectController.class);
pc.newProject();
Workspace workspace = pc.getCurrentWorkspace();
// Get controllers and models
ImportController importController = Lookup.getDefault().lookup(ImportController.class);
GraphModel graphModel = Lookup.getDefault().lookup(GraphController.class).getGraphModel();
// AttributeModel?
// Import from database
EdgeListDatabaseImpl db = new EdgeListDatabaseImpl();
db.setSQLDriver(new PostgreSQLDriver());
db.setHost("localhost");
db.setPort(5432);
db.setDBName(args[0]);
db.setUsername(args[1]);
db.setPasswd(args[2]);
db.setNodeQuery(nodeQuery);
db.setEdgeQuery(edgeQuery);
ImporterEdgeList edgeListImporter = new ImporterEdgeList();
Container container = importController.importDatabase(db, edgeListImporter);
// If a node is in the edge list, but not node list, we don't want to create it automatically
container.getLoader().setAllowAutoNode(false);
container.getLoader().setAllowSelfLoop(false);
container.getLoader().setEdgeDefault(EdgeDirectionDefault.UNDIRECTED); // This is an undirected graph
// Add imported data to graph
importController.process(container, new DefaultProcessor(), workspace);
// Layout
AutoLayout autoLayout = new AutoLayout(2, TimeUnit.MINUTES);
autoLayout.setGraphModel(graphModel);
// YifanHuLayout firstLayout = new YifanHuLayout(null, new StepDisplacement(1f));
ForceAtlas2 secondLayout = new ForceAtlas2(null);
// AutoLayout.DynamicProperty adjustBySizeProperty = AutoLayout.createDynamicProperty("forceAtlas.adjustSizes.name", Boolean.TRUE, 0.1f);
// AutoLayout.DynamicProperty repulsionProperty = AutoLayout.createDynamicProperty("forceAtlas.repulsionStrength.name", 500., 0f);
// autoLayout.addLayout(firstLayout, 0.5f);
// autoLayout.addLayout(secondLayout, 0.5f, new AutoLayout.DynamicProperty[]{adjustBySizeProperty, repulsionProperty});
autoLayout.addLayout(secondLayout, 1f);
autoLayout.execute();
// Export
ExportController exportController = Lookup.getDefault().lookup(ExportController.class);
try {
exportController.exportFile(new File("fediverse.gexf"));
} catch (IOException e) {
throw new RuntimeException(e);
}
// Gephi doesn't seem to provide a good way to close the postgres connection, so we have to force close the
// program. This'll leave a hanging connection for some period ¯\_()_/¯
System.exit(0);
}
}

View File

@ -1,3 +1,5 @@
from datetime import datetime
LOCK_MODES = (
'ACCESS SHARE',
'ROW SHARE',
@ -53,3 +55,11 @@ def get_key(data, keys: list):
return val
except KeyError:
return ''
def validate_int(integer):
return isinstance(integer, int) and 0 <= integer < 2147483647 or None
def log(text):
return "{} - {}".format(datetime.now().isoformat(), text)

View File

@ -0,0 +1,22 @@
import subprocess
from django.core.management.base import BaseCommand
from django.conf import settings
class Command(BaseCommand):
help = "Takes what's in the database and calls Gephi to create and layout a graph"
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def handle(self, *args, **options):
database_config = settings.DATABASES['default']
subprocess.call([
'java',
'-Xmx4g',
'-jar',
'gephi/build/libs/graphBuilder.jar',
database_config['NAME'],
database_config['USER'],
database_config['PASSWORD'],
])

View File

@ -7,11 +7,11 @@ import json
import multiprocessing
import requests
import time
from datetime import datetime
from datetime import datetime, timedelta
from django.core.management.base import BaseCommand
from django.db import transaction
from scraper.models import Instance
from scraper.management.commands._util import require_lock, InvalidResponseError, get_key
from django import db
from scraper.models import Instance, PeerRelationship
from scraper.management.commands._util import require_lock, InvalidResponseError, get_key, log, validate_int
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# Because the script uses the Mastodon API other platforms like #
@ -27,7 +27,8 @@ from scraper.management.commands._util import require_lock, InvalidResponseError
# TODO: use the /api/v1/server/followers and /api/v1/server/following endpoints in peertube instances
SEED = 'mastodon.social'
TIMEOUT = 1
TIMEOUT = 10
NUM_THREADS = 4
class Command(BaseCommand):
@ -49,6 +50,8 @@ class Command(BaseCommand):
@staticmethod
def get_instance_peers(instance_name: str):
"""Collect connected instances"""
# The peers endpoint returns a "list of all domain names known to this instance"
# (https://github.com/tootsuite/mastodon/pull/6125)
url = 'https://' + instance_name + '/api/v1/instance/peers'
response = requests.get(url, timeout=TIMEOUT)
json = response.json()
@ -56,14 +59,14 @@ class Command(BaseCommand):
raise InvalidResponseError("Could not get peers for {}".format(instance_name))
return json
def process_instance(self, instance_name: str):
def process_instance(self, instance: Instance):
"""Given an instance, get all the data we're interested in"""
self.stdout.write("{} - Processing {}".format(datetime.now().isoformat(), instance_name))
data = dict()
try:
data['instance'] = instance_name
data['info'] = self.get_instance_info(instance_name)
data['peers'] = [peer for peer in self.get_instance_peers(instance_name) if peer] # get rid of null peers
data['instance_name'] = instance.name
data['info'] = self.get_instance_info(instance.name)
# Get rid of peers that just say "null" and the instance itself
data['peers'] = [peer for peer in self.get_instance_peers(instance.name) if peer and peer != instance.name]
if not data['info'] and not data['peers']:
# We got a response from the instance, but it didn't have any of the information we were expecting.
raise InvalidResponseError
@ -72,60 +75,74 @@ class Command(BaseCommand):
except (InvalidResponseError,
requests.exceptions.RequestException,
json.decoder.JSONDecodeError) as e:
data['instance'] = instance_name
data['instance_name'] = instance.name
data['status'] = type(e).__name__
return data
@transaction.atomic
@db.transaction.atomic
@require_lock(Instance, 'ACCESS EXCLUSIVE')
def save_data(self, data):
def save_data(self, instance, data, queue):
"""Save data"""
defaults = dict()
defaults['domain_count'] = get_key(data, ['info', 'stats', 'domain_count']) or None
defaults['status_count'] = get_key(data, ['info', 'stats', 'status_count']) or None
defaults['user_count'] = get_key(data, ['info', 'stats', 'user_count']) or None
defaults['description'] = get_key(data, ['info', 'description'])
defaults['version'] = get_key(data, ['info', 'version'])
defaults['status'] = get_key(data, ['status'])
instance, _ = Instance.objects.update_or_create(
name=get_key(data, ['instance']),
defaults=defaults,
)
if defaults['status'] == 'success' and data['peers']:
# Save peers
# TODO: make this shared amongst threads so the database only needs to be queried once
# Validate the ints. Some servers that appear to be fake instances have e.g. negative numbers here.
# TODO: these always return 1!
instance.domain_count = validate_int(get_key(data, ['info', 'stats', 'domain_count']))
instance.status_count = validate_int(get_key(data, ['info', 'stats', 'status_count']))
instance.user_count = validate_int(get_key(data, ['info', 'stats', 'user_count']))
instance.description = get_key(data, ['info', 'description'])
instance.version = get_key(data, ['info', 'version'])
instance.status = get_key(data, ['status'])
instance.save()
if data['status'] == 'success' and data['peers']:
# TODO: handle a peer disappeer-ing
# Create instances for the peers we haven't seen before and add them to the queue
# TODO: share this among all threads so we only have to call it once at the start
existing_instance_ids = Instance.objects.values_list('name', flat=True)
existing_peers = Instance.objects.filter(name__in=existing_instance_ids)
new_peer_ids = [peer for peer in data['peers'] if peer not in existing_instance_ids]
new_instance_ids = [peer_id for peer_id in data['peers'] if peer_id not in existing_instance_ids]
# bulk_create doesn't call save(), so the auto_now_add field won't get set automatically
new_instances = [Instance(name=id, first_seen=datetime.now(), last_updated=datetime.now())
for id in new_instance_ids]
Instance.objects.bulk_create(new_instances)
for new_instance in new_instances:
queue.put(new_instance)
# Create relationships we haven't seen before
existing_peer_ids = PeerRelationship.objects.filter(source=instance).values_list('target', flat=True)
new_peer_ids = [peer_id for peer_id in data['peers'] if peer_id not in existing_peer_ids]
if new_peer_ids:
new_peers = Instance.objects.bulk_create([Instance(name=peer) for peer in new_peer_ids])
instance.peers.set(new_peers)
instance.peers.set(existing_peers)
self.stdout.write("{} - Saved {}".format(datetime.now().isoformat(), data['instance']))
new_peers = Instance.objects.filter(name__in=new_peer_ids)
new_relationships = [PeerRelationship(source=instance, target=new_peer, first_seen=datetime.now())
for new_peer in new_peers]
PeerRelationship.objects.bulk_create(new_relationships)
self.stdout.write(log("Saved {}".format(data['instance_name'])))
def worker(self, queue: multiprocessing.JoinableQueue):
"""The main worker that processes URLs"""
# https://stackoverflow.com/a/38356519/3697202
db.connections.close_all()
while True:
# Get an item from the queue. Block if the queue is empty.
instance = queue.get()
if instance in self.done_bag:
print("Skipping {}, already done".format(instance))
self.stderr.write(log("Skipping {}, already done. This should not have been added to the queue!".format(instance)))
queue.task_done()
else:
# Fetch data on instance
self.stdout.write(log("Processing {}".format(instance.name)))
data = self.process_instance(instance)
if 'peers' in data:
for peer in [p for p in data['peers'] if p not in self.done_bag]:
queue.put(peer)
self.save_data(data)
self.save_data(instance, data, queue)
self.done_bag.add(instance)
queue.task_done()
def handle(self, *args, **options):
start_time = time.time()
stale_instances = Instance.objects.filter(last_updated__lte=datetime.now()-timedelta(weeks=1))
queue = multiprocessing.JoinableQueue()
queue.put(SEED)
# pool = multiprocessing.Pool(1, initializer=self.worker, initargs=(queue, )) # Disable concurrency (debug)
pool = multiprocessing.Pool(initializer=self.worker, initargs=(queue, ))
if stale_instances:
queue.put(list(stale_instances))
elif not Instance.objects.exists():
instance, _ = Instance.objects.get_or_create(name=SEED)
queue.put(instance)
pool = multiprocessing.Pool(NUM_THREADS, initializer=self.worker, initargs=(queue, ))
queue.join()
end_time = time.time()
self.stdout.write(self.style.SUCCESS("Successfully scraped the fediverse in {:.0f}s".format(end_time-start_time)))
self.stdout.write(self.style.SUCCESS(log("Successfully scraped the fediverse in {:.0f}s".format(end_time-start_time))))

View File

@ -1,6 +1,7 @@
# Generated by Django 2.1 on 2018-08-29 17:37
# Generated by Django 2.1 on 2018-08-30 19:57
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
@ -15,6 +16,7 @@ class Migration(migrations.Migration):
name='Instance',
fields=[
('name', models.CharField(max_length=200, primary_key=True, serialize=False)),
('description', models.TextField(blank=True)),
('domain_count', models.IntegerField(blank=True, null=True)),
('status_count', models.IntegerField(blank=True, null=True)),
('user_count', models.IntegerField(blank=True, null=True)),
@ -22,7 +24,20 @@ class Migration(migrations.Migration):
('status', models.CharField(max_length=100)),
('first_seen', models.DateTimeField(auto_now_add=True)),
('last_updated', models.DateTimeField(auto_now=True)),
('peers', models.ManyToManyField(related_name='_instance_peers_+', to='scraper.Instance')),
],
),
migrations.CreateModel(
name='PeerRelationship',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('first_seen', models.DateTimeField(auto_now_add=True)),
('source', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='following_relationship', to='scraper.Instance')),
('target', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='follower_relationships', to='scraper.Instance')),
],
),
migrations.AddField(
model_name='instance',
name='following',
field=models.ManyToManyField(related_name='followers', through='scraper.PeerRelationship', to='scraper.Instance'),
),
]

View File

@ -1,18 +0,0 @@
# Generated by Django 2.1 on 2018-08-29 18:01
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('scraper', '0001_initial'),
]
operations = [
migrations.AddField(
model_name='instance',
name='description',
field=models.TextField(blank=True),
),
]

View File

@ -2,6 +2,12 @@ from django.db import models
class Instance(models.Model):
"""
The main model that saves details of an instance and links between them in the peers
property.
Don't change the schema without verifying that the gephi script can still read the data.
"""
# Primary key
name = models.CharField(max_length=200, primary_key=True)
@ -14,13 +20,16 @@ class Instance(models.Model):
status = models.CharField(max_length=100)
# Foreign keys
# The peers endpoint returns a "list of all domain names known to this instance"
# (https://github.com/tootsuite/mastodon/pull/6125)
# In other words, an asymmetrical relationship here doesn't make much sense. If we one day can get a list of
# instances that the instance actively follows (i.e. knows and not suspended), it's worth adding an
# asymmetrical relation.
peers = models.ManyToManyField('self', symmetrical=True)
following = models.ManyToManyField('self', symmetrical=False, through='PeerRelationship', related_name="followers")
# Automatic fields
first_seen = models.DateTimeField(auto_now_add=True)
last_updated = models.DateTimeField(auto_now=True)
class PeerRelationship(models.Model):
source = models.ForeignKey(Instance, related_name="following_relationship", on_delete=models.CASCADE)
target = models.ForeignKey(Instance, related_name="follower_relationships", on_delete=models.CASCADE)
# Metadata
first_seen = models.DateTimeField(auto_now_add=True)