parent
3cbc4dd1b6
commit
93932c5196
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -2,6 +2,7 @@
|
|||
.idea/
|
||||
config.json
|
||||
backend/static/
|
||||
*.gexf
|
||||
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
|
|
|
@ -2,7 +2,11 @@
|
|||
fediverse.space is a tool to explore instances in the fediverse.
|
||||
|
||||
## Running it
|
||||
* `cp config.json.template config.json` and enter your configuration details.
|
||||
* `cp config.json.template config.json` and enter your configuration details. I've used a postgres database for development.
|
||||
* Set the environment variable `FEDIVERSE_CONFIG` to point to the path of this file.
|
||||
* `pip install -r requirements.txt`
|
||||
* `yarn install`
|
||||
* Make sure you have the Java 8 JRE (to run) or JDK (to develop) installed, and gradle
|
||||
* For development, run `python manage.py runserver --settings=backend.settings.dev`
|
||||
* In production, set the environment variable `DJANGO_SETTINGS_MODULE=backend.settings.production`
|
||||
|
||||
|
|
8
apiv1/_util.py
Normal file
8
apiv1/_util.py
Normal file
|
@ -0,0 +1,8 @@
|
|||
def to_representation(self, instance):
|
||||
"""
|
||||
Object instance -> Dict of primitive datatypes.
|
||||
We use a custom to_representation function to exclude empty fields in the serialized JSON.
|
||||
"""
|
||||
ret = super(InstanceListSerializer, self).to_representation(instance)
|
||||
ret = OrderedDict(list(filter(lambda x: x[1], ret.items())))
|
||||
return ret
|
|
@ -1,6 +1,6 @@
|
|||
from rest_framework import serializers
|
||||
from collections import OrderedDict
|
||||
from scraper.models import Instance
|
||||
from scraper.models import Instance, PeerRelationship
|
||||
|
||||
|
||||
class InstanceListSerializer(serializers.ModelSerializer):
|
||||
|
@ -11,6 +11,7 @@ class InstanceListSerializer(serializers.ModelSerializer):
|
|||
def to_representation(self, instance):
|
||||
"""
|
||||
Object instance -> Dict of primitive datatypes.
|
||||
We use a custom to_representation function to exclude empty fields in the serialized JSON.
|
||||
"""
|
||||
ret = super(InstanceListSerializer, self).to_representation(instance)
|
||||
ret = OrderedDict(list(filter(lambda x: x[1], ret.items())))
|
||||
|
@ -23,3 +24,39 @@ class InstanceDetailSerializer(serializers.ModelSerializer):
|
|||
class Meta:
|
||||
model = Instance
|
||||
fields = '__all__'
|
||||
|
||||
|
||||
class EdgeSerializer(serializers.ModelSerializer):
|
||||
id = serializers.SerializerMethodField('get_pk')
|
||||
|
||||
class Meta:
|
||||
model = PeerRelationship
|
||||
fields = ('source', 'target', 'id')
|
||||
|
||||
def get_pk(self, obj):
|
||||
return obj.pk
|
||||
|
||||
|
||||
class NodeSerializer(serializers.ModelSerializer):
|
||||
id = serializers.SerializerMethodField('get_name')
|
||||
label = serializers.SerializerMethodField('get_name')
|
||||
size = serializers.SerializerMethodField()
|
||||
|
||||
class Meta:
|
||||
model = Instance
|
||||
fields = ('id', 'label', 'size')
|
||||
|
||||
def get_name(self, obj):
|
||||
return obj.name
|
||||
|
||||
def get_size(self, obj):
|
||||
return obj.user_count or 1
|
||||
|
||||
def to_representation(self, instance):
|
||||
"""
|
||||
Object instance -> Dict of primitive datatypes.
|
||||
We use a custom to_representation function to exclude empty fields in the serialized JSON.
|
||||
"""
|
||||
ret = super(NodeSerializer, self).to_representation(instance)
|
||||
ret = OrderedDict(list(filter(lambda x: x[1], ret.items())))
|
||||
return ret
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
from rest_framework import viewsets
|
||||
from scraper.models import Instance
|
||||
from apiv1.serializers import InstanceListSerializer, InstanceDetailSerializer
|
||||
from scraper.models import Instance, PeerRelationship
|
||||
from apiv1.serializers import InstanceListSerializer, InstanceDetailSerializer, NodeSerializer, EdgeSerializer
|
||||
|
||||
|
||||
class InstanceViewSet(viewsets.ReadOnlyModelViewSet):
|
||||
|
@ -18,3 +18,20 @@ class InstanceViewSet(viewsets.ReadOnlyModelViewSet):
|
|||
if hasattr(self, 'detail_serializer_class'):
|
||||
return self.detail_serializer_class
|
||||
return self.serializer_class
|
||||
|
||||
|
||||
class EdgeView(viewsets.ReadOnlyModelViewSet):
|
||||
"""
|
||||
Endpoint to get a list of the graph's edges in a SigmaJS-friendly format.
|
||||
"""
|
||||
queryset = PeerRelationship.objects.all()[:1000]
|
||||
serializer_class = EdgeSerializer
|
||||
|
||||
|
||||
class NodeView(viewsets.ReadOnlyModelViewSet):
|
||||
"""
|
||||
Endpoint to get a list of the graph's nodes in a SigmaJS-friendly format.
|
||||
"""
|
||||
# queryset = Instance.objects.filter(status='success')
|
||||
queryset = Instance.objects.all()
|
||||
serializer_class = NodeSerializer
|
||||
|
|
|
@ -135,7 +135,7 @@ USE_I18N = True
|
|||
|
||||
USE_L10N = True
|
||||
|
||||
USE_TZ = True
|
||||
USE_TZ = False
|
||||
|
||||
|
||||
# Static files (CSS, JavaScript, Images)
|
||||
|
|
|
@ -28,10 +28,11 @@ class OptionalTrailingSlashRouter(routers.DefaultRouter):
|
|||
|
||||
router = OptionalTrailingSlashRouter()
|
||||
router.register(r'instances', views.InstanceViewSet)
|
||||
router.register(r'graph/nodes', views.NodeView)
|
||||
router.register(r'graph/edges', views.EdgeView)
|
||||
|
||||
urlpatterns = [
|
||||
path('api/v1/', include(router.urls)),
|
||||
path('silk/', include('silk.urls', namespace='silk')),
|
||||
path('', TemplateView.as_view(template_name='index.html')),
|
||||
]
|
||||
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
"react-dom": "^16.4.2",
|
||||
"react-redux": "^5.0.7",
|
||||
"react-scripts-ts": "2.17.0",
|
||||
"react-sigma": "^1.2.30",
|
||||
"react-virtualized": "^9.20.1",
|
||||
"redux": "^4.0.0",
|
||||
"redux-thunk": "^2.3.0"
|
||||
|
|
|
@ -5,25 +5,30 @@ import { Dispatch } from 'redux';
|
|||
import { Button, Intent, NonIdealState, Spinner } from '@blueprintjs/core';
|
||||
import { IconNames } from '@blueprintjs/icons';
|
||||
|
||||
import { Graph } from './components/Graph';
|
||||
import { Nav } from './components/Nav';
|
||||
import { fetchInstances } from './redux/actions';
|
||||
import { IAppState, IInstance } from './redux/types';
|
||||
import { fetchGraph, fetchInstances } from './redux/actions';
|
||||
import { IAppState, IGraph, IInstance } from './redux/types';
|
||||
|
||||
interface IAppProps {
|
||||
currentInstanceName?: string | null;
|
||||
graph?: IGraph;
|
||||
instances?: IInstance[],
|
||||
isLoadingGraph: boolean;
|
||||
isLoadingInstances: boolean,
|
||||
fetchInstances: () => void;
|
||||
fetchGraph: () => void;
|
||||
}
|
||||
class AppImpl extends React.Component<IAppProps> {
|
||||
public render() {
|
||||
let body = this.welcomeState();
|
||||
if (this.props.isLoadingInstances) {
|
||||
body = this.loadingState();
|
||||
} else if (!!this.props.instances) {
|
||||
body = this.renderGraph()
|
||||
body = this.loadingState("Loading instances...");
|
||||
} else if (this.props.isLoadingGraph) {
|
||||
body = this.loadingState("Loading graph...");
|
||||
} else if (!!this.props.graph) {
|
||||
body = <Graph />;
|
||||
}
|
||||
// TODO: show the number of instances up front
|
||||
return (
|
||||
<div className="App bp3-dark">
|
||||
<Nav />
|
||||
|
@ -32,49 +37,44 @@ class AppImpl extends React.Component<IAppProps> {
|
|||
);
|
||||
}
|
||||
|
||||
public componentDidMount() {
|
||||
this.props.fetchInstances();
|
||||
}
|
||||
|
||||
private welcomeState = () => {
|
||||
const numInstances = this.props.instances ? this.props.instances.length : "lots of";
|
||||
const description = `There are ${numInstances} known instances, so loading the graph might take a little while. Ready?`
|
||||
return (
|
||||
<NonIdealState
|
||||
className="fediverse-welcome"
|
||||
icon={IconNames.GLOBE_NETWORK}
|
||||
title="Welcome to fediverse.space!"
|
||||
description="There are currently $MANY known instances, so loading them might take a little while. Ready?"
|
||||
action={<Button intent={Intent.PRIMARY} text={"Let's go"} onClick={this.props.fetchInstances} />}
|
||||
description={description}
|
||||
action={<Button intent={Intent.PRIMARY} text={"Let's go"} onClick={this.props.fetchGraph} />}
|
||||
/>
|
||||
)
|
||||
}
|
||||
|
||||
private loadingState = () => {
|
||||
private loadingState = (title?: string) => {
|
||||
return (
|
||||
<NonIdealState
|
||||
className="fediverse-welcome"
|
||||
icon={<Spinner />}
|
||||
title="Loading..."
|
||||
title={title || "Loading..."}
|
||||
/>
|
||||
)
|
||||
}
|
||||
|
||||
private renderGraph = () => {
|
||||
return (
|
||||
<div>
|
||||
<NonIdealState
|
||||
className="fediverse-welcome"
|
||||
icon={IconNames.SEARCH_AROUND}
|
||||
title="Graph. TODO"
|
||||
description={"Selected " + (this.props.currentInstanceName || "nothing")}
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
const mapStateToProps = (state: IAppState) => ({
|
||||
currentInstanceName: state.currentInstanceName,
|
||||
graph: state.data.graph,
|
||||
instances: state.data.instances,
|
||||
isLoadingGraph: state.data.isLoadingGraph,
|
||||
isLoadingInstances: state.data.isLoadingInstances,
|
||||
})
|
||||
const mapDispatchToProps = (dispatch: Dispatch) => ({
|
||||
fetchInstances: () => dispatch(fetchInstances() as any)
|
||||
fetchGraph: () => dispatch(fetchGraph() as any),
|
||||
fetchInstances: () => dispatch(fetchInstances() as any),
|
||||
})
|
||||
export const App = connect(mapStateToProps, mapDispatchToProps)(AppImpl)
|
||||
|
|
60
frontend/src/components/Graph.jsx
Normal file
60
frontend/src/components/Graph.jsx
Normal file
|
@ -0,0 +1,60 @@
|
|||
import * as React from 'react';
|
||||
import { connect } from 'react-redux';
|
||||
import { NodeShapes, RandomizeNodePositions, RelativeSize, Sigma, SigmaEnableWebGL, LoadGEXF, Filter } from 'react-sigma';
|
||||
|
||||
import { selectInstance } from '../redux/actions';
|
||||
|
||||
const STYLE = {
|
||||
bottom: "0",
|
||||
left: "0",
|
||||
position: "absolute",
|
||||
right: "0",
|
||||
top: "50px",
|
||||
}
|
||||
const SETTINGS = {
|
||||
defaultEdgeColor: "#5C7080",
|
||||
defaultNodeColor: "#CED9E0",
|
||||
drawEdges: true,
|
||||
drawLabels: true,
|
||||
edgeColor: "default",
|
||||
}
|
||||
|
||||
class GraphImpl extends React.Component {
|
||||
|
||||
render() {
|
||||
if (!this.props.graph) {
|
||||
return null;
|
||||
}
|
||||
return (
|
||||
<Sigma
|
||||
graph={this.props.graph}
|
||||
renderer="webgl"
|
||||
settings={SETTINGS}
|
||||
style={STYLE}
|
||||
onClickNode={(e) => this.props.selectInstance(e.data.node.label)}
|
||||
onClickStage={(e) => this.props.selectInstance(null)}
|
||||
>
|
||||
<RandomizeNodePositions />
|
||||
<Filter neighborsOf={this.props.currentInstanceName} />
|
||||
<RelativeSize initialSize={15} />
|
||||
</Sigma>
|
||||
)
|
||||
}
|
||||
|
||||
// onClickNode = (e) => {
|
||||
// this.props.selectInstance(e.data.node.label);
|
||||
// }
|
||||
|
||||
// zoomToNode = (camera, node) => {
|
||||
// s
|
||||
// }
|
||||
}
|
||||
|
||||
const mapStateToProps = (state) => ({
|
||||
currentInstanceName: state.currentInstanceName,
|
||||
graph: state.data.graph,
|
||||
})
|
||||
const mapDispatchToProps = (dispatch) => ({
|
||||
selectInstance: (instanceName) => dispatch(selectInstance(instanceName)),
|
||||
})
|
||||
export const Graph = connect(mapStateToProps, mapDispatchToProps)(GraphImpl)
|
|
@ -22,7 +22,7 @@ FocusStyleManager.onlyShowFocusOnTabs();
|
|||
// Initialize redux
|
||||
// @ts-ignore
|
||||
const composeEnhancers = window.__REDUX_DEVTOOLS_EXTENSION_COMPOSE__ || compose;
|
||||
const store = createStore(rootReducer, /* preloadedState, */ composeEnhancers(
|
||||
const store = createStore(rootReducer, composeEnhancers(
|
||||
applyMiddleware(thunk)
|
||||
));
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
import { Dispatch } from 'redux';
|
||||
|
||||
import { getFromApi } from '../util';
|
||||
import { ActionType, IInstance } from './types';
|
||||
import { ActionType, IGraph, IInstance } from './types';
|
||||
|
||||
export const selectInstance = (instanceName: string) => {
|
||||
return {
|
||||
|
@ -23,6 +23,19 @@ export const receiveInstances = (instances: IInstance[]) => {
|
|||
}
|
||||
}
|
||||
|
||||
export const requestGraph = () => {
|
||||
return {
|
||||
type: ActionType.REQUEST_GRAPH,
|
||||
}
|
||||
}
|
||||
|
||||
export const receiveGraph = (graph: IGraph) => {
|
||||
return {
|
||||
payload: graph,
|
||||
type: ActionType.RECEIVE_GRAPH,
|
||||
}
|
||||
}
|
||||
|
||||
/** Async actions: https://redux.js.org/advanced/asyncactions */
|
||||
|
||||
export const fetchInstances = () => {
|
||||
|
@ -30,10 +43,22 @@ export const fetchInstances = () => {
|
|||
return (dispatch: Dispatch) => {
|
||||
dispatch(requestInstances());
|
||||
return getFromApi("instances")
|
||||
.then(response => {
|
||||
return response.json();
|
||||
})
|
||||
.then(instances => dispatch(receiveInstances(instances))
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
export const fetchGraph = () => {
|
||||
// TODO: handle errors
|
||||
return (dispatch: Dispatch) => {
|
||||
dispatch(requestGraph());
|
||||
return Promise.all([getFromApi("graph/edges"), getFromApi("graph/nodes")])
|
||||
.then(responses => {
|
||||
return {
|
||||
edges: responses[0],
|
||||
nodes: responses[1],
|
||||
};
|
||||
})
|
||||
.then(graph => dispatch(receiveGraph(graph)))
|
||||
}
|
||||
}
|
||||
|
|
|
@ -3,6 +3,7 @@ import { combineReducers } from 'redux';
|
|||
import { ActionType, IAction, IDataState } from './types';
|
||||
|
||||
const initialDataState = {
|
||||
isLoadingGraph: false,
|
||||
isLoadingInstances: false,
|
||||
}
|
||||
const data = (state: IDataState = initialDataState, action: IAction) => {
|
||||
|
@ -19,6 +20,17 @@ const data = (state: IDataState = initialDataState, action: IAction) => {
|
|||
instances: action.payload,
|
||||
isLoadingInstances: false,
|
||||
};
|
||||
case ActionType.REQUEST_GRAPH:
|
||||
return {
|
||||
...state,
|
||||
isLoadingGraph: true,
|
||||
};
|
||||
case ActionType.RECEIVE_GRAPH:
|
||||
return {
|
||||
...state,
|
||||
graph: action.payload,
|
||||
isLoadingGraph: false,
|
||||
};
|
||||
default:
|
||||
return state;
|
||||
}
|
||||
|
|
|
@ -2,6 +2,8 @@ export enum ActionType {
|
|||
SELECT_INSTANCE = 'SELECT_INSTANCE',
|
||||
REQUEST_INSTANCES = 'REQUEST_INSTANCES',
|
||||
RECEIVE_INSTANCES = 'RECEIVE_INSTANCES',
|
||||
REQUEST_GRAPH = 'REQUEST_GRAPH',
|
||||
RECEIVE_GRAPH = 'RECEIVE_GRAPH',
|
||||
}
|
||||
|
||||
export interface IAction {
|
||||
|
@ -14,12 +16,34 @@ export interface IInstance {
|
|||
numUsers?: number,
|
||||
}
|
||||
|
||||
interface IGraphNode {
|
||||
id: string;
|
||||
label: string;
|
||||
size?: number;
|
||||
color?: string;
|
||||
}
|
||||
|
||||
interface IGraphEdge {
|
||||
source: string;
|
||||
target: string;
|
||||
id?: string;
|
||||
}
|
||||
|
||||
export interface IGraph {
|
||||
nodes: IGraphNode[];
|
||||
edges: IGraphEdge[];
|
||||
}
|
||||
|
||||
// Redux state
|
||||
|
||||
export interface IDataState {
|
||||
instances?: IInstance[],
|
||||
graph?: IGraph,
|
||||
isLoadingInstances: boolean,
|
||||
isLoadingGraph: boolean,
|
||||
}
|
||||
|
||||
export interface IAppState {
|
||||
currentInstanceName: string | null,
|
||||
data: IDataState,
|
||||
}
|
||||
}
|
|
@ -1,11 +1,11 @@
|
|||
import fetch from 'cross-fetch';
|
||||
|
||||
const API_ROOT = "https://fediverse.space/api/v1/"
|
||||
// const API_ROOT = "http://localhost:8000/api/v1/"
|
||||
// const API_ROOT = "https://fediverse.space/api/v1/"
|
||||
const API_ROOT = "http://localhost:8000/api/v1/"
|
||||
|
||||
export const getFromApi = (path: string): Promise<any> => {
|
||||
const domain = API_ROOT.endsWith("/") ? API_ROOT : API_ROOT + "/";
|
||||
path = path.endsWith("/") ? path : path + "/";
|
||||
path += "?format=json"
|
||||
return fetch(domain + path);
|
||||
return fetch(domain + path).then(response => response.json());
|
||||
}
|
||||
|
|
|
@ -5809,6 +5809,10 @@ react-scripts-ts@2.17.0:
|
|||
optionalDependencies:
|
||||
fsevents "^1.1.3"
|
||||
|
||||
react-sigma@^1.2.30:
|
||||
version "1.2.30"
|
||||
resolved "https://registry.yarnpkg.com/react-sigma/-/react-sigma-1.2.30.tgz#794f88e796c4f763158afe404d10d9635f848846"
|
||||
|
||||
react-transition-group@^2.2.1:
|
||||
version "2.4.0"
|
||||
resolved "https://registry.yarnpkg.com/react-transition-group/-/react-transition-group-2.4.0.tgz#1d9391fabfd82e016f26fabd1eec329dbd922b5a"
|
||||
|
|
18
gephi/.gitignore
vendored
Normal file
18
gephi/.gitignore
vendored
Normal file
|
@ -0,0 +1,18 @@
|
|||
.gradle/
|
||||
gradle/
|
||||
build/
|
||||
lib/*
|
||||
!lib/.gitkeep
|
||||
|
||||
.idea/
|
||||
|
||||
# Ignore Gradle GUI config
|
||||
gradle-app.setting
|
||||
|
||||
# Avoid ignoring Gradle wrapper jar file (.jar files are usually ignored)
|
||||
!gradle-wrapper.jar
|
||||
|
||||
# Cache of project
|
||||
.gradletasknamecache
|
||||
|
||||
*.javac
|
23
gephi/README.md
Normal file
23
gephi/README.md
Normal file
|
@ -0,0 +1,23 @@
|
|||
# Gephi
|
||||
|
||||
This subproject uses Gephi to layout a graph that'll then be served to people on the front-end.
|
||||
Always make sure to run `./gradlew shadowJar` to compile your changes.
|
||||
|
||||
Note that it won't compile with the given repos:
|
||||
```
|
||||
> Could not resolve all files for configuration ':compile'.
|
||||
> Could not find net.java.dev:stax-utils:snapshot-20100402.
|
||||
Searched in the following locations:
|
||||
- https://repo.maven.apache.org/maven2/net/java/dev/stax-utils/snapshot-20100402/stax-utils-snapshot-20100402.pom
|
||||
- https://repo.maven.apache.org/maven2/net/java/dev/stax-utils/snapshot-20100402/stax-utils-snapshot-20100402.jar
|
||||
- https://jcenter.bintray.com/net/java/dev/stax-utils/snapshot-20100402/stax-utils-snapshot-20100402.pom
|
||||
- https://jcenter.bintray.com/net/java/dev/stax-utils/snapshot-20100402/stax-utils-snapshot-20100402.jar
|
||||
- https://dl.google.com/dl/android/maven2/net/java/dev/stax-utils/snapshot-20100402/stax-utils-snapshot-20100402.pom
|
||||
- https://dl.google.com/dl/android/maven2/net/java/dev/stax-utils/snapshot-20100402/stax-utils-snapshot-20100402.jar
|
||||
- http://bits.netbeans.org/nexus/content/groups/netbeans/net/java/dev/stax-utils/snapshot-20100402/stax-utils-snapshot-20100402.pom
|
||||
- http://bits.netbeans.org/nexus/content/groups/netbeans/net/java/dev/stax-utils/snapshot-20100402/stax-utils-snapshot-20100402.jar
|
||||
Required by:
|
||||
project : > org.gephi:gephi-toolkit:0.9.2 > org.gephi:core-library-wrapper:0.9.2
|
||||
```
|
||||
|
||||
I just downloaded version 0.9.2 from the gephi-toolkit GitHub and manually added it to fediverse.space/gephi/lib... ¯\_(ツ)_/¯
|
47
gephi/build.gradle
Normal file
47
gephi/build.gradle
Normal file
|
@ -0,0 +1,47 @@
|
|||
buildscript {
|
||||
// repositories {
|
||||
// jcenter()
|
||||
// }
|
||||
dependencies {
|
||||
classpath 'com.github.jengelman.gradle.plugins:shadow:2.0.4'
|
||||
}
|
||||
}
|
||||
|
||||
plugins {
|
||||
id "base"
|
||||
id "java"
|
||||
id "com.github.johnrengelman.shadow" version "2.0.4"
|
||||
}
|
||||
|
||||
|
||||
repositories {
|
||||
flatDir {
|
||||
dirs 'lib'
|
||||
}
|
||||
// mavenCentral()
|
||||
// jcenter()
|
||||
// google()
|
||||
// maven {
|
||||
// url "http://bits.netbeans.org/nexus/content/groups/netbeans/"
|
||||
// }
|
||||
}
|
||||
|
||||
dependencies {
|
||||
compile group: 'org.gephi', name: 'gephi-toolkit', version: '0.9.2'
|
||||
}
|
||||
|
||||
jar {
|
||||
manifest {
|
||||
attributes (
|
||||
'Class-Path': configurations.compile.collect { it.getName() }.join(' '),
|
||||
'Main-Class': 'space.fediverse.graph.GraphBuilder'
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
shadowJar {
|
||||
baseName = 'graphBuilder'
|
||||
classifier = null
|
||||
version = null
|
||||
}
|
||||
|
172
gephi/gradlew
vendored
Executable file
172
gephi/gradlew
vendored
Executable file
|
@ -0,0 +1,172 @@
|
|||
#!/usr/bin/env sh
|
||||
|
||||
##############################################################################
|
||||
##
|
||||
## Gradle start up script for UN*X
|
||||
##
|
||||
##############################################################################
|
||||
|
||||
# Attempt to set APP_HOME
|
||||
# Resolve links: $0 may be a link
|
||||
PRG="$0"
|
||||
# Need this for relative symlinks.
|
||||
while [ -h "$PRG" ] ; do
|
||||
ls=`ls -ld "$PRG"`
|
||||
link=`expr "$ls" : '.*-> \(.*\)$'`
|
||||
if expr "$link" : '/.*' > /dev/null; then
|
||||
PRG="$link"
|
||||
else
|
||||
PRG=`dirname "$PRG"`"/$link"
|
||||
fi
|
||||
done
|
||||
SAVED="`pwd`"
|
||||
cd "`dirname \"$PRG\"`/" >/dev/null
|
||||
APP_HOME="`pwd -P`"
|
||||
cd "$SAVED" >/dev/null
|
||||
|
||||
APP_NAME="Gradle"
|
||||
APP_BASE_NAME=`basename "$0"`
|
||||
|
||||
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
||||
DEFAULT_JVM_OPTS=""
|
||||
|
||||
# Use the maximum available, or set MAX_FD != -1 to use that value.
|
||||
MAX_FD="maximum"
|
||||
|
||||
warn () {
|
||||
echo "$*"
|
||||
}
|
||||
|
||||
die () {
|
||||
echo
|
||||
echo "$*"
|
||||
echo
|
||||
exit 1
|
||||
}
|
||||
|
||||
# OS specific support (must be 'true' or 'false').
|
||||
cygwin=false
|
||||
msys=false
|
||||
darwin=false
|
||||
nonstop=false
|
||||
case "`uname`" in
|
||||
CYGWIN* )
|
||||
cygwin=true
|
||||
;;
|
||||
Darwin* )
|
||||
darwin=true
|
||||
;;
|
||||
MINGW* )
|
||||
msys=true
|
||||
;;
|
||||
NONSTOP* )
|
||||
nonstop=true
|
||||
;;
|
||||
esac
|
||||
|
||||
CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
|
||||
|
||||
# Determine the Java command to use to start the JVM.
|
||||
if [ -n "$JAVA_HOME" ] ; then
|
||||
if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
|
||||
# IBM's JDK on AIX uses strange locations for the executables
|
||||
JAVACMD="$JAVA_HOME/jre/sh/java"
|
||||
else
|
||||
JAVACMD="$JAVA_HOME/bin/java"
|
||||
fi
|
||||
if [ ! -x "$JAVACMD" ] ; then
|
||||
die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
|
||||
|
||||
Please set the JAVA_HOME variable in your environment to match the
|
||||
location of your Java installation."
|
||||
fi
|
||||
else
|
||||
JAVACMD="java"
|
||||
which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
|
||||
|
||||
Please set the JAVA_HOME variable in your environment to match the
|
||||
location of your Java installation."
|
||||
fi
|
||||
|
||||
# Increase the maximum file descriptors if we can.
|
||||
if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
|
||||
MAX_FD_LIMIT=`ulimit -H -n`
|
||||
if [ $? -eq 0 ] ; then
|
||||
if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
|
||||
MAX_FD="$MAX_FD_LIMIT"
|
||||
fi
|
||||
ulimit -n $MAX_FD
|
||||
if [ $? -ne 0 ] ; then
|
||||
warn "Could not set maximum file descriptor limit: $MAX_FD"
|
||||
fi
|
||||
else
|
||||
warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
|
||||
fi
|
||||
fi
|
||||
|
||||
# For Darwin, add options to specify how the application appears in the dock
|
||||
if $darwin; then
|
||||
GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
|
||||
fi
|
||||
|
||||
# For Cygwin, switch paths to Windows format before running java
|
||||
if $cygwin ; then
|
||||
APP_HOME=`cygpath --path --mixed "$APP_HOME"`
|
||||
CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
|
||||
JAVACMD=`cygpath --unix "$JAVACMD"`
|
||||
|
||||
# We build the pattern for arguments to be converted via cygpath
|
||||
ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
|
||||
SEP=""
|
||||
for dir in $ROOTDIRSRAW ; do
|
||||
ROOTDIRS="$ROOTDIRS$SEP$dir"
|
||||
SEP="|"
|
||||
done
|
||||
OURCYGPATTERN="(^($ROOTDIRS))"
|
||||
# Add a user-defined pattern to the cygpath arguments
|
||||
if [ "$GRADLE_CYGPATTERN" != "" ] ; then
|
||||
OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
|
||||
fi
|
||||
# Now convert the arguments - kludge to limit ourselves to /bin/sh
|
||||
i=0
|
||||
for arg in "$@" ; do
|
||||
CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
|
||||
CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
|
||||
|
||||
if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
|
||||
eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
|
||||
else
|
||||
eval `echo args$i`="\"$arg\""
|
||||
fi
|
||||
i=$((i+1))
|
||||
done
|
||||
case $i in
|
||||
(0) set -- ;;
|
||||
(1) set -- "$args0" ;;
|
||||
(2) set -- "$args0" "$args1" ;;
|
||||
(3) set -- "$args0" "$args1" "$args2" ;;
|
||||
(4) set -- "$args0" "$args1" "$args2" "$args3" ;;
|
||||
(5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
|
||||
(6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
|
||||
(7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
|
||||
(8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
|
||||
(9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
|
||||
esac
|
||||
fi
|
||||
|
||||
# Escape application args
|
||||
save () {
|
||||
for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
|
||||
echo " "
|
||||
}
|
||||
APP_ARGS=$(save "$@")
|
||||
|
||||
# Collect all arguments for the java command, following the shell quoting and substitution rules
|
||||
eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
|
||||
|
||||
# by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong
|
||||
if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then
|
||||
cd "$(dirname "$0")"
|
||||
fi
|
||||
|
||||
exec "$JAVACMD" "$@"
|
84
gephi/gradlew.bat
vendored
Normal file
84
gephi/gradlew.bat
vendored
Normal file
|
@ -0,0 +1,84 @@
|
|||
@if "%DEBUG%" == "" @echo off
|
||||
@rem ##########################################################################
|
||||
@rem
|
||||
@rem Gradle startup script for Windows
|
||||
@rem
|
||||
@rem ##########################################################################
|
||||
|
||||
@rem Set local scope for the variables with windows NT shell
|
||||
if "%OS%"=="Windows_NT" setlocal
|
||||
|
||||
set DIRNAME=%~dp0
|
||||
if "%DIRNAME%" == "" set DIRNAME=.
|
||||
set APP_BASE_NAME=%~n0
|
||||
set APP_HOME=%DIRNAME%
|
||||
|
||||
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
||||
set DEFAULT_JVM_OPTS=
|
||||
|
||||
@rem Find java.exe
|
||||
if defined JAVA_HOME goto findJavaFromJavaHome
|
||||
|
||||
set JAVA_EXE=java.exe
|
||||
%JAVA_EXE% -version >NUL 2>&1
|
||||
if "%ERRORLEVEL%" == "0" goto init
|
||||
|
||||
echo.
|
||||
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
|
||||
echo.
|
||||
echo Please set the JAVA_HOME variable in your environment to match the
|
||||
echo location of your Java installation.
|
||||
|
||||
goto fail
|
||||
|
||||
:findJavaFromJavaHome
|
||||
set JAVA_HOME=%JAVA_HOME:"=%
|
||||
set JAVA_EXE=%JAVA_HOME%/bin/java.exe
|
||||
|
||||
if exist "%JAVA_EXE%" goto init
|
||||
|
||||
echo.
|
||||
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
|
||||
echo.
|
||||
echo Please set the JAVA_HOME variable in your environment to match the
|
||||
echo location of your Java installation.
|
||||
|
||||
goto fail
|
||||
|
||||
:init
|
||||
@rem Get command-line arguments, handling Windows variants
|
||||
|
||||
if not "%OS%" == "Windows_NT" goto win9xME_args
|
||||
|
||||
:win9xME_args
|
||||
@rem Slurp the command line arguments.
|
||||
set CMD_LINE_ARGS=
|
||||
set _SKIP=2
|
||||
|
||||
:win9xME_args_slurp
|
||||
if "x%~1" == "x" goto execute
|
||||
|
||||
set CMD_LINE_ARGS=%*
|
||||
|
||||
:execute
|
||||
@rem Setup the command line
|
||||
|
||||
set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
|
||||
|
||||
@rem Execute Gradle
|
||||
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
|
||||
|
||||
:end
|
||||
@rem End local scope for the variables with windows NT shell
|
||||
if "%ERRORLEVEL%"=="0" goto mainEnd
|
||||
|
||||
:fail
|
||||
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
|
||||
rem the _cmd.exe /c_ return code!
|
||||
if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
|
||||
exit /b 1
|
||||
|
||||
:mainEnd
|
||||
if "%OS%"=="Windows_NT" endlocal
|
||||
|
||||
:omega
|
10
gephi/settings.gradle
Normal file
10
gephi/settings.gradle
Normal file
|
@ -0,0 +1,10 @@
|
|||
/*
|
||||
* This file was generated by the Gradle 'init' task.
|
||||
*
|
||||
* The settings file is used to specify which projects to include in your build.
|
||||
*
|
||||
* Detailed information about configuring a multi-project build in Gradle can be found
|
||||
* in the user guide at https://docs.gradle.org/4.10/userguide/multi_project_builds.html
|
||||
*/
|
||||
|
||||
rootProject.name = 'gephi'
|
103
gephi/src/main/java/space/fediverse/graph/GraphBuilder.java
Normal file
103
gephi/src/main/java/space/fediverse/graph/GraphBuilder.java
Normal file
|
@ -0,0 +1,103 @@
|
|||
package space.fediverse.graph;
|
||||
|
||||
import org.gephi.graph.api.GraphController;
|
||||
import org.gephi.graph.api.GraphModel;
|
||||
import org.gephi.io.database.drivers.PostgreSQLDriver;
|
||||
import org.gephi.io.exporter.api.ExportController;
|
||||
import org.gephi.io.importer.api.Container;
|
||||
import org.gephi.io.importer.api.EdgeDirectionDefault;
|
||||
import org.gephi.io.importer.api.ImportController;
|
||||
import org.gephi.io.importer.plugin.database.EdgeListDatabaseImpl;
|
||||
import org.gephi.io.importer.plugin.database.ImporterEdgeList;
|
||||
import org.gephi.io.processor.plugin.DefaultProcessor;
|
||||
import org.gephi.layout.plugin.AutoLayout;
|
||||
import org.gephi.layout.plugin.forceAtlas2.ForceAtlas2;
|
||||
import org.gephi.project.api.ProjectController;
|
||||
import org.gephi.project.api.Workspace;
|
||||
import org.openide.util.Lookup;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
public class GraphBuilder {
|
||||
|
||||
private static final String nodeQuery = String.join(""
|
||||
, "SELECT"
|
||||
, " scraper_instance.name AS id,"
|
||||
, " scraper_instance.name AS label,"
|
||||
, " scraper_instance.user_count"
|
||||
, " FROM scraper_instance WHERE status = 'success'"
|
||||
);
|
||||
|
||||
private static final String edgeQuery = String.join(""
|
||||
, "SELECT"
|
||||
, " scraper_instance_peers.from_instance_id AS source,"
|
||||
, " scraper_instance_peers.to_instance_id AS target"
|
||||
, " FROM scraper_instance_peers"
|
||||
);
|
||||
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
||||
Path currentRelativePath = Paths.get(".");
|
||||
|
||||
// Init project & workspace; required to do things w/ gephi
|
||||
ProjectController pc = Lookup.getDefault().lookup(ProjectController.class);
|
||||
pc.newProject();
|
||||
Workspace workspace = pc.getCurrentWorkspace();
|
||||
|
||||
// Get controllers and models
|
||||
ImportController importController = Lookup.getDefault().lookup(ImportController.class);
|
||||
GraphModel graphModel = Lookup.getDefault().lookup(GraphController.class).getGraphModel();
|
||||
// AttributeModel?
|
||||
|
||||
// Import from database
|
||||
|
||||
EdgeListDatabaseImpl db = new EdgeListDatabaseImpl();
|
||||
db.setSQLDriver(new PostgreSQLDriver());
|
||||
db.setHost("localhost");
|
||||
db.setPort(5432);
|
||||
db.setDBName(args[0]);
|
||||
db.setUsername(args[1]);
|
||||
db.setPasswd(args[2]);
|
||||
db.setNodeQuery(nodeQuery);
|
||||
db.setEdgeQuery(edgeQuery);
|
||||
|
||||
ImporterEdgeList edgeListImporter = new ImporterEdgeList();
|
||||
Container container = importController.importDatabase(db, edgeListImporter);
|
||||
// If a node is in the edge list, but not node list, we don't want to create it automatically
|
||||
container.getLoader().setAllowAutoNode(false);
|
||||
container.getLoader().setAllowSelfLoop(false);
|
||||
container.getLoader().setEdgeDefault(EdgeDirectionDefault.UNDIRECTED); // This is an undirected graph
|
||||
|
||||
// Add imported data to graph
|
||||
importController.process(container, new DefaultProcessor(), workspace);
|
||||
|
||||
// Layout
|
||||
AutoLayout autoLayout = new AutoLayout(2, TimeUnit.MINUTES);
|
||||
autoLayout.setGraphModel(graphModel);
|
||||
// YifanHuLayout firstLayout = new YifanHuLayout(null, new StepDisplacement(1f));
|
||||
ForceAtlas2 secondLayout = new ForceAtlas2(null);
|
||||
// AutoLayout.DynamicProperty adjustBySizeProperty = AutoLayout.createDynamicProperty("forceAtlas.adjustSizes.name", Boolean.TRUE, 0.1f);
|
||||
// AutoLayout.DynamicProperty repulsionProperty = AutoLayout.createDynamicProperty("forceAtlas.repulsionStrength.name", 500., 0f);
|
||||
// autoLayout.addLayout(firstLayout, 0.5f);
|
||||
// autoLayout.addLayout(secondLayout, 0.5f, new AutoLayout.DynamicProperty[]{adjustBySizeProperty, repulsionProperty});
|
||||
autoLayout.addLayout(secondLayout, 1f);
|
||||
autoLayout.execute();
|
||||
|
||||
// Export
|
||||
ExportController exportController = Lookup.getDefault().lookup(ExportController.class);
|
||||
try {
|
||||
exportController.exportFile(new File("fediverse.gexf"));
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
// Gephi doesn't seem to provide a good way to close the postgres connection, so we have to force close the
|
||||
// program. This'll leave a hanging connection for some period ¯\_(ツ)_/¯
|
||||
System.exit(0);
|
||||
}
|
||||
}
|
|
@ -1,3 +1,5 @@
|
|||
from datetime import datetime
|
||||
|
||||
LOCK_MODES = (
|
||||
'ACCESS SHARE',
|
||||
'ROW SHARE',
|
||||
|
@ -53,3 +55,11 @@ def get_key(data, keys: list):
|
|||
return val
|
||||
except KeyError:
|
||||
return ''
|
||||
|
||||
|
||||
def validate_int(integer):
|
||||
return isinstance(integer, int) and 0 <= integer < 2147483647 or None
|
||||
|
||||
|
||||
def log(text):
|
||||
return "{} - {}".format(datetime.now().isoformat(), text)
|
||||
|
|
22
scraper/management/commands/build_graph.py
Normal file
22
scraper/management/commands/build_graph.py
Normal file
|
@ -0,0 +1,22 @@
|
|||
import subprocess
|
||||
from django.core.management.base import BaseCommand
|
||||
from django.conf import settings
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "Takes what's in the database and calls Gephi to create and layout a graph"
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
database_config = settings.DATABASES['default']
|
||||
subprocess.call([
|
||||
'java',
|
||||
'-Xmx4g',
|
||||
'-jar',
|
||||
'gephi/build/libs/graphBuilder.jar',
|
||||
database_config['NAME'],
|
||||
database_config['USER'],
|
||||
database_config['PASSWORD'],
|
||||
])
|
|
@ -7,11 +7,11 @@ import json
|
|||
import multiprocessing
|
||||
import requests
|
||||
import time
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timedelta
|
||||
from django.core.management.base import BaseCommand
|
||||
from django.db import transaction
|
||||
from scraper.models import Instance
|
||||
from scraper.management.commands._util import require_lock, InvalidResponseError, get_key
|
||||
from django import db
|
||||
from scraper.models import Instance, PeerRelationship
|
||||
from scraper.management.commands._util import require_lock, InvalidResponseError, get_key, log, validate_int
|
||||
|
||||
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
||||
# Because the script uses the Mastodon API other platforms like #
|
||||
|
@ -27,7 +27,8 @@ from scraper.management.commands._util import require_lock, InvalidResponseError
|
|||
# TODO: use the /api/v1/server/followers and /api/v1/server/following endpoints in peertube instances
|
||||
|
||||
SEED = 'mastodon.social'
|
||||
TIMEOUT = 1
|
||||
TIMEOUT = 10
|
||||
NUM_THREADS = 4
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
|
@ -49,6 +50,8 @@ class Command(BaseCommand):
|
|||
@staticmethod
|
||||
def get_instance_peers(instance_name: str):
|
||||
"""Collect connected instances"""
|
||||
# The peers endpoint returns a "list of all domain names known to this instance"
|
||||
# (https://github.com/tootsuite/mastodon/pull/6125)
|
||||
url = 'https://' + instance_name + '/api/v1/instance/peers'
|
||||
response = requests.get(url, timeout=TIMEOUT)
|
||||
json = response.json()
|
||||
|
@ -56,14 +59,14 @@ class Command(BaseCommand):
|
|||
raise InvalidResponseError("Could not get peers for {}".format(instance_name))
|
||||
return json
|
||||
|
||||
def process_instance(self, instance_name: str):
|
||||
def process_instance(self, instance: Instance):
|
||||
"""Given an instance, get all the data we're interested in"""
|
||||
self.stdout.write("{} - Processing {}".format(datetime.now().isoformat(), instance_name))
|
||||
data = dict()
|
||||
try:
|
||||
data['instance'] = instance_name
|
||||
data['info'] = self.get_instance_info(instance_name)
|
||||
data['peers'] = [peer for peer in self.get_instance_peers(instance_name) if peer] # get rid of null peers
|
||||
data['instance_name'] = instance.name
|
||||
data['info'] = self.get_instance_info(instance.name)
|
||||
# Get rid of peers that just say "null" and the instance itself
|
||||
data['peers'] = [peer for peer in self.get_instance_peers(instance.name) if peer and peer != instance.name]
|
||||
if not data['info'] and not data['peers']:
|
||||
# We got a response from the instance, but it didn't have any of the information we were expecting.
|
||||
raise InvalidResponseError
|
||||
|
@ -72,60 +75,74 @@ class Command(BaseCommand):
|
|||
except (InvalidResponseError,
|
||||
requests.exceptions.RequestException,
|
||||
json.decoder.JSONDecodeError) as e:
|
||||
data['instance'] = instance_name
|
||||
data['instance_name'] = instance.name
|
||||
data['status'] = type(e).__name__
|
||||
return data
|
||||
|
||||
@transaction.atomic
|
||||
@db.transaction.atomic
|
||||
@require_lock(Instance, 'ACCESS EXCLUSIVE')
|
||||
def save_data(self, data):
|
||||
def save_data(self, instance, data, queue):
|
||||
"""Save data"""
|
||||
defaults = dict()
|
||||
defaults['domain_count'] = get_key(data, ['info', 'stats', 'domain_count']) or None
|
||||
defaults['status_count'] = get_key(data, ['info', 'stats', 'status_count']) or None
|
||||
defaults['user_count'] = get_key(data, ['info', 'stats', 'user_count']) or None
|
||||
defaults['description'] = get_key(data, ['info', 'description'])
|
||||
defaults['version'] = get_key(data, ['info', 'version'])
|
||||
defaults['status'] = get_key(data, ['status'])
|
||||
instance, _ = Instance.objects.update_or_create(
|
||||
name=get_key(data, ['instance']),
|
||||
defaults=defaults,
|
||||
)
|
||||
if defaults['status'] == 'success' and data['peers']:
|
||||
# Save peers
|
||||
# TODO: make this shared amongst threads so the database only needs to be queried once
|
||||
# Validate the ints. Some servers that appear to be fake instances have e.g. negative numbers here.
|
||||
# TODO: these always return 1!
|
||||
instance.domain_count = validate_int(get_key(data, ['info', 'stats', 'domain_count']))
|
||||
instance.status_count = validate_int(get_key(data, ['info', 'stats', 'status_count']))
|
||||
instance.user_count = validate_int(get_key(data, ['info', 'stats', 'user_count']))
|
||||
instance.description = get_key(data, ['info', 'description'])
|
||||
instance.version = get_key(data, ['info', 'version'])
|
||||
instance.status = get_key(data, ['status'])
|
||||
instance.save()
|
||||
if data['status'] == 'success' and data['peers']:
|
||||
# TODO: handle a peer disappeer-ing
|
||||
# Create instances for the peers we haven't seen before and add them to the queue
|
||||
# TODO: share this among all threads so we only have to call it once at the start
|
||||
existing_instance_ids = Instance.objects.values_list('name', flat=True)
|
||||
existing_peers = Instance.objects.filter(name__in=existing_instance_ids)
|
||||
new_peer_ids = [peer for peer in data['peers'] if peer not in existing_instance_ids]
|
||||
new_instance_ids = [peer_id for peer_id in data['peers'] if peer_id not in existing_instance_ids]
|
||||
# bulk_create doesn't call save(), so the auto_now_add field won't get set automatically
|
||||
new_instances = [Instance(name=id, first_seen=datetime.now(), last_updated=datetime.now())
|
||||
for id in new_instance_ids]
|
||||
Instance.objects.bulk_create(new_instances)
|
||||
for new_instance in new_instances:
|
||||
queue.put(new_instance)
|
||||
|
||||
# Create relationships we haven't seen before
|
||||
existing_peer_ids = PeerRelationship.objects.filter(source=instance).values_list('target', flat=True)
|
||||
new_peer_ids = [peer_id for peer_id in data['peers'] if peer_id not in existing_peer_ids]
|
||||
if new_peer_ids:
|
||||
new_peers = Instance.objects.bulk_create([Instance(name=peer) for peer in new_peer_ids])
|
||||
instance.peers.set(new_peers)
|
||||
instance.peers.set(existing_peers)
|
||||
self.stdout.write("{} - Saved {}".format(datetime.now().isoformat(), data['instance']))
|
||||
new_peers = Instance.objects.filter(name__in=new_peer_ids)
|
||||
new_relationships = [PeerRelationship(source=instance, target=new_peer, first_seen=datetime.now())
|
||||
for new_peer in new_peers]
|
||||
PeerRelationship.objects.bulk_create(new_relationships)
|
||||
self.stdout.write(log("Saved {}".format(data['instance_name'])))
|
||||
|
||||
def worker(self, queue: multiprocessing.JoinableQueue):
|
||||
"""The main worker that processes URLs"""
|
||||
# https://stackoverflow.com/a/38356519/3697202
|
||||
db.connections.close_all()
|
||||
while True:
|
||||
# Get an item from the queue. Block if the queue is empty.
|
||||
instance = queue.get()
|
||||
if instance in self.done_bag:
|
||||
print("Skipping {}, already done".format(instance))
|
||||
self.stderr.write(log("Skipping {}, already done. This should not have been added to the queue!".format(instance)))
|
||||
queue.task_done()
|
||||
else:
|
||||
# Fetch data on instance
|
||||
self.stdout.write(log("Processing {}".format(instance.name)))
|
||||
data = self.process_instance(instance)
|
||||
if 'peers' in data:
|
||||
for peer in [p for p in data['peers'] if p not in self.done_bag]:
|
||||
queue.put(peer)
|
||||
self.save_data(data)
|
||||
self.save_data(instance, data, queue)
|
||||
self.done_bag.add(instance)
|
||||
queue.task_done()
|
||||
|
||||
def handle(self, *args, **options):
|
||||
start_time = time.time()
|
||||
stale_instances = Instance.objects.filter(last_updated__lte=datetime.now()-timedelta(weeks=1))
|
||||
queue = multiprocessing.JoinableQueue()
|
||||
queue.put(SEED)
|
||||
# pool = multiprocessing.Pool(1, initializer=self.worker, initargs=(queue, )) # Disable concurrency (debug)
|
||||
pool = multiprocessing.Pool(initializer=self.worker, initargs=(queue, ))
|
||||
if stale_instances:
|
||||
queue.put(list(stale_instances))
|
||||
elif not Instance.objects.exists():
|
||||
instance, _ = Instance.objects.get_or_create(name=SEED)
|
||||
queue.put(instance)
|
||||
|
||||
pool = multiprocessing.Pool(NUM_THREADS, initializer=self.worker, initargs=(queue, ))
|
||||
queue.join()
|
||||
end_time = time.time()
|
||||
self.stdout.write(self.style.SUCCESS("Successfully scraped the fediverse in {:.0f}s".format(end_time-start_time)))
|
||||
self.stdout.write(self.style.SUCCESS(log("Successfully scraped the fediverse in {:.0f}s".format(end_time-start_time))))
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
# Generated by Django 2.1 on 2018-08-29 17:37
|
||||
# Generated by Django 2.1 on 2018-08-30 19:57
|
||||
|
||||
from django.db import migrations, models
|
||||
import django.db.models.deletion
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
@ -15,6 +16,7 @@ class Migration(migrations.Migration):
|
|||
name='Instance',
|
||||
fields=[
|
||||
('name', models.CharField(max_length=200, primary_key=True, serialize=False)),
|
||||
('description', models.TextField(blank=True)),
|
||||
('domain_count', models.IntegerField(blank=True, null=True)),
|
||||
('status_count', models.IntegerField(blank=True, null=True)),
|
||||
('user_count', models.IntegerField(blank=True, null=True)),
|
||||
|
@ -22,7 +24,20 @@ class Migration(migrations.Migration):
|
|||
('status', models.CharField(max_length=100)),
|
||||
('first_seen', models.DateTimeField(auto_now_add=True)),
|
||||
('last_updated', models.DateTimeField(auto_now=True)),
|
||||
('peers', models.ManyToManyField(related_name='_instance_peers_+', to='scraper.Instance')),
|
||||
],
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='PeerRelationship',
|
||||
fields=[
|
||||
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('first_seen', models.DateTimeField(auto_now_add=True)),
|
||||
('source', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='following_relationship', to='scraper.Instance')),
|
||||
('target', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='follower_relationships', to='scraper.Instance')),
|
||||
],
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='instance',
|
||||
name='following',
|
||||
field=models.ManyToManyField(related_name='followers', through='scraper.PeerRelationship', to='scraper.Instance'),
|
||||
),
|
||||
]
|
||||
|
|
|
@ -1,18 +0,0 @@
|
|||
# Generated by Django 2.1 on 2018-08-29 18:01
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('scraper', '0001_initial'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name='instance',
|
||||
name='description',
|
||||
field=models.TextField(blank=True),
|
||||
),
|
||||
]
|
|
@ -2,6 +2,12 @@ from django.db import models
|
|||
|
||||
|
||||
class Instance(models.Model):
|
||||
"""
|
||||
The main model that saves details of an instance and links between them in the peers
|
||||
property.
|
||||
|
||||
Don't change the schema without verifying that the gephi script can still read the data.
|
||||
"""
|
||||
# Primary key
|
||||
name = models.CharField(max_length=200, primary_key=True)
|
||||
|
||||
|
@ -14,13 +20,16 @@ class Instance(models.Model):
|
|||
status = models.CharField(max_length=100)
|
||||
|
||||
# Foreign keys
|
||||
# The peers endpoint returns a "list of all domain names known to this instance"
|
||||
# (https://github.com/tootsuite/mastodon/pull/6125)
|
||||
# In other words, an asymmetrical relationship here doesn't make much sense. If we one day can get a list of
|
||||
# instances that the instance actively follows (i.e. knows and not suspended), it's worth adding an
|
||||
# asymmetrical relation.
|
||||
peers = models.ManyToManyField('self', symmetrical=True)
|
||||
following = models.ManyToManyField('self', symmetrical=False, through='PeerRelationship', related_name="followers")
|
||||
|
||||
# Automatic fields
|
||||
first_seen = models.DateTimeField(auto_now_add=True)
|
||||
last_updated = models.DateTimeField(auto_now=True)
|
||||
|
||||
|
||||
class PeerRelationship(models.Model):
|
||||
source = models.ForeignKey(Instance, related_name="following_relationship", on_delete=models.CASCADE)
|
||||
target = models.ForeignKey(Instance, related_name="follower_relationships", on_delete=models.CASCADE)
|
||||
|
||||
# Metadata
|
||||
first_seen = models.DateTimeField(auto_now_add=True)
|
||||
|
|
Loading…
Reference in a new issue