fix edge generation, serve graph in cytoscape format

This commit is contained in:
Tao Bror Bojlén 2019-07-18 13:21:12 +03:00
parent 82677fcd32
commit 9478017eb0
No known key found for this signature in database
GPG key ID: C6EC7AAB905F9E6F
13 changed files with 166 additions and 130 deletions

View file

@ -80,7 +80,7 @@ defmodule Backend.Crawler do
# Save the state (after crawling) to the database.
defp save(%Crawler{domain: domain, result: result, found_api?: true, error: nil}) do
now = NaiveDateTime.truncate(NaiveDateTime.utc_now(), :second)
now = get_now()
## Update the instance we crawled ##
Repo.insert!(

View file

@ -13,6 +13,8 @@ defmodule Backend.Edge do
type: :string,
foreign_key: :target_domain
field :weight, :float
timestamps()
end

View file

@ -13,8 +13,6 @@ defmodule Backend.InstancePeer do
type: :string,
foreign_key: :target_domain
field :weight, :float, default: 0.0
timestamps()
end

View file

@ -5,7 +5,8 @@ defmodule Backend.Scheduler do
use Quantum.Scheduler, otp_app: :backend
alias Backend.{Crawl, Edge, Interaction, Instance, Repo}
alias Backend.{Crawl, Edge, CrawlInteraction, Instance, Repo}
import Backend.Util
import Ecto.Query
require Logger
@ -29,88 +30,136 @@ defmodule Backend.Scheduler do
Logger.info("Pruned #{deleted_num} old crawls.")
end
@doc """
Calculates every instance's "insularity score" -- that is, the percentage of mentions that are among users on the
instance, rather than at other instances.
"""
def generate_insularity_scores() do
now = get_now()
crawls_subquery =
Crawl
|> select([c], %{
instance_domain: c.instance_domain,
interactions_seen: sum(c.interactions_seen)
})
|> where([c], is_nil(c.error))
|> group_by([c], c.instance_domain)
scores =
CrawlInteraction
|> join(:left, [ci], c in subquery(crawls_subquery),
on: ci.source_domain == c.instance_domain
)
|> where([ci], ci.source_domain == ci.target_domain)
|> group_by([ci], ci.source_domain)
|> select([ci, c], %{
domain: ci.source_domain,
mentions: sum(ci.mentions),
# we can take min() because every row is the same
interactions: min(c.interactions_seen)
})
|> Repo.all()
|> (fn o ->
Logger.info(inspect(o))
o
end).()
|> Enum.map(fn %{domain: domain, mentions: mentions, interactions: interactions} ->
%{
domain: domain,
insularity: mentions / interactions,
inserted_at: now,
updated_at: now
}
end)
Instance
|> Repo.insert_all(scores,
on_conflict: {:replace, [:insularity, :updated_at]},
conflict_target: :domain
)
end
@doc """
This function aggregates statistics from the interactions in the database.
It calculates the strength of edges between nodes.
TODO: generate edge weights. The weight of an edge between two instances will be
(number of mentions of each other) / (total number of statuses crawled).
This requires us to keep track of how many statuses we've seen.
"""
def generate_edges() do
now = get_now()
crawls_subquery =
Crawl
|> select([c], %{
instance_domain: c.instance_domain,
statuses_seen: sum(c.statuses_seen)
})
|> where([c], is_nil(c.error))
|> group_by([c], c.instance_domain)
interactions =
Interaction
|> select([inter], {inter.source_domain, inter.target_domain})
|> join(:left, [inter], i_source in Instance, on: inter.source_domain == i_source.domain)
|> join(:left, [inter], i_target in Instance, on: inter.target_domain == i_target.domain)
|> where(
[inter, i_source, i_target],
not is_nil(i_source.last_crawl_timestamp) and not is_nil(i_target.last_crawl_timestamp)
CrawlInteraction
|> join(:left, [ci], c_source in subquery(crawls_subquery),
on: ci.source_domain == c_source.instance_domain
)
# Repo.all() returns a tuple like {"mastodon.social", "cursed.technology"}
|> join(:left, [ci], c_target in subquery(crawls_subquery),
on: ci.target_domain == c_target.instance_domain
)
|> group_by([ci], [ci.source_domain, ci.target_domain])
|> select([ci, c_source, c_target], %{
source_domain: ci.source_domain,
target_domain: ci.target_domain,
mentions: sum(ci.mentions),
# we can take min() because every row is the same
source_statuses_seen: min(c_source.statuses_seen),
target_statuses_seen: min(c_target.statuses_seen)
})
|> Repo.all()
# Create a map of %{source_domain => [target_domains]}
|> Enum.group_by(fn tuple -> Kernel.elem(tuple, 0) end, fn tuple ->
Kernel.elem(tuple, 1)
end)
# Calculate insularity score
# Get edges and their weights
Repo.transaction(fn ->
interactions
|> Enum.each(fn {source, targets} ->
total_mentions = length(targets)
self_mentions = Enum.count(targets, fn t -> t == source end)
insularity = self_mentions / total_mentions
Repo.insert!(
%Instance{
domain: source,
insularity: insularity
},
on_conflict: [set: [insularity: insularity]],
conflict_target: :domain
)
end)
# Get edges
edges = MapSet.new()
interactions
|> Enum.each(fn {source, targets} ->
targets
|> Enum.each(fn target ->
[key_a, key_b] = Enum.sort([source, target])
edge = %Edge{
source_domain: key_a,
target_domain: key_b
}
MapSet.put(edges, edge)
Logger.debug(inspect(edges))
end)
end)
Logger.debug(inspect(edges))
now = NaiveDateTime.truncate(NaiveDateTime.utc_now(), :second)
Repo.delete_all(Edge)
Edge
|> Repo.delete_all()
edges =
edges
|> MapSet.to_list()
|> Enum.map(fn %{source_domain: source_domain, target_domain: target_domain} ->
%Edge{
interactions
# Get a map of %{{source, target} => {total_mention_count, total_statuses_seen}}
|> Enum.reduce(%{}, fn
%{
source_domain: source_domain,
target_domain: target_domain,
updated_at: now,
inserted_at: now
mentions: mentions,
source_statuses_seen: source_statuses_seen,
target_statuses_seen: target_statuses_seen
} = x,
acc ->
Logger.info(inspect(x))
key = get_interaction_key(source_domain, target_domain)
# target_statuses_seen might be nil if that instance was never crawled. default to 0.
target_statuses_seen =
case target_statuses_seen do
nil -> 0
_ -> target_statuses_seen
end
statuses_seen = source_statuses_seen + target_statuses_seen
Map.update(acc, key, {mentions, statuses_seen}, fn {curr_mentions, curr_statuses_seen} ->
{curr_mentions + mentions, curr_statuses_seen}
end)
end)
|> Enum.map(fn {{source_domain, target_domain}, {mention_count, statuses_seen}} ->
%{
source_domain: source_domain,
target_domain: target_domain,
weight: mention_count / statuses_seen,
inserted_at: now,
updated_at: now
}
end)
Repo.insert_all(Edge, edges)
Edge
|> Repo.insert_all(edges)
end)
end
end

View file

@ -16,21 +16,28 @@ defmodule BackendWeb.GraphView do
false -> 1
end
# This is the format that cytoscape.js expects.
%{
id: node.domain,
label: node.domain,
size: size,
x: node.x,
y: node.y
data: %{
id: node.domain,
label: node.domain,
size: size
},
position: %{
x: node.x,
y: node.y
}
}
end
def render("edge.json", %{graph: edge}) do
%{
id: edge.id,
source: edge.source_domain,
target: edge.target_domain,
size: edge.weight
data: %{
id: edge.id,
source: edge.source_domain,
target: edge.target_domain,
weight: edge.weight
}
}
end
end

View file

@ -10,6 +10,9 @@ defmodule Backend.Repo.Migrations.CreateInstances do
add :version, :string
add :insularity, :float
add :x, :float
add :y, :float
timestamps()
end
@ -19,8 +22,6 @@ defmodule Backend.Repo.Migrations.CreateInstances do
add :source_domain, references(:instances, column: :domain, type: :string)
add :target_domain, references(:instances, column: :domain, type: :string)
add :weight, :float
timestamps()
end

View file

@ -6,6 +6,8 @@ defmodule Backend.Repo.Migrations.CreateEdges do
add :source_domain, references(:instances, column: :domain, type: :string), null: false
add :target_domain, references(:instances, column: :domain, type: :string), null: false
add :weight, :float, null: false
timestamps()
end

View file

@ -1,10 +0,0 @@
defmodule Backend.Repo.Migrations.AddInstanceCoords do
use Ecto.Migration
def change do
alter table(:instances) do
add :x, :float
add :y, :float
end
end
end

View file

@ -69,11 +69,12 @@ class GraphImpl extends React.Component<IGraphProps, IGraphState> {
// Check that all nodes have size & coordinates; otherwise the graph will look messed up
const lengthBeforeFilter = graph.nodes.length;
graph = { ...graph, nodes: graph.nodes.filter(n => n.size && n.x && n.y) };
graph = { ...graph, nodes: graph.nodes.filter(n => n.data.size && n.position.x && n.position.y) };
if (graph.nodes.length !== lengthBeforeFilter) {
// tslint:disable-next-line:no-console
console.error(
"Some nodes were missing details: " + graph.nodes.filter(n => !n.size || !n.x || !n.y).map(n => n.label)
"Some nodes were missing details: " +
graph.nodes.filter(n => !n.data.size || !n.position.x || !n.position.y).map(n => n.data.label)
);
this.setState({ didError: true });
}
@ -125,29 +126,9 @@ class GraphImpl extends React.Component<IGraphProps, IGraphState> {
return;
}
this.cy = cytoscape({
autoungrabify: true,
autoungrabify: false,
container: this.cytoscapeDiv.current,
elements: {
edges: graph.edges.map(edge => ({
data: {
id: edge.id || `${edge.source}${edge.target}`,
source: edge.source,
target: edge.target,
weight: edge.size
},
group: "edges" as "edges"
})),
nodes: graph.nodes.map(node => ({
data: {
id: node.id
},
group: "nodes" as "nodes",
position: {
x: node.x,
y: node.y
}
}))
},
elements: graph,
layout: {
name: "preset"
},

View file

@ -83,7 +83,7 @@ class SidebarImpl extends React.Component<ISidebarProps, ISidebarState> {
} else if (
this.props.graph &&
this.props.instanceName &&
this.props.graph.nodes.map(n => n.id).indexOf(this.props.instanceName) < 0
this.props.graph.nodes.map(n => n.data.id).indexOf(this.props.instanceName) < 0
) {
return this.renderQuietInstanceState();
}
@ -177,13 +177,15 @@ class SidebarImpl extends React.Component<ISidebarProps, ISidebarState> {
if (!this.props.graph || !this.props.instanceName) {
return;
}
const edges = this.props.graph.edges.filter(e => [e.source, e.target].indexOf(this.props.instanceName!) > -1);
const edges = this.props.graph.edges.filter(
e => [e.data.source, e.data.target].indexOf(this.props.instanceName!) > -1
);
const neighbors: any[] = [];
edges.forEach(e => {
if (e.source === this.props.instanceName) {
neighbors.push({ neighbor: e.target, weight: e.size });
if (e.data.source === this.props.instanceName) {
neighbors.push({ neighbor: e.data.target, weight: e.data.weight });
} else {
neighbors.push({ neighbor: e.source, weight: e.size });
neighbors.push({ neighbor: e.data.source, weight: e.data.weight });
}
});
const neighborRows = orderBy(neighbors, ["weight"], ["desc"]).map((neighborDetails: any, idx: number) => (

View file

@ -33,19 +33,24 @@ export interface IInstanceDetails {
}
interface IGraphNode {
id: string;
label: string;
x: number;
y: number;
size?: number;
color?: string;
data: {
id: string;
label: string;
size: number;
};
position: {
x: number;
y: number;
};
}
interface IGraphEdge {
source: string;
target: string;
id?: string;
size?: number;
data: {
source: string;
target: string;
id: string;
weight: number;
};
}
export interface IGraph {

View file

@ -24,7 +24,6 @@ import java.io.IOException;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.util.Arrays;
import java.util.concurrent.TimeUnit;
public class GraphBuilder {