fix edge generation, serve graph in cytoscape format

This commit is contained in:
Tao Bror Bojlén 2019-07-18 13:21:12 +03:00
parent 82677fcd32
commit 9478017eb0
No known key found for this signature in database
GPG key ID: C6EC7AAB905F9E6F
13 changed files with 166 additions and 130 deletions

View file

@ -80,7 +80,7 @@ defmodule Backend.Crawler do
# Save the state (after crawling) to the database. # Save the state (after crawling) to the database.
defp save(%Crawler{domain: domain, result: result, found_api?: true, error: nil}) do defp save(%Crawler{domain: domain, result: result, found_api?: true, error: nil}) do
now = NaiveDateTime.truncate(NaiveDateTime.utc_now(), :second) now = get_now()
## Update the instance we crawled ## ## Update the instance we crawled ##
Repo.insert!( Repo.insert!(

View file

@ -13,6 +13,8 @@ defmodule Backend.Edge do
type: :string, type: :string,
foreign_key: :target_domain foreign_key: :target_domain
field :weight, :float
timestamps() timestamps()
end end

View file

@ -13,8 +13,6 @@ defmodule Backend.InstancePeer do
type: :string, type: :string,
foreign_key: :target_domain foreign_key: :target_domain
field :weight, :float, default: 0.0
timestamps() timestamps()
end end

View file

@ -5,7 +5,8 @@ defmodule Backend.Scheduler do
use Quantum.Scheduler, otp_app: :backend use Quantum.Scheduler, otp_app: :backend
alias Backend.{Crawl, Edge, Interaction, Instance, Repo} alias Backend.{Crawl, Edge, CrawlInteraction, Instance, Repo}
import Backend.Util
import Ecto.Query import Ecto.Query
require Logger require Logger
@ -30,87 +31,135 @@ defmodule Backend.Scheduler do
end end
@doc """ @doc """
This function aggregates statistics from the interactions in the database. Calculates every instance's "insularity score" -- that is, the percentage of mentions that are among users on the
It calculates the strength of edges between nodes. instance, rather than at other instances.
TODO: generate edge weights. The weight of an edge between two instances will be
(number of mentions of each other) / (total number of statuses crawled).
This requires us to keep track of how many statuses we've seen.
""" """
def generate_edges() do def generate_insularity_scores() do
interactions = now = get_now()
Interaction
|> select([inter], {inter.source_domain, inter.target_domain}) crawls_subquery =
|> join(:left, [inter], i_source in Instance, on: inter.source_domain == i_source.domain) Crawl
|> join(:left, [inter], i_target in Instance, on: inter.target_domain == i_target.domain) |> select([c], %{
|> where( instance_domain: c.instance_domain,
[inter, i_source, i_target], interactions_seen: sum(c.interactions_seen)
not is_nil(i_source.last_crawl_timestamp) and not is_nil(i_target.last_crawl_timestamp) })
|> where([c], is_nil(c.error))
|> group_by([c], c.instance_domain)
scores =
CrawlInteraction
|> join(:left, [ci], c in subquery(crawls_subquery),
on: ci.source_domain == c.instance_domain
) )
# Repo.all() returns a tuple like {"mastodon.social", "cursed.technology"} |> where([ci], ci.source_domain == ci.target_domain)
|> group_by([ci], ci.source_domain)
|> select([ci, c], %{
domain: ci.source_domain,
mentions: sum(ci.mentions),
# we can take min() because every row is the same
interactions: min(c.interactions_seen)
})
|> Repo.all() |> Repo.all()
# Create a map of %{source_domain => [target_domains]} |> (fn o ->
|> Enum.group_by(fn tuple -> Kernel.elem(tuple, 0) end, fn tuple -> Logger.info(inspect(o))
Kernel.elem(tuple, 1) o
end).()
|> Enum.map(fn %{domain: domain, mentions: mentions, interactions: interactions} ->
%{
domain: domain,
insularity: mentions / interactions,
inserted_at: now,
updated_at: now
}
end) end)
# Calculate insularity score Instance
Repo.transaction(fn -> |> Repo.insert_all(scores,
interactions on_conflict: {:replace, [:insularity, :updated_at]},
|> Enum.each(fn {source, targets} ->
total_mentions = length(targets)
self_mentions = Enum.count(targets, fn t -> t == source end)
insularity = self_mentions / total_mentions
Repo.insert!(
%Instance{
domain: source,
insularity: insularity
},
on_conflict: [set: [insularity: insularity]],
conflict_target: :domain conflict_target: :domain
) )
end) end
# Get edges @doc """
edges = MapSet.new() This function aggregates statistics from the interactions in the database.
It calculates the strength of edges between nodes.
"""
def generate_edges() do
now = get_now()
interactions crawls_subquery =
|> Enum.each(fn {source, targets} -> Crawl
targets |> select([c], %{
|> Enum.each(fn target -> instance_domain: c.instance_domain,
[key_a, key_b] = Enum.sort([source, target]) statuses_seen: sum(c.statuses_seen)
})
|> where([c], is_nil(c.error))
|> group_by([c], c.instance_domain)
edge = %Edge{ interactions =
source_domain: key_a, CrawlInteraction
target_domain: key_b |> join(:left, [ci], c_source in subquery(crawls_subquery),
} on: ci.source_domain == c_source.instance_domain
)
|> join(:left, [ci], c_target in subquery(crawls_subquery),
on: ci.target_domain == c_target.instance_domain
)
|> group_by([ci], [ci.source_domain, ci.target_domain])
|> select([ci, c_source, c_target], %{
source_domain: ci.source_domain,
target_domain: ci.target_domain,
mentions: sum(ci.mentions),
# we can take min() because every row is the same
source_statuses_seen: min(c_source.statuses_seen),
target_statuses_seen: min(c_target.statuses_seen)
})
|> Repo.all()
MapSet.put(edges, edge) # Get edges and their weights
Logger.debug(inspect(edges)) Repo.transaction(fn ->
end) Edge
end) |> Repo.delete_all()
Logger.debug(inspect(edges))
now = NaiveDateTime.truncate(NaiveDateTime.utc_now(), :second)
Repo.delete_all(Edge)
edges = edges =
edges interactions
|> MapSet.to_list() # Get a map of %{{source, target} => {total_mention_count, total_statuses_seen}}
|> Enum.map(fn %{source_domain: source_domain, target_domain: target_domain} -> |> Enum.reduce(%{}, fn
%Edge{ %{
source_domain: source_domain, source_domain: source_domain,
target_domain: target_domain, target_domain: target_domain,
updated_at: now, mentions: mentions,
inserted_at: now source_statuses_seen: source_statuses_seen,
target_statuses_seen: target_statuses_seen
} = x,
acc ->
Logger.info(inspect(x))
key = get_interaction_key(source_domain, target_domain)
# target_statuses_seen might be nil if that instance was never crawled. default to 0.
target_statuses_seen =
case target_statuses_seen do
nil -> 0
_ -> target_statuses_seen
end
statuses_seen = source_statuses_seen + target_statuses_seen
Map.update(acc, key, {mentions, statuses_seen}, fn {curr_mentions, curr_statuses_seen} ->
{curr_mentions + mentions, curr_statuses_seen}
end)
end)
|> Enum.map(fn {{source_domain, target_domain}, {mention_count, statuses_seen}} ->
%{
source_domain: source_domain,
target_domain: target_domain,
weight: mention_count / statuses_seen,
inserted_at: now,
updated_at: now
} }
end) end)
Repo.insert_all(Edge, edges) Edge
|> Repo.insert_all(edges)
end) end)
end end
end end

View file

@ -16,21 +16,28 @@ defmodule BackendWeb.GraphView do
false -> 1 false -> 1
end end
# This is the format that cytoscape.js expects.
%{ %{
data: %{
id: node.domain, id: node.domain,
label: node.domain, label: node.domain,
size: size, size: size
},
position: %{
x: node.x, x: node.x,
y: node.y y: node.y
} }
}
end end
def render("edge.json", %{graph: edge}) do def render("edge.json", %{graph: edge}) do
%{ %{
data: %{
id: edge.id, id: edge.id,
source: edge.source_domain, source: edge.source_domain,
target: edge.target_domain, target: edge.target_domain,
size: edge.weight weight: edge.weight
}
} }
end end
end end

View file

@ -10,6 +10,9 @@ defmodule Backend.Repo.Migrations.CreateInstances do
add :version, :string add :version, :string
add :insularity, :float add :insularity, :float
add :x, :float
add :y, :float
timestamps() timestamps()
end end
@ -19,8 +22,6 @@ defmodule Backend.Repo.Migrations.CreateInstances do
add :source_domain, references(:instances, column: :domain, type: :string) add :source_domain, references(:instances, column: :domain, type: :string)
add :target_domain, references(:instances, column: :domain, type: :string) add :target_domain, references(:instances, column: :domain, type: :string)
add :weight, :float
timestamps() timestamps()
end end

View file

@ -6,6 +6,8 @@ defmodule Backend.Repo.Migrations.CreateEdges do
add :source_domain, references(:instances, column: :domain, type: :string), null: false add :source_domain, references(:instances, column: :domain, type: :string), null: false
add :target_domain, references(:instances, column: :domain, type: :string), null: false add :target_domain, references(:instances, column: :domain, type: :string), null: false
add :weight, :float, null: false
timestamps() timestamps()
end end

View file

@ -1,10 +0,0 @@
defmodule Backend.Repo.Migrations.AddInstanceCoords do
use Ecto.Migration
def change do
alter table(:instances) do
add :x, :float
add :y, :float
end
end
end

View file

@ -69,11 +69,12 @@ class GraphImpl extends React.Component<IGraphProps, IGraphState> {
// Check that all nodes have size & coordinates; otherwise the graph will look messed up // Check that all nodes have size & coordinates; otherwise the graph will look messed up
const lengthBeforeFilter = graph.nodes.length; const lengthBeforeFilter = graph.nodes.length;
graph = { ...graph, nodes: graph.nodes.filter(n => n.size && n.x && n.y) }; graph = { ...graph, nodes: graph.nodes.filter(n => n.data.size && n.position.x && n.position.y) };
if (graph.nodes.length !== lengthBeforeFilter) { if (graph.nodes.length !== lengthBeforeFilter) {
// tslint:disable-next-line:no-console // tslint:disable-next-line:no-console
console.error( console.error(
"Some nodes were missing details: " + graph.nodes.filter(n => !n.size || !n.x || !n.y).map(n => n.label) "Some nodes were missing details: " +
graph.nodes.filter(n => !n.data.size || !n.position.x || !n.position.y).map(n => n.data.label)
); );
this.setState({ didError: true }); this.setState({ didError: true });
} }
@ -125,29 +126,9 @@ class GraphImpl extends React.Component<IGraphProps, IGraphState> {
return; return;
} }
this.cy = cytoscape({ this.cy = cytoscape({
autoungrabify: true, autoungrabify: false,
container: this.cytoscapeDiv.current, container: this.cytoscapeDiv.current,
elements: { elements: graph,
edges: graph.edges.map(edge => ({
data: {
id: edge.id || `${edge.source}${edge.target}`,
source: edge.source,
target: edge.target,
weight: edge.size
},
group: "edges" as "edges"
})),
nodes: graph.nodes.map(node => ({
data: {
id: node.id
},
group: "nodes" as "nodes",
position: {
x: node.x,
y: node.y
}
}))
},
layout: { layout: {
name: "preset" name: "preset"
}, },

View file

@ -83,7 +83,7 @@ class SidebarImpl extends React.Component<ISidebarProps, ISidebarState> {
} else if ( } else if (
this.props.graph && this.props.graph &&
this.props.instanceName && this.props.instanceName &&
this.props.graph.nodes.map(n => n.id).indexOf(this.props.instanceName) < 0 this.props.graph.nodes.map(n => n.data.id).indexOf(this.props.instanceName) < 0
) { ) {
return this.renderQuietInstanceState(); return this.renderQuietInstanceState();
} }
@ -177,13 +177,15 @@ class SidebarImpl extends React.Component<ISidebarProps, ISidebarState> {
if (!this.props.graph || !this.props.instanceName) { if (!this.props.graph || !this.props.instanceName) {
return; return;
} }
const edges = this.props.graph.edges.filter(e => [e.source, e.target].indexOf(this.props.instanceName!) > -1); const edges = this.props.graph.edges.filter(
e => [e.data.source, e.data.target].indexOf(this.props.instanceName!) > -1
);
const neighbors: any[] = []; const neighbors: any[] = [];
edges.forEach(e => { edges.forEach(e => {
if (e.source === this.props.instanceName) { if (e.data.source === this.props.instanceName) {
neighbors.push({ neighbor: e.target, weight: e.size }); neighbors.push({ neighbor: e.data.target, weight: e.data.weight });
} else { } else {
neighbors.push({ neighbor: e.source, weight: e.size }); neighbors.push({ neighbor: e.data.source, weight: e.data.weight });
} }
}); });
const neighborRows = orderBy(neighbors, ["weight"], ["desc"]).map((neighborDetails: any, idx: number) => ( const neighborRows = orderBy(neighbors, ["weight"], ["desc"]).map((neighborDetails: any, idx: number) => (

View file

@ -33,19 +33,24 @@ export interface IInstanceDetails {
} }
interface IGraphNode { interface IGraphNode {
data: {
id: string; id: string;
label: string; label: string;
size: number;
};
position: {
x: number; x: number;
y: number; y: number;
size?: number; };
color?: string;
} }
interface IGraphEdge { interface IGraphEdge {
data: {
source: string; source: string;
target: string; target: string;
id?: string; id: string;
size?: number; weight: number;
};
} }
export interface IGraph { export interface IGraph {

View file

@ -24,7 +24,6 @@ import java.io.IOException;
import java.sql.Connection; import java.sql.Connection;
import java.sql.PreparedStatement; import java.sql.PreparedStatement;
import java.sql.SQLException; import java.sql.SQLException;
import java.util.Arrays;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
public class GraphBuilder { public class GraphBuilder {