fix edge generation, serve graph in cytoscape format
This commit is contained in:
parent
82677fcd32
commit
9478017eb0
|
@ -80,7 +80,7 @@ defmodule Backend.Crawler do
|
|||
|
||||
# Save the state (after crawling) to the database.
|
||||
defp save(%Crawler{domain: domain, result: result, found_api?: true, error: nil}) do
|
||||
now = NaiveDateTime.truncate(NaiveDateTime.utc_now(), :second)
|
||||
now = get_now()
|
||||
|
||||
## Update the instance we crawled ##
|
||||
Repo.insert!(
|
||||
|
|
|
@ -13,6 +13,8 @@ defmodule Backend.Edge do
|
|||
type: :string,
|
||||
foreign_key: :target_domain
|
||||
|
||||
field :weight, :float
|
||||
|
||||
timestamps()
|
||||
end
|
||||
|
||||
|
|
|
@ -13,8 +13,6 @@ defmodule Backend.InstancePeer do
|
|||
type: :string,
|
||||
foreign_key: :target_domain
|
||||
|
||||
field :weight, :float, default: 0.0
|
||||
|
||||
timestamps()
|
||||
end
|
||||
|
||||
|
|
|
@ -5,7 +5,8 @@ defmodule Backend.Scheduler do
|
|||
|
||||
use Quantum.Scheduler, otp_app: :backend
|
||||
|
||||
alias Backend.{Crawl, Edge, Interaction, Instance, Repo}
|
||||
alias Backend.{Crawl, Edge, CrawlInteraction, Instance, Repo}
|
||||
import Backend.Util
|
||||
import Ecto.Query
|
||||
require Logger
|
||||
|
||||
|
@ -30,87 +31,135 @@ defmodule Backend.Scheduler do
|
|||
end
|
||||
|
||||
@doc """
|
||||
This function aggregates statistics from the interactions in the database.
|
||||
It calculates the strength of edges between nodes.
|
||||
|
||||
TODO: generate edge weights. The weight of an edge between two instances will be
|
||||
(number of mentions of each other) / (total number of statuses crawled).
|
||||
This requires us to keep track of how many statuses we've seen.
|
||||
Calculates every instance's "insularity score" -- that is, the percentage of mentions that are among users on the
|
||||
instance, rather than at other instances.
|
||||
"""
|
||||
def generate_edges() do
|
||||
interactions =
|
||||
Interaction
|
||||
|> select([inter], {inter.source_domain, inter.target_domain})
|
||||
|> join(:left, [inter], i_source in Instance, on: inter.source_domain == i_source.domain)
|
||||
|> join(:left, [inter], i_target in Instance, on: inter.target_domain == i_target.domain)
|
||||
|> where(
|
||||
[inter, i_source, i_target],
|
||||
not is_nil(i_source.last_crawl_timestamp) and not is_nil(i_target.last_crawl_timestamp)
|
||||
def generate_insularity_scores() do
|
||||
now = get_now()
|
||||
|
||||
crawls_subquery =
|
||||
Crawl
|
||||
|> select([c], %{
|
||||
instance_domain: c.instance_domain,
|
||||
interactions_seen: sum(c.interactions_seen)
|
||||
})
|
||||
|> where([c], is_nil(c.error))
|
||||
|> group_by([c], c.instance_domain)
|
||||
|
||||
scores =
|
||||
CrawlInteraction
|
||||
|> join(:left, [ci], c in subquery(crawls_subquery),
|
||||
on: ci.source_domain == c.instance_domain
|
||||
)
|
||||
# Repo.all() returns a tuple like {"mastodon.social", "cursed.technology"}
|
||||
|> where([ci], ci.source_domain == ci.target_domain)
|
||||
|> group_by([ci], ci.source_domain)
|
||||
|> select([ci, c], %{
|
||||
domain: ci.source_domain,
|
||||
mentions: sum(ci.mentions),
|
||||
# we can take min() because every row is the same
|
||||
interactions: min(c.interactions_seen)
|
||||
})
|
||||
|> Repo.all()
|
||||
# Create a map of %{source_domain => [target_domains]}
|
||||
|> Enum.group_by(fn tuple -> Kernel.elem(tuple, 0) end, fn tuple ->
|
||||
Kernel.elem(tuple, 1)
|
||||
|> (fn o ->
|
||||
Logger.info(inspect(o))
|
||||
o
|
||||
end).()
|
||||
|> Enum.map(fn %{domain: domain, mentions: mentions, interactions: interactions} ->
|
||||
%{
|
||||
domain: domain,
|
||||
insularity: mentions / interactions,
|
||||
inserted_at: now,
|
||||
updated_at: now
|
||||
}
|
||||
end)
|
||||
|
||||
# Calculate insularity score
|
||||
Repo.transaction(fn ->
|
||||
interactions
|
||||
|> Enum.each(fn {source, targets} ->
|
||||
total_mentions = length(targets)
|
||||
self_mentions = Enum.count(targets, fn t -> t == source end)
|
||||
|
||||
insularity = self_mentions / total_mentions
|
||||
|
||||
Repo.insert!(
|
||||
%Instance{
|
||||
domain: source,
|
||||
insularity: insularity
|
||||
},
|
||||
on_conflict: [set: [insularity: insularity]],
|
||||
Instance
|
||||
|> Repo.insert_all(scores,
|
||||
on_conflict: {:replace, [:insularity, :updated_at]},
|
||||
conflict_target: :domain
|
||||
)
|
||||
end)
|
||||
end
|
||||
|
||||
# Get edges
|
||||
edges = MapSet.new()
|
||||
@doc """
|
||||
This function aggregates statistics from the interactions in the database.
|
||||
It calculates the strength of edges between nodes.
|
||||
"""
|
||||
def generate_edges() do
|
||||
now = get_now()
|
||||
|
||||
interactions
|
||||
|> Enum.each(fn {source, targets} ->
|
||||
targets
|
||||
|> Enum.each(fn target ->
|
||||
[key_a, key_b] = Enum.sort([source, target])
|
||||
crawls_subquery =
|
||||
Crawl
|
||||
|> select([c], %{
|
||||
instance_domain: c.instance_domain,
|
||||
statuses_seen: sum(c.statuses_seen)
|
||||
})
|
||||
|> where([c], is_nil(c.error))
|
||||
|> group_by([c], c.instance_domain)
|
||||
|
||||
edge = %Edge{
|
||||
source_domain: key_a,
|
||||
target_domain: key_b
|
||||
}
|
||||
interactions =
|
||||
CrawlInteraction
|
||||
|> join(:left, [ci], c_source in subquery(crawls_subquery),
|
||||
on: ci.source_domain == c_source.instance_domain
|
||||
)
|
||||
|> join(:left, [ci], c_target in subquery(crawls_subquery),
|
||||
on: ci.target_domain == c_target.instance_domain
|
||||
)
|
||||
|> group_by([ci], [ci.source_domain, ci.target_domain])
|
||||
|> select([ci, c_source, c_target], %{
|
||||
source_domain: ci.source_domain,
|
||||
target_domain: ci.target_domain,
|
||||
mentions: sum(ci.mentions),
|
||||
# we can take min() because every row is the same
|
||||
source_statuses_seen: min(c_source.statuses_seen),
|
||||
target_statuses_seen: min(c_target.statuses_seen)
|
||||
})
|
||||
|> Repo.all()
|
||||
|
||||
MapSet.put(edges, edge)
|
||||
Logger.debug(inspect(edges))
|
||||
end)
|
||||
end)
|
||||
|
||||
Logger.debug(inspect(edges))
|
||||
|
||||
now = NaiveDateTime.truncate(NaiveDateTime.utc_now(), :second)
|
||||
|
||||
Repo.delete_all(Edge)
|
||||
# Get edges and their weights
|
||||
Repo.transaction(fn ->
|
||||
Edge
|
||||
|> Repo.delete_all()
|
||||
|
||||
edges =
|
||||
edges
|
||||
|> MapSet.to_list()
|
||||
|> Enum.map(fn %{source_domain: source_domain, target_domain: target_domain} ->
|
||||
%Edge{
|
||||
interactions
|
||||
# Get a map of %{{source, target} => {total_mention_count, total_statuses_seen}}
|
||||
|> Enum.reduce(%{}, fn
|
||||
%{
|
||||
source_domain: source_domain,
|
||||
target_domain: target_domain,
|
||||
updated_at: now,
|
||||
inserted_at: now
|
||||
mentions: mentions,
|
||||
source_statuses_seen: source_statuses_seen,
|
||||
target_statuses_seen: target_statuses_seen
|
||||
} = x,
|
||||
acc ->
|
||||
Logger.info(inspect(x))
|
||||
key = get_interaction_key(source_domain, target_domain)
|
||||
|
||||
# target_statuses_seen might be nil if that instance was never crawled. default to 0.
|
||||
target_statuses_seen =
|
||||
case target_statuses_seen do
|
||||
nil -> 0
|
||||
_ -> target_statuses_seen
|
||||
end
|
||||
|
||||
statuses_seen = source_statuses_seen + target_statuses_seen
|
||||
|
||||
Map.update(acc, key, {mentions, statuses_seen}, fn {curr_mentions, curr_statuses_seen} ->
|
||||
{curr_mentions + mentions, curr_statuses_seen}
|
||||
end)
|
||||
end)
|
||||
|> Enum.map(fn {{source_domain, target_domain}, {mention_count, statuses_seen}} ->
|
||||
%{
|
||||
source_domain: source_domain,
|
||||
target_domain: target_domain,
|
||||
weight: mention_count / statuses_seen,
|
||||
inserted_at: now,
|
||||
updated_at: now
|
||||
}
|
||||
end)
|
||||
|
||||
Repo.insert_all(Edge, edges)
|
||||
Edge
|
||||
|> Repo.insert_all(edges)
|
||||
end)
|
||||
end
|
||||
end
|
||||
|
|
|
@ -16,21 +16,28 @@ defmodule BackendWeb.GraphView do
|
|||
false -> 1
|
||||
end
|
||||
|
||||
# This is the format that cytoscape.js expects.
|
||||
%{
|
||||
data: %{
|
||||
id: node.domain,
|
||||
label: node.domain,
|
||||
size: size,
|
||||
size: size
|
||||
},
|
||||
position: %{
|
||||
x: node.x,
|
||||
y: node.y
|
||||
}
|
||||
}
|
||||
end
|
||||
|
||||
def render("edge.json", %{graph: edge}) do
|
||||
%{
|
||||
data: %{
|
||||
id: edge.id,
|
||||
source: edge.source_domain,
|
||||
target: edge.target_domain,
|
||||
size: edge.weight
|
||||
weight: edge.weight
|
||||
}
|
||||
}
|
||||
end
|
||||
end
|
||||
|
|
|
@ -10,6 +10,9 @@ defmodule Backend.Repo.Migrations.CreateInstances do
|
|||
add :version, :string
|
||||
add :insularity, :float
|
||||
|
||||
add :x, :float
|
||||
add :y, :float
|
||||
|
||||
timestamps()
|
||||
end
|
||||
|
||||
|
@ -19,8 +22,6 @@ defmodule Backend.Repo.Migrations.CreateInstances do
|
|||
add :source_domain, references(:instances, column: :domain, type: :string)
|
||||
add :target_domain, references(:instances, column: :domain, type: :string)
|
||||
|
||||
add :weight, :float
|
||||
|
||||
timestamps()
|
||||
end
|
||||
|
||||
|
|
|
@ -6,6 +6,8 @@ defmodule Backend.Repo.Migrations.CreateEdges do
|
|||
add :source_domain, references(:instances, column: :domain, type: :string), null: false
|
||||
add :target_domain, references(:instances, column: :domain, type: :string), null: false
|
||||
|
||||
add :weight, :float, null: false
|
||||
|
||||
timestamps()
|
||||
end
|
||||
|
||||
|
|
|
@ -1,10 +0,0 @@
|
|||
defmodule Backend.Repo.Migrations.AddInstanceCoords do
|
||||
use Ecto.Migration
|
||||
|
||||
def change do
|
||||
alter table(:instances) do
|
||||
add :x, :float
|
||||
add :y, :float
|
||||
end
|
||||
end
|
||||
end
|
|
@ -69,11 +69,12 @@ class GraphImpl extends React.Component<IGraphProps, IGraphState> {
|
|||
|
||||
// Check that all nodes have size & coordinates; otherwise the graph will look messed up
|
||||
const lengthBeforeFilter = graph.nodes.length;
|
||||
graph = { ...graph, nodes: graph.nodes.filter(n => n.size && n.x && n.y) };
|
||||
graph = { ...graph, nodes: graph.nodes.filter(n => n.data.size && n.position.x && n.position.y) };
|
||||
if (graph.nodes.length !== lengthBeforeFilter) {
|
||||
// tslint:disable-next-line:no-console
|
||||
console.error(
|
||||
"Some nodes were missing details: " + graph.nodes.filter(n => !n.size || !n.x || !n.y).map(n => n.label)
|
||||
"Some nodes were missing details: " +
|
||||
graph.nodes.filter(n => !n.data.size || !n.position.x || !n.position.y).map(n => n.data.label)
|
||||
);
|
||||
this.setState({ didError: true });
|
||||
}
|
||||
|
@ -125,29 +126,9 @@ class GraphImpl extends React.Component<IGraphProps, IGraphState> {
|
|||
return;
|
||||
}
|
||||
this.cy = cytoscape({
|
||||
autoungrabify: true,
|
||||
autoungrabify: false,
|
||||
container: this.cytoscapeDiv.current,
|
||||
elements: {
|
||||
edges: graph.edges.map(edge => ({
|
||||
data: {
|
||||
id: edge.id || `${edge.source}${edge.target}`,
|
||||
source: edge.source,
|
||||
target: edge.target,
|
||||
weight: edge.size
|
||||
},
|
||||
group: "edges" as "edges"
|
||||
})),
|
||||
nodes: graph.nodes.map(node => ({
|
||||
data: {
|
||||
id: node.id
|
||||
},
|
||||
group: "nodes" as "nodes",
|
||||
position: {
|
||||
x: node.x,
|
||||
y: node.y
|
||||
}
|
||||
}))
|
||||
},
|
||||
elements: graph,
|
||||
layout: {
|
||||
name: "preset"
|
||||
},
|
||||
|
|
|
@ -83,7 +83,7 @@ class SidebarImpl extends React.Component<ISidebarProps, ISidebarState> {
|
|||
} else if (
|
||||
this.props.graph &&
|
||||
this.props.instanceName &&
|
||||
this.props.graph.nodes.map(n => n.id).indexOf(this.props.instanceName) < 0
|
||||
this.props.graph.nodes.map(n => n.data.id).indexOf(this.props.instanceName) < 0
|
||||
) {
|
||||
return this.renderQuietInstanceState();
|
||||
}
|
||||
|
@ -177,13 +177,15 @@ class SidebarImpl extends React.Component<ISidebarProps, ISidebarState> {
|
|||
if (!this.props.graph || !this.props.instanceName) {
|
||||
return;
|
||||
}
|
||||
const edges = this.props.graph.edges.filter(e => [e.source, e.target].indexOf(this.props.instanceName!) > -1);
|
||||
const edges = this.props.graph.edges.filter(
|
||||
e => [e.data.source, e.data.target].indexOf(this.props.instanceName!) > -1
|
||||
);
|
||||
const neighbors: any[] = [];
|
||||
edges.forEach(e => {
|
||||
if (e.source === this.props.instanceName) {
|
||||
neighbors.push({ neighbor: e.target, weight: e.size });
|
||||
if (e.data.source === this.props.instanceName) {
|
||||
neighbors.push({ neighbor: e.data.target, weight: e.data.weight });
|
||||
} else {
|
||||
neighbors.push({ neighbor: e.source, weight: e.size });
|
||||
neighbors.push({ neighbor: e.data.source, weight: e.data.weight });
|
||||
}
|
||||
});
|
||||
const neighborRows = orderBy(neighbors, ["weight"], ["desc"]).map((neighborDetails: any, idx: number) => (
|
||||
|
|
|
@ -33,19 +33,24 @@ export interface IInstanceDetails {
|
|||
}
|
||||
|
||||
interface IGraphNode {
|
||||
data: {
|
||||
id: string;
|
||||
label: string;
|
||||
size: number;
|
||||
};
|
||||
position: {
|
||||
x: number;
|
||||
y: number;
|
||||
size?: number;
|
||||
color?: string;
|
||||
};
|
||||
}
|
||||
|
||||
interface IGraphEdge {
|
||||
data: {
|
||||
source: string;
|
||||
target: string;
|
||||
id?: string;
|
||||
size?: number;
|
||||
id: string;
|
||||
weight: number;
|
||||
};
|
||||
}
|
||||
|
||||
export interface IGraph {
|
||||
|
|
Binary file not shown.
|
@ -24,7 +24,6 @@ import java.io.IOException;
|
|||
import java.sql.Connection;
|
||||
import java.sql.PreparedStatement;
|
||||
import java.sql.SQLException;
|
||||
import java.util.Arrays;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
public class GraphBuilder {
|
||||
|
|
Loading…
Reference in a new issue