fix edge generation, serve graph in cytoscape format
This commit is contained in:
parent
82677fcd32
commit
9478017eb0
|
@ -80,7 +80,7 @@ defmodule Backend.Crawler do
|
||||||
|
|
||||||
# Save the state (after crawling) to the database.
|
# Save the state (after crawling) to the database.
|
||||||
defp save(%Crawler{domain: domain, result: result, found_api?: true, error: nil}) do
|
defp save(%Crawler{domain: domain, result: result, found_api?: true, error: nil}) do
|
||||||
now = NaiveDateTime.truncate(NaiveDateTime.utc_now(), :second)
|
now = get_now()
|
||||||
|
|
||||||
## Update the instance we crawled ##
|
## Update the instance we crawled ##
|
||||||
Repo.insert!(
|
Repo.insert!(
|
||||||
|
|
|
@ -13,6 +13,8 @@ defmodule Backend.Edge do
|
||||||
type: :string,
|
type: :string,
|
||||||
foreign_key: :target_domain
|
foreign_key: :target_domain
|
||||||
|
|
||||||
|
field :weight, :float
|
||||||
|
|
||||||
timestamps()
|
timestamps()
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -13,8 +13,6 @@ defmodule Backend.InstancePeer do
|
||||||
type: :string,
|
type: :string,
|
||||||
foreign_key: :target_domain
|
foreign_key: :target_domain
|
||||||
|
|
||||||
field :weight, :float, default: 0.0
|
|
||||||
|
|
||||||
timestamps()
|
timestamps()
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -5,7 +5,8 @@ defmodule Backend.Scheduler do
|
||||||
|
|
||||||
use Quantum.Scheduler, otp_app: :backend
|
use Quantum.Scheduler, otp_app: :backend
|
||||||
|
|
||||||
alias Backend.{Crawl, Edge, Interaction, Instance, Repo}
|
alias Backend.{Crawl, Edge, CrawlInteraction, Instance, Repo}
|
||||||
|
import Backend.Util
|
||||||
import Ecto.Query
|
import Ecto.Query
|
||||||
require Logger
|
require Logger
|
||||||
|
|
||||||
|
@ -29,88 +30,136 @@ defmodule Backend.Scheduler do
|
||||||
Logger.info("Pruned #{deleted_num} old crawls.")
|
Logger.info("Pruned #{deleted_num} old crawls.")
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@doc """
|
||||||
|
Calculates every instance's "insularity score" -- that is, the percentage of mentions that are among users on the
|
||||||
|
instance, rather than at other instances.
|
||||||
|
"""
|
||||||
|
def generate_insularity_scores() do
|
||||||
|
now = get_now()
|
||||||
|
|
||||||
|
crawls_subquery =
|
||||||
|
Crawl
|
||||||
|
|> select([c], %{
|
||||||
|
instance_domain: c.instance_domain,
|
||||||
|
interactions_seen: sum(c.interactions_seen)
|
||||||
|
})
|
||||||
|
|> where([c], is_nil(c.error))
|
||||||
|
|> group_by([c], c.instance_domain)
|
||||||
|
|
||||||
|
scores =
|
||||||
|
CrawlInteraction
|
||||||
|
|> join(:left, [ci], c in subquery(crawls_subquery),
|
||||||
|
on: ci.source_domain == c.instance_domain
|
||||||
|
)
|
||||||
|
|> where([ci], ci.source_domain == ci.target_domain)
|
||||||
|
|> group_by([ci], ci.source_domain)
|
||||||
|
|> select([ci, c], %{
|
||||||
|
domain: ci.source_domain,
|
||||||
|
mentions: sum(ci.mentions),
|
||||||
|
# we can take min() because every row is the same
|
||||||
|
interactions: min(c.interactions_seen)
|
||||||
|
})
|
||||||
|
|> Repo.all()
|
||||||
|
|> (fn o ->
|
||||||
|
Logger.info(inspect(o))
|
||||||
|
o
|
||||||
|
end).()
|
||||||
|
|> Enum.map(fn %{domain: domain, mentions: mentions, interactions: interactions} ->
|
||||||
|
%{
|
||||||
|
domain: domain,
|
||||||
|
insularity: mentions / interactions,
|
||||||
|
inserted_at: now,
|
||||||
|
updated_at: now
|
||||||
|
}
|
||||||
|
end)
|
||||||
|
|
||||||
|
Instance
|
||||||
|
|> Repo.insert_all(scores,
|
||||||
|
on_conflict: {:replace, [:insularity, :updated_at]},
|
||||||
|
conflict_target: :domain
|
||||||
|
)
|
||||||
|
end
|
||||||
|
|
||||||
@doc """
|
@doc """
|
||||||
This function aggregates statistics from the interactions in the database.
|
This function aggregates statistics from the interactions in the database.
|
||||||
It calculates the strength of edges between nodes.
|
It calculates the strength of edges between nodes.
|
||||||
|
|
||||||
TODO: generate edge weights. The weight of an edge between two instances will be
|
|
||||||
(number of mentions of each other) / (total number of statuses crawled).
|
|
||||||
This requires us to keep track of how many statuses we've seen.
|
|
||||||
"""
|
"""
|
||||||
def generate_edges() do
|
def generate_edges() do
|
||||||
|
now = get_now()
|
||||||
|
|
||||||
|
crawls_subquery =
|
||||||
|
Crawl
|
||||||
|
|> select([c], %{
|
||||||
|
instance_domain: c.instance_domain,
|
||||||
|
statuses_seen: sum(c.statuses_seen)
|
||||||
|
})
|
||||||
|
|> where([c], is_nil(c.error))
|
||||||
|
|> group_by([c], c.instance_domain)
|
||||||
|
|
||||||
interactions =
|
interactions =
|
||||||
Interaction
|
CrawlInteraction
|
||||||
|> select([inter], {inter.source_domain, inter.target_domain})
|
|> join(:left, [ci], c_source in subquery(crawls_subquery),
|
||||||
|> join(:left, [inter], i_source in Instance, on: inter.source_domain == i_source.domain)
|
on: ci.source_domain == c_source.instance_domain
|
||||||
|> join(:left, [inter], i_target in Instance, on: inter.target_domain == i_target.domain)
|
|
||||||
|> where(
|
|
||||||
[inter, i_source, i_target],
|
|
||||||
not is_nil(i_source.last_crawl_timestamp) and not is_nil(i_target.last_crawl_timestamp)
|
|
||||||
)
|
)
|
||||||
# Repo.all() returns a tuple like {"mastodon.social", "cursed.technology"}
|
|> join(:left, [ci], c_target in subquery(crawls_subquery),
|
||||||
|
on: ci.target_domain == c_target.instance_domain
|
||||||
|
)
|
||||||
|
|> group_by([ci], [ci.source_domain, ci.target_domain])
|
||||||
|
|> select([ci, c_source, c_target], %{
|
||||||
|
source_domain: ci.source_domain,
|
||||||
|
target_domain: ci.target_domain,
|
||||||
|
mentions: sum(ci.mentions),
|
||||||
|
# we can take min() because every row is the same
|
||||||
|
source_statuses_seen: min(c_source.statuses_seen),
|
||||||
|
target_statuses_seen: min(c_target.statuses_seen)
|
||||||
|
})
|
||||||
|> Repo.all()
|
|> Repo.all()
|
||||||
# Create a map of %{source_domain => [target_domains]}
|
|
||||||
|> Enum.group_by(fn tuple -> Kernel.elem(tuple, 0) end, fn tuple ->
|
|
||||||
Kernel.elem(tuple, 1)
|
|
||||||
end)
|
|
||||||
|
|
||||||
# Calculate insularity score
|
# Get edges and their weights
|
||||||
Repo.transaction(fn ->
|
Repo.transaction(fn ->
|
||||||
interactions
|
Edge
|
||||||
|> Enum.each(fn {source, targets} ->
|
|> Repo.delete_all()
|
||||||
total_mentions = length(targets)
|
|
||||||
self_mentions = Enum.count(targets, fn t -> t == source end)
|
|
||||||
|
|
||||||
insularity = self_mentions / total_mentions
|
|
||||||
|
|
||||||
Repo.insert!(
|
|
||||||
%Instance{
|
|
||||||
domain: source,
|
|
||||||
insularity: insularity
|
|
||||||
},
|
|
||||||
on_conflict: [set: [insularity: insularity]],
|
|
||||||
conflict_target: :domain
|
|
||||||
)
|
|
||||||
end)
|
|
||||||
|
|
||||||
# Get edges
|
|
||||||
edges = MapSet.new()
|
|
||||||
|
|
||||||
interactions
|
|
||||||
|> Enum.each(fn {source, targets} ->
|
|
||||||
targets
|
|
||||||
|> Enum.each(fn target ->
|
|
||||||
[key_a, key_b] = Enum.sort([source, target])
|
|
||||||
|
|
||||||
edge = %Edge{
|
|
||||||
source_domain: key_a,
|
|
||||||
target_domain: key_b
|
|
||||||
}
|
|
||||||
|
|
||||||
MapSet.put(edges, edge)
|
|
||||||
Logger.debug(inspect(edges))
|
|
||||||
end)
|
|
||||||
end)
|
|
||||||
|
|
||||||
Logger.debug(inspect(edges))
|
|
||||||
|
|
||||||
now = NaiveDateTime.truncate(NaiveDateTime.utc_now(), :second)
|
|
||||||
|
|
||||||
Repo.delete_all(Edge)
|
|
||||||
|
|
||||||
edges =
|
edges =
|
||||||
edges
|
interactions
|
||||||
|> MapSet.to_list()
|
# Get a map of %{{source, target} => {total_mention_count, total_statuses_seen}}
|
||||||
|> Enum.map(fn %{source_domain: source_domain, target_domain: target_domain} ->
|
|> Enum.reduce(%{}, fn
|
||||||
%Edge{
|
%{
|
||||||
source_domain: source_domain,
|
source_domain: source_domain,
|
||||||
target_domain: target_domain,
|
target_domain: target_domain,
|
||||||
updated_at: now,
|
mentions: mentions,
|
||||||
inserted_at: now
|
source_statuses_seen: source_statuses_seen,
|
||||||
|
target_statuses_seen: target_statuses_seen
|
||||||
|
} = x,
|
||||||
|
acc ->
|
||||||
|
Logger.info(inspect(x))
|
||||||
|
key = get_interaction_key(source_domain, target_domain)
|
||||||
|
|
||||||
|
# target_statuses_seen might be nil if that instance was never crawled. default to 0.
|
||||||
|
target_statuses_seen =
|
||||||
|
case target_statuses_seen do
|
||||||
|
nil -> 0
|
||||||
|
_ -> target_statuses_seen
|
||||||
|
end
|
||||||
|
|
||||||
|
statuses_seen = source_statuses_seen + target_statuses_seen
|
||||||
|
|
||||||
|
Map.update(acc, key, {mentions, statuses_seen}, fn {curr_mentions, curr_statuses_seen} ->
|
||||||
|
{curr_mentions + mentions, curr_statuses_seen}
|
||||||
|
end)
|
||||||
|
end)
|
||||||
|
|> Enum.map(fn {{source_domain, target_domain}, {mention_count, statuses_seen}} ->
|
||||||
|
%{
|
||||||
|
source_domain: source_domain,
|
||||||
|
target_domain: target_domain,
|
||||||
|
weight: mention_count / statuses_seen,
|
||||||
|
inserted_at: now,
|
||||||
|
updated_at: now
|
||||||
}
|
}
|
||||||
end)
|
end)
|
||||||
|
|
||||||
Repo.insert_all(Edge, edges)
|
Edge
|
||||||
|
|> Repo.insert_all(edges)
|
||||||
end)
|
end)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -16,21 +16,28 @@ defmodule BackendWeb.GraphView do
|
||||||
false -> 1
|
false -> 1
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# This is the format that cytoscape.js expects.
|
||||||
%{
|
%{
|
||||||
id: node.domain,
|
data: %{
|
||||||
label: node.domain,
|
id: node.domain,
|
||||||
size: size,
|
label: node.domain,
|
||||||
x: node.x,
|
size: size
|
||||||
y: node.y
|
},
|
||||||
|
position: %{
|
||||||
|
x: node.x,
|
||||||
|
y: node.y
|
||||||
|
}
|
||||||
}
|
}
|
||||||
end
|
end
|
||||||
|
|
||||||
def render("edge.json", %{graph: edge}) do
|
def render("edge.json", %{graph: edge}) do
|
||||||
%{
|
%{
|
||||||
id: edge.id,
|
data: %{
|
||||||
source: edge.source_domain,
|
id: edge.id,
|
||||||
target: edge.target_domain,
|
source: edge.source_domain,
|
||||||
size: edge.weight
|
target: edge.target_domain,
|
||||||
|
weight: edge.weight
|
||||||
|
}
|
||||||
}
|
}
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -10,6 +10,9 @@ defmodule Backend.Repo.Migrations.CreateInstances do
|
||||||
add :version, :string
|
add :version, :string
|
||||||
add :insularity, :float
|
add :insularity, :float
|
||||||
|
|
||||||
|
add :x, :float
|
||||||
|
add :y, :float
|
||||||
|
|
||||||
timestamps()
|
timestamps()
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -19,8 +22,6 @@ defmodule Backend.Repo.Migrations.CreateInstances do
|
||||||
add :source_domain, references(:instances, column: :domain, type: :string)
|
add :source_domain, references(:instances, column: :domain, type: :string)
|
||||||
add :target_domain, references(:instances, column: :domain, type: :string)
|
add :target_domain, references(:instances, column: :domain, type: :string)
|
||||||
|
|
||||||
add :weight, :float
|
|
||||||
|
|
||||||
timestamps()
|
timestamps()
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -6,6 +6,8 @@ defmodule Backend.Repo.Migrations.CreateEdges do
|
||||||
add :source_domain, references(:instances, column: :domain, type: :string), null: false
|
add :source_domain, references(:instances, column: :domain, type: :string), null: false
|
||||||
add :target_domain, references(:instances, column: :domain, type: :string), null: false
|
add :target_domain, references(:instances, column: :domain, type: :string), null: false
|
||||||
|
|
||||||
|
add :weight, :float, null: false
|
||||||
|
|
||||||
timestamps()
|
timestamps()
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -1,10 +0,0 @@
|
||||||
defmodule Backend.Repo.Migrations.AddInstanceCoords do
|
|
||||||
use Ecto.Migration
|
|
||||||
|
|
||||||
def change do
|
|
||||||
alter table(:instances) do
|
|
||||||
add :x, :float
|
|
||||||
add :y, :float
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
|
@ -69,11 +69,12 @@ class GraphImpl extends React.Component<IGraphProps, IGraphState> {
|
||||||
|
|
||||||
// Check that all nodes have size & coordinates; otherwise the graph will look messed up
|
// Check that all nodes have size & coordinates; otherwise the graph will look messed up
|
||||||
const lengthBeforeFilter = graph.nodes.length;
|
const lengthBeforeFilter = graph.nodes.length;
|
||||||
graph = { ...graph, nodes: graph.nodes.filter(n => n.size && n.x && n.y) };
|
graph = { ...graph, nodes: graph.nodes.filter(n => n.data.size && n.position.x && n.position.y) };
|
||||||
if (graph.nodes.length !== lengthBeforeFilter) {
|
if (graph.nodes.length !== lengthBeforeFilter) {
|
||||||
// tslint:disable-next-line:no-console
|
// tslint:disable-next-line:no-console
|
||||||
console.error(
|
console.error(
|
||||||
"Some nodes were missing details: " + graph.nodes.filter(n => !n.size || !n.x || !n.y).map(n => n.label)
|
"Some nodes were missing details: " +
|
||||||
|
graph.nodes.filter(n => !n.data.size || !n.position.x || !n.position.y).map(n => n.data.label)
|
||||||
);
|
);
|
||||||
this.setState({ didError: true });
|
this.setState({ didError: true });
|
||||||
}
|
}
|
||||||
|
@ -125,29 +126,9 @@ class GraphImpl extends React.Component<IGraphProps, IGraphState> {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
this.cy = cytoscape({
|
this.cy = cytoscape({
|
||||||
autoungrabify: true,
|
autoungrabify: false,
|
||||||
container: this.cytoscapeDiv.current,
|
container: this.cytoscapeDiv.current,
|
||||||
elements: {
|
elements: graph,
|
||||||
edges: graph.edges.map(edge => ({
|
|
||||||
data: {
|
|
||||||
id: edge.id || `${edge.source}${edge.target}`,
|
|
||||||
source: edge.source,
|
|
||||||
target: edge.target,
|
|
||||||
weight: edge.size
|
|
||||||
},
|
|
||||||
group: "edges" as "edges"
|
|
||||||
})),
|
|
||||||
nodes: graph.nodes.map(node => ({
|
|
||||||
data: {
|
|
||||||
id: node.id
|
|
||||||
},
|
|
||||||
group: "nodes" as "nodes",
|
|
||||||
position: {
|
|
||||||
x: node.x,
|
|
||||||
y: node.y
|
|
||||||
}
|
|
||||||
}))
|
|
||||||
},
|
|
||||||
layout: {
|
layout: {
|
||||||
name: "preset"
|
name: "preset"
|
||||||
},
|
},
|
||||||
|
|
|
@ -83,7 +83,7 @@ class SidebarImpl extends React.Component<ISidebarProps, ISidebarState> {
|
||||||
} else if (
|
} else if (
|
||||||
this.props.graph &&
|
this.props.graph &&
|
||||||
this.props.instanceName &&
|
this.props.instanceName &&
|
||||||
this.props.graph.nodes.map(n => n.id).indexOf(this.props.instanceName) < 0
|
this.props.graph.nodes.map(n => n.data.id).indexOf(this.props.instanceName) < 0
|
||||||
) {
|
) {
|
||||||
return this.renderQuietInstanceState();
|
return this.renderQuietInstanceState();
|
||||||
}
|
}
|
||||||
|
@ -177,13 +177,15 @@ class SidebarImpl extends React.Component<ISidebarProps, ISidebarState> {
|
||||||
if (!this.props.graph || !this.props.instanceName) {
|
if (!this.props.graph || !this.props.instanceName) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const edges = this.props.graph.edges.filter(e => [e.source, e.target].indexOf(this.props.instanceName!) > -1);
|
const edges = this.props.graph.edges.filter(
|
||||||
|
e => [e.data.source, e.data.target].indexOf(this.props.instanceName!) > -1
|
||||||
|
);
|
||||||
const neighbors: any[] = [];
|
const neighbors: any[] = [];
|
||||||
edges.forEach(e => {
|
edges.forEach(e => {
|
||||||
if (e.source === this.props.instanceName) {
|
if (e.data.source === this.props.instanceName) {
|
||||||
neighbors.push({ neighbor: e.target, weight: e.size });
|
neighbors.push({ neighbor: e.data.target, weight: e.data.weight });
|
||||||
} else {
|
} else {
|
||||||
neighbors.push({ neighbor: e.source, weight: e.size });
|
neighbors.push({ neighbor: e.data.source, weight: e.data.weight });
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
const neighborRows = orderBy(neighbors, ["weight"], ["desc"]).map((neighborDetails: any, idx: number) => (
|
const neighborRows = orderBy(neighbors, ["weight"], ["desc"]).map((neighborDetails: any, idx: number) => (
|
||||||
|
|
|
@ -33,19 +33,24 @@ export interface IInstanceDetails {
|
||||||
}
|
}
|
||||||
|
|
||||||
interface IGraphNode {
|
interface IGraphNode {
|
||||||
id: string;
|
data: {
|
||||||
label: string;
|
id: string;
|
||||||
x: number;
|
label: string;
|
||||||
y: number;
|
size: number;
|
||||||
size?: number;
|
};
|
||||||
color?: string;
|
position: {
|
||||||
|
x: number;
|
||||||
|
y: number;
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
interface IGraphEdge {
|
interface IGraphEdge {
|
||||||
source: string;
|
data: {
|
||||||
target: string;
|
source: string;
|
||||||
id?: string;
|
target: string;
|
||||||
size?: number;
|
id: string;
|
||||||
|
weight: number;
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface IGraph {
|
export interface IGraph {
|
||||||
|
|
Binary file not shown.
|
@ -24,7 +24,6 @@ import java.io.IOException;
|
||||||
import java.sql.Connection;
|
import java.sql.Connection;
|
||||||
import java.sql.PreparedStatement;
|
import java.sql.PreparedStatement;
|
||||||
import java.sql.SQLException;
|
import java.sql.SQLException;
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
public class GraphBuilder {
|
public class GraphBuilder {
|
||||||
|
|
Loading…
Reference in a new issue