diff --git a/backend/lib/backend/crawler/crawler.ex b/backend/lib/backend/crawler/crawler.ex index 1538a77..534e4c3 100644 --- a/backend/lib/backend/crawler/crawler.ex +++ b/backend/lib/backend/crawler/crawler.ex @@ -80,7 +80,7 @@ defmodule Backend.Crawler do # Save the state (after crawling) to the database. defp save(%Crawler{domain: domain, result: result, found_api?: true, error: nil}) do - now = NaiveDateTime.truncate(NaiveDateTime.utc_now(), :second) + now = get_now() ## Update the instance we crawled ## Repo.insert!( diff --git a/backend/lib/backend/edge.ex b/backend/lib/backend/edge.ex index f808271..3182218 100644 --- a/backend/lib/backend/edge.ex +++ b/backend/lib/backend/edge.ex @@ -13,6 +13,8 @@ defmodule Backend.Edge do type: :string, foreign_key: :target_domain + field :weight, :float + timestamps() end diff --git a/backend/lib/backend/instance_peer.ex b/backend/lib/backend/instance_peer.ex index fbe2731..914bf4f 100644 --- a/backend/lib/backend/instance_peer.ex +++ b/backend/lib/backend/instance_peer.ex @@ -13,8 +13,6 @@ defmodule Backend.InstancePeer do type: :string, foreign_key: :target_domain - field :weight, :float, default: 0.0 - timestamps() end diff --git a/backend/lib/backend/scheduler.ex b/backend/lib/backend/scheduler.ex index 81b1ad5..ea88070 100644 --- a/backend/lib/backend/scheduler.ex +++ b/backend/lib/backend/scheduler.ex @@ -5,7 +5,8 @@ defmodule Backend.Scheduler do use Quantum.Scheduler, otp_app: :backend - alias Backend.{Crawl, Edge, Interaction, Instance, Repo} + alias Backend.{Crawl, Edge, CrawlInteraction, Instance, Repo} + import Backend.Util import Ecto.Query require Logger @@ -29,88 +30,136 @@ defmodule Backend.Scheduler do Logger.info("Pruned #{deleted_num} old crawls.") end + @doc """ + Calculates every instance's "insularity score" -- that is, the percentage of mentions that are among users on the + instance, rather than at other instances. + """ + def generate_insularity_scores() do + now = get_now() + + crawls_subquery = + Crawl + |> select([c], %{ + instance_domain: c.instance_domain, + interactions_seen: sum(c.interactions_seen) + }) + |> where([c], is_nil(c.error)) + |> group_by([c], c.instance_domain) + + scores = + CrawlInteraction + |> join(:left, [ci], c in subquery(crawls_subquery), + on: ci.source_domain == c.instance_domain + ) + |> where([ci], ci.source_domain == ci.target_domain) + |> group_by([ci], ci.source_domain) + |> select([ci, c], %{ + domain: ci.source_domain, + mentions: sum(ci.mentions), + # we can take min() because every row is the same + interactions: min(c.interactions_seen) + }) + |> Repo.all() + |> (fn o -> + Logger.info(inspect(o)) + o + end).() + |> Enum.map(fn %{domain: domain, mentions: mentions, interactions: interactions} -> + %{ + domain: domain, + insularity: mentions / interactions, + inserted_at: now, + updated_at: now + } + end) + + Instance + |> Repo.insert_all(scores, + on_conflict: {:replace, [:insularity, :updated_at]}, + conflict_target: :domain + ) + end + @doc """ This function aggregates statistics from the interactions in the database. It calculates the strength of edges between nodes. - - TODO: generate edge weights. The weight of an edge between two instances will be - (number of mentions of each other) / (total number of statuses crawled). - This requires us to keep track of how many statuses we've seen. """ def generate_edges() do + now = get_now() + + crawls_subquery = + Crawl + |> select([c], %{ + instance_domain: c.instance_domain, + statuses_seen: sum(c.statuses_seen) + }) + |> where([c], is_nil(c.error)) + |> group_by([c], c.instance_domain) + interactions = - Interaction - |> select([inter], {inter.source_domain, inter.target_domain}) - |> join(:left, [inter], i_source in Instance, on: inter.source_domain == i_source.domain) - |> join(:left, [inter], i_target in Instance, on: inter.target_domain == i_target.domain) - |> where( - [inter, i_source, i_target], - not is_nil(i_source.last_crawl_timestamp) and not is_nil(i_target.last_crawl_timestamp) + CrawlInteraction + |> join(:left, [ci], c_source in subquery(crawls_subquery), + on: ci.source_domain == c_source.instance_domain ) - # Repo.all() returns a tuple like {"mastodon.social", "cursed.technology"} + |> join(:left, [ci], c_target in subquery(crawls_subquery), + on: ci.target_domain == c_target.instance_domain + ) + |> group_by([ci], [ci.source_domain, ci.target_domain]) + |> select([ci, c_source, c_target], %{ + source_domain: ci.source_domain, + target_domain: ci.target_domain, + mentions: sum(ci.mentions), + # we can take min() because every row is the same + source_statuses_seen: min(c_source.statuses_seen), + target_statuses_seen: min(c_target.statuses_seen) + }) |> Repo.all() - # Create a map of %{source_domain => [target_domains]} - |> Enum.group_by(fn tuple -> Kernel.elem(tuple, 0) end, fn tuple -> - Kernel.elem(tuple, 1) - end) - # Calculate insularity score + # Get edges and their weights Repo.transaction(fn -> - interactions - |> Enum.each(fn {source, targets} -> - total_mentions = length(targets) - self_mentions = Enum.count(targets, fn t -> t == source end) - - insularity = self_mentions / total_mentions - - Repo.insert!( - %Instance{ - domain: source, - insularity: insularity - }, - on_conflict: [set: [insularity: insularity]], - conflict_target: :domain - ) - end) - - # Get edges - edges = MapSet.new() - - interactions - |> Enum.each(fn {source, targets} -> - targets - |> Enum.each(fn target -> - [key_a, key_b] = Enum.sort([source, target]) - - edge = %Edge{ - source_domain: key_a, - target_domain: key_b - } - - MapSet.put(edges, edge) - Logger.debug(inspect(edges)) - end) - end) - - Logger.debug(inspect(edges)) - - now = NaiveDateTime.truncate(NaiveDateTime.utc_now(), :second) - - Repo.delete_all(Edge) + Edge + |> Repo.delete_all() edges = - edges - |> MapSet.to_list() - |> Enum.map(fn %{source_domain: source_domain, target_domain: target_domain} -> - %Edge{ + interactions + # Get a map of %{{source, target} => {total_mention_count, total_statuses_seen}} + |> Enum.reduce(%{}, fn + %{ source_domain: source_domain, target_domain: target_domain, - updated_at: now, - inserted_at: now + mentions: mentions, + source_statuses_seen: source_statuses_seen, + target_statuses_seen: target_statuses_seen + } = x, + acc -> + Logger.info(inspect(x)) + key = get_interaction_key(source_domain, target_domain) + + # target_statuses_seen might be nil if that instance was never crawled. default to 0. + target_statuses_seen = + case target_statuses_seen do + nil -> 0 + _ -> target_statuses_seen + end + + statuses_seen = source_statuses_seen + target_statuses_seen + + Map.update(acc, key, {mentions, statuses_seen}, fn {curr_mentions, curr_statuses_seen} -> + {curr_mentions + mentions, curr_statuses_seen} + end) + end) + |> Enum.map(fn {{source_domain, target_domain}, {mention_count, statuses_seen}} -> + %{ + source_domain: source_domain, + target_domain: target_domain, + weight: mention_count / statuses_seen, + inserted_at: now, + updated_at: now } end) - Repo.insert_all(Edge, edges) + Edge + |> Repo.insert_all(edges) end) end end diff --git a/backend/lib/backend_web/views/graph_view.ex b/backend/lib/backend_web/views/graph_view.ex index b978e4b..a8915c1 100644 --- a/backend/lib/backend_web/views/graph_view.ex +++ b/backend/lib/backend_web/views/graph_view.ex @@ -16,21 +16,28 @@ defmodule BackendWeb.GraphView do false -> 1 end + # This is the format that cytoscape.js expects. %{ - id: node.domain, - label: node.domain, - size: size, - x: node.x, - y: node.y + data: %{ + id: node.domain, + label: node.domain, + size: size + }, + position: %{ + x: node.x, + y: node.y + } } end def render("edge.json", %{graph: edge}) do %{ - id: edge.id, - source: edge.source_domain, - target: edge.target_domain, - size: edge.weight + data: %{ + id: edge.id, + source: edge.source_domain, + target: edge.target_domain, + weight: edge.weight + } } end end diff --git a/backend/priv/repo/migrations/20190624090436_create_instances.exs b/backend/priv/repo/migrations/20190624090436_create_instances.exs index fa27d26..fa1b999 100644 --- a/backend/priv/repo/migrations/20190624090436_create_instances.exs +++ b/backend/priv/repo/migrations/20190624090436_create_instances.exs @@ -10,6 +10,9 @@ defmodule Backend.Repo.Migrations.CreateInstances do add :version, :string add :insularity, :float + add :x, :float + add :y, :float + timestamps() end @@ -19,8 +22,6 @@ defmodule Backend.Repo.Migrations.CreateInstances do add :source_domain, references(:instances, column: :domain, type: :string) add :target_domain, references(:instances, column: :domain, type: :string) - add :weight, :float - timestamps() end diff --git a/backend/priv/repo/migrations/20190710133755_create_edges.exs b/backend/priv/repo/migrations/20190710133755_create_edges.exs index 1bfd546..60d118b 100644 --- a/backend/priv/repo/migrations/20190710133755_create_edges.exs +++ b/backend/priv/repo/migrations/20190710133755_create_edges.exs @@ -6,6 +6,8 @@ defmodule Backend.Repo.Migrations.CreateEdges do add :source_domain, references(:instances, column: :domain, type: :string), null: false add :target_domain, references(:instances, column: :domain, type: :string), null: false + add :weight, :float, null: false + timestamps() end diff --git a/backend/priv/repo/migrations/20190712133009_add_instance_coords.exs b/backend/priv/repo/migrations/20190712133009_add_instance_coords.exs deleted file mode 100644 index 8d68453..0000000 --- a/backend/priv/repo/migrations/20190712133009_add_instance_coords.exs +++ /dev/null @@ -1,10 +0,0 @@ -defmodule Backend.Repo.Migrations.AddInstanceCoords do - use Ecto.Migration - - def change do - alter table(:instances) do - add :x, :float - add :y, :float - end - end -end diff --git a/frontend/src/components/CytoscapeGraph.tsx b/frontend/src/components/CytoscapeGraph.tsx index 453103a..06a225c 100644 --- a/frontend/src/components/CytoscapeGraph.tsx +++ b/frontend/src/components/CytoscapeGraph.tsx @@ -69,11 +69,12 @@ class GraphImpl extends React.Component { // Check that all nodes have size & coordinates; otherwise the graph will look messed up const lengthBeforeFilter = graph.nodes.length; - graph = { ...graph, nodes: graph.nodes.filter(n => n.size && n.x && n.y) }; + graph = { ...graph, nodes: graph.nodes.filter(n => n.data.size && n.position.x && n.position.y) }; if (graph.nodes.length !== lengthBeforeFilter) { // tslint:disable-next-line:no-console console.error( - "Some nodes were missing details: " + graph.nodes.filter(n => !n.size || !n.x || !n.y).map(n => n.label) + "Some nodes were missing details: " + + graph.nodes.filter(n => !n.data.size || !n.position.x || !n.position.y).map(n => n.data.label) ); this.setState({ didError: true }); } @@ -125,29 +126,9 @@ class GraphImpl extends React.Component { return; } this.cy = cytoscape({ - autoungrabify: true, + autoungrabify: false, container: this.cytoscapeDiv.current, - elements: { - edges: graph.edges.map(edge => ({ - data: { - id: edge.id || `${edge.source}${edge.target}`, - source: edge.source, - target: edge.target, - weight: edge.size - }, - group: "edges" as "edges" - })), - nodes: graph.nodes.map(node => ({ - data: { - id: node.id - }, - group: "nodes" as "nodes", - position: { - x: node.x, - y: node.y - } - })) - }, + elements: graph, layout: { name: "preset" }, diff --git a/frontend/src/components/Sidebar.tsx b/frontend/src/components/Sidebar.tsx index f058c3a..db23360 100644 --- a/frontend/src/components/Sidebar.tsx +++ b/frontend/src/components/Sidebar.tsx @@ -83,7 +83,7 @@ class SidebarImpl extends React.Component { } else if ( this.props.graph && this.props.instanceName && - this.props.graph.nodes.map(n => n.id).indexOf(this.props.instanceName) < 0 + this.props.graph.nodes.map(n => n.data.id).indexOf(this.props.instanceName) < 0 ) { return this.renderQuietInstanceState(); } @@ -177,13 +177,15 @@ class SidebarImpl extends React.Component { if (!this.props.graph || !this.props.instanceName) { return; } - const edges = this.props.graph.edges.filter(e => [e.source, e.target].indexOf(this.props.instanceName!) > -1); + const edges = this.props.graph.edges.filter( + e => [e.data.source, e.data.target].indexOf(this.props.instanceName!) > -1 + ); const neighbors: any[] = []; edges.forEach(e => { - if (e.source === this.props.instanceName) { - neighbors.push({ neighbor: e.target, weight: e.size }); + if (e.data.source === this.props.instanceName) { + neighbors.push({ neighbor: e.data.target, weight: e.data.weight }); } else { - neighbors.push({ neighbor: e.source, weight: e.size }); + neighbors.push({ neighbor: e.data.source, weight: e.data.weight }); } }); const neighborRows = orderBy(neighbors, ["weight"], ["desc"]).map((neighborDetails: any, idx: number) => ( diff --git a/frontend/src/redux/types.ts b/frontend/src/redux/types.ts index 36c2c58..4c4de59 100644 --- a/frontend/src/redux/types.ts +++ b/frontend/src/redux/types.ts @@ -33,19 +33,24 @@ export interface IInstanceDetails { } interface IGraphNode { - id: string; - label: string; - x: number; - y: number; - size?: number; - color?: string; + data: { + id: string; + label: string; + size: number; + }; + position: { + x: number; + y: number; + }; } interface IGraphEdge { - source: string; - target: string; - id?: string; - size?: number; + data: { + source: string; + target: string; + id: string; + weight: number; + }; } export interface IGraph { diff --git a/gephi/bin/main/space/fediverse/graph/GraphBuilder.class b/gephi/bin/main/space/fediverse/graph/GraphBuilder.class index 9bbada1..b5339a6 100644 Binary files a/gephi/bin/main/space/fediverse/graph/GraphBuilder.class and b/gephi/bin/main/space/fediverse/graph/GraphBuilder.class differ diff --git a/gephi/src/main/java/space/fediverse/graph/GraphBuilder.java b/gephi/src/main/java/space/fediverse/graph/GraphBuilder.java index 9b4eb94..b7de6e1 100644 --- a/gephi/src/main/java/space/fediverse/graph/GraphBuilder.java +++ b/gephi/src/main/java/space/fediverse/graph/GraphBuilder.java @@ -24,7 +24,6 @@ import java.io.IOException; import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.SQLException; -import java.util.Arrays; import java.util.concurrent.TimeUnit; public class GraphBuilder {