fix no edges between blocked instances
This commit is contained in:
parent
693cf2b2d9
commit
c2f842263c
|
@ -13,6 +13,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||||
|
|
||||||
### Changed
|
### Changed
|
||||||
|
|
||||||
|
- Edges are no longer shown between instances where one blocks the other (based on the federation list in nodeinfo).
|
||||||
|
- Edges are only shown between timeline-crawlable instance types (Mastodon, Pleroma, Gab, Misskey, and GNU Social)
|
||||||
|
if there's a mention in each direction. This is to avoid edges between instances where one blocks the other.
|
||||||
|
|
||||||
### Deprecated
|
### Deprecated
|
||||||
|
|
||||||
### Removed
|
### Removed
|
||||||
|
|
|
@ -58,7 +58,7 @@ config :backend, Backend.Repo,
|
||||||
|
|
||||||
config :backend, :crawler,
|
config :backend, :crawler,
|
||||||
status_age_limit_days: 28,
|
status_age_limit_days: 28,
|
||||||
status_count_limit: 100,
|
status_count_limit: 1000,
|
||||||
personal_instance_threshold: 5,
|
personal_instance_threshold: 5,
|
||||||
crawl_interval_mins: 60,
|
crawl_interval_mins: 60,
|
||||||
crawl_workers: 10,
|
crawl_workers: 10,
|
||||||
|
|
|
@ -163,16 +163,20 @@ defmodule Backend.Scheduler do
|
||||||
|> join(:inner, [ci], c_target in subquery(crawls_subquery),
|
|> join(:inner, [ci], c_target in subquery(crawls_subquery),
|
||||||
on: ci.target_domain == c_target.instance_domain
|
on: ci.target_domain == c_target.instance_domain
|
||||||
)
|
)
|
||||||
|> where([ci], ci.source_domain != ci.target_domain)
|
|> join(:inner, [ci], i_source in Instance, on: ci.source_domain == i_source.domain)
|
||||||
|> group_by([ci], [ci.source_domain, ci.target_domain])
|
|> join(:inner, [ci], i_target in Instance, on: ci.target_domain == i_target.domain)
|
||||||
|> select([ci, c_source, c_target], %{
|
|> select([ci, c_source, c_target, i_source, i_target], %{
|
||||||
source_domain: ci.source_domain,
|
source_domain: ci.source_domain,
|
||||||
target_domain: ci.target_domain,
|
target_domain: ci.target_domain,
|
||||||
mentions: sum(ci.mentions),
|
mentions: sum(ci.mentions),
|
||||||
# we can take min() because every row is the same
|
# we can take min() because every row is the same
|
||||||
|
source_type: min(i_source.type),
|
||||||
|
target_type: min(i_target.type),
|
||||||
source_statuses_seen: min(c_source.statuses_seen),
|
source_statuses_seen: min(c_source.statuses_seen),
|
||||||
target_statuses_seen: min(c_target.statuses_seen)
|
target_statuses_seen: min(c_target.statuses_seen)
|
||||||
})
|
})
|
||||||
|
|> where([ci], ci.source_domain != ci.target_domain)
|
||||||
|
|> group_by([ci], [ci.source_domain, ci.target_domain])
|
||||||
|> Repo.all(timeout: :infinity)
|
|> Repo.all(timeout: :infinity)
|
||||||
|
|
||||||
federation_blocks =
|
federation_blocks =
|
||||||
|
@ -182,35 +186,28 @@ defmodule Backend.Scheduler do
|
||||||
|> Repo.all()
|
|> Repo.all()
|
||||||
|> MapSet.new()
|
|> MapSet.new()
|
||||||
|
|
||||||
|
new_edges =
|
||||||
|
interactions
|
||||||
|
|> filter_to_eligible_interactions(federation_blocks)
|
||||||
|
|> combine_mention_directions()
|
||||||
|
|> Enum.map(fn {{source_domain, target_domain}, {mention_count, statuses_seen}} ->
|
||||||
|
%{
|
||||||
|
source_domain: source_domain,
|
||||||
|
target_domain: target_domain,
|
||||||
|
weight: mention_count / statuses_seen,
|
||||||
|
inserted_at: now,
|
||||||
|
updated_at: now
|
||||||
|
}
|
||||||
|
end)
|
||||||
|
|
||||||
# Get edges and their weights
|
# Get edges and their weights
|
||||||
Repo.transaction(
|
Repo.transaction(
|
||||||
fn ->
|
fn ->
|
||||||
Edge
|
Edge
|
||||||
|> Repo.delete_all(timeout: :infinity)
|
|> Repo.delete_all(timeout: :infinity)
|
||||||
|
|
||||||
mentions =
|
|
||||||
interactions
|
|
||||||
|> reduce_mention_count(federation_blocks)
|
|
||||||
|
|
||||||
# Filter down to mentions where there are interactions in both directions
|
|
||||||
filtered_mentions =
|
|
||||||
mentions
|
|
||||||
|> Enum.filter(&has_opposite_mention?(&1, mentions))
|
|
||||||
|
|
||||||
edges =
|
|
||||||
filtered_mentions
|
|
||||||
|> Enum.map(fn {{source_domain, target_domain}, {mention_count, statuses_seen}} ->
|
|
||||||
%{
|
|
||||||
source_domain: source_domain,
|
|
||||||
target_domain: target_domain,
|
|
||||||
weight: mention_count / statuses_seen,
|
|
||||||
inserted_at: now,
|
|
||||||
updated_at: now
|
|
||||||
}
|
|
||||||
end)
|
|
||||||
|
|
||||||
Edge
|
Edge
|
||||||
|> Repo.insert_all(edges, timeout: :infinity)
|
|> Repo.insert_all(new_edges, timeout: :infinity)
|
||||||
end,
|
end,
|
||||||
timeout: :infinity
|
timeout: :infinity
|
||||||
)
|
)
|
||||||
|
@ -266,9 +263,9 @@ defmodule Backend.Scheduler do
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
# Takes a list of Interactions and a MapSet of blocks in the form {source_domain, target_domain}
|
# Takes a list of Interactions
|
||||||
# Returns a map of %{{source, target} => {total_mention_count, total_statuses_seen}}
|
# Returns a map of %{{source, target} => {total_mention_count, total_statuses_seen}}
|
||||||
defp reduce_mention_count(interactions, federation_blocks) do
|
defp combine_mention_directions(interactions) do
|
||||||
Enum.reduce(interactions, %{}, fn
|
Enum.reduce(interactions, %{}, fn
|
||||||
%{
|
%{
|
||||||
source_domain: source_domain,
|
source_domain: source_domain,
|
||||||
|
@ -289,46 +286,58 @@ defmodule Backend.Scheduler do
|
||||||
|
|
||||||
statuses_seen = source_statuses_seen + target_statuses_seen
|
statuses_seen = source_statuses_seen + target_statuses_seen
|
||||||
|
|
||||||
maybe_update_map(
|
Map.update(acc, key, {mentions, statuses_seen}, fn {curr_mentions, curr_statuses_seen} ->
|
||||||
acc,
|
{curr_mentions + mentions, curr_statuses_seen}
|
||||||
key,
|
end)
|
||||||
source_domain,
|
|
||||||
target_domain,
|
|
||||||
mentions,
|
|
||||||
statuses_seen,
|
|
||||||
federation_blocks
|
|
||||||
)
|
|
||||||
end)
|
end)
|
||||||
end
|
end
|
||||||
|
|
||||||
defp maybe_update_map(
|
defp filter_to_eligible_interactions(interactions, federation_blocks) do
|
||||||
acc,
|
# A map of {source_domain, target_domain} => mention_count. Used to find out whether a mention in the reverse
|
||||||
key,
|
# direction has been seen.
|
||||||
source_domain,
|
mention_directions =
|
||||||
target_domain,
|
interactions
|
||||||
mentions,
|
|> Enum.reduce(%{}, fn %{source_domain: source, target_domain: target, mentions: mentions},
|
||||||
statuses_seen,
|
acc ->
|
||||||
|
Map.put(acc, {source, target}, mentions)
|
||||||
|
end)
|
||||||
|
|
||||||
|
interactions
|
||||||
|
|> Enum.filter(&is_eligible_interaction?(&1, mention_directions, federation_blocks))
|
||||||
|
end
|
||||||
|
|
||||||
|
# Returns true if
|
||||||
|
# * there's no federation block in either direction between the two instances
|
||||||
|
# * there are mentions in both directions
|
||||||
|
defp is_eligible_interaction?(
|
||||||
|
%{
|
||||||
|
source_domain: source,
|
||||||
|
target_domain: target,
|
||||||
|
mentions: mention_count,
|
||||||
|
source_type: source_type,
|
||||||
|
target_type: target_type
|
||||||
|
},
|
||||||
|
mention_directions,
|
||||||
federation_blocks
|
federation_blocks
|
||||||
) do
|
) do
|
||||||
if not MapSet.member?(federation_blocks, {source_domain, target_domain}) and
|
mentions_were_seen = mention_count > 0
|
||||||
not MapSet.member?(federation_blocks, {target_domain, source_domain}) do
|
|
||||||
Map.update(acc, key, {mentions, statuses_seen}, fn {curr_mentions, curr_statuses_seen} ->
|
opposite_mention_exists =
|
||||||
{curr_mentions + mentions, curr_statuses_seen}
|
if is_timeline_crawlable_type?(source_type) and is_timeline_crawlable_type?(target_type) do
|
||||||
end)
|
Map.has_key?(mention_directions, {target, source}) and
|
||||||
end
|
Map.get(mention_directions, {target, source}) > 0
|
||||||
|
else
|
||||||
|
true
|
||||||
|
end
|
||||||
|
|
||||||
|
federation_block_exists =
|
||||||
|
MapSet.member?(federation_blocks, {source, target}) or
|
||||||
|
MapSet.member?(federation_blocks, {target, source})
|
||||||
|
|
||||||
|
mentions_were_seen and opposite_mention_exists and not federation_block_exists
|
||||||
end
|
end
|
||||||
|
|
||||||
defp has_opposite_mention?(mention, all_mentions) do
|
defp is_timeline_crawlable_type?(type) do
|
||||||
{{source_domain, target_domain}, {mention_count, _statuses_seen}} = mention
|
Enum.member?(["mastodon", "gab", "pleroma", "gnusocial", "misskey"], type)
|
||||||
other_direction_key = {target_domain, source_domain}
|
|
||||||
|
|
||||||
if mention_count > 0 and Map.has_key?(all_mentions, other_direction_key) do
|
|
||||||
{other_direction_mentions, _other_statuses_seen} =
|
|
||||||
Map.get(all_mentions, other_direction_key)
|
|
||||||
|
|
||||||
other_direction_mentions > 0
|
|
||||||
else
|
|
||||||
false
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in a new issue