don't create edges between blocking instances
This commit is contained in:
parent
b1d6665bb8
commit
3e52431435
|
@ -63,7 +63,9 @@ config :backend, :crawler,
|
|||
crawl_workers: 20,
|
||||
blacklist: [
|
||||
"gab.best",
|
||||
"4chan.icu"
|
||||
"4chan.icu",
|
||||
"pleroma.site",
|
||||
"pleroma.online"
|
||||
],
|
||||
user_agent: "fediverse.space crawler",
|
||||
admin_phone: System.get_env("ADMIN_PHONE"),
|
||||
|
|
|
@ -62,8 +62,4 @@ config :backend, :crawler,
|
|||
personal_instance_threshold: 5,
|
||||
crawl_interval_mins: 60,
|
||||
crawl_workers: 10,
|
||||
blacklist: [
|
||||
"gab.best",
|
||||
"4chan.icu"
|
||||
],
|
||||
frontend_domain: "localhost:3000"
|
||||
|
|
|
@ -26,25 +26,39 @@ defmodule Backend.Crawler.ApiCrawler do
|
|||
:peers,
|
||||
:interactions,
|
||||
:statuses_seen,
|
||||
:instance_type
|
||||
:instance_type,
|
||||
:blocked_domains
|
||||
]
|
||||
|
||||
@type t() :: %__MODULE__{
|
||||
version: String.t(),
|
||||
description: String.t(),
|
||||
version: String.t() | nil,
|
||||
description: String.t() | nil,
|
||||
user_count: integer | nil,
|
||||
status_count: integer | nil,
|
||||
peers: [String.t()],
|
||||
interactions: instance_interactions,
|
||||
statuses_seen: integer,
|
||||
instance_type: instance_type
|
||||
instance_type: instance_type | nil,
|
||||
blocked_domains: [String.t()]
|
||||
}
|
||||
|
||||
@empty_result %{
|
||||
version: nil,
|
||||
description: nil,
|
||||
user_count: nil,
|
||||
status_count: nil,
|
||||
peers: [],
|
||||
interactions: %{},
|
||||
statuses_seen: 0,
|
||||
instance_type: nil,
|
||||
blocked_domains: []
|
||||
}
|
||||
|
||||
@doc """
|
||||
Check whether the instance at the given domain is of the type that this ApiCrawler implements.
|
||||
Arguments are the instance domain and the nodeinfo results.
|
||||
"""
|
||||
@callback is_instance_type?(String.t(), Nodeinfo.t()) :: boolean()
|
||||
@callback is_instance_type?(String.t(), ApiCrawler.t()) :: boolean()
|
||||
|
||||
@doc """
|
||||
Check whether the instance allows crawling according to its robots.txt or otherwise.
|
||||
|
@ -56,4 +70,11 @@ defmodule Backend.Crawler.ApiCrawler do
|
|||
Takes two arguments: the domain to crawl and the existing results (from nodeinfo).
|
||||
"""
|
||||
@callback crawl(String.t(), Nodeinfo.t()) :: t()
|
||||
|
||||
@doc """
|
||||
Returns the default, empty state
|
||||
"""
|
||||
def get_default do
|
||||
@empty_result
|
||||
end
|
||||
end
|
||||
|
|
|
@ -4,7 +4,17 @@ defmodule Backend.Crawler do
|
|||
"""
|
||||
|
||||
alias __MODULE__
|
||||
alias Backend.{Crawl, CrawlInteraction, Instance, InstancePeer, MostRecentCrawl, Repo}
|
||||
|
||||
alias Backend.{
|
||||
Crawl,
|
||||
CrawlInteraction,
|
||||
FederationRestriction,
|
||||
Instance,
|
||||
InstancePeer,
|
||||
MostRecentCrawl,
|
||||
Repo
|
||||
}
|
||||
|
||||
alias Backend.Crawler.ApiCrawler
|
||||
alias Backend.Crawler.Crawlers.{Friendica, GnuSocial, Mastodon, Misskey, Nodeinfo}
|
||||
|
||||
|
@ -75,14 +85,24 @@ defmodule Backend.Crawler do
|
|||
# a) it should always be run first
|
||||
# b) it passes the results on to the next crawlers (e.g. user_count)
|
||||
defp crawl(%Crawler{api_crawlers: [Nodeinfo | remaining_crawlers], domain: domain} = state) do
|
||||
with true <- Nodeinfo.allows_crawling?(domain), {:ok, nodeinfo} <- Nodeinfo.crawl(domain) do
|
||||
Logger.debug("Found nodeinfo for #{domain}.")
|
||||
result = Map.merge(nodeinfo, %{peers: [], interactions: %{}, statuses_seen: 0})
|
||||
crawl(%Crawler{state | result: result, found_api?: true, api_crawlers: remaining_crawlers})
|
||||
else
|
||||
_ ->
|
||||
if Nodeinfo.allows_crawling?(domain) do
|
||||
nodeinfo = Nodeinfo.crawl(domain, nil)
|
||||
|
||||
if nodeinfo != nil do
|
||||
Logger.debug("Found nodeinfo for #{domain}.")
|
||||
|
||||
crawl(%Crawler{
|
||||
state
|
||||
| result: nodeinfo,
|
||||
found_api?: true,
|
||||
api_crawlers: remaining_crawlers
|
||||
})
|
||||
else
|
||||
Logger.debug("Did not find nodeinfo for #{domain}.")
|
||||
crawl(%Crawler{state | api_crawlers: remaining_crawlers})
|
||||
end
|
||||
else
|
||||
crawl(%Crawler{state | api_crawlers: remaining_crawlers, allows_crawling?: false})
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -165,7 +185,7 @@ defmodule Backend.Crawler do
|
|||
|
||||
Elasticsearch.put_document!(Backend.Elasticsearch.Cluster, instance, "instances/_doc")
|
||||
|
||||
# Save details of a new crawl
|
||||
## Save details of a new crawl ##
|
||||
curr_crawl =
|
||||
Repo.insert!(%Crawl{
|
||||
instance_domain: domain,
|
||||
|
@ -202,12 +222,13 @@ defmodule Backend.Crawler do
|
|||
raise "#{domain} has invalid peers: #{Enum.join(invalid_peers, ", ")}"
|
||||
end
|
||||
|
||||
peers =
|
||||
new_instances =
|
||||
peers_domains
|
||||
|> list_union(result.blocked_domains)
|
||||
|> Enum.map(&%{domain: &1, inserted_at: now, updated_at: now, next_crawl: now})
|
||||
|
||||
Instance
|
||||
|> Repo.insert_all(peers, on_conflict: :nothing, conflict_target: :domain)
|
||||
|> Repo.insert_all(new_instances, on_conflict: :nothing, conflict_target: :domain)
|
||||
|
||||
Repo.transaction(fn ->
|
||||
## Save peer relationships ##
|
||||
|
@ -249,6 +270,56 @@ defmodule Backend.Crawler do
|
|||
|> Repo.insert_all(new_instance_peers)
|
||||
end)
|
||||
|
||||
## Save federation restrictions ##
|
||||
Repo.transaction(fn ->
|
||||
current_restrictions =
|
||||
FederationRestriction
|
||||
|> select([fr], {fr.target_domain, fr.type})
|
||||
|> where(source_domain: ^domain)
|
||||
|> Repo.all()
|
||||
|
||||
wanted_restrictions_set =
|
||||
result.blocked_domains
|
||||
|> Enum.map(&{&1, "reject"})
|
||||
|> MapSet.new()
|
||||
|
||||
current_restrictions_set = MapSet.new(current_restrictions)
|
||||
|
||||
# Delete the ones we don't want
|
||||
restrictions_to_delete =
|
||||
current_restrictions_set
|
||||
|> MapSet.difference(wanted_restrictions_set)
|
||||
|> MapSet.to_list()
|
||||
|> Enum.map(fn {target_domain, _type} -> target_domain end)
|
||||
|
||||
if length(restrictions_to_delete) > 0 do
|
||||
FederationRestriction
|
||||
|> where(
|
||||
[fr],
|
||||
fr.source_domain == ^domain and fr.target_domain in ^restrictions_to_delete
|
||||
)
|
||||
|> Repo.delete_all()
|
||||
end
|
||||
|
||||
# Save the new ones
|
||||
new_restrictions =
|
||||
wanted_restrictions_set
|
||||
|> MapSet.difference(current_restrictions_set)
|
||||
|> MapSet.to_list()
|
||||
|> Enum.map(fn {target_domain, type} ->
|
||||
%{
|
||||
source_domain: domain,
|
||||
target_domain: target_domain,
|
||||
type: type,
|
||||
inserted_at: now,
|
||||
updated_at: now
|
||||
}
|
||||
end)
|
||||
|
||||
FederationRestriction
|
||||
|> Repo.insert_all(new_restrictions)
|
||||
end)
|
||||
|
||||
## Save interactions ##
|
||||
interactions =
|
||||
result.interactions
|
||||
|
|
|
@ -62,12 +62,11 @@ defmodule Backend.Crawler.Crawlers.Friendica do
|
|||
end)
|
||||
|
||||
if details |> Map.get(:user_count, 0) |> is_above_user_threshold?() do
|
||||
Map.merge(
|
||||
%{peers: peers, interactions: %{}, statuses_seen: 0, instance_type: :friendica},
|
||||
Map.take(details, [:description, :version, :user_count, :status_count])
|
||||
)
|
||||
ApiCrawler.get_default()
|
||||
|> Map.merge(%{peers: peers, instance_type: :friendica})
|
||||
|> Map.merge(Map.take(details, [:description, :version, :user_count, :status_count]))
|
||||
else
|
||||
nodeinfo_result
|
||||
Map.merge(ApiCrawler.get_default(), nodeinfo_result)
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -3,7 +3,6 @@ defmodule Backend.Crawler.Crawlers.GnuSocial do
|
|||
Crawler for GNU Social servers.
|
||||
"""
|
||||
alias Backend.Crawler.ApiCrawler
|
||||
alias Backend.Crawler.Crawlers.Nodeinfo
|
||||
import Backend.Crawler.Util
|
||||
import Backend.Util
|
||||
require Logger
|
||||
|
@ -32,17 +31,17 @@ defmodule Backend.Crawler.Crawlers.GnuSocial do
|
|||
end
|
||||
|
||||
@impl ApiCrawler
|
||||
def crawl(domain, nodeinfo_result) do
|
||||
if nodeinfo_result == nil or
|
||||
nodeinfo_result |> Map.get(:user_count) |> is_above_user_threshold?() do
|
||||
crawl_large_instance(domain, nodeinfo_result)
|
||||
def crawl(domain, nodeinfo) do
|
||||
if nodeinfo == nil or
|
||||
nodeinfo |> Map.get(:user_count) |> is_above_user_threshold?() do
|
||||
Map.merge(crawl_large_instance(domain), nodeinfo)
|
||||
else
|
||||
nodeinfo_result
|
||||
Map.merge(ApiCrawler.get_default(), nodeinfo)
|
||||
end
|
||||
end
|
||||
|
||||
@spec crawl_large_instance(String.t(), Nodeinfo.t()) :: ApiCrawler.t()
|
||||
defp crawl_large_instance(domain, nodeinfo_result) do
|
||||
@spec crawl_large_instance(String.t()) :: ApiCrawler.t()
|
||||
defp crawl_large_instance(domain) do
|
||||
status_datetime_threshold =
|
||||
NaiveDateTime.utc_now()
|
||||
|> NaiveDateTime.add(get_config(:status_age_limit_days) * 24 * 3600 * -1, :second)
|
||||
|
@ -52,24 +51,14 @@ defmodule Backend.Crawler.Crawlers.GnuSocial do
|
|||
|
||||
{interactions, statuses_seen} = get_interactions(domain, min_timestamp)
|
||||
|
||||
if nodeinfo_result != nil do
|
||||
Map.merge(nodeinfo_result, %{
|
||||
interactions: interactions,
|
||||
statuses_seen: statuses_seen,
|
||||
peers: []
|
||||
})
|
||||
else
|
||||
Map.merge(
|
||||
ApiCrawler.get_default(),
|
||||
%{
|
||||
version: nil,
|
||||
description: nil,
|
||||
user_count: nil,
|
||||
status_count: nil,
|
||||
peers: [],
|
||||
interactions: interactions,
|
||||
statuses_seen: statuses_seen,
|
||||
instance_type: :gnusocial
|
||||
}
|
||||
end
|
||||
)
|
||||
end
|
||||
|
||||
@spec get_interactions(
|
||||
|
|
|
@ -34,26 +34,19 @@ defmodule Backend.Crawler.Crawlers.Mastodon do
|
|||
end
|
||||
|
||||
@impl ApiCrawler
|
||||
def crawl(domain, _current_result) do
|
||||
def crawl(domain, nodeinfo) do
|
||||
instance = get_and_decode!("https://#{domain}/api/v1/instance")
|
||||
user_count = get_in(instance, ["stats", "user_count"])
|
||||
|
||||
if is_above_user_threshold?(user_count) or has_opted_in?(domain) do
|
||||
crawl_large_instance(domain, instance)
|
||||
Map.merge(nodeinfo, crawl_large_instance(domain, instance))
|
||||
else
|
||||
Map.merge(
|
||||
Map.take(instance["stats"], ["user_count"])
|
||||
|> convert_keys_to_atoms(),
|
||||
%{
|
||||
instance_type: get_instance_type(instance),
|
||||
peers: [],
|
||||
interactions: %{},
|
||||
statuses_seen: 0,
|
||||
description: nil,
|
||||
version: nil,
|
||||
status_count: nil
|
||||
}
|
||||
)
|
||||
ApiCrawler.get_default()
|
||||
|> Map.merge(nodeinfo)
|
||||
|> Map.merge(%{
|
||||
instance_type: get_instance_type(instance),
|
||||
user_count: get_in(instance, ["stats", "user_count"])
|
||||
})
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -35,22 +35,18 @@ defmodule Backend.Crawler.Crawlers.Misskey do
|
|||
end
|
||||
|
||||
@impl ApiCrawler
|
||||
def crawl(domain, _result) do
|
||||
def crawl(domain, nodeinfo) do
|
||||
with {:ok, %{"originalUsersCount" => user_count, "originalNotesCount" => status_count}} <-
|
||||
post_and_decode("https://#{domain}/api/stats") do
|
||||
if is_above_user_threshold?(user_count) or has_opted_in?(domain) do
|
||||
crawl_large_instance(domain, user_count, status_count)
|
||||
Map.merge(nodeinfo, crawl_large_instance(domain, user_count, status_count))
|
||||
else
|
||||
%{
|
||||
instance_type: :misskey,
|
||||
version: nil,
|
||||
description: nil,
|
||||
ApiCrawler.get_default()
|
||||
|> Map.merge(nodeinfo)
|
||||
|> Map.merge(%{
|
||||
user_count: user_count,
|
||||
status_count: nil,
|
||||
peers: [],
|
||||
interactions: %{},
|
||||
statuses_seen: 0
|
||||
}
|
||||
type: :misskey
|
||||
})
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -1,34 +1,16 @@
|
|||
defmodule Backend.Crawler.Crawlers.Nodeinfo do
|
||||
@moduledoc """
|
||||
This module is slightly different from the other crawlers.
|
||||
It doesn't implement the ApiCrawler spec because it isn't run as a self-contained crawler.
|
||||
Instead, it's run before all the other crawlers.
|
||||
|
||||
This is to get the user count. Some servers don't publish this in other places (e.g. GNU Social, PeerTube) so we need
|
||||
nodeinfo to know whether it's a personal instance or not.
|
||||
This module is slightly different from the other crawlers. It's run before all the others and its
|
||||
result is included in theirs.
|
||||
"""
|
||||
|
||||
alias Backend.Crawler.ApiCrawler
|
||||
require Logger
|
||||
import Backend.Util
|
||||
import Backend.Crawler.Util
|
||||
@behaviour ApiCrawler
|
||||
|
||||
defstruct [
|
||||
:description,
|
||||
:user_count,
|
||||
:status_count,
|
||||
:instance_type,
|
||||
:version
|
||||
]
|
||||
|
||||
@type t() :: %__MODULE__{
|
||||
description: String.t(),
|
||||
user_count: integer,
|
||||
status_count: integer,
|
||||
instance_type: ApiCrawler.instance_type(),
|
||||
version: String.t()
|
||||
}
|
||||
|
||||
@spec allows_crawling?(String.t()) :: boolean()
|
||||
@impl ApiCrawler
|
||||
def allows_crawling?(domain) do
|
||||
[
|
||||
".well-known/nodeinfo"
|
||||
|
@ -37,13 +19,19 @@ defmodule Backend.Crawler.Crawlers.Nodeinfo do
|
|||
|> urls_are_crawlable?()
|
||||
end
|
||||
|
||||
@spec crawl(String.t()) :: {:ok, t()} | {:error, nil}
|
||||
def crawl(domain) do
|
||||
@impl ApiCrawler
|
||||
def is_instance_type?(_domain, _nodeinfo) do
|
||||
# This crawler is used slightly differently from the others -- we always check for nodeinfo.
|
||||
true
|
||||
end
|
||||
|
||||
@impl ApiCrawler
|
||||
def crawl(domain, _curr_result) do
|
||||
with {:ok, nodeinfo_url} <- get_nodeinfo_url(domain),
|
||||
{:ok, nodeinfo} <- get_nodeinfo(nodeinfo_url) do
|
||||
{:ok, nodeinfo}
|
||||
nodeinfo
|
||||
else
|
||||
_other -> {:error, nil}
|
||||
_other -> ApiCrawler.get_default()
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -65,8 +53,7 @@ defmodule Backend.Crawler.Crawlers.Nodeinfo do
|
|||
|> Map.get("href")
|
||||
end
|
||||
|
||||
@spec get_nodeinfo(String.t()) ::
|
||||
{:ok, t()} | {:error, Jason.DecodeError.t() | HTTPoison.Error.t()}
|
||||
@spec get_nodeinfo(String.t()) :: ApiCrawler.t()
|
||||
defp get_nodeinfo(nodeinfo_url) do
|
||||
case get_and_decode(nodeinfo_url) do
|
||||
{:ok, nodeinfo} -> {:ok, process_nodeinfo(nodeinfo)}
|
||||
|
@ -74,7 +61,7 @@ defmodule Backend.Crawler.Crawlers.Nodeinfo do
|
|||
end
|
||||
end
|
||||
|
||||
@spec process_nodeinfo(any()) :: t()
|
||||
@spec process_nodeinfo(any()) :: ApiCrawler.t()
|
||||
defp process_nodeinfo(nodeinfo) do
|
||||
user_count = get_in(nodeinfo, ["usage", "users", "total"])
|
||||
|
||||
|
@ -90,21 +77,33 @@ defmodule Backend.Crawler.Crawlers.Nodeinfo do
|
|||
|
||||
type = nodeinfo |> get_in(["software", "name"]) |> String.downcase() |> String.to_atom()
|
||||
|
||||
%__MODULE__{
|
||||
description: description,
|
||||
user_count: user_count,
|
||||
status_count: get_in(nodeinfo, ["usage", "localPosts"]),
|
||||
instance_type: type,
|
||||
version: get_in(nodeinfo, ["software", "version"])
|
||||
}
|
||||
Map.merge(
|
||||
ApiCrawler.get_default(),
|
||||
%{
|
||||
description: description,
|
||||
user_count: user_count,
|
||||
status_count: get_in(nodeinfo, ["usage", "localPosts"]),
|
||||
instance_type: type,
|
||||
version: get_in(nodeinfo, ["software", "version"]),
|
||||
blocked_domains:
|
||||
get_in(nodeinfo, ["metadata", "federation", "mrf_simple", "reject"])
|
||||
|> (fn b ->
|
||||
if b == nil do
|
||||
[]
|
||||
else
|
||||
b
|
||||
end
|
||||
end).()
|
||||
|> Enum.map(&clean_domain(&1))
|
||||
}
|
||||
)
|
||||
else
|
||||
%{
|
||||
description: nil,
|
||||
user_count: user_count,
|
||||
status_count: nil,
|
||||
instance_type: nil,
|
||||
version: nil
|
||||
}
|
||||
Map.merge(
|
||||
ApiCrawler.get_default(),
|
||||
%{
|
||||
user_count: user_count
|
||||
}
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -0,0 +1,27 @@
|
|||
defmodule Backend.FederationRestriction do
|
||||
use Ecto.Schema
|
||||
import Ecto.Changeset
|
||||
|
||||
schema "federation_restrictions" do
|
||||
belongs_to :source, Backend.Instance,
|
||||
references: :domain,
|
||||
type: :string,
|
||||
foreign_key: :source_domain
|
||||
|
||||
belongs_to :target, Backend.Instance,
|
||||
references: :domain,
|
||||
type: :string,
|
||||
foreign_key: :target_domain
|
||||
|
||||
field :type, :string
|
||||
|
||||
timestamps()
|
||||
end
|
||||
|
||||
@doc false
|
||||
def changeset(federation_restriction, attrs) do
|
||||
federation_restriction
|
||||
|> cast(attrs, [:source, :target, :type])
|
||||
|> validate_required([:source, :target, :type])
|
||||
end
|
||||
end
|
|
@ -3,10 +3,9 @@ defmodule Backend.Scheduler do
|
|||
This module runs recurring tasks.
|
||||
"""
|
||||
|
||||
use Appsignal.Instrumentation.Decorators
|
||||
use Quantum.Scheduler, otp_app: :backend
|
||||
|
||||
alias Backend.{Crawl, CrawlInteraction, Edge, Instance, Repo}
|
||||
alias Backend.{Crawl, CrawlInteraction, Edge, FederationRestriction, Instance, Repo}
|
||||
alias Backend.Mailer.AdminEmail
|
||||
|
||||
import Backend.Util
|
||||
|
@ -21,7 +20,6 @@ defmodule Backend.Scheduler do
|
|||
`unit` must singular, e.g. "second", "minute", "hour", "month", "year", etc...
|
||||
"""
|
||||
@spec prune_crawls(integer, String.t()) :: any
|
||||
@decorate transaction()
|
||||
def prune_crawls(amount, unit) do
|
||||
{deleted_num, _} =
|
||||
Crawl
|
||||
|
@ -39,7 +37,6 @@ defmodule Backend.Scheduler do
|
|||
Calculates every instance's "insularity score" -- that is, the percentage of mentions that are among users on the
|
||||
instance, rather than at other instances.
|
||||
"""
|
||||
@decorate transaction()
|
||||
def generate_insularity_scores do
|
||||
now = get_now()
|
||||
|
||||
|
@ -85,7 +82,6 @@ defmodule Backend.Scheduler do
|
|||
@doc """
|
||||
This function calculates the average number of statuses per hour over the last month.
|
||||
"""
|
||||
@decorate transaction()
|
||||
def generate_status_rate do
|
||||
now = get_now()
|
||||
# We want the earliest sucessful crawl so that we can exclude it from the statistics.
|
||||
|
@ -143,9 +139,10 @@ defmodule Backend.Scheduler do
|
|||
@doc """
|
||||
This function aggregates statistics from the interactions in the database.
|
||||
It calculates the strength of edges between nodes. Self-edges are not generated.
|
||||
Edges are only generated if both instances have been succesfully crawled.
|
||||
Edges are only generated if
|
||||
* both instances have been succesfully crawled
|
||||
* neither of the instances have blocked each other
|
||||
"""
|
||||
@decorate transaction()
|
||||
def generate_edges do
|
||||
now = get_now()
|
||||
|
||||
|
@ -177,6 +174,13 @@ defmodule Backend.Scheduler do
|
|||
})
|
||||
|> Repo.all(timeout: :infinity)
|
||||
|
||||
federation_blocks =
|
||||
FederationRestriction
|
||||
|> select([fr], {fr.source_domain, fr.target_domain})
|
||||
|> where([fr], fr.type == "reject")
|
||||
|> Repo.all()
|
||||
|> MapSet.new()
|
||||
|
||||
# Get edges and their weights
|
||||
Repo.transaction(
|
||||
fn ->
|
||||
|
@ -185,7 +189,7 @@ defmodule Backend.Scheduler do
|
|||
|
||||
edges =
|
||||
interactions
|
||||
|> reduce_mention_count()
|
||||
|> reduce_mention_count(federation_blocks)
|
||||
|> Enum.map(fn {{source_domain, target_domain}, {mention_count, statuses_seen}} ->
|
||||
%{
|
||||
source_domain: source_domain,
|
||||
|
@ -207,7 +211,6 @@ defmodule Backend.Scheduler do
|
|||
This function checks to see if a lot of instances on the same base domain have been created recently. If so,
|
||||
notifies the server admin over SMS.
|
||||
"""
|
||||
@decorate transaction()
|
||||
def check_for_spam_instances do
|
||||
hour_range = 3
|
||||
|
||||
|
@ -254,10 +257,9 @@ defmodule Backend.Scheduler do
|
|||
end
|
||||
end
|
||||
|
||||
# Takes a list of Interactions
|
||||
# Takes a list of Interactions and a MapSet of blocks in the form {source_domain, target_domain}
|
||||
# Returns a map of %{{source, target} => {total_mention_count, total_statuses_seen}}
|
||||
@decorate transaction_event()
|
||||
defp reduce_mention_count(interactions) do
|
||||
defp reduce_mention_count(interactions, federation_blocks) do
|
||||
Enum.reduce(interactions, %{}, fn
|
||||
%{
|
||||
source_domain: source_domain,
|
||||
|
@ -278,9 +280,12 @@ defmodule Backend.Scheduler do
|
|||
|
||||
statuses_seen = source_statuses_seen + target_statuses_seen
|
||||
|
||||
Map.update(acc, key, {mentions, statuses_seen}, fn {curr_mentions, curr_statuses_seen} ->
|
||||
{curr_mentions + mentions, curr_statuses_seen}
|
||||
end)
|
||||
if not MapSet.member?(federation_blocks, {source_domain, target_domain}) and
|
||||
not MapSet.member?(federation_blocks, {target_domain, source_domain}) do
|
||||
Map.update(acc, key, {mentions, statuses_seen}, fn {curr_mentions, curr_statuses_seen} ->
|
||||
{curr_mentions + mentions, curr_statuses_seen}
|
||||
end)
|
||||
end
|
||||
end)
|
||||
end
|
||||
end
|
||||
|
|
|
@ -128,6 +128,7 @@ defmodule Backend.Util do
|
|||
end
|
||||
end
|
||||
|
||||
@spec clean_domain(String.t()) :: String.t()
|
||||
def clean_domain(domain) do
|
||||
cleaned =
|
||||
domain
|
||||
|
|
|
@ -0,0 +1,22 @@
|
|||
defmodule Backend.Repo.Migrations.CreateFederationRestrictions do
|
||||
use Ecto.Migration
|
||||
|
||||
def change do
|
||||
create table(:federation_restrictions) do
|
||||
add :source_domain,
|
||||
references(:instances, column: :domain, type: :string, on_delete: :delete_all),
|
||||
null: false
|
||||
|
||||
add :target_domain,
|
||||
references(:instances, column: :domain, type: :string, on_delete: :delete_all),
|
||||
null: false
|
||||
|
||||
add :type, :string, null: false
|
||||
|
||||
timestamps()
|
||||
end
|
||||
|
||||
create index(:federation_restrictions, [:source_domain])
|
||||
create index(:federation_restrictions, [:target_domain])
|
||||
end
|
||||
end
|
Loading…
Reference in New Issue