From 693cf2b2d98f32eae3b0793f84d2dae6036ff993 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tao=20Bojl=C3=A9n?= <2803708-taobojlen@users.noreply.gitlab.com> Date: Tue, 27 Aug 2019 13:50:16 +0000 Subject: [PATCH] improved edges --- backend/config/config.exs | 4 +- backend/config/dev.exs | 4 - backend/lib/backend/api.ex | 7 + backend/lib/backend/crawler/api_crawler.ex | 31 ++++- backend/lib/backend/crawler/crawler.ex | 104 +++++++++++--- .../lib/backend/crawler/crawlers/friendica.ex | 9 +- .../backend/crawler/crawlers/gnu_social.ex | 31 ++--- .../lib/backend/crawler/crawlers/mastodon.ex | 23 ++-- .../lib/backend/crawler/crawlers/misskey.ex | 18 +-- .../lib/backend/crawler/crawlers/nodeinfo.ex | 87 ++++++------ backend/lib/backend/federation_restriction.ex | 28 ++++ backend/lib/backend/repo.ex | 2 +- backend/lib/backend/scheduler.ex | 80 ++++++++--- backend/lib/backend/util.ex | 5 +- .../controllers/instance_controller.ex | 21 +++ backend/lib/backend_web/router.ex | 2 +- .../lib/backend_web/views/instance_view.ex | 57 ++++++-- backend/mix.exs | 4 +- backend/mix.lock | 2 + ...7092226_create_federation_restrictions.exs | 22 +++ frontend/src/AppRouter.tsx | 10 +- frontend/src/components/organisms/Graph.tsx | 2 +- .../components/organisms/InstanceTable.tsx | 127 ++++++++++++++++++ frontend/src/components/organisms/Nav.tsx | 11 +- frontend/src/components/organisms/index.ts | 1 + .../src/components/screens/GraphScreen.tsx | 2 +- .../src/components/screens/TableScreen.tsx | 17 +++ frontend/src/components/screens/index.ts | 1 + frontend/src/redux/actions.ts | 26 ++++ frontend/src/redux/reducers.ts | 29 +++- frontend/src/redux/types.ts | 21 ++- 31 files changed, 622 insertions(+), 166 deletions(-) create mode 100644 backend/lib/backend/federation_restriction.ex create mode 100644 backend/priv/repo/migrations/20190827092226_create_federation_restrictions.exs create mode 100644 frontend/src/components/organisms/InstanceTable.tsx create mode 100644 frontend/src/components/screens/TableScreen.tsx diff --git a/backend/config/config.exs b/backend/config/config.exs index b43791d..09836b8 100644 --- a/backend/config/config.exs +++ b/backend/config/config.exs @@ -63,7 +63,9 @@ config :backend, :crawler, crawl_workers: 20, blacklist: [ "gab.best", - "4chan.icu" + "4chan.icu", + "pleroma.site", + "pleroma.online" ], user_agent: "fediverse.space crawler", admin_phone: System.get_env("ADMIN_PHONE"), diff --git a/backend/config/dev.exs b/backend/config/dev.exs index 0471a82..5bcfe6c 100644 --- a/backend/config/dev.exs +++ b/backend/config/dev.exs @@ -62,8 +62,4 @@ config :backend, :crawler, personal_instance_threshold: 5, crawl_interval_mins: 60, crawl_workers: 10, - blacklist: [ - "gab.best", - "4chan.icu" - ], frontend_domain: "localhost:3000" diff --git a/backend/lib/backend/api.ex b/backend/lib/backend/api.ex index 8ed3db0..b0ffe21 100644 --- a/backend/lib/backend/api.ex +++ b/backend/lib/backend/api.ex @@ -6,6 +6,13 @@ defmodule Backend.Api do import Backend.Util import Ecto.Query + @spec get_instances(Integer.t() | nil) :: Scrivener.Page.t() + def get_instances(page \\ nil) do + Instance + |> where([i], not is_nil(i.type)) + |> Repo.paginate(page: page) + end + @spec get_instance(String.t()) :: Instance.t() | nil def get_instance(domain) do Instance diff --git a/backend/lib/backend/crawler/api_crawler.ex b/backend/lib/backend/crawler/api_crawler.ex index 8b4d265..c6d0164 100644 --- a/backend/lib/backend/crawler/api_crawler.ex +++ b/backend/lib/backend/crawler/api_crawler.ex @@ -26,25 +26,39 @@ defmodule Backend.Crawler.ApiCrawler do :peers, :interactions, :statuses_seen, - :instance_type + :instance_type, + :blocked_domains ] @type t() :: %__MODULE__{ - version: String.t(), - description: String.t(), + version: String.t() | nil, + description: String.t() | nil, user_count: integer | nil, status_count: integer | nil, peers: [String.t()], interactions: instance_interactions, statuses_seen: integer, - instance_type: instance_type + instance_type: instance_type | nil, + blocked_domains: [String.t()] } + @empty_result %{ + version: nil, + description: nil, + user_count: nil, + status_count: nil, + peers: [], + interactions: %{}, + statuses_seen: 0, + instance_type: nil, + blocked_domains: [] + } + @doc """ Check whether the instance at the given domain is of the type that this ApiCrawler implements. Arguments are the instance domain and the nodeinfo results. """ - @callback is_instance_type?(String.t(), Nodeinfo.t()) :: boolean() + @callback is_instance_type?(String.t(), ApiCrawler.t()) :: boolean() @doc """ Check whether the instance allows crawling according to its robots.txt or otherwise. @@ -56,4 +70,11 @@ defmodule Backend.Crawler.ApiCrawler do Takes two arguments: the domain to crawl and the existing results (from nodeinfo). """ @callback crawl(String.t(), Nodeinfo.t()) :: t() + + @doc """ + Returns the default, empty state + """ + def get_default do + @empty_result + end end diff --git a/backend/lib/backend/crawler/crawler.ex b/backend/lib/backend/crawler/crawler.ex index d5f661a..4b8b1b0 100644 --- a/backend/lib/backend/crawler/crawler.ex +++ b/backend/lib/backend/crawler/crawler.ex @@ -4,7 +4,17 @@ defmodule Backend.Crawler do """ alias __MODULE__ - alias Backend.{Crawl, CrawlInteraction, Instance, InstancePeer, MostRecentCrawl, Repo} + + alias Backend.{ + Crawl, + CrawlInteraction, + FederationRestriction, + Instance, + InstancePeer, + MostRecentCrawl, + Repo + } + alias Backend.Crawler.ApiCrawler alias Backend.Crawler.Crawlers.{Friendica, GnuSocial, Mastodon, Misskey, Nodeinfo} @@ -75,14 +85,24 @@ defmodule Backend.Crawler do # a) it should always be run first # b) it passes the results on to the next crawlers (e.g. user_count) defp crawl(%Crawler{api_crawlers: [Nodeinfo | remaining_crawlers], domain: domain} = state) do - with true <- Nodeinfo.allows_crawling?(domain), {:ok, nodeinfo} <- Nodeinfo.crawl(domain) do - Logger.debug("Found nodeinfo for #{domain}.") - result = Map.merge(nodeinfo, %{peers: [], interactions: %{}, statuses_seen: 0}) - crawl(%Crawler{state | result: result, found_api?: true, api_crawlers: remaining_crawlers}) - else - _ -> + if Nodeinfo.allows_crawling?(domain) do + nodeinfo = Nodeinfo.crawl(domain, nil) + + if nodeinfo != nil do + Logger.debug("Found nodeinfo for #{domain}.") + + crawl(%Crawler{ + state + | result: nodeinfo, + found_api?: true, + api_crawlers: remaining_crawlers + }) + else Logger.debug("Did not find nodeinfo for #{domain}.") crawl(%Crawler{state | api_crawlers: remaining_crawlers}) + end + else + crawl(%Crawler{state | api_crawlers: remaining_crawlers, allows_crawling?: false}) end end @@ -165,7 +185,7 @@ defmodule Backend.Crawler do Elasticsearch.put_document!(Backend.Elasticsearch.Cluster, instance, "instances/_doc") - # Save details of a new crawl + ## Save details of a new crawl ## curr_crawl = Repo.insert!(%Crawl{ instance_domain: domain, @@ -196,18 +216,22 @@ defmodule Backend.Crawler do |> list_union(result.peers) |> Enum.filter(fn domain -> domain != nil and not is_blacklisted?(domain) end) |> Enum.map(&clean_domain(&1)) + |> Enum.filter(fn peer_domain -> + if is_valid_domain?(peer_domain) do + true + else + Logger.info("Found invalid peer domain from #{domain}: #{peer_domain}") + false + end + end) - if not Enum.all?(peers_domains, &is_valid_domain?(&1)) do - invalid_peers = Enum.filter(peers_domains, fn d -> not is_valid_domain?(d) end) - raise "#{domain} has invalid peers: #{Enum.join(invalid_peers, ", ")}" - end - - peers = + new_instances = peers_domains + |> list_union(result.blocked_domains) |> Enum.map(&%{domain: &1, inserted_at: now, updated_at: now, next_crawl: now}) Instance - |> Repo.insert_all(peers, on_conflict: :nothing, conflict_target: :domain) + |> Repo.insert_all(new_instances, on_conflict: :nothing, conflict_target: :domain) Repo.transaction(fn -> ## Save peer relationships ## @@ -249,6 +273,56 @@ defmodule Backend.Crawler do |> Repo.insert_all(new_instance_peers) end) + ## Save federation restrictions ## + Repo.transaction(fn -> + current_restrictions = + FederationRestriction + |> select([fr], {fr.target_domain, fr.type}) + |> where(source_domain: ^domain) + |> Repo.all() + + wanted_restrictions_set = + result.blocked_domains + |> Enum.map(&{&1, "reject"}) + |> MapSet.new() + + current_restrictions_set = MapSet.new(current_restrictions) + + # Delete the ones we don't want + restrictions_to_delete = + current_restrictions_set + |> MapSet.difference(wanted_restrictions_set) + |> MapSet.to_list() + |> Enum.map(fn {target_domain, _type} -> target_domain end) + + if length(restrictions_to_delete) > 0 do + FederationRestriction + |> where( + [fr], + fr.source_domain == ^domain and fr.target_domain in ^restrictions_to_delete + ) + |> Repo.delete_all() + end + + # Save the new ones + new_restrictions = + wanted_restrictions_set + |> MapSet.difference(current_restrictions_set) + |> MapSet.to_list() + |> Enum.map(fn {target_domain, type} -> + %{ + source_domain: domain, + target_domain: target_domain, + type: type, + inserted_at: now, + updated_at: now + } + end) + + FederationRestriction + |> Repo.insert_all(new_restrictions) + end) + ## Save interactions ## interactions = result.interactions diff --git a/backend/lib/backend/crawler/crawlers/friendica.ex b/backend/lib/backend/crawler/crawlers/friendica.ex index 4a624c9..27c748d 100644 --- a/backend/lib/backend/crawler/crawlers/friendica.ex +++ b/backend/lib/backend/crawler/crawlers/friendica.ex @@ -62,12 +62,11 @@ defmodule Backend.Crawler.Crawlers.Friendica do end) if details |> Map.get(:user_count, 0) |> is_above_user_threshold?() do - Map.merge( - %{peers: peers, interactions: %{}, statuses_seen: 0, instance_type: :friendica}, - Map.take(details, [:description, :version, :user_count, :status_count]) - ) + ApiCrawler.get_default() + |> Map.merge(%{peers: peers, instance_type: :friendica}) + |> Map.merge(Map.take(details, [:description, :version, :user_count, :status_count])) else - nodeinfo_result + Map.merge(ApiCrawler.get_default(), nodeinfo_result) end end diff --git a/backend/lib/backend/crawler/crawlers/gnu_social.ex b/backend/lib/backend/crawler/crawlers/gnu_social.ex index 1deb5c2..0f71238 100644 --- a/backend/lib/backend/crawler/crawlers/gnu_social.ex +++ b/backend/lib/backend/crawler/crawlers/gnu_social.ex @@ -3,7 +3,6 @@ defmodule Backend.Crawler.Crawlers.GnuSocial do Crawler for GNU Social servers. """ alias Backend.Crawler.ApiCrawler - alias Backend.Crawler.Crawlers.Nodeinfo import Backend.Crawler.Util import Backend.Util require Logger @@ -32,17 +31,17 @@ defmodule Backend.Crawler.Crawlers.GnuSocial do end @impl ApiCrawler - def crawl(domain, nodeinfo_result) do - if nodeinfo_result == nil or - nodeinfo_result |> Map.get(:user_count) |> is_above_user_threshold?() do - crawl_large_instance(domain, nodeinfo_result) + def crawl(domain, nodeinfo) do + if nodeinfo == nil or + nodeinfo |> Map.get(:user_count) |> is_above_user_threshold?() do + Map.merge(crawl_large_instance(domain), nodeinfo) else - nodeinfo_result + Map.merge(ApiCrawler.get_default(), nodeinfo) end end - @spec crawl_large_instance(String.t(), Nodeinfo.t()) :: ApiCrawler.t() - defp crawl_large_instance(domain, nodeinfo_result) do + @spec crawl_large_instance(String.t()) :: ApiCrawler.t() + defp crawl_large_instance(domain) do status_datetime_threshold = NaiveDateTime.utc_now() |> NaiveDateTime.add(get_config(:status_age_limit_days) * 24 * 3600 * -1, :second) @@ -52,24 +51,14 @@ defmodule Backend.Crawler.Crawlers.GnuSocial do {interactions, statuses_seen} = get_interactions(domain, min_timestamp) - if nodeinfo_result != nil do - Map.merge(nodeinfo_result, %{ - interactions: interactions, - statuses_seen: statuses_seen, - peers: [] - }) - else + Map.merge( + ApiCrawler.get_default(), %{ - version: nil, - description: nil, - user_count: nil, - status_count: nil, - peers: [], interactions: interactions, statuses_seen: statuses_seen, instance_type: :gnusocial } - end + ) end @spec get_interactions( diff --git a/backend/lib/backend/crawler/crawlers/mastodon.ex b/backend/lib/backend/crawler/crawlers/mastodon.ex index 74b6b03..68442b0 100644 --- a/backend/lib/backend/crawler/crawlers/mastodon.ex +++ b/backend/lib/backend/crawler/crawlers/mastodon.ex @@ -34,26 +34,19 @@ defmodule Backend.Crawler.Crawlers.Mastodon do end @impl ApiCrawler - def crawl(domain, _current_result) do + def crawl(domain, nodeinfo) do instance = get_and_decode!("https://#{domain}/api/v1/instance") user_count = get_in(instance, ["stats", "user_count"]) if is_above_user_threshold?(user_count) or has_opted_in?(domain) do - crawl_large_instance(domain, instance) + Map.merge(nodeinfo, crawl_large_instance(domain, instance)) else - Map.merge( - Map.take(instance["stats"], ["user_count"]) - |> convert_keys_to_atoms(), - %{ - instance_type: get_instance_type(instance), - peers: [], - interactions: %{}, - statuses_seen: 0, - description: nil, - version: nil, - status_count: nil - } - ) + ApiCrawler.get_default() + |> Map.merge(nodeinfo) + |> Map.merge(%{ + instance_type: get_instance_type(instance), + user_count: get_in(instance, ["stats", "user_count"]) + }) end end diff --git a/backend/lib/backend/crawler/crawlers/misskey.ex b/backend/lib/backend/crawler/crawlers/misskey.ex index 0a7c6c3..fe8f116 100644 --- a/backend/lib/backend/crawler/crawlers/misskey.ex +++ b/backend/lib/backend/crawler/crawlers/misskey.ex @@ -35,22 +35,18 @@ defmodule Backend.Crawler.Crawlers.Misskey do end @impl ApiCrawler - def crawl(domain, _result) do + def crawl(domain, nodeinfo) do with {:ok, %{"originalUsersCount" => user_count, "originalNotesCount" => status_count}} <- post_and_decode("https://#{domain}/api/stats") do if is_above_user_threshold?(user_count) or has_opted_in?(domain) do - crawl_large_instance(domain, user_count, status_count) + Map.merge(nodeinfo, crawl_large_instance(domain, user_count, status_count)) else - %{ - instance_type: :misskey, - version: nil, - description: nil, + ApiCrawler.get_default() + |> Map.merge(nodeinfo) + |> Map.merge(%{ user_count: user_count, - status_count: nil, - peers: [], - interactions: %{}, - statuses_seen: 0 - } + type: :misskey + }) end end end diff --git a/backend/lib/backend/crawler/crawlers/nodeinfo.ex b/backend/lib/backend/crawler/crawlers/nodeinfo.ex index 1f58f4f..413d8d2 100644 --- a/backend/lib/backend/crawler/crawlers/nodeinfo.ex +++ b/backend/lib/backend/crawler/crawlers/nodeinfo.ex @@ -1,34 +1,16 @@ defmodule Backend.Crawler.Crawlers.Nodeinfo do @moduledoc """ - This module is slightly different from the other crawlers. - It doesn't implement the ApiCrawler spec because it isn't run as a self-contained crawler. - Instead, it's run before all the other crawlers. - - This is to get the user count. Some servers don't publish this in other places (e.g. GNU Social, PeerTube) so we need - nodeinfo to know whether it's a personal instance or not. + This module is slightly different from the other crawlers. It's run before all the others and its + result is included in theirs. """ + alias Backend.Crawler.ApiCrawler require Logger import Backend.Util import Backend.Crawler.Util + @behaviour ApiCrawler - defstruct [ - :description, - :user_count, - :status_count, - :instance_type, - :version - ] - - @type t() :: %__MODULE__{ - description: String.t(), - user_count: integer, - status_count: integer, - instance_type: ApiCrawler.instance_type(), - version: String.t() - } - - @spec allows_crawling?(String.t()) :: boolean() + @impl ApiCrawler def allows_crawling?(domain) do [ ".well-known/nodeinfo" @@ -37,13 +19,19 @@ defmodule Backend.Crawler.Crawlers.Nodeinfo do |> urls_are_crawlable?() end - @spec crawl(String.t()) :: {:ok, t()} | {:error, nil} - def crawl(domain) do + @impl ApiCrawler + def is_instance_type?(_domain, _nodeinfo) do + # This crawler is used slightly differently from the others -- we always check for nodeinfo. + true + end + + @impl ApiCrawler + def crawl(domain, _curr_result) do with {:ok, nodeinfo_url} <- get_nodeinfo_url(domain), {:ok, nodeinfo} <- get_nodeinfo(nodeinfo_url) do - {:ok, nodeinfo} + nodeinfo else - _other -> {:error, nil} + _other -> ApiCrawler.get_default() end end @@ -65,8 +53,7 @@ defmodule Backend.Crawler.Crawlers.Nodeinfo do |> Map.get("href") end - @spec get_nodeinfo(String.t()) :: - {:ok, t()} | {:error, Jason.DecodeError.t() | HTTPoison.Error.t()} + @spec get_nodeinfo(String.t()) :: ApiCrawler.t() defp get_nodeinfo(nodeinfo_url) do case get_and_decode(nodeinfo_url) do {:ok, nodeinfo} -> {:ok, process_nodeinfo(nodeinfo)} @@ -74,7 +61,7 @@ defmodule Backend.Crawler.Crawlers.Nodeinfo do end end - @spec process_nodeinfo(any()) :: t() + @spec process_nodeinfo(any()) :: ApiCrawler.t() defp process_nodeinfo(nodeinfo) do user_count = get_in(nodeinfo, ["usage", "users", "total"]) @@ -90,21 +77,33 @@ defmodule Backend.Crawler.Crawlers.Nodeinfo do type = nodeinfo |> get_in(["software", "name"]) |> String.downcase() |> String.to_atom() - %__MODULE__{ - description: description, - user_count: user_count, - status_count: get_in(nodeinfo, ["usage", "localPosts"]), - instance_type: type, - version: get_in(nodeinfo, ["software", "version"]) - } + Map.merge( + ApiCrawler.get_default(), + %{ + description: description, + user_count: user_count, + status_count: get_in(nodeinfo, ["usage", "localPosts"]), + instance_type: type, + version: get_in(nodeinfo, ["software", "version"]), + blocked_domains: + get_in(nodeinfo, ["metadata", "federation", "mrf_simple", "reject"]) + |> (fn b -> + if b == nil do + [] + else + b + end + end).() + |> Enum.map(&clean_domain(&1)) + } + ) else - %{ - description: nil, - user_count: user_count, - status_count: nil, - instance_type: nil, - version: nil - } + Map.merge( + ApiCrawler.get_default(), + %{ + user_count: user_count + } + ) end end diff --git a/backend/lib/backend/federation_restriction.ex b/backend/lib/backend/federation_restriction.ex new file mode 100644 index 0000000..370848c --- /dev/null +++ b/backend/lib/backend/federation_restriction.ex @@ -0,0 +1,28 @@ +defmodule Backend.FederationRestriction do + @moduledoc false + use Ecto.Schema + import Ecto.Changeset + + schema "federation_restrictions" do + belongs_to :source, Backend.Instance, + references: :domain, + type: :string, + foreign_key: :source_domain + + belongs_to :target, Backend.Instance, + references: :domain, + type: :string, + foreign_key: :target_domain + + field :type, :string + + timestamps() + end + + @doc false + def changeset(federation_restriction, attrs) do + federation_restriction + |> cast(attrs, [:source, :target, :type]) + |> validate_required([:source, :target, :type]) + end +end diff --git a/backend/lib/backend/repo.ex b/backend/lib/backend/repo.ex index 071627a..02a04a1 100644 --- a/backend/lib/backend/repo.ex +++ b/backend/lib/backend/repo.ex @@ -4,7 +4,7 @@ defmodule Backend.Repo do adapter: Ecto.Adapters.Postgres, timeout: 25_000 - use Paginator + use Scrivener, page_size: 20 def init(_type, config) do {:ok, Keyword.put(config, :url, System.get_env("DATABASE_URL"))} diff --git a/backend/lib/backend/scheduler.ex b/backend/lib/backend/scheduler.ex index 6ad86a1..56ffcb9 100644 --- a/backend/lib/backend/scheduler.ex +++ b/backend/lib/backend/scheduler.ex @@ -3,10 +3,9 @@ defmodule Backend.Scheduler do This module runs recurring tasks. """ - use Appsignal.Instrumentation.Decorators use Quantum.Scheduler, otp_app: :backend - alias Backend.{Crawl, CrawlInteraction, Edge, Instance, Repo} + alias Backend.{Crawl, CrawlInteraction, Edge, FederationRestriction, Instance, Repo} alias Backend.Mailer.AdminEmail import Backend.Util @@ -21,7 +20,6 @@ defmodule Backend.Scheduler do `unit` must singular, e.g. "second", "minute", "hour", "month", "year", etc... """ @spec prune_crawls(integer, String.t()) :: any - @decorate transaction() def prune_crawls(amount, unit) do {deleted_num, _} = Crawl @@ -39,7 +37,6 @@ defmodule Backend.Scheduler do Calculates every instance's "insularity score" -- that is, the percentage of mentions that are among users on the instance, rather than at other instances. """ - @decorate transaction() def generate_insularity_scores do now = get_now() @@ -85,7 +82,6 @@ defmodule Backend.Scheduler do @doc """ This function calculates the average number of statuses per hour over the last month. """ - @decorate transaction() def generate_status_rate do now = get_now() # We want the earliest sucessful crawl so that we can exclude it from the statistics. @@ -143,9 +139,11 @@ defmodule Backend.Scheduler do @doc """ This function aggregates statistics from the interactions in the database. It calculates the strength of edges between nodes. Self-edges are not generated. - Edges are only generated if both instances have been succesfully crawled. + Edges are only generated if + * both instances have been succesfully crawled + * neither of the instances have blocked each other + * there are interactions in each direction """ - @decorate transaction() def generate_edges do now = get_now() @@ -177,15 +175,30 @@ defmodule Backend.Scheduler do }) |> Repo.all(timeout: :infinity) + federation_blocks = + FederationRestriction + |> select([fr], {fr.source_domain, fr.target_domain}) + |> where([fr], fr.type == "reject") + |> Repo.all() + |> MapSet.new() + # Get edges and their weights Repo.transaction( fn -> Edge |> Repo.delete_all(timeout: :infinity) - edges = + mentions = interactions - |> reduce_mention_count() + |> reduce_mention_count(federation_blocks) + + # Filter down to mentions where there are interactions in both directions + filtered_mentions = + mentions + |> Enum.filter(&has_opposite_mention?(&1, mentions)) + + edges = + filtered_mentions |> Enum.map(fn {{source_domain, target_domain}, {mention_count, statuses_seen}} -> %{ source_domain: source_domain, @@ -207,7 +220,6 @@ defmodule Backend.Scheduler do This function checks to see if a lot of instances on the same base domain have been created recently. If so, notifies the server admin over SMS. """ - @decorate transaction() def check_for_spam_instances do hour_range = 3 @@ -254,10 +266,9 @@ defmodule Backend.Scheduler do end end - # Takes a list of Interactions + # Takes a list of Interactions and a MapSet of blocks in the form {source_domain, target_domain} # Returns a map of %{{source, target} => {total_mention_count, total_statuses_seen}} - @decorate transaction_event() - defp reduce_mention_count(interactions) do + defp reduce_mention_count(interactions, federation_blocks) do Enum.reduce(interactions, %{}, fn %{ source_domain: source_domain, @@ -278,9 +289,46 @@ defmodule Backend.Scheduler do statuses_seen = source_statuses_seen + target_statuses_seen - Map.update(acc, key, {mentions, statuses_seen}, fn {curr_mentions, curr_statuses_seen} -> - {curr_mentions + mentions, curr_statuses_seen} - end) + maybe_update_map( + acc, + key, + source_domain, + target_domain, + mentions, + statuses_seen, + federation_blocks + ) end) end + + defp maybe_update_map( + acc, + key, + source_domain, + target_domain, + mentions, + statuses_seen, + federation_blocks + ) do + if not MapSet.member?(federation_blocks, {source_domain, target_domain}) and + not MapSet.member?(federation_blocks, {target_domain, source_domain}) do + Map.update(acc, key, {mentions, statuses_seen}, fn {curr_mentions, curr_statuses_seen} -> + {curr_mentions + mentions, curr_statuses_seen} + end) + end + end + + defp has_opposite_mention?(mention, all_mentions) do + {{source_domain, target_domain}, {mention_count, _statuses_seen}} = mention + other_direction_key = {target_domain, source_domain} + + if mention_count > 0 and Map.has_key?(all_mentions, other_direction_key) do + {other_direction_mentions, _other_statuses_seen} = + Map.get(all_mentions, other_direction_key) + + other_direction_mentions > 0 + else + false + end + end end diff --git a/backend/lib/backend/util.ex b/backend/lib/backend/util.ex index 225dbbd..8102313 100644 --- a/backend/lib/backend/util.ex +++ b/backend/lib/backend/util.ex @@ -128,6 +128,7 @@ defmodule Backend.Util do end end + @spec clean_domain(String.t()) :: String.t() def clean_domain(domain) do cleaned = domain @@ -136,7 +137,7 @@ defmodule Backend.Util do |> String.trim() |> String.downcase() - Regex.replace(~r/:\d+/, cleaned, "") + Regex.replace(~r/(:\d+|\.)$/, cleaned, "") end def get_account(username, domain) do @@ -209,6 +210,6 @@ defmodule Backend.Util do @spec is_valid_domain?(String.t()) :: boolean def is_valid_domain?(domain) do - Regex.match?(~r/^[\w\.\-_]+$/, domain) + Regex.match?(~r/^[\pL\d\.\-_]+\.[a-zA-Z]+$/, domain) end end diff --git a/backend/lib/backend_web/controllers/instance_controller.ex b/backend/lib/backend_web/controllers/instance_controller.ex index af72d67..294597d 100644 --- a/backend/lib/backend_web/controllers/instance_controller.ex +++ b/backend/lib/backend_web/controllers/instance_controller.ex @@ -1,9 +1,30 @@ defmodule BackendWeb.InstanceController do use BackendWeb, :controller + alias Backend.Api alias Graph.Cache action_fallback(BackendWeb.FallbackController) + def index(conn, params) do + page = Map.get(params, "page") + + %{ + entries: instances, + total_pages: total_pages, + page_number: page_number, + total_entries: total_entries, + page_size: page_size + } = Api.get_instances(page) + + render(conn, "index.json", + instances: instances, + total_pages: total_pages, + page_number: page_number, + total_entries: total_entries, + page_size: page_size + ) + end + def show(conn, %{"id" => domain}) do instance = Cache.get_instance_with_peers(domain) diff --git a/backend/lib/backend_web/router.ex b/backend/lib/backend_web/router.ex index dfa4f01..c62c51d 100644 --- a/backend/lib/backend_web/router.ex +++ b/backend/lib/backend_web/router.ex @@ -8,7 +8,7 @@ defmodule BackendWeb.Router do scope "/api", BackendWeb do pipe_through(:api) - resources("/instances", InstanceController, only: [:show]) + resources("/instances", InstanceController, only: [:index, :show]) resources("/graph", GraphController, only: [:index, :show]) resources("/search", SearchController, only: [:index]) diff --git a/backend/lib/backend_web/views/instance_view.ex b/backend/lib/backend_web/views/instance_view.ex index c7d0956..c5844a9 100644 --- a/backend/lib/backend_web/views/instance_view.ex +++ b/backend/lib/backend_web/views/instance_view.ex @@ -3,6 +3,40 @@ defmodule BackendWeb.InstanceView do alias BackendWeb.InstanceView import Backend.Util + def render("index.json", %{ + instances: instances, + total_pages: total_pages, + page_number: page_number, + total_entries: total_entries, + page_size: page_size + }) do + %{ + instances: render_many(instances, InstanceView, "index_instance.json"), + pageNumber: page_number, + totalPages: total_pages, + totalEntries: total_entries, + pageSize: page_size + } + end + + @doc """ + Used when rendering the index of all instances (the different from show.json is primarily that it does not + include peers). + """ + def render("index_instance.json", %{instance: instance}) do + %{ + name: instance.domain, + description: instance.description, + version: instance.version, + userCount: instance.user_count, + insularity: instance.insularity, + statusCount: instance.status_count, + type: instance.type, + statusesPerDay: instance.statuses_per_day, + statusesPerUserPerDay: get_statuses_per_user_per_day(instance) + } + end + def render("show.json", %{instance: instance, crawl: crawl}) do user_threshold = get_config(:personal_instance_threshold) @@ -21,7 +55,7 @@ defmodule BackendWeb.InstanceView do end end - def render("instance.json", %{instance: instance}) do + def render("peer.json", %{instance: instance}) do %{name: instance.domain} end @@ -46,14 +80,6 @@ defmodule BackendWeb.InstanceView do instance.peers |> Enum.filter(fn peer -> not peer.opt_out end) - statuses_per_user_per_day = - if instance.statuses_per_day != nil and instance.user_count != nil and - instance.user_count > 0 do - instance.statuses_per_day / instance.user_count - else - nil - end - %{ name: instance.domain, description: instance.description, @@ -62,12 +88,21 @@ defmodule BackendWeb.InstanceView do insularity: instance.insularity, statusCount: instance.status_count, domainCount: length(instance.peers), - peers: render_many(filtered_peers, InstanceView, "instance.json"), + peers: render_many(filtered_peers, InstanceView, "peer.json"), lastUpdated: last_updated, status: "success", type: instance.type, statusesPerDay: instance.statuses_per_day, - statusesPerUserPerDay: statuses_per_user_per_day + statusesPerUserPerDay: get_statuses_per_user_per_day(instance) } end + + defp get_statuses_per_user_per_day(instance) do + if instance.statuses_per_day != nil and instance.user_count != nil and + instance.user_count > 0 do + instance.statuses_per_day / instance.user_count + else + nil + end + end end diff --git a/backend/mix.exs b/backend/mix.exs index e0f9626..c10edb4 100644 --- a/backend/mix.exs +++ b/backend/mix.exs @@ -56,7 +56,6 @@ defmodule Backend.MixProject do {:corsica, "~> 1.1.2"}, {:sobelow, "~> 0.8", only: [:dev, :test]}, {:gollum, "~> 0.3.2"}, - {:paginator, "~> 0.6.0"}, {:public_suffix, "~> 0.6.0"}, {:idna, "~> 5.1.2", override: true}, {:swoosh, "~> 0.23.3"}, @@ -66,7 +65,8 @@ defmodule Backend.MixProject do {:credo, "~> 1.1", only: [:dev, :test], runtime: false}, {:nebulex, "~> 1.1"}, {:hunter, "~> 0.5.1"}, - {:poison, "~> 4.0", override: true} + {:poison, "~> 4.0", override: true}, + {:scrivener_ecto, "~> 2.2"} ] end diff --git a/backend/mix.lock b/backend/mix.lock index 406198d..df47d89 100644 --- a/backend/mix.lock +++ b/backend/mix.lock @@ -50,6 +50,8 @@ "public_suffix": {:hex, :public_suffix, "0.6.0", "100cfe86f13f9f6f0cf67e743b1b83c78dd1223a2c422fa03ebf4adff514cbc3", [:mix], [{:idna, ">= 1.2.0 and < 6.0.0", [hex: :idna, repo: "hexpm", optional: false]}], "hexpm"}, "quantum": {:hex, :quantum, "2.3.4", "72a0e8855e2adc101459eac8454787cb74ab4169de6ca50f670e72142d4960e9", [:mix], [{:calendar, "~> 0.17", [hex: :calendar, repo: "hexpm", optional: true]}, {:crontab, "~> 1.1", [hex: :crontab, repo: "hexpm", optional: false]}, {:gen_stage, "~> 0.12", [hex: :gen_stage, repo: "hexpm", optional: false]}, {:swarm, "~> 3.3", [hex: :swarm, repo: "hexpm", optional: false]}, {:timex, "~> 3.1", [hex: :timex, repo: "hexpm", optional: true]}], "hexpm"}, "ranch": {:hex, :ranch, "1.7.1", "6b1fab51b49196860b733a49c07604465a47bdb78aa10c1c16a3d199f7f8c881", [:rebar3], [], "hexpm"}, + "scrivener": {:hex, :scrivener, "2.7.0", "fa94cdea21fad0649921d8066b1833d18d296217bfdf4a5389a2f45ee857b773", [:mix], [], "hexpm"}, + "scrivener_ecto": {:hex, :scrivener_ecto, "2.2.0", "53d5f1ba28f35f17891cf526ee102f8f225b7024d1cdaf8984875467158c9c5e", [:mix], [{:ecto, "~> 3.0", [hex: :ecto, repo: "hexpm", optional: false]}, {:scrivener, "~> 2.4", [hex: :scrivener, repo: "hexpm", optional: false]}], "hexpm"}, "shards": {:hex, :shards, "0.6.0", "678d292ad74a4598a872930f9b12251f43e97f6050287f1fb712fbfd3d282f75", [:make, :rebar3], [], "hexpm"}, "sobelow": {:hex, :sobelow, "0.8.0", "a3ec73e546dfde19f14818e5000c418e3f305d9edb070e79dd391de0ae1cd1ea", [:mix], [], "hexpm"}, "ssl_verify_fun": {:hex, :ssl_verify_fun, "1.1.4", "f0eafff810d2041e93f915ef59899c923f4568f4585904d010387ed74988e77b", [:make, :mix, :rebar3], [], "hexpm"}, diff --git a/backend/priv/repo/migrations/20190827092226_create_federation_restrictions.exs b/backend/priv/repo/migrations/20190827092226_create_federation_restrictions.exs new file mode 100644 index 0000000..f5084af --- /dev/null +++ b/backend/priv/repo/migrations/20190827092226_create_federation_restrictions.exs @@ -0,0 +1,22 @@ +defmodule Backend.Repo.Migrations.CreateFederationRestrictions do + use Ecto.Migration + + def change do + create table(:federation_restrictions) do + add :source_domain, + references(:instances, column: :domain, type: :string, on_delete: :delete_all), + null: false + + add :target_domain, + references(:instances, column: :domain, type: :string, on_delete: :delete_all), + null: false + + add :type, :string, null: false + + timestamps() + end + + create index(:federation_restrictions, [:source_domain]) + create index(:federation_restrictions, [:target_domain]) + end +end diff --git a/frontend/src/AppRouter.tsx b/frontend/src/AppRouter.tsx index 9bdea62..4ddda2b 100644 --- a/frontend/src/AppRouter.tsx +++ b/frontend/src/AppRouter.tsx @@ -5,13 +5,21 @@ import { Classes } from "@blueprintjs/core"; import { ConnectedRouter } from "connected-react-router"; import { Route } from "react-router-dom"; import { Nav } from "./components/organisms/"; -import { AboutScreen, AdminScreen, GraphScreen, LoginScreen, VerifyLoginScreen } from "./components/screens/"; +import { + AboutScreen, + AdminScreen, + GraphScreen, + LoginScreen, + TableScreen, + VerifyLoginScreen +} from "./components/screens/"; import { history } from "./index"; const AppRouter: React.FC = () => (