Merge branch 'develop' into 'master'

release 2.7.1

See merge request fediverse.space/fediverse.space!90
This commit is contained in:
Tao Bojlén 2019-08-23 11:53:03 +00:00
commit 8b036be23f
47 changed files with 2369 additions and 1584 deletions

View File

@ -22,15 +22,14 @@ test-frontend:
changes:
- frontend/*
backend-sobelow:
test-backend:
stage: test
image: elixir:1.9
variables:
MIX_ENV: test
only:
changes:
- backend/*
except:
- develop
- master
before_script:
- cd backend
script:
@ -38,6 +37,7 @@ backend-sobelow:
- mix local.rebar --force
- mix deps.get
- mix deps.compile
- mix credo --strict
- mix sobelow --config
cache:
paths:

View File

@ -17,6 +17,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Fixed
## [2.7.1 - 2018-08-23]
### Added
- Add caching to graph + instance endpoints to better handle traffic spikes.
### Fixed
- Added ON DELETE to `most_recent_crawl` table, such that it can handle previously-crawled but now-dead instances.
- You can now login to the admin view by clicking, not just by pressing enter.
- Add handling for weirdly-formatted Friendica peers.
- If the details of an instance fail to load, it's now easy to dismiss the error.
## [2.7.0 - 2018-08-18]
### Added

View File

@ -34,10 +34,15 @@ config :logger, :console,
config :phoenix, :json_library, Jason
config :gollum,
# 24 hrs
refresh_secs: 86_400,
lazy_refresh: true,
user_agent: "fediverse.space crawler"
config :backend, Graph.Cache,
# 1 hour
gc_interval: 3600
config :ex_twilio,
account_sid: System.get_env("TWILIO_ACCOUNT_SID"),
auth_token: System.get_env("TWILIO_AUTH_TOKEN")

View File

@ -1,4 +1,7 @@
defmodule Backend.Api do
@moduledoc """
Functions used in the API controllers. Most of these simply return data from the database.
"""
alias Backend.{Edge, Instance, Repo}
import Backend.Util
import Ecto.Query
@ -67,6 +70,7 @@ defmodule Backend.Api do
end
@spec list_edges() :: [Edge.t()]
# credo:disable-for-next-line Credo.Check.Refactor.CyclomaticComplexity
def list_edges(domain \\ nil) do
user_threshold = get_config(:personal_instance_threshold)

View File

@ -22,7 +22,7 @@ defmodule Backend.Application do
# Start the endpoint when the application starts
BackendWeb.Endpoint,
# Crawler children
:hackney_pool.child_spec(:crawler, timeout: 15000, max_connections: crawl_worker_count),
:hackney_pool.child_spec(:crawler, timeout: 15_000, max_connections: crawl_worker_count),
Supervisor.child_spec(
{Task,
fn ->
@ -33,7 +33,8 @@ defmodule Backend.Application do
),
Supervisor.child_spec({Task, fn -> HTTPoison.start() end}, id: :start_httpoison),
Backend.Scheduler,
Backend.Elasticsearch.Cluster
Backend.Elasticsearch.Cluster,
Graph.Cache
]
children =

View File

@ -1,4 +1,7 @@
defmodule Backend.Auth do
@moduledoc """
Functions related to authentication.
"""
alias Phoenix.Token
import Backend.Util
@ -12,6 +15,6 @@ defmodule Backend.Auth do
def verify_token(token) do
# tokens are valid for 12 hours
Token.verify(BackendWeb.Endpoint, @salt, token, max_age: 43200)
Token.verify(BackendWeb.Endpoint, @salt, token, max_age: 43_200)
end
end

View File

@ -1,4 +1,7 @@
defmodule Backend.Crawl do
@moduledoc """
Stores aggregate data about a single crawl (i.e. not individual statuses, but the number of statuses seen etc.)
"""
use Ecto.Schema
import Ecto.Changeset

View File

@ -1,4 +1,8 @@
defmodule Backend.CrawlInteraction do
@moduledoc """
Model for tracking interactions between instances. Stores the source and target instance, as well as the number
of mentions seen in the given crawl.
"""
use Ecto.Schema
import Ecto.Changeset

View File

@ -4,11 +4,13 @@ defmodule Backend.Crawler do
"""
alias __MODULE__
alias Backend.Crawler.Crawlers.{Friendica, GnuSocial, Mastodon, Misskey, Nodeinfo}
alias Backend.{Crawl, CrawlInteraction, Instance, InstancePeer, MostRecentCrawl, Repo}
alias Backend.Crawler.ApiCrawler
alias Backend.{Crawl, CrawlInteraction, MostRecentCrawl, Repo, Instance, InstancePeer}
alias Backend.Crawler.Crawlers.{Friendica, GnuSocial, Mastodon, Misskey, Nodeinfo}
import Ecto.Query
import Backend.Util
require Logger
defstruct [
@ -195,6 +197,11 @@ defmodule Backend.Crawler do
|> Enum.filter(fn domain -> domain != nil and not is_blacklisted?(domain) end)
|> Enum.map(&clean_domain(&1))
if not Enum.all?(peers_domains, &is_valid_domain?(&1)) do
invalid_peers = Enum.filter(peers_domains, fn d -> not is_valid_domain?(d) end)
raise "#{domain} has invalid peers: #{Enum.join(invalid_peers, ", ")}"
end
peers =
peers_domains
|> Enum.map(&%{domain: &1, inserted_at: now, updated_at: now, next_crawl: now})

View File

@ -1,4 +1,9 @@
defmodule Backend.Crawler.Crawlers.Friendica do
@moduledoc """
A crawler for Friendica servers.
These don't expose a public list of statuses. This crawler combines nodeinfo data with the /statistics.json endpoint
in Friendica, and gets a list of peers from /poco/@server.
"""
alias Backend.Crawler.ApiCrawler
import Backend.Crawler.Util
import Backend.Util
@ -21,7 +26,8 @@ defmodule Backend.Crawler.Crawlers.Friendica do
@impl ApiCrawler
def allows_crawling?(domain) do
[
"/statistics.json"
"/statistics.json",
"/poco/@server"
]
|> Enum.map(fn endpoint -> "https://#{domain}#{endpoint}" end)
|> urls_are_crawlable?()
@ -70,13 +76,15 @@ defmodule Backend.Crawler.Crawlers.Friendica do
end
defp to_domain(url) do
url =
cond do
String.starts_with?(url, "https://") -> String.slice(url, 8..-1)
String.starts_with?(url, "http://") -> String.slice(url, 7..-1)
true -> url
end
url
|> String.replace_prefix("http://", "")
|> String.replace_prefix("https://", "")
|> strip_username()
end
# Sometimes a url at the poco/@server endpoint has the form username@domain.tld, in which case we only want domain.tld
defp strip_username(string) do
[_match, _username, domain] = Regex.run(~r/([\w\-_]+@)?([\w\.\-_]+)/, string)
domain
end
end

View File

@ -1,4 +1,7 @@
defmodule Backend.Crawler.Crawlers.GnuSocial do
@moduledoc """
Crawler for GNU Social servers.
"""
alias Backend.Crawler.ApiCrawler
alias Backend.Crawler.Crawlers.Nodeinfo
import Backend.Crawler.Util

View File

@ -1,4 +1,7 @@
defmodule Backend.Crawler.Crawlers.Mastodon do
@moduledoc """
Crawler for the Mastodon API (used by Mastodon, its forks like Gab or Glitch, and Pleroma).
"""
require Logger
import Backend.Crawler.Util
import Backend.Util

View File

@ -1,4 +1,7 @@
defmodule Backend.Crawler.Crawlers.Misskey do
@moduledoc """
Crawler for Misskey servers.
"""
alias Backend.Crawler.ApiCrawler
@behaviour ApiCrawler
@ -202,7 +205,7 @@ defmodule Backend.Crawler.Crawlers.Misskey do
defp extract_mentions_from_status(status) do
status_content = Map.get(status, "text")
Regex.scan(~r/@\w+@([\w.-]+)/, status_content)
Regex.scan(~r/@\w+@([\w\._\-]+)/, status_content)
|> Enum.map(fn [_match, domain] -> domain end)
|> Enum.reduce(%{}, fn domain, acc ->
Map.update(acc, domain, 1, &(&1 + 1))

View File

@ -1,9 +1,4 @@
defmodule Backend.Crawler.Crawlers.Nodeinfo do
alias Backend.Crawler.ApiCrawler
require Logger
import Backend.Util
import Backend.Crawler.Util
@moduledoc """
This module is slightly different from the other crawlers.
It doesn't implement the ApiCrawler spec because it isn't run as a self-contained crawler.
@ -12,6 +7,10 @@ defmodule Backend.Crawler.Crawlers.Nodeinfo do
This is to get the user count. Some servers don't publish this in other places (e.g. GNU Social, PeerTube) so we need
nodeinfo to know whether it's a personal instance or not.
"""
alias Backend.Crawler.ApiCrawler
require Logger
import Backend.Util
import Backend.Crawler.Util
defstruct [
:description,

View File

@ -1,15 +1,15 @@
defmodule Backend.Crawler.StaleInstanceManager do
@moduledoc """
This module regularly finds stale instances (i.e. instances that haven't been updated for longer than the crawl
interval) and adds them to the job queue. It runs once a minute.
"""
use GenServer
alias Backend.{Instance, Repo}
import Ecto.Query
import Backend.Util
require Logger
@moduledoc """
This module regularly finds stale instances (i.e. instances that haven't been updated for longer than the crawl
interval) and adds them to the job queue. It runs once a minute.
"""
def start_link(_opts) do
GenServer.start_link(__MODULE__, [], name: __MODULE__)
end
@ -44,11 +44,11 @@ defmodule Backend.Crawler.StaleInstanceManager do
{:noreply, state}
end
defp schedule_add() do
defp schedule_add do
Process.send_after(self(), :queue_stale_domains, 60_000)
end
defp queue_stale_domains() do
defp queue_stale_domains do
now = get_now()
stale_domains =

View File

@ -1,4 +1,5 @@
defmodule Backend.Crawler.Util do
@moduledoc false
require Logger
alias Backend.{Instance, Repo}
import Backend.Util
@ -8,7 +9,7 @@ defmodule Backend.Crawler.Util do
# (e.g. https://mastodon.social/@demouser or https://pleroma.site/users/demouser)
@spec get_domain(String.t()) :: String.t()
def get_domain(url) do
[_match, domain] = Regex.run(~r/https?:\/\/([\w.-]+)\/.*/, url)
[_match, domain] = Regex.run(~r/https?:\/\/([\w\.\-_]+)\/.*/, url)
domain
end

View File

@ -1,4 +1,5 @@
defmodule Backend.Edge do
@moduledoc false
use Ecto.Schema
import Ecto.Changeset

View File

@ -1,4 +1,5 @@
defmodule Backend.Elasticsearch.Cluster do
@moduledoc false
use Elasticsearch.Cluster, otp_app: :backend
def init(config) do

View File

@ -1,4 +1,5 @@
defmodule Backend.Elasticsearch.Store do
@moduledoc false
@behaviour Elasticsearch.Store
alias Backend.Repo

View File

@ -1,4 +1,8 @@
defmodule Backend.Instance do
@moduledoc """
Model for storing everything related to an instance: not only the data from crawls, but also statistics, the time
of the next scheduled crawl, X and Y coordinates on the graph, and so on.
"""
use Ecto.Schema
import Ecto.Changeset

View File

@ -1,4 +1,8 @@
defmodule Backend.InstancePeer do
@moduledoc """
Model for tracking which other instances a given instance knows of
(the data returned from /api/v1/instance/peers from Mastodon, for example)
"""
use Ecto.Schema
import Ecto.Changeset

View File

@ -1,4 +1,8 @@
defmodule Backend.MostRecentCrawl do
@moduledoc """
Model for fast access to the most recent crawl ID for a given domain.
You could also just look this up in the crawls table, but that table gets very large so this is much faster.
"""
use Ecto.Schema
import Ecto.Changeset

View File

@ -1,4 +1,8 @@
defmodule Backend.Release do
@moduledoc """
Functions related to releases. Can be run against the compiled binary with e.g.
`/bin/backend eval "Backend.Release.migrate()"`
"""
@app :backend
@start_apps [
:crypto,
@ -31,7 +35,7 @@ defmodule Backend.Release do
{:ok, _, _} = Ecto.Migrator.with_repo(repo, &Ecto.Migrator.run(&1, :down, to: version))
end
def build_elasticsearch_indexes() do
def build_elasticsearch_indexes do
start_services()
IO.puts("Building indexes...")
Enum.each(@indexes, &Elasticsearch.Index.hot_swap(Backend.Elasticsearch.Cluster, &1))

View File

@ -3,11 +3,15 @@ defmodule Backend.Scheduler do
This module runs recurring tasks.
"""
use Appsignal.Instrumentation.Decorators
use Quantum.Scheduler, otp_app: :backend
alias Backend.{Crawl, Edge, CrawlInteraction, Instance, Repo}
alias Backend.{Crawl, CrawlInteraction, Edge, Instance, Repo}
alias Backend.Mailer.AdminEmail
import Backend.Util
import Ecto.Query
require Logger
@doc """
@ -17,6 +21,7 @@ defmodule Backend.Scheduler do
`unit` must singular, e.g. "second", "minute", "hour", "month", "year", etc...
"""
@spec prune_crawls(integer, String.t()) :: any
@decorate transaction()
def prune_crawls(amount, unit) do
{deleted_num, _} =
Crawl
@ -34,7 +39,8 @@ defmodule Backend.Scheduler do
Calculates every instance's "insularity score" -- that is, the percentage of mentions that are among users on the
instance, rather than at other instances.
"""
def generate_insularity_scores() do
@decorate transaction()
def generate_insularity_scores do
now = get_now()
crawls_subquery =
@ -79,7 +85,8 @@ defmodule Backend.Scheduler do
@doc """
This function calculates the average number of statuses per hour over the last month.
"""
def generate_status_rate() do
@decorate transaction()
def generate_status_rate do
now = get_now()
# We want the earliest sucessful crawl so that we can exclude it from the statistics.
# This is because the first crawl goes up to one month into the past -- this would mess up the counts!
@ -138,7 +145,8 @@ defmodule Backend.Scheduler do
It calculates the strength of edges between nodes. Self-edges are not generated.
Edges are only generated if both instances have been succesfully crawled.
"""
def generate_edges() do
@decorate transaction()
def generate_edges do
now = get_now()
crawls_subquery =
@ -177,32 +185,7 @@ defmodule Backend.Scheduler do
edges =
interactions
# Get a map of %{{source, target} => {total_mention_count, total_statuses_seen}}
|> Enum.reduce(%{}, fn
%{
source_domain: source_domain,
target_domain: target_domain,
mentions: mentions,
source_statuses_seen: source_statuses_seen,
target_statuses_seen: target_statuses_seen
},
acc ->
key = get_interaction_key(source_domain, target_domain)
# target_statuses_seen might be nil if that instance was never crawled. default to 0.
target_statuses_seen =
case target_statuses_seen do
nil -> 0
_ -> target_statuses_seen
end
statuses_seen = source_statuses_seen + target_statuses_seen
Map.update(acc, key, {mentions, statuses_seen}, fn {curr_mentions,
curr_statuses_seen} ->
{curr_mentions + mentions, curr_statuses_seen}
end)
end)
|> reduce_mention_count()
|> Enum.map(fn {{source_domain, target_domain}, {mention_count, statuses_seen}} ->
%{
source_domain: source_domain,
@ -224,7 +207,8 @@ defmodule Backend.Scheduler do
This function checks to see if a lot of instances on the same base domain have been created recently. If so,
notifies the server admin over SMS.
"""
def check_for_spam_instances() do
@decorate transaction()
def check_for_spam_instances do
hour_range = 3
count_subquery =
@ -264,9 +248,39 @@ defmodule Backend.Scheduler do
Logger.info(message)
send_admin_sms(message)
Backend.Mailer.AdminEmail.send("Potential spam", message)
AdminEmail.send("Potential spam", message)
else
Logger.debug("Did not find potential spam instances.")
end
end
# Takes a list of Interactions
# Returns a map of %{{source, target} => {total_mention_count, total_statuses_seen}}
@decorate transaction_event()
defp reduce_mention_count(interactions) do
Enum.reduce(interactions, %{}, fn
%{
source_domain: source_domain,
target_domain: target_domain,
mentions: mentions,
source_statuses_seen: source_statuses_seen,
target_statuses_seen: target_statuses_seen
},
acc ->
key = get_interaction_key(source_domain, target_domain)
# target_statuses_seen might be nil if that instance was never crawled. default to 0.
target_statuses_seen =
case target_statuses_seen do
nil -> 0
_ -> target_statuses_seen
end
statuses_seen = source_statuses_seen + target_statuses_seen
Map.update(acc, key, {mentions, statuses_seen}, fn {curr_mentions, curr_statuses_seen} ->
{curr_mentions + mentions, curr_statuses_seen}
end)
end)
end
end

View File

@ -1,4 +1,5 @@
defmodule Backend.Util do
@moduledoc false
import Ecto.Query
require Logger
alias Backend.{Crawl, MostRecentCrawl, Repo}
@ -53,7 +54,7 @@ defmodule Backend.Util do
@doc """
Gets the current UTC time as a NaiveDateTime in a format that can be inserted into the database.
"""
def get_now() do
def get_now do
NaiveDateTime.truncate(NaiveDateTime.utc_now(), :second)
end
@ -128,10 +129,14 @@ defmodule Backend.Util do
end
def clean_domain(domain) do
domain
|> String.replace_prefix("https://", "")
|> String.trim_trailing("/")
|> String.downcase()
cleaned =
domain
|> String.replace_prefix("https://", "")
|> String.trim_trailing("/")
|> String.trim()
|> String.downcase()
Regex.replace(~r/:\d+/, cleaned, "")
end
def get_account(username, domain) do
@ -160,8 +165,8 @@ defmodule Backend.Util do
def get_and_decode(url) do
case HTTPoison.get(url, [{"User-Agent", get_config(:user_agent)}],
hackney: [pool: :crawler],
recv_timeout: 15000,
timeout: 15000
recv_timeout: 15_000,
timeout: 15_000
) do
{:ok, %{status_code: 200, body: body}} -> Jason.decode(body)
{:ok, _} -> {:error, %HTTPoison.Error{reason: "Non-200 response"}}
@ -185,8 +190,8 @@ defmodule Backend.Util do
def post_and_decode(url, body \\ "") do
case HTTPoison.post(url, body, [{"User-Agent", get_config(:user_agent)}],
hackney: [pool: :crawler],
recv_timeout: 15000,
timeout: 15000
recv_timeout: 15_000,
timeout: 15_000
) do
{:ok, %{status_code: 200, body: response_body}} -> Jason.decode(response_body)
{:ok, _} -> {:error, %HTTPoison.Error{reason: "Non-200 response"}}
@ -201,4 +206,9 @@ defmodule Backend.Util do
{:error, error} -> raise error
end
end
@spec is_valid_domain?(String.t()) :: boolean
def is_valid_domain?(domain) do
Regex.match?(~r/^[\w\.\-_]+$/, domain)
end
end

View File

@ -1,7 +1,6 @@
defmodule BackendWeb.AdminController do
alias Backend.{Api, Auth, Instance}
use BackendWeb, :controller
alias Backend.{Auth, Api, Instance}
require Logger
action_fallback BackendWeb.FallbackController

View File

@ -10,8 +10,6 @@ defmodule BackendWeb.AdminLoginController do
choose one or the other by POSTing back.
"""
def show(conn, %{"id" => domain}) do
# TODO: this should really be handled in a more async manner
# TODO: this assumes mastodon/pleroma API
cleaned_domain = clean_domain(domain)
instance_data = get_and_decode!("https://#{cleaned_domain}/api/v1/instance")
@ -24,6 +22,7 @@ defmodule BackendWeb.AdminLoginController do
instance_data = get_and_decode!("https://#{cleaned_domain}/api/v1/instance")
# credo:disable-for-lines:16 Credo.Check.Refactor.CondStatements
error =
cond do
type == "email" ->

View File

@ -1,19 +1,16 @@
defmodule BackendWeb.GraphController do
use BackendWeb, :controller
alias Backend.Api
alias Graph.Cache
action_fallback BackendWeb.FallbackController
def index(conn, _params) do
nodes = Api.list_nodes()
edges = Api.list_edges()
%{nodes: nodes, edges: edges} = Cache.get_graph()
render(conn, "index.json", nodes: nodes, edges: edges)
end
def show(conn, %{"id" => domain}) do
nodes = Api.list_nodes(domain)
edges = Api.list_edges(domain)
%{nodes: nodes, edges: edges} = Cache.get_graph(domain)
render(conn, "index.json", nodes: nodes, edges: edges)
end
end

View File

@ -1,18 +1,16 @@
defmodule BackendWeb.InstanceController do
use BackendWeb, :controller
import Backend.Util
alias Backend.Api
alias Graph.Cache
action_fallback(BackendWeb.FallbackController)
def show(conn, %{"id" => domain}) do
instance = Api.get_instance_with_peers(domain)
instance = Cache.get_instance_with_peers(domain)
if instance == nil or instance.opt_out == true do
send_resp(conn, 404, "Not found")
else
last_crawl = get_last_crawl(domain)
last_crawl = Cache.get_last_crawl(domain)
render(conn, "show.json", instance: instance, crawl: last_crawl)
end
end

View File

@ -8,57 +8,66 @@ defmodule BackendWeb.InstanceView do
cond do
instance.user_count < user_threshold and not instance.opt_in ->
%{
name: instance.domain,
status: "personal instance"
}
render_personal_instance(instance)
instance.crawl_error == "robots.txt" ->
%{
name: instance.domain,
status: instance.crawl_error
}
render_domain_and_error(instance)
instance.crawl_error != nil and instance.type == nil ->
%{
name: instance.domain,
status: instance.crawl_error
}
render_domain_and_error(instance)
true ->
last_updated = max_datetime(crawl.inserted_at, instance.updated_at)
filtered_peers =
instance.peers
|> Enum.filter(fn peer -> not peer.opt_out end)
statuses_per_user_per_day =
if instance.statuses_per_day != nil and instance.user_count != nil and
instance.user_count > 0 do
instance.statuses_per_day / instance.user_count
else
nil
end
%{
name: instance.domain,
description: instance.description,
version: instance.version,
userCount: instance.user_count,
insularity: instance.insularity,
statusCount: instance.status_count,
domainCount: length(instance.peers),
peers: render_many(filtered_peers, InstanceView, "instance.json"),
lastUpdated: last_updated,
status: "success",
type: instance.type,
statusesPerDay: instance.statuses_per_day,
statusesPerUserPerDay: statuses_per_user_per_day
}
render_instance(instance, crawl)
end
end
def render("instance.json", %{instance: instance}) do
%{name: instance.domain}
end
defp render_personal_instance(instance) do
%{
name: instance.domain,
status: "personal instance"
}
end
defp render_domain_and_error(instance) do
%{
name: instance.domain,
status: instance.crawl_error
}
end
defp render_instance(instance, crawl) do
last_updated = max_datetime(crawl.inserted_at, instance.updated_at)
filtered_peers =
instance.peers
|> Enum.filter(fn peer -> not peer.opt_out end)
statuses_per_user_per_day =
if instance.statuses_per_day != nil and instance.user_count != nil and
instance.user_count > 0 do
instance.statuses_per_day / instance.user_count
else
nil
end
%{
name: instance.domain,
description: instance.description,
version: instance.version,
userCount: instance.user_count,
insularity: instance.insularity,
statusCount: instance.status_count,
domainCount: length(instance.peers),
peers: render_many(filtered_peers, InstanceView, "instance.json"),
lastUpdated: last_updated,
status: "success",
type: instance.type,
statusesPerDay: instance.statuses_per_day,
statusesPerUserPerDay: statuses_per_user_per_day
}
end
end

View File

@ -0,0 +1,93 @@
defmodule Graph.Cache do
@moduledoc false
use Nebulex.Cache,
otp_app: :backend,
adapter: Nebulex.Adapters.Local
alias Backend.{Api, Crawl, Edge, Instance, MostRecentCrawl, Repo}
alias __MODULE__
require Logger
import Ecto.Query
@spec get_graph(String.t() | nil) :: %{
nodes: [Instance.t()],
edges: [Edge.t()]
}
def get_graph(domain \\ nil) do
key =
if domain != nil do
"graph_" <> domain
else
"graph"
end
case Cache.get(key) do
nil ->
Appsignal.increment_counter("graph_cache.misses", 1)
Logger.debug("Graph cache: miss")
nodes = Api.list_nodes(domain)
edges = Api.list_edges(domain)
# Cache for 10 minutes
Cache.set(key, %{nodes: nodes, edges: edges}, ttl: 600)
%{nodes: nodes, edges: edges}
data ->
Appsignal.increment_counter("graph_cache.hits", 1)
Logger.debug("Graph cache: hit")
data
end
end
@spec get_instance_with_peers(String.t()) :: Instance.t()
def get_instance_with_peers(domain) do
key = "instance_" <> domain
case Cache.get(key) do
nil ->
Appsignal.increment_counter("instance_cache.misses", 1)
Logger.debug("Instance cache: miss")
instance = Api.get_instance_with_peers(domain)
# Cache for one minute
Cache.set(key, instance, ttl: 60)
instance
data ->
Appsignal.increment_counter("instance_cache.hits", 1)
Logger.debug("Instance cache: hit")
data
end
end
@spec get_last_crawl(String.t()) :: Crawl.t() | nil
def get_last_crawl(domain) do
key = "most_recent_crawl_" <> domain
most_recent_crawl_subquery =
MostRecentCrawl
|> select([mrc], %{
most_recent_id: mrc.crawl_id
})
|> where([mrc], mrc.instance_domain == ^domain)
case Cache.get(key) do
nil ->
Appsignal.increment_counter("most_recent_crawl_cache.misses", 1)
Logger.debug("Most recent crawl cache: miss")
crawl =
Crawl
|> join(:inner, [c], mrc in subquery(most_recent_crawl_subquery),
on: c.id == mrc.most_recent_id
)
|> Repo.one()
# Cache for one minute
Cache.set(key, crawl, ttl: 60)
data ->
Appsignal.increment_counter("most_recent_crawl_cache.hits", 1)
Logger.debug("Most recent crawl cache: hit")
data
end
end
end

View File

@ -1,4 +1,7 @@
defmodule Backend.Mailer.AdminEmail do
@moduledoc """
Module for sending emails to the server administrator.
"""
import Swoosh.Email
import Backend.Util
require Logger

View File

@ -1,3 +1,4 @@
defmodule Backend.Mailer do
@moduledoc false
use Swoosh.Mailer, otp_app: :backend
end

View File

@ -1,4 +1,7 @@
defmodule Backend.Mailer.UserEmail do
@moduledoc """
Module for sending emails to users.
"""
import Swoosh.Email
import Backend.{Auth, Util}
require Logger

View File

@ -1,13 +0,0 @@
defmodule Mix.Tasks.Crawl do
alias Backend.Crawler
use Mix.Task
@shortdoc "Crawl a given instance."
def run(domain) do
Mix.Task.run("app.start")
# Application.ensure_all_started(:timex)
# Mix.Task.run("loadconfig")
Crawler.run(domain)
end
end

View File

@ -4,7 +4,7 @@ defmodule Backend.MixProject do
def project do
[
app: :backend,
version: "2.7.0",
version: "2.7.1",
elixir: "~> 1.5",
elixirc_paths: elixirc_paths(Mix.env()),
compilers: [:phoenix, :gettext] ++ Mix.compilers(),
@ -54,7 +54,7 @@ defmodule Backend.MixProject do
{:honeydew, "~> 1.4.3"},
{:quantum, "~> 2.3"},
{:corsica, "~> 1.1.2"},
{:sobelow, "~> 0.8", only: :dev},
{:sobelow, "~> 0.8", only: [:dev, :test]},
{:gollum, "~> 0.3.2"},
{:paginator, "~> 0.6.0"},
{:public_suffix, "~> 0.6.0"},
@ -62,7 +62,9 @@ defmodule Backend.MixProject do
{:swoosh, "~> 0.23.3"},
{:ex_twilio, "~> 0.7.0"},
{:elasticsearch, "~> 1.0"},
{:appsignal, "~> 1.10.1"}
{:appsignal, "~> 1.10.1"},
{:credo, "~> 1.1", only: [:dev, :test], runtime: false},
{:nebulex, "~> 1.1"}
]
end

View File

@ -2,12 +2,14 @@
"appsignal": {:hex, :appsignal, "1.10.11", "5df2546d6ea15e392a4384b175ebc1bb33f4ccf8fe9872c11542d3ae2043ff88", [:make, :mix], [{:decorator, "~> 1.2.3", [hex: :decorator, repo: "hexpm", optional: false]}, {:hackney, "~> 1.6", [hex: :hackney, repo: "hexpm", optional: false]}, {:phoenix, ">= 1.2.0", [hex: :phoenix, repo: "hexpm", optional: true]}, {:plug, ">= 1.1.0", [hex: :plug, repo: "hexpm", optional: true]}, {:poison, ">= 1.3.0", [hex: :poison, repo: "hexpm", optional: false]}], "hexpm"},
"artificery": {:hex, :artificery, "0.4.2", "3ded6e29e13113af52811c72f414d1e88f711410cac1b619ab3a2666bbd7efd4", [:mix], [], "hexpm"},
"base64url": {:hex, :base64url, "0.0.1", "36a90125f5948e3afd7be97662a1504b934dd5dac78451ca6e9abf85a10286be", [:rebar], [], "hexpm"},
"bunt": {:hex, :bunt, "0.2.0", "951c6e801e8b1d2cbe58ebbd3e616a869061ddadcc4863d0a2182541acae9a38", [:mix], [], "hexpm"},
"certifi": {:hex, :certifi, "2.5.1", "867ce347f7c7d78563450a18a6a28a8090331e77fa02380b4a21962a65d36ee5", [:rebar3], [{:parse_trans, "~>3.3", [hex: :parse_trans, repo: "hexpm", optional: false]}], "hexpm"},
"combine": {:hex, :combine, "0.10.0", "eff8224eeb56498a2af13011d142c5e7997a80c8f5b97c499f84c841032e429f", [:mix], [], "hexpm"},
"connection": {:hex, :connection, "1.0.4", "a1cae72211f0eef17705aaededacac3eb30e6625b04a6117c1b2db6ace7d5976", [:mix], [], "hexpm"},
"corsica": {:hex, :corsica, "1.1.2", "5ad8b9dcbeeda4762d78a57c0c8c2f88e1eef8741508517c98cb79e0db1f107d", [:mix], [{:plug, "~> 1.0", [hex: :plug, repo: "hexpm", optional: false]}], "hexpm"},
"cowboy": {:hex, :cowboy, "2.6.3", "99aa50e94e685557cad82e704457336a453d4abcb77839ad22dbe71f311fcc06", [:rebar3], [{:cowlib, "~> 2.7.3", [hex: :cowlib, repo: "hexpm", optional: false]}, {:ranch, "~> 1.7.1", [hex: :ranch, repo: "hexpm", optional: false]}], "hexpm"},
"cowlib": {:hex, :cowlib, "2.7.3", "a7ffcd0917e6d50b4d5fb28e9e2085a0ceb3c97dea310505f7460ff5ed764ce9", [:rebar3], [], "hexpm"},
"credo": {:hex, :credo, "1.1.3", "bf31887b8914a4b7e1810ae2b5aab7c657698abbf4cca6a2335a094d57995168", [:mix], [{:bunt, "~> 0.2.0", [hex: :bunt, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm"},
"crontab": {:hex, :crontab, "1.1.7", "b9219f0bdc8678b94143655a8f229716c5810c0636a4489f98c0956137e53985", [:mix], [{:ecto, "~> 1.0 or ~> 2.0 or ~> 3.0", [hex: :ecto, repo: "hexpm", optional: true]}], "hexpm"},
"db_connection": {:hex, :db_connection, "2.1.0", "122e2f62c4906bf2e49554f1e64db5030c19229aa40935f33088e7d543aa79d0", [:mix], [{:connection, "~> 1.0.2", [hex: :connection, repo: "hexpm", optional: false]}], "hexpm"},
"decimal": {:hex, :decimal, "1.8.0", "ca462e0d885f09a1c5a342dbd7c1dcf27ea63548c65a65e67334f4b61803822e", [:mix], [], "hexpm"},
@ -33,6 +35,7 @@
"metrics": {:hex, :metrics, "1.0.1", "25f094dea2cda98213cecc3aeff09e940299d950904393b2a29d191c346a8486", [:rebar3], [], "hexpm"},
"mime": {:hex, :mime, "1.3.1", "30ce04ab3175b6ad0bdce0035cba77bba68b813d523d1aac73d9781b4d193cf8", [:mix], [], "hexpm"},
"mimerl": {:hex, :mimerl, "1.2.0", "67e2d3f571088d5cfd3e550c383094b47159f3eee8ffa08e64106cdf5e981be3", [:rebar3], [], "hexpm"},
"nebulex": {:hex, :nebulex, "1.1.0", "be45cc3a2b7d01eb7da05747d38072d336187d05796ad9ef2d9dad9be430f915", [:mix], [{:shards, "~> 0.6", [hex: :shards, repo: "hexpm", optional: false]}], "hexpm"},
"paginator": {:hex, :paginator, "0.6.0", "bc2c01abdd98281ff39b6a7439cf540091122a7927bdaabc167c61d4508f9cbb", [:mix], [{:ecto, "~> 3.0", [hex: :ecto, repo: "hexpm", optional: false]}, {:ecto_sql, "~> 3.0", [hex: :ecto_sql, repo: "hexpm", optional: false]}, {:postgrex, "~> 0.13", [hex: :postgrex, repo: "hexpm", optional: true]}], "hexpm"},
"parse_trans": {:hex, :parse_trans, "3.3.0", "09765507a3c7590a784615cfd421d101aec25098d50b89d7aa1d66646bc571c1", [:rebar3], [], "hexpm"},
"phoenix": {:hex, :phoenix, "1.4.9", "746d098e10741c334d88143d3c94cab1756435f94387a63441792e66ec0ee974", [:mix], [{:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:phoenix_pubsub, "~> 1.1", [hex: :phoenix_pubsub, repo: "hexpm", optional: false]}, {:plug, "~> 1.8.1 or ~> 1.9", [hex: :plug, repo: "hexpm", optional: false]}, {:plug_cowboy, "~> 1.0 or ~> 2.0", [hex: :plug_cowboy, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm"},
@ -46,6 +49,7 @@
"public_suffix": {:hex, :public_suffix, "0.6.0", "100cfe86f13f9f6f0cf67e743b1b83c78dd1223a2c422fa03ebf4adff514cbc3", [:mix], [{:idna, ">= 1.2.0 and < 6.0.0", [hex: :idna, repo: "hexpm", optional: false]}], "hexpm"},
"quantum": {:hex, :quantum, "2.3.4", "72a0e8855e2adc101459eac8454787cb74ab4169de6ca50f670e72142d4960e9", [:mix], [{:calendar, "~> 0.17", [hex: :calendar, repo: "hexpm", optional: true]}, {:crontab, "~> 1.1", [hex: :crontab, repo: "hexpm", optional: false]}, {:gen_stage, "~> 0.12", [hex: :gen_stage, repo: "hexpm", optional: false]}, {:swarm, "~> 3.3", [hex: :swarm, repo: "hexpm", optional: false]}, {:timex, "~> 3.1", [hex: :timex, repo: "hexpm", optional: true]}], "hexpm"},
"ranch": {:hex, :ranch, "1.7.1", "6b1fab51b49196860b733a49c07604465a47bdb78aa10c1c16a3d199f7f8c881", [:rebar3], [], "hexpm"},
"shards": {:hex, :shards, "0.6.0", "678d292ad74a4598a872930f9b12251f43e97f6050287f1fb712fbfd3d282f75", [:make, :rebar3], [], "hexpm"},
"sobelow": {:hex, :sobelow, "0.8.0", "a3ec73e546dfde19f14818e5000c418e3f305d9edb070e79dd391de0ae1cd1ea", [:mix], [], "hexpm"},
"ssl_verify_fun": {:hex, :ssl_verify_fun, "1.1.4", "f0eafff810d2041e93f915ef59899c923f4568f4585904d010387ed74988e77b", [:make, :mix, :rebar3], [], "hexpm"},
"swarm": {:hex, :swarm, "3.4.0", "64f8b30055d74640d2186c66354b33b999438692a91be275bb89cdc7e401f448", [:mix], [{:gen_state_machine, "~> 2.0", [hex: :gen_state_machine, repo: "hexpm", optional: false]}, {:libring, "~> 1.0", [hex: :libring, repo: "hexpm", optional: false]}], "hexpm"},

View File

@ -0,0 +1,25 @@
defmodule Backend.Repo.Migrations.AddMostRecentCrawlOnDelete do
use Ecto.Migration
def change do
execute(
"ALTER TABLE most_recent_crawl DROP CONSTRAINT most_recent_crawl_crawl_id_fkey",
"ALTER TABLE most_recent_crawl ADD CONSTRAINT most_recent_crawl_crawl_id_fkey FOREIGN KEY (crawl_id) REFERENCES crawls(id)"
)
execute(
"ALTER TABLE most_recent_crawl ADD CONSTRAINT most_recent_crawl_crawl_id_fkey FOREIGN KEY (crawl_id) REFERENCES crawls(id) ON DELETE CASCADE",
"ALTER TABLE most_recent_crawl DROP CONSTRAINT most_recent_crawl_crawl_id_fkey"
)
execute(
"ALTER TABLE most_recent_crawl DROP CONSTRAINT most_recent_crawl_instance_domain_fkey",
"ALTER TABLE most_recent_crawl ADD CONSTRAINT most_recent_crawl_instance_domain_fkey FOREIGN KEY (instance_domain) REFERENCES instances(domain)"
)
execute(
"ALTER TABLE most_recent_crawl ADD CONSTRAINT most_recent_crawl_instance_domain_fkey FOREIGN KEY (instance_domain) REFERENCES instances(domain) ON DELETE CASCADE",
"ALTER TABLE most_recent_crawl DROP CONSTRAINT most_recent_crawl_instance_domain_fkey"
)
end
end

View File

@ -0,0 +1,25 @@
defmodule Backend.Repo.Migrations.AddCrawlInteractionOnDelete do
use Ecto.Migration
def change do
execute(
"ALTER TABLE crawl_interactions DROP CONSTRAINT crawl_interactions_source_domain_fkey",
"ALTER TABLE crawl_interactions ADD CONSTRAINT crawl_interactions_source_domain_fkey FOREIGN KEY (source_domain) REFERENCES instances(domain)"
)
execute(
"ALTER TABLE crawl_interactions ADD CONSTRAINT crawl_interactions_source_domain_fkey FOREIGN KEY (source_domain) REFERENCES instances(domain) ON DELETE CASCADE",
"ALTER TABLE crawl_interactions DROP CONSTRAINT crawl_interactions_source_domain_fkey"
)
execute(
"ALTER TABLE crawl_interactions DROP CONSTRAINT crawl_interactions_target_domain_fkey",
"ALTER TABLE crawl_interactions ADD CONSTRAINT crawl_interactions_target_domain_fkey FOREIGN KEY (target_domain) REFERENCES instances(domain)"
)
execute(
"ALTER TABLE crawl_interactions ADD CONSTRAINT crawl_interactions_target_domain_fkey FOREIGN KEY (target_domain) REFERENCES instances(domain) ON DELETE CASCADE",
"ALTER TABLE crawl_interactions DROP CONSTRAINT crawl_interactions_target_domain_fkey"
)
end
end

View File

@ -14,6 +14,7 @@ defmodule BackendWeb.ChannelCase do
"""
use ExUnit.CaseTemplate
alias Ecto.Adapters.SQL.Sandbox
using do
quote do
@ -26,10 +27,10 @@ defmodule BackendWeb.ChannelCase do
end
setup tags do
:ok = Ecto.Adapters.SQL.Sandbox.checkout(Backend.Repo)
:ok = Sandbox.checkout(Backend.Repo)
unless tags[:async] do
Ecto.Adapters.SQL.Sandbox.mode(Backend.Repo, {:shared, self()})
Sandbox.mode(Backend.Repo, {:shared, self()})
end
:ok

View File

@ -14,6 +14,7 @@ defmodule BackendWeb.ConnCase do
"""
use ExUnit.CaseTemplate
alias Ecto.Adapters.SQL.Sandbox
using do
quote do
@ -27,10 +28,10 @@ defmodule BackendWeb.ConnCase do
end
setup tags do
:ok = Ecto.Adapters.SQL.Sandbox.checkout(Backend.Repo)
:ok = Sandbox.checkout(Backend.Repo)
unless tags[:async] do
Ecto.Adapters.SQL.Sandbox.mode(Backend.Repo, {:shared, self()})
Sandbox.mode(Backend.Repo, {:shared, self()})
end
{:ok, conn: Phoenix.ConnTest.build_conn()}

View File

@ -13,6 +13,7 @@ defmodule Backend.DataCase do
"""
use ExUnit.CaseTemplate
alias Ecto.Adapters.SQL.Sandbox
using do
quote do
@ -26,10 +27,10 @@ defmodule Backend.DataCase do
end
setup tags do
:ok = Ecto.Adapters.SQL.Sandbox.checkout(Backend.Repo)
:ok = Sandbox.checkout(Backend.Repo)
unless tags[:async] do
Ecto.Adapters.SQL.Sandbox.mode(Backend.Repo, {:shared, self()})
Sandbox.mode(Backend.Repo, {:shared, self()})
end
:ok

View File

@ -1,6 +1,6 @@
{
"name": "frontend",
"version": "2.7.0",
"version": "2.7.1",
"private": true,
"scripts": {
"start": "NODE_ENV=development react-scripts start",
@ -9,7 +9,7 @@
"lint": "yarn typecheck && tslint -p tsconfig.json -c tslint.json \"src/**/*.{ts,tsx}\"",
"lint:fix": "yarn lint --fix",
"pretty": "prettier --write \"src/**/*.{ts,tsx}\"",
"test": "yarn lint && react-scripts test",
"test": "yarn lint && react-scripts test --ci",
"eject": "react-scripts eject"
},
"husky": {
@ -28,50 +28,49 @@
"printWidth": 120
},
"dependencies": {
"@blueprintjs/core": "^3.17.1",
"@blueprintjs/icons": "^3.9.1",
"@blueprintjs/select": "^3.9.0",
"@blueprintjs/core": "^3.18.0",
"@blueprintjs/icons": "^3.10.0",
"@blueprintjs/select": "^3.10.0",
"classnames": "^2.2.6",
"connected-react-router": "^6.5.2",
"cross-fetch": "^3.0.4",
"cytoscape": "^3.8.1",
"cytoscape": "^3.9.0",
"cytoscape-popper": "^1.0.4",
"inflection": "^1.12.0",
"lodash": "^4.17.15",
"moment": "^2.22.2",
"normalize.css": "^8.0.0",
"numeral": "^2.0.6",
"react": "^16.8.0",
"react-dom": "^16.8.0",
"react": "^16.9.0",
"react-dom": "^16.9.0",
"react-redux": "^7.1.0",
"react-router-dom": "^5.0.1",
"react-scripts": "^3.0.1",
"react-scripts": "^3.1.1",
"react-sigma": "^1.2.30",
"react-virtualized": "^9.21.1",
"redux": "^4.0.4",
"redux-thunk": "^2.3.0",
"sanitize-html": "^1.20.1",
"styled-components": "^4.3.2",
"tippy.js": "^4.3.4"
"tippy.js": "^4.3.5"
},
"devDependencies": {
"@blueprintjs/tslint-config": "^1.8.1",
"@blueprintjs/tslint-config": "^1.9.0",
"@types/classnames": "^2.2.9",
"@types/cytoscape": "^3.8.0",
"@types/cytoscape": "^3.8.1",
"@types/inflection": "^1.5.28",
"@types/jest": "^24.0.15",
"@types/lodash": "^4.14.136",
"@types/node": "^12.6.8",
"@types/numeral": "^0.0.25",
"@types/react": "^16.8.23",
"@types/react-dom": "^16.8.4",
"@types/react-redux": "^7.1.1",
"@types/jest": "^24.0.18",
"@types/lodash": "^4.14.137",
"@types/node": "^12.7.2",
"@types/numeral": "^0.0.26",
"@types/react": "^16.9.2",
"@types/react-dom": "^16.9.0",
"@types/react-redux": "^7.1.2",
"@types/react-router-dom": "^4.3.4",
"@types/react-virtualized": "^9.21.3",
"@types/sanitize-html": "^1.20.1",
"@types/styled-components": "4.1.18",
"husky": "^3.0.1",
"lint-staged": "^9.2.0",
"husky": "^3.0.4",
"lint-staged": "^9.2.3",
"react-axe": "^3.2.0",
"tslint": "^5.18.0",
"tslint-config-security": "^1.16.0",

View File

@ -14,16 +14,25 @@ const AboutScreen: React.FC = () => (
. It works by crawling every instance it can find and aggregating statistics on communication between these.
</p>
<p>
You can follow the project on{" "}
<a href="https://x0r.be/@fediversespace" target="_blank" rel="noopener noreferrer">
Mastodon
</a>
.
</p>
<br />
<H2>FAQ</H2>
<H4>Why can't I see details about my instance?</H4>
<p className={Classes.RUNNING_TEXT}>
Currently, fediverse.space only supports Mastodon and Pleroma instances. In addition, instances with 10 or fewer
users won't be scraped -- it's a tool for understanding communities, not individuals.
fediverse.space only supports servers using the Mastodon API, the Misskey API, the GNU Social API, or Nodeinfo.
Instances with 10 or fewer users won't be scraped -- it's a tool for understanding communities, not individuals.
</p>
<H4>
When is <Code>$OTHER_ACTIVITYPUB_SERVER</Code> going to be added?
When is <Code>$OTHER_FEDIVERSE_SERVER</Code> going to be added?
</H4>
<p className={Classes.RUNNING_TEXT}>
Check out{" "}
@ -38,12 +47,13 @@ const AboutScreen: React.FC = () => (
<H4>How do you calculate the strength of relationships between instances?</H4>
<p className={Classes.RUNNING_TEXT}>
fediverse.space looks at statuses from within the last month on the public timeline of each instance. It
fediverse.space looks at public statuses from within the last month on the public timeline of each instance. It
calculates at the ratio of
<Code>mentions of an instance / total statuses</Code>. It uses a ratio rather than an absolute number of mentions
to reflect that smaller instances can play a large role in a community.
</p>
<br />
<H2>Credits</H2>
<a href="https://nlnet.nl/project/fediverse_space/" target="_blank" rel="noopener noreferrer">

View File

@ -105,7 +105,7 @@ class InstanceScreenImpl extends React.PureComponent<IInstanceScreenProps, IInst
if (this.props.isLoadingInstanceDetails || this.state.isProcessingNeighbors || this.state.isLoadingLocalGraph) {
content = this.renderLoadingState();
} else if (this.props.instanceLoadError || this.state.localGraphLoadError || !this.props.instanceDetails) {
return (content = <ErrorState />);
content = <ErrorState />;
} else if (this.props.instanceDetails.status.toLowerCase().indexOf("personal instance") > -1) {
content = this.renderPersonalInstanceErrorState();
} else if (this.props.instanceDetails.status.toLowerCase().indexOf("robots.txt") > -1) {

View File

@ -86,6 +86,7 @@ class LoginScreen extends React.PureComponent<{}, ILoginScreenState> {
private renderChooseInstance = () => {
const { isGettingLoginTypes } = this.state;
const onButtonClick = () => this.getLoginTypes();
return (
<form onSubmit={this.getLoginTypes}>
<FormGroup label="Instance domain" labelFor="domain-input" disabled={isGettingLoginTypes} inline={true}>
@ -101,6 +102,7 @@ class LoginScreen extends React.PureComponent<{}, ILoginScreenState> {
rightIcon={IconNames.ARROW_RIGHT}
title="submit"
loading={isGettingLoginTypes}
onClick={onButtonClick}
/>
}
placeholder="mastodon.social"
@ -166,8 +168,10 @@ class LoginScreen extends React.PureComponent<{}, ILoginScreenState> {
this.setState({ domain: event.target.value });
};
private getLoginTypes = (e: React.FormEvent<HTMLFormElement>) => {
e.preventDefault();
private getLoginTypes = (e?: React.FormEvent<HTMLFormElement>) => {
if (!!e) {
e.preventDefault();
}
this.setState({ isGettingLoginTypes: true });
let { domain } = this.state;
if (domain.startsWith("https://")) {

File diff suppressed because it is too large Load Diff