Save base domain

This commit is contained in:
Tao Bojlén 2019-07-25 15:56:03 +00:00
parent 99a6dc2023
commit 490b518587
12 changed files with 173 additions and 29 deletions

View file

@ -27,6 +27,14 @@ config :logger, :console,
# Use Jason for JSON parsing in Phoenix
config :phoenix, :json_library, Jason
config :ex_twilio,
account_sid: System.get_env("TWILIO_ACCOUNT_SID"),
auth_token: System.get_env("TWILIO_AUTH_TOKEN")
config :backend, Backend.Mailer,
adapter: Swoosh.Adapters.Sendgrid,
api_key: System.get_env("SENDGRID_API_KEY")
config :backend, :crawler,
status_age_limit_days: 28,
status_count_limit: 5000,
@ -37,7 +45,10 @@ config :backend, :crawler,
"gab.best",
"4chan.icu"
],
user_agent: "fediverse.space crawler"
user_agent: "fediverse.space crawler",
admin_phone: System.get_env("ADMIN_PHONE"),
twilio_phone: System.get_env("TWILIO_PHONE"),
admin_email: System.get_env("ADMIN_EMAIL")
config :backend, Backend.Scheduler,
jobs: [
@ -46,7 +57,9 @@ config :backend, Backend.Scheduler,
# 00.15 daily
{"15 0 * * *", {Backend.Scheduler, :generate_edges, []}},
# 00.30 every night
{"30 0 * * *", {Backend.Scheduler, :generate_insularity_scores, []}}
{"30 0 * * *", {Backend.Scheduler, :generate_insularity_scores, []}},
# Every 30 minutes
{"*/30 * * * *", {Backend.Scheduler, :check_for_spam_instances, []}}
]
# Import environment specific config. This must remain at the bottom

View file

@ -63,11 +63,6 @@ config :backend, :crawler,
crawl_interval_mins: 1,
crawl_workers: 10,
blacklist: [
"gab.best"
]
config :backend, Backend.Scheduler,
jobs: [
# Every 5 minutes
{"*/5 * * * *", {Backend.Scheduler, :prune_crawls, [12, "month"]}}
"gab.best",
"4chan.icu"
]

View file

@ -24,3 +24,16 @@ config :backend, BackendWeb.Endpoint,
root: ".",
secret_key_base: System.get_env("SECRET_KEY_BASE"),
server: true
config :ex_twilio,
account_sid: System.get_env("TWILIO_ACCOUNT_SID"),
auth_token: System.get_env("TWILIO_AUTH_TOKEN")
config :backend, :crawler,
admin_phone: System.get_env("ADMIN_PHONE"),
twilio_phone: System.get_env("TWILIO_PHONE"),
admin_email: System.get_env("ADMIN_EMAIL")
config :backend, Backend.Mailer,
adapter: Swoosh.Adapters.Sendgrid,
api_key: System.get_env("SENDGRID_API_KEY")

View file

@ -113,22 +113,16 @@ defmodule Backend.Crawler do
Repo.insert!(
%Instance{
domain: domain,
description: result.description,
version: result.version,
user_count: result.user_count,
status_count: result.status_count,
type: instance_type
},
on_conflict: [
set: [
description: result.description,
version: result.version,
user_count: result.user_count,
status_count: result.status_count,
type: instance_type,
updated_at: now
]
],
base_domain: get_base_domain(domain)
},
on_conflict:
{:replace,
[:description, :version, :user_count, :status_count, :type, :base_domain, :updated_at]},
conflict_target: :domain
)
@ -224,9 +218,24 @@ defmodule Backend.Crawler do
true -> "unknown error"
end
Repo.transaction(fn ->
Repo.insert!(
%Instance{
domain: domain,
base_domain: get_base_domain(domain)
},
on_conflict: {:replace, [:base_domain]},
conflict_target: :domain
)
Repo.insert!(%Crawl{
instance_domain: domain,
error: error
})
end)
end
defp get_base_domain(domain) do
PublicSuffix.registrable_domain(domain, ignore_private: true)
end
end

View file

@ -10,6 +10,7 @@ defmodule Backend.Instance do
field :version, :string
field :insularity, :float
field :type, :string
field :base_domain, :string
many_to_many :peers, Backend.Instance,
join_through: Backend.InstancePeer,
@ -35,7 +36,8 @@ defmodule Backend.Instance do
:version,
:insularity,
:updated_at,
:type
:type,
:base_domain
])
|> validate_required([:domain])
|> put_assoc(:peers, attrs.peers)

View file

@ -159,4 +159,54 @@ defmodule Backend.Scheduler do
|> Repo.insert_all(edges)
end)
end
@doc """
This function checks to see if a lot of instances on the same base domain have been created recently. If so,
notifies the server admin over SMS.
"""
def check_for_spam_instances() do
hour_range = 6
count_subquery =
Instance
|> where(
[i],
i.inserted_at > datetime_add(^NaiveDateTime.utc_now(), -1 * ^hour_range, "hour")
)
|> group_by(:base_domain)
|> select([i], %{
count: count(i.id),
base_domain: i.base_domain
})
potential_spam_instances =
Instance
|> join(:inner, [i], c in subquery(count_subquery), on: i.domain == c.base_domain)
|> where([i, c], c.count > 2)
|> select([i, c], %{
base_domain: i.base_domain,
count: c.count
})
|> Repo.all()
if length(potential_spam_instances) > 0 do
message =
potential_spam_instances
|> Enum.map(fn %{count: count, base_domain: base_domain} ->
"* #{count} new at #{base_domain}"
end)
|> Enum.join("\n")
|> (fn lines ->
"fediverse.space detected the following potential spam domains from the last #{
hour_range
} hours:\n#{lines}"
end).()
Logger.info(message)
send_admin_sms(message)
Backend.Mailer.AdminEmail.send("Potential spam", message)
else
Logger.debug("Did not find potential spam instances.")
end
end
end

View file

@ -1,5 +1,6 @@
defmodule Backend.Util do
import Ecto.Query
require Logger
alias Backend.{Crawl, Repo}
@doc """
@ -126,4 +127,19 @@ defmodule Backend.Util do
Map.update(acc, key, val, &(&1 + val))
end)
end
@doc """
Sends an SMS to the admin phone number if configured.
"""
def send_admin_sms(body) do
if get_config(:admin_phone) != nil and get_config(:twilio_phone) != nil do
ExTwilio.Message.create(
to: get_config(:admin_phone),
from: get_config(:twilio_phone),
body: body
)
else
Logger.info("Could not send SMS to admin; not configured.")
end
end
end

View file

@ -0,0 +1,20 @@
defmodule Backend.Mailer.AdminEmail do
import Swoosh.Email
import Backend.Util
require Logger
def send(subject, body) do
admin_email = get_config(:admin_email)
if admin_email != nil do
new()
|> to(admin_email)
|> from("noreply@fediverse.space")
|> subject(subject)
|> text_body(body)
|> Backend.Mailer.deliver!()
else
Logger.info("Could not send email to admin; not configured.")
end
end
end

View file

@ -0,0 +1,3 @@
defmodule Backend.Mailer do
use Swoosh.Mailer, otp_app: :backend
end

View file

@ -20,7 +20,7 @@ defmodule Backend.MixProject do
def application do
[
mod: {Backend.Application, []},
extra_applications: [:logger, :runtime_tools, :mnesia, :gollum]
extra_applications: [:logger, :runtime_tools, :mnesia, :gollum, :ex_twilio]
]
end
@ -48,7 +48,11 @@ defmodule Backend.MixProject do
{:corsica, "~> 1.1.2"},
{:sobelow, "~> 0.8", only: :dev},
{:gollum, "~> 0.3.2"},
{:paginator, "~> 0.6.0"}
{:paginator, "~> 0.6.0"},
{:public_suffix, "~> 0.6.0"},
{:idna, "~> 5.1.2", override: true},
{:swoosh, "~> 0.23.3"},
{:ex_twilio, "~> 0.7.0"}
]
end

View file

@ -1,5 +1,6 @@
%{
"artificery": {:hex, :artificery, "0.4.2", "3ded6e29e13113af52811c72f414d1e88f711410cac1b619ab3a2666bbd7efd4", [:mix], [], "hexpm"},
"base64url": {:hex, :base64url, "0.0.1", "36a90125f5948e3afd7be97662a1504b934dd5dac78451ca6e9abf85a10286be", [:rebar], [], "hexpm"},
"certifi": {:hex, :certifi, "2.5.1", "867ce347f7c7d78563450a18a6a28a8090331e77fa02380b4a21962a65d36ee5", [:rebar3], [{:parse_trans, "~>3.3", [hex: :parse_trans, repo: "hexpm", optional: false]}], "hexpm"},
"combine": {:hex, :combine, "0.10.0", "eff8224eeb56498a2af13011d142c5e7997a80c8f5b97c499f84c841032e429f", [:mix], [], "hexpm"},
"connection": {:hex, :connection, "1.0.4", "a1cae72211f0eef17705aaededacac3eb30e6625b04a6117c1b2db6ace7d5976", [:mix], [], "hexpm"},
@ -12,6 +13,7 @@
"distillery": {:hex, :distillery, "2.1.1", "f9332afc2eec8a1a2b86f22429e068ef35f84a93ea1718265e740d90dd367814", [:mix], [{:artificery, "~> 0.2", [hex: :artificery, repo: "hexpm", optional: false]}], "hexpm"},
"ecto": {:hex, :ecto, "3.1.7", "fa21d06ef56cdc2fdaa62574e8c3ba34a2751d44ea34c30bc65f0728421043e5", [:mix], [{:decimal, "~> 1.6", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}], "hexpm"},
"ecto_sql": {:hex, :ecto_sql, "3.1.6", "1e80e30d16138a729c717f73dcb938590bcdb3a4502f3012414d0cbb261045d8", [:mix], [{:db_connection, "~> 2.0", [hex: :db_connection, repo: "hexpm", optional: false]}, {:ecto, "~> 3.1.0", [hex: :ecto, repo: "hexpm", optional: false]}, {:mariaex, "~> 0.9.1", [hex: :mariaex, repo: "hexpm", optional: true]}, {:myxql, "~> 0.2.0", [hex: :myxql, repo: "hexpm", optional: true]}, {:postgrex, "~> 0.14.0 or ~> 0.15.0", [hex: :postgrex, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm"},
"ex_twilio": {:hex, :ex_twilio, "0.7.0", "d7ce624ef4661311ae28c3e3aa060ecb66a9f4843184d7400c29072f7d3f5a4a", [:mix], [{:httpoison, ">= 0.9.0", [hex: :httpoison, repo: "hexpm", optional: false]}, {:inflex, "~> 1.0", [hex: :inflex, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:joken, "~> 2.0", [hex: :joken, repo: "hexpm", optional: false]}, {:poison, "~> 3.0", [hex: :poison, repo: "hexpm", optional: false]}], "hexpm"},
"gen_stage": {:hex, :gen_stage, "0.14.2", "6a2a578a510c5bfca8a45e6b27552f613b41cf584b58210f017088d3d17d0b14", [:mix], [], "hexpm"},
"gen_state_machine": {:hex, :gen_state_machine, "2.0.5", "9ac15ec6e66acac994cc442dcc2c6f9796cf380ec4b08267223014be1c728a95", [:mix], [], "hexpm"},
"gettext": {:hex, :gettext, "0.17.0", "abe21542c831887a2b16f4c94556db9c421ab301aee417b7c4fbde7fbdbe01ec", [:mix], [], "hexpm"},
@ -19,8 +21,11 @@
"hackney": {:hex, :hackney, "1.15.1", "9f8f471c844b8ce395f7b6d8398139e26ddca9ebc171a8b91342ee15a19963f4", [:rebar3], [{:certifi, "2.5.1", [hex: :certifi, repo: "hexpm", optional: false]}, {:idna, "6.0.0", [hex: :idna, repo: "hexpm", optional: false]}, {:metrics, "1.0.1", [hex: :metrics, repo: "hexpm", optional: false]}, {:mimerl, "~>1.1", [hex: :mimerl, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "1.1.4", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}], "hexpm"},
"honeydew": {:hex, :honeydew, "1.4.3", "f2d976aaf8b9b914a635d2d483f1a71d2f6d8651809474dd5db581953cbebb30", [:mix], [{:ecto, "~> 3.0", [hex: :ecto, repo: "hexpm", optional: true]}], "hexpm"},
"httpoison": {:hex, :httpoison, "1.5.1", "0f55b5b673b03c5c327dac7015a67cb571b99b631acc0bc1b0b98dcd6b9f2104", [:mix], [{:hackney, "~> 1.8", [hex: :hackney, repo: "hexpm", optional: false]}], "hexpm"},
"idna": {:hex, :idna, "6.0.0", "689c46cbcdf3524c44d5f3dde8001f364cd7608a99556d8fbd8239a5798d4c10", [:rebar3], [{:unicode_util_compat, "0.4.1", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm"},
"idna": {:hex, :idna, "5.1.2", "e21cb58a09f0228a9e0b95eaa1217f1bcfc31a1aaa6e1fdf2f53a33f7dbd9494", [:rebar3], [{:unicode_util_compat, "0.3.1", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm"},
"inflex": {:hex, :inflex, "1.10.0", "8366a7696e70e1813aca102e61274addf85d99f4a072b2f9c7984054ea1b9d29", [:mix], [], "hexpm"},
"jason": {:hex, :jason, "1.1.2", "b03dedea67a99223a2eaf9f1264ce37154564de899fd3d8b9a21b1a6fd64afe7", [:mix], [{:decimal, "~> 1.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm"},
"joken": {:hex, :joken, "2.1.0", "bf21a73105d82649f617c5e59a7f8919aa47013d2519ebcc39d998d8d12adda9", [:mix], [{:jose, "~> 1.9", [hex: :jose, repo: "hexpm", optional: false]}], "hexpm"},
"jose": {:hex, :jose, "1.9.0", "4167c5f6d06ffaebffd15cdb8da61a108445ef5e85ab8f5a7ad926fdf3ada154", [:mix, :rebar3], [{:base64url, "~> 0.0.1", [hex: :base64url, repo: "hexpm", optional: false]}], "hexpm"},
"libring": {:hex, :libring, "1.4.0", "41246ba2f3fbc76b3971f6bce83119dfec1eee17e977a48d8a9cfaaf58c2a8d6", [:mix], [], "hexpm"},
"metrics": {:hex, :metrics, "1.0.1", "25f094dea2cda98213cecc3aeff09e940299d950904393b2a29d191c346a8486", [:rebar3], [], "hexpm"},
"mime": {:hex, :mime, "1.3.1", "30ce04ab3175b6ad0bdce0035cba77bba68b813d523d1aac73d9781b4d193cf8", [:mix], [], "hexpm"},
@ -33,14 +38,17 @@
"plug": {:hex, :plug, "1.8.2", "0bcce1daa420f189a6491f3940cc77ea7fb1919761175c9c3b59800d897440fc", [:mix], [{:mime, "~> 1.0", [hex: :mime, repo: "hexpm", optional: false]}, {:plug_crypto, "~> 1.0", [hex: :plug_crypto, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4", [hex: :telemetry, repo: "hexpm", optional: true]}], "hexpm"},
"plug_cowboy": {:hex, :plug_cowboy, "2.1.0", "b75768153c3a8a9e8039d4b25bb9b14efbc58e9c4a6e6a270abff1cd30cbe320", [:mix], [{:cowboy, "~> 2.5", [hex: :cowboy, repo: "hexpm", optional: false]}, {:plug, "~> 1.7", [hex: :plug, repo: "hexpm", optional: false]}], "hexpm"},
"plug_crypto": {:hex, :plug_crypto, "1.0.0", "18e49317d3fa343f24620ed22795ec29d4a5e602d52d1513ccea0b07d8ea7d4d", [:mix], [], "hexpm"},
"poison": {:hex, :poison, "3.1.0", "d9eb636610e096f86f25d9a46f35a9facac35609a7591b3be3326e99a0484665", [:mix], [], "hexpm"},
"postgrex": {:hex, :postgrex, "0.14.3", "5754dee2fdf6e9e508cbf49ab138df964278700b764177e8f3871e658b345a1e", [:mix], [{:connection, "~> 1.0", [hex: :connection, repo: "hexpm", optional: false]}, {:db_connection, "~> 2.0", [hex: :db_connection, repo: "hexpm", optional: false]}, {:decimal, "~> 1.5", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}], "hexpm"},
"public_suffix": {:hex, :public_suffix, "0.6.0", "100cfe86f13f9f6f0cf67e743b1b83c78dd1223a2c422fa03ebf4adff514cbc3", [:mix], [{:idna, ">= 1.2.0 and < 6.0.0", [hex: :idna, repo: "hexpm", optional: false]}], "hexpm"},
"quantum": {:hex, :quantum, "2.3.4", "72a0e8855e2adc101459eac8454787cb74ab4169de6ca50f670e72142d4960e9", [:mix], [{:calendar, "~> 0.17", [hex: :calendar, repo: "hexpm", optional: true]}, {:crontab, "~> 1.1", [hex: :crontab, repo: "hexpm", optional: false]}, {:gen_stage, "~> 0.12", [hex: :gen_stage, repo: "hexpm", optional: false]}, {:swarm, "~> 3.3", [hex: :swarm, repo: "hexpm", optional: false]}, {:timex, "~> 3.1", [hex: :timex, repo: "hexpm", optional: true]}], "hexpm"},
"ranch": {:hex, :ranch, "1.7.1", "6b1fab51b49196860b733a49c07604465a47bdb78aa10c1c16a3d199f7f8c881", [:rebar3], [], "hexpm"},
"sobelow": {:hex, :sobelow, "0.8.0", "a3ec73e546dfde19f14818e5000c418e3f305d9edb070e79dd391de0ae1cd1ea", [:mix], [], "hexpm"},
"ssl_verify_fun": {:hex, :ssl_verify_fun, "1.1.4", "f0eafff810d2041e93f915ef59899c923f4568f4585904d010387ed74988e77b", [:make, :mix, :rebar3], [], "hexpm"},
"swarm": {:hex, :swarm, "3.4.0", "64f8b30055d74640d2186c66354b33b999438692a91be275bb89cdc7e401f448", [:mix], [{:gen_state_machine, "~> 2.0", [hex: :gen_state_machine, repo: "hexpm", optional: false]}, {:libring, "~> 1.0", [hex: :libring, repo: "hexpm", optional: false]}], "hexpm"},
"swoosh": {:hex, :swoosh, "0.23.3", "750a6d4e2b72e4307e2ff53209fd990cebb46edbf7cb4479678d4e68eb17fe98", [:mix], [{:cowboy, "~> 1.0.1 or ~> 1.1 or ~> 2.4", [hex: :cowboy, repo: "hexpm", optional: true]}, {:gen_smtp, "~> 0.13", [hex: :gen_smtp, repo: "hexpm", optional: true]}, {:hackney, "~> 1.9", [hex: :hackney, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:mail, "~> 0.2", [hex: :mail, repo: "hexpm", optional: true]}, {:mime, "~> 1.1", [hex: :mime, repo: "hexpm", optional: false]}, {:plug_cowboy, ">= 1.0.0", [hex: :plug_cowboy, repo: "hexpm", optional: true]}], "hexpm"},
"telemetry": {:hex, :telemetry, "0.4.0", "8339bee3fa8b91cb84d14c2935f8ecf399ccd87301ad6da6b71c09553834b2ab", [:rebar3], [], "hexpm"},
"timex": {:hex, :timex, "3.6.1", "efdf56d0e67a6b956cc57774353b0329c8ab7726766a11547e529357ffdc1d56", [:mix], [{:combine, "~> 0.10", [hex: :combine, repo: "hexpm", optional: false]}, {:gettext, "~> 0.10", [hex: :gettext, repo: "hexpm", optional: false]}, {:tzdata, "~> 0.1.8 or ~> 0.5 or ~> 1.0.0", [hex: :tzdata, repo: "hexpm", optional: false]}], "hexpm"},
"tzdata": {:hex, :tzdata, "1.0.1", "f6027a331af7d837471248e62733c6ebee86a72e57c613aa071ebb1f750fc71a", [:mix], [{:hackney, "~> 1.0", [hex: :hackney, repo: "hexpm", optional: false]}], "hexpm"},
"unicode_util_compat": {:hex, :unicode_util_compat, "0.4.1", "d869e4c68901dd9531385bb0c8c40444ebf624e60b6962d95952775cac5e90cd", [:rebar3], [], "hexpm"},
"unicode_util_compat": {:hex, :unicode_util_compat, "0.3.1", "a1f612a7b512638634a603c8f401892afbf99b8ce93a45041f8aaca99cadb85e", [:rebar3], [], "hexpm"},
}

View file

@ -0,0 +1,11 @@
defmodule Backend.Repo.Migrations.AddBaseDomain do
use Ecto.Migration
def change do
alter table(:instances) do
add :base_domain, :string
end
create index(:instances, [:base_domain])
end
end