Revert "add metadata endpoint"

This reverts commit 82153b283b.
This commit is contained in:
Tao Bojlén 2019-08-09 16:59:51 +00:00
parent 9a0bbbb7d9
commit 3320e050c8
14 changed files with 477 additions and 170 deletions

View File

@ -11,10 +11,12 @@ defmodule Backend.Crawler.ApiCrawler do
* Make sure to check the most recent crawl of the instance so you don't re-crawl old statuses
"""
alias Backend.Crawler.Crawlers.Nodeinfo
# {domain_mentioned, count}
@type instance_interactions :: %{String.t() => integer}
@type instance_type :: :mastodon | :pleroma | :gab | :misskey
@type instance_type :: :mastodon | :pleroma | :gab | :misskey | :gnusocial
defstruct [
:version,
@ -30,8 +32,8 @@ defmodule Backend.Crawler.ApiCrawler do
@type t() :: %__MODULE__{
version: String.t(),
description: String.t(),
user_count: integer,
status_count: integer,
user_count: integer | nil,
status_count: integer | nil,
peers: [String.t()],
interactions: instance_interactions,
statuses_seen: integer,
@ -40,8 +42,9 @@ defmodule Backend.Crawler.ApiCrawler do
@doc """
Check whether the instance at the given domain is of the type that this ApiCrawler implements.
Arguments are the instance domain and the nodeinfo results.
"""
@callback is_instance_type?(String.t()) :: boolean()
@callback is_instance_type?(String.t(), Nodeinfo.t()) :: boolean()
@doc """
Check whether the instance allows crawling according to its robots.txt or otherwise.
@ -50,6 +53,7 @@ defmodule Backend.Crawler.ApiCrawler do
@doc """
Crawl the instance at the given domain.
Takes two arguments: the domain to crawl and the existing results (from nodeinfo).
"""
@callback crawl(String.t()) :: t()
@callback crawl(String.t(), Nodeinfo.t()) :: t()
end

View File

@ -4,7 +4,7 @@ defmodule Backend.Crawler do
"""
alias __MODULE__
alias Backend.Crawler.Crawlers.{Mastodon, Misskey}
alias Backend.Crawler.Crawlers.{GnuSocial, Mastodon, Misskey, Nodeinfo}
alias Backend.Crawler.ApiCrawler
alias Backend.{Crawl, CrawlInteraction, Repo, Instance, InstancePeer}
import Ecto.Query
@ -16,8 +16,8 @@ defmodule Backend.Crawler do
:domain,
# a list of ApiCrawlers that will be attempted
:api_crawlers,
:found_api?,
:allows_crawling?,
:found_api?,
:result,
:error
]
@ -25,8 +25,8 @@ defmodule Backend.Crawler do
@type t() :: %__MODULE__{
domain: String.t(),
api_crawlers: [ApiCrawler.t()],
found_api?: boolean,
allows_crawling?: boolean,
found_api?: boolean,
result: ApiCrawler.t() | nil,
error: String.t() | nil
}
@ -37,16 +37,18 @@ defmodule Backend.Crawler do
state = %Crawler{
domain: domain,
api_crawlers: [],
found_api?: false,
allows_crawling?: true,
found_api?: false,
result: nil,
error: nil
}
state
# register APICrawlers here
# These crawlers are run in the order they're registered. Nodeinfo should be the first one.
|> register(Nodeinfo)
|> register(Mastodon)
|> register(Misskey)
|> register(GnuSocial)
# go!
|> crawl()
|> save()
@ -56,33 +58,47 @@ defmodule Backend.Crawler do
# Adds a new ApiCrawler that run/1 will check.
defp register(%Crawler{api_crawlers: crawlers} = state, api_crawler) do
Map.put(state, :api_crawlers, [api_crawler | crawlers])
Map.put(state, :api_crawlers, crawlers ++ [api_crawler])
end
# Recursive function to check whether `domain` has an API that the head of the api_crawlers list can read.
# If so, crawls it. If not, continues with the tail of the api_crawlers list.
defp crawl(%Crawler{api_crawlers: [], domain: domain} = state) do
Logger.debug("Found no compatible API for #{domain}")
Map.put(state, :found_api?, false)
state
end
defp crawl(%Crawler{domain: domain, api_crawlers: [curr | remaining_crawlers]} = state) do
if curr.is_instance_type?(domain) do
# Nodeinfo is distinct from other crawlers in that
# a) it should always be run first
# b) it passes the results on to the next crawlers (e.g. user_count)
defp crawl(%Crawler{api_crawlers: [Nodeinfo | remaining_crawlers], domain: domain} = state) do
with true <- Nodeinfo.allows_crawling?(domain), {:ok, nodeinfo} <- Nodeinfo.crawl(domain) do
Logger.debug("Found nodeinfo for #{domain}.")
result = Map.merge(nodeinfo, %{peers: [], interactions: %{}, statuses_seen: 0})
crawl(%Crawler{state | result: result, found_api?: true, api_crawlers: remaining_crawlers})
else
_ ->
Logger.debug("Did not find nodeinfo for #{domain}.")
crawl(%Crawler{state | api_crawlers: remaining_crawlers})
end
end
defp crawl(
%Crawler{domain: domain, result: result, api_crawlers: [curr | remaining_crawlers]} =
state
) do
if curr.is_instance_type?(domain, result) do
Logger.debug("Found #{curr} instance")
state = Map.put(state, :found_api?, true)
if curr.allows_crawling?(domain) do
try do
%Crawler{state | result: curr.crawl(domain), api_crawlers: []}
%Crawler{state | result: curr.crawl(domain, result), found_api?: true}
rescue
e in HTTPoison.Error ->
Map.put(state, :error, "HTTPoison error: " <> HTTPoison.Error.message(e))
e in Jason.DecodeError ->
Map.put(state, :error, "Jason DecodeError: " <> Jason.DecodeError.message(e))
e in _ ->
Map.put(state, :error, "Unknown error: " <> inspect(e))
end
else
Logger.debug("#{domain} does not allow crawling.")
@ -99,9 +115,9 @@ defmodule Backend.Crawler do
defp save(%Crawler{
domain: domain,
result: result,
found_api?: true,
error: nil,
allows_crawling?: true
allows_crawling?: true,
found_api?: true
}) do
now = get_now()
@ -240,7 +256,7 @@ defmodule Backend.Crawler do
cond do
not allows_crawling -> "robots.txt"
error == nil -> "no api found"
true -> "unknown error"
true -> error
end
# The "+1" is this error!
@ -250,25 +266,25 @@ defmodule Backend.Crawler do
|> Map.get(:crawl_error_count)
|> Kernel.+(1)
# The crawl interval grows exponentially at first but never goes above 72 hours
# The crawl interval grows exponentially at first but never goes above 24 hours
crawl_interval_mins =
min(get_config(:crawl_interval_mins) * round(:math.pow(2, error_count)), 4320)
min(get_config(:crawl_interval_mins) * round(:math.pow(2, error_count)), 1440)
next_crawl = NaiveDateTime.add(now, crawl_interval_mins * 60, :second)
Repo.transaction(fn ->
Repo.insert!(
%Instance{
domain: domain,
base_domain: get_base_domain(domain),
crawl_error: error,
crawl_error_count: error_count,
next_crawl: next_crawl
},
on_conflict: {:replace, [:base_domain, :crawl_error, :crawl_error_count, :next_crawl]},
conflict_target: :domain
)
end)
Repo.insert!(
%Instance{
domain: domain,
base_domain: get_base_domain(domain),
crawl_error: error,
crawl_error_count: error_count,
next_crawl: next_crawl,
updated_at: now
},
on_conflict:
{:replace, [:base_domain, :crawl_error, :crawl_error_count, :next_crawl, :updated_at]},
conflict_target: :domain
)
Appsignal.increment_counter("crawler.failure", 1)
end

View File

@ -0,0 +1,178 @@
defmodule Backend.Crawler.Crawlers.GnuSocial do
alias Backend.Crawler.ApiCrawler
alias Backend.Crawler.Crawlers.Nodeinfo
import Backend.Crawler.Util
import Backend.Util
require Logger
@behaviour ApiCrawler
@impl ApiCrawler
def is_instance_type?(_domain, nodeinfo_result) do
nodeinfo_result != nil and Map.get(nodeinfo_result, :instance_type) == :gnusocial
end
@impl ApiCrawler
def allows_crawling?(domain) do
[
"/api/statuses/public_timeline.json"
]
|> Enum.map(fn endpoint -> "https://#{domain}#{endpoint}" end)
|> urls_are_crawlable?()
end
@impl ApiCrawler
def crawl(domain, nodeinfo_result) do
if nodeinfo_result |> Map.get(:user_count) |> is_above_user_threshold?() do
crawl_large_instance(domain, nodeinfo_result)
else
nodeinfo_result
end
end
@spec crawl_large_instance(String.t(), Nodeinfo.t()) :: ApiCrawler.t()
defp crawl_large_instance(domain, nodeinfo_result) do
status_datetime_threshold =
NaiveDateTime.utc_now()
|> NaiveDateTime.add(get_config(:status_age_limit_days) * 24 * 3600 * -1, :second)
# Don't get any statuses older than this
min_timestamp = max_datetime(get_last_crawl_timestamp(domain), status_datetime_threshold)
{interactions, statuses_seen} = get_interactions(domain, min_timestamp)
Map.merge(nodeinfo_result, %{
interactions: interactions,
statuses_seen: statuses_seen,
peers: []
})
end
@spec get_interactions(
String.t(),
NaiveDateTime.t(),
String.t() | nil,
ApiCrawler.instance_interactions(),
integer()
) :: {ApiCrawler.instance_interactions(), integer()}
defp get_interactions(
domain,
min_timestamp,
max_id \\ nil,
interactions \\ %{},
statuses_seen \\ 0
) do
endpoint = "https://#{domain}/api/statuses/public_timeline.json"
endpoint =
if max_id != nil do
endpoint <> "?max_id=#{max_id}"
else
endpoint
end
Logger.debug("Crawling #{endpoint}")
statuses = get_and_decode!(endpoint)
# Filter to statuses that are in the correct timeframe
filtered_statuses =
statuses
|> Enum.filter(fn s ->
s["created_at"]
|> parse_timestamp()
|> is_after?(min_timestamp)
end)
if length(filtered_statuses) > 0 do
# Filter down further to statuses that a) aren't faves and b) aren't from #nobot users
eligible_statuses =
filtered_statuses |> Enum.filter(fn s -> not is_fave?(s) and not has_nobot?(s) end)
# get statuses that are eligible (i.e. users don't have #nobot in their profile), have mentions, and are not faves
interactions =
eligible_statuses
|> statuses_to_interactions()
|> merge_count_maps(interactions)
statuses_seen =
eligible_statuses
|> Kernel.length()
|> Kernel.+(statuses_seen)
oldest_status = Enum.at(filtered_statuses, -1)
oldest_status_datetime =
oldest_status
|> Map.get("created_at")
|> parse_timestamp()
if NaiveDateTime.compare(oldest_status_datetime, min_timestamp) == :gt and
statuses_seen < get_config(:status_count_limit) and
length(filtered_statuses) == length(statuses) do
get_interactions(domain, min_timestamp, oldest_status["id"], interactions, statuses_seen)
else
{interactions, statuses_seen}
end
else
{interactions, statuses_seen}
end
end
@spec statuses_to_interactions(any()) :: ApiCrawler.instance_interactions()
defp statuses_to_interactions(statuses) do
statuses
|> Enum.filter(fn status -> is_mention?(status) end)
|> Enum.map(fn status -> extract_mentions_from_status(status) end)
|> Enum.reduce(%{}, fn map, acc ->
Map.merge(acc, map)
end)
end
# Checks whether the status contains one or more mentions
@spec is_mention?(any()) :: boolean()
defp is_mention?(%{"attentions" => []}) do
false
end
defp is_mention?(_status) do
true
end
@spec is_fave?(any()) :: boolean()
defp is_fave?(status) do
uri_elements = status |> Map.get("uri") |> String.split(":")
Enum.member?(uri_elements, "fave")
end
@spec has_nobot?(any()) :: boolean()
defp has_nobot?(status) do
case get_in(status, ["user", "description"]) do
nil ->
false
description ->
description
|> String.downcase()
|> String.contains?("nobot")
end
end
@spec extract_mentions_from_status(any()) :: ApiCrawler.instance_interactions()
defp extract_mentions_from_status(status) do
status["attentions"]
|> Enum.map(fn mention -> get_domain(mention["profileurl"]) end)
|> Enum.reduce(%{}, fn domain, acc ->
Map.update(acc, domain, 1, &(&1 + 1))
end)
end
# Parses the messed-up time format that GNU social uses
# Like seriously, it's 2019, why *wouldn't* you use iso8601?
@spec parse_timestamp(String.t()) :: NaiveDateTime.t()
defp parse_timestamp(timestamp) do
timestamp
|> Timex.parse!("{WDshort} {Mshort} {0D} {h24}:{0m}:{0s} {0Z} {YYYY}")
|> Timex.to_naive_datetime()
end
end

View File

@ -7,10 +7,15 @@ defmodule Backend.Crawler.Crawlers.Mastodon do
@behaviour ApiCrawler
@impl ApiCrawler
def is_instance_type?(domain) do
case get("https://#{domain}/api/v1/instance") do
{:ok, response} -> if is_http_200?(response), do: has_title?(response.body), else: false
{:error, _error} -> false
def is_instance_type?(domain, result) do
# We might already know that this is a Pleroma instance from nodeinfo
if result != nil and Map.get(result, :instance_type) == :pleroma do
true
else
case get_and_decode("https://#{domain}/api/v1/instance") do
{:ok, %{"title" => _title}} -> true
_other -> false
end
end
end
@ -26,8 +31,8 @@ defmodule Backend.Crawler.Crawlers.Mastodon do
end
@impl ApiCrawler
def crawl(domain) do
instance = Jason.decode!(get!("https://#{domain}/api/v1/instance").body)
def crawl(domain, _current_result) do
instance = get_and_decode!("https://#{domain}/api/v1/instance")
user_count = get_in(instance, ["stats", "user_count"])
if is_above_user_threshold?(user_count) or has_opted_in?(domain) do
@ -51,12 +56,7 @@ defmodule Backend.Crawler.Crawlers.Mastodon do
@spec crawl_large_instance(String.t(), any()) :: ApiCrawler.t()
defp crawl_large_instance(domain, instance) do
# servers may not publish peers
peers =
case get("https://#{domain}/api/v1/instance/peers") do
{:ok, response} -> if is_http_200?(response), do: Jason.decode!(response.body), else: []
{:error, _error} -> []
end
peers = get_peers(domain)
Logger.debug("Found #{length(peers)} peers.")
@ -124,15 +124,15 @@ defmodule Backend.Crawler.Crawlers.Mastodon do
Logger.debug("Crawling #{endpoint}")
statuses =
endpoint
|> get!()
|> Map.get(:body)
|> Jason.decode!()
statuses = get_and_decode!(endpoint)
filtered_statuses =
statuses
|> Enum.filter(fn s -> is_after?(s["created_at"], min_timestamp) end)
|> Enum.filter(fn s ->
s["created_at"]
|> NaiveDateTime.from_iso8601!()
|> is_after?(min_timestamp)
end)
if length(filtered_statuses) > 0 do
# get statuses that are eligible (i.e. users don't have #nobot in their profile) and have mentions
@ -166,12 +166,11 @@ defmodule Backend.Crawler.Crawlers.Mastodon do
end
end
# To check if the endpoint works as expected
@spec has_title?(String.t()) :: boolean
defp has_title?(body) do
case Jason.decode(body) do
{:ok, decoded} -> Map.has_key?(decoded, "title")
{:error, _error} -> false
defp get_peers(domain) do
# servers may not publish peers
case get_and_decode("https://#{domain}/api/v1/instance/peers") do
{:ok, peers} -> peers
{:error, _err} -> []
end
end

View File

@ -7,10 +7,15 @@ defmodule Backend.Crawler.Crawlers.Misskey do
require Logger
@impl ApiCrawler
def is_instance_type?(domain) do
case get_version_and_description(domain) do
{:ok, _} -> true
{:error, _} -> false
def is_instance_type?(domain, result) do
# We may already know that this is a Misskey instance from nodeinfo
if result != nil and Map.get(result, :instance_type) == :misskey do
true
else
case get_version_and_description(domain) do
{:ok, _} -> true
{:error, _} -> false
end
end
end
@ -27,11 +32,9 @@ defmodule Backend.Crawler.Crawlers.Misskey do
end
@impl ApiCrawler
def crawl(domain) do
with {:ok, %{status_code: 200, body: stats_body}} <- post("https://#{domain}/api/stats") do
%{"originalUsersCount" => user_count, "originalNotesCount" => status_count} =
Jason.decode!(stats_body)
def crawl(domain, _result) do
with {:ok, %{"originalUsersCount" => user_count, "originalNotesCount" => status_count}} <-
post_and_decode("https://#{domain}/api/stats") do
if is_above_user_threshold?(user_count) or has_opted_in?(domain) do
crawl_large_instance(domain, user_count, status_count)
else
@ -107,15 +110,15 @@ defmodule Backend.Crawler.Crawlers.Misskey do
Logger.debug("Crawling #{endpoint} with untilId=#{until_id}")
statuses =
endpoint
|> post!(Jason.encode!(params))
|> Map.get(:body)
|> Jason.decode!()
statuses = post_and_decode!(endpoint, Jason.encode!(params))
filtered_statuses =
statuses
|> Enum.filter(fn s -> is_after?(s["createdAt"], min_timestamp) end)
|> Enum.filter(fn s ->
s["createdAt"]
|> NaiveDateTime.from_iso8601!()
|> is_after?(min_timestamp)
end)
if length(filtered_statuses) > 0 do
# get statuses that are eligible (i.e. users don't have #nobot in their profile) and have mentions
@ -151,35 +154,22 @@ defmodule Backend.Crawler.Crawlers.Misskey do
end
@spec get_version_and_description(String.t()) ::
{:ok, {String.t(), String.t()}} | {:error, String.t()}
{:ok, {String.t(), String.t()}} | {:error, Jason.DecodeError.t() | HTTPoison.Error.t()}
defp get_version_and_description(domain) do
case post("https://#{domain}/api/meta") do
{:ok, %{status_code: 200, body: body}} ->
case Jason.decode(body) do
{:ok, decoded} ->
{:ok, {Map.get(decoded, "version"), Map.get(decoded, "description")}}
case post_and_decode("https://#{domain}/api/meta") do
{:ok, %{"version" => version, "description" => description}} ->
{:ok, {version, description}}
{:error, _error} ->
{:error, "invalid response"}
end
_ ->
{:error, "unsuccesful request"}
{:error, err} ->
{:error, err}
end
end
@spec get_peers(String.t()) :: {:ok, [String.t()]} | {:error, Jason.DecodeError.t()}
defp get_peers(domain) do
case get("https://#{domain}/api/v1/instance/peers") do
{:ok, response} ->
with %{status_code: 200, body: body} <- response do
Jason.decode(body)
else
_ -> {:ok, []}
end
{:error, _} ->
{:ok, []}
case get_and_decode("https://#{domain}/api/v1/instance/peers") do
{:ok, peers} -> {:ok, peers}
{:error, _} -> {:ok, []}
end
end

View File

@ -0,0 +1,117 @@
defmodule Backend.Crawler.Crawlers.Nodeinfo do
alias Backend.Crawler.ApiCrawler
require Logger
import Backend.Util
import Backend.Crawler.Util
@moduledoc """
This module is slightly different from the other crawlers.
It doesn't implement the ApiCrawler spec because it isn't run as a self-contained crawler.
Instead, it's run before all the other crawlers.
This is to get the user count. Some servers don't publish this in other places (e.g. GNU Social, PeerTube) so we need
nodeinfo to know whether it's a personal instance or not.
"""
defstruct [
:description,
:user_count,
:status_count,
:instance_type,
:version
]
@type t() :: %__MODULE__{
description: String.t(),
user_count: integer,
status_count: integer,
instance_type: ApiCrawler.instance_type(),
version: String.t()
}
@spec allows_crawling?(String.t()) :: boolean()
def allows_crawling?(domain) do
[
".well-known/nodeinfo"
]
|> Enum.map(fn endpoint -> "https://#{domain}#{endpoint}" end)
|> urls_are_crawlable?()
end
@spec crawl(String.t()) :: {:ok, t()} | {:error, nil}
def crawl(domain) do
with {:ok, nodeinfo_url} <- get_nodeinfo_url(domain),
{:ok, nodeinfo} <- get_nodeinfo(nodeinfo_url) do
{:ok, nodeinfo}
else
_other -> {:error, nil}
end
end
@spec get_nodeinfo_url(String.t()) ::
{:ok, String.t()} | {:error, Jason.DecodeError.t() | HTTPoison.Error.t()}
defp get_nodeinfo_url(domain) do
case get_and_decode("https://#{domain}/.well-known/nodeinfo") do
{:ok, response} -> {:ok, process_nodeinfo_url(response)}
{:error, err} -> {:error, err}
end
end
@spec process_nodeinfo_url(any()) :: String.t()
defp process_nodeinfo_url(response) do
response
|> Map.get("links")
|> Enum.filter(fn %{"rel" => rel} -> is_compatible_nodeinfo_version?(rel) end)
|> Kernel.hd()
|> Map.get("href")
end
@spec get_nodeinfo(String.t()) ::
{:ok, t()} | {:error, Jason.DecodeError.t() | HTTPoison.Error.t()}
defp get_nodeinfo(nodeinfo_url) do
case get_and_decode(nodeinfo_url) do
{:ok, nodeinfo} -> {:ok, process_nodeinfo(nodeinfo)}
{:error, err} -> {:error, err}
end
end
@spec process_nodeinfo(any()) :: t()
defp process_nodeinfo(nodeinfo) do
user_count = get_in(nodeinfo, ["usage", "users", "total"])
if is_above_user_threshold?(user_count) do
# Both of these are used, depending on the server implementation
description =
[
get_in(nodeinfo, ["metadata", "description"]),
get_in(nodeinfo, ["metadata", "nodeDescription"])
]
|> Enum.filter(fn d -> d != nil end)
|> Enum.at(0)
type = nodeinfo |> get_in(["software", "name"]) |> String.downcase() |> String.to_atom()
%__MODULE__{
description: description,
user_count: user_count,
status_count: get_in(nodeinfo, ["usage", "localPosts"]),
instance_type: type,
version: get_in(nodeinfo, ["software", "version"])
}
else
%{
description: nil,
user_count: user_count,
status_count: nil,
instance_type: nil,
version: nil
}
end
end
@spec is_compatible_nodeinfo_version?(String.t()) :: boolean()
defp is_compatible_nodeinfo_version?(schema_url) do
version = String.slice(schema_url, (String.length(schema_url) - 3)..-1)
Enum.member?(["1.0", "1.1", "2.0"], version)
end
end

View File

@ -54,7 +54,7 @@ defmodule Backend.Crawler.StaleInstanceManager do
stale_domains =
Instance
|> select([i], i.domain)
|> where([i], i.next_crawl < ^now)
|> where([i], i.next_crawl < ^now and not i.opt_out)
|> Repo.all()
|> MapSet.new()

View File

@ -8,27 +8,19 @@ defmodule Backend.Crawler.Util do
# (e.g. https://mastodon.social/@demouser or https://pleroma.site/users/demouser)
@spec get_domain(String.t()) :: String.t()
def get_domain(url) do
String.slice(url, 8..-1)
|> String.split("/")
|> Enum.at(0)
[_match, domain] = Regex.run(~r/https?:\/\/([\w.-]+)\/.*/, url)
domain
end
@spec is_http_200?(HTTPoison.Response.t()) :: boolean
def is_http_200?(%{status_code: 200}) do
true
end
def is_http_200?(_) do
false
end
@spec is_after?(String.t(), NaiveDateTime.t() | nil) :: boolean()
@doc """
Returns true if the first argument is after the second.
"""
@spec is_after?(NaiveDateTime.t(), NaiveDateTime.t() | nil) :: boolean()
def is_after?(timestamp, threshold) do
if threshold == nil do
true
else
timestamp
|> NaiveDateTime.from_iso8601!()
# :second is the granularity used in the database
|> NaiveDateTime.truncate(:second)
|> NaiveDateTime.compare(threshold)
@ -36,49 +28,6 @@ defmodule Backend.Crawler.Util do
end
end
def get(url) do
# TODO: add version number to user agent?
HTTPoison.get(url, [{"User-Agent", get_config(:user_agent)}],
hackney: [pool: :crawler],
recv_timeout: 15000,
timeout: 15000
)
end
@spec get!(binary) :: %{
:__struct__ => HTTPoison.AsyncResponse | HTTPoison.Response,
optional(:body) => any,
optional(:headers) => [any],
optional(:id) => reference,
optional(:request) => HTTPoison.Request.t(),
optional(:request_url) => any,
optional(:status_code) => integer
}
def get!(url) do
# TODO: add version number to user agent?
HTTPoison.get!(url, [{"User-Agent", get_config(:user_agent)}],
hackney: [pool: :crawler],
recv_timeout: 15000,
timeout: 15000
)
end
def post(url, body \\ "") do
HTTPoison.post(url, body, [{"User-Agent", get_config(:user_agent)}],
hackney: [pool: :crawler],
recv_timeout: 15000,
timeout: 15000
)
end
def post!(url, body \\ "") do
HTTPoison.post!(url, body, [{"User-Agent", get_config(:user_agent)}],
hackney: [pool: :crawler],
recv_timeout: 15000,
timeout: 15000
)
end
@spec urls_are_crawlable?([String.t()]) :: boolean()
def urls_are_crawlable?(urls) do
user_agent = get_config(:user_agent)

View File

@ -145,4 +145,54 @@ defmodule Backend.Util do
def convert_keys_to_atoms(map) do
map |> Map.new(fn {k, v} -> {String.to_atom(k), v} end)
end
@doc """
Gets and decodes a HTTP response.
"""
@spec get_and_decode(String.t()) ::
{:ok, any()} | {:error, Jason.DecodeError.t() | HTTPoison.Error.t()}
def get_and_decode(url) do
case HTTPoison.get(url, [{"User-Agent", get_config(:user_agent)}],
hackney: [pool: :crawler],
recv_timeout: 15000,
timeout: 15000
) do
{:ok, %{status_code: 200, body: body}} -> Jason.decode(body)
{:ok, _} -> {:error, %HTTPoison.Error{reason: "Non-200 response"}}
{:error, err} -> {:error, err}
end
end
@spec get_and_decode!(String.t()) :: any()
def get_and_decode!(url) do
case get_and_decode(url) do
{:ok, decoded} -> decoded
{:error, error} -> raise error
end
end
@doc """
POSTS to a HTTP endpoint and decodes the JSON response.
"""
@spec post_and_decode(String.t(), String.t()) ::
{:ok, any()} | {:error, Jason.DecodeError.t() | HTTPoison.Error.t()}
def post_and_decode(url, body \\ "") do
case HTTPoison.post(url, body, [{"User-Agent", get_config(:user_agent)}],
hackney: [pool: :crawler],
recv_timeout: 15000,
timeout: 15000
) do
{:ok, %{status_code: 200, body: response_body}} -> Jason.decode(response_body)
{:ok, _} -> {:error, %HTTPoison.Error{reason: "Non-200 response"}}
{:error, err} -> {:error, err}
end
end
@spec post_and_decode!(String.t(), String.t()) :: any()
def post_and_decode!(url, body \\ "") do
case post_and_decode(url, body) do
{:ok, decoded} -> decoded
{:error, error} -> raise error
end
end
end

View File

@ -14,10 +14,7 @@ defmodule BackendWeb.AdminLoginController do
# TODO: this assumes mastodon/pleroma API
cleaned_domain = clean_domain(domain)
instance_data =
HTTPoison.get!("https://#{cleaned_domain}/api/v1/instance")
|> Map.get(:body)
|> Jason.decode!()
instance_data = get_and_decode!("https://#{cleaned_domain}/api/v1/instance")
render(conn, "show.json", instance_data: instance_data, cleaned_domain: cleaned_domain)
end
@ -25,10 +22,7 @@ defmodule BackendWeb.AdminLoginController do
def create(conn, %{"domain" => domain, "type" => type}) do
cleaned_domain = clean_domain(domain)
instance_data =
HTTPoison.get!("https://#{cleaned_domain}/api/v1/instance")
|> Map.get(:body)
|> Jason.decode!()
instance_data = get_and_decode!("https://#{cleaned_domain}/api/v1/instance")
error =
cond do

View File

@ -3,7 +3,7 @@ import { IconNames } from "@blueprintjs/icons";
import React from "react";
import { QUALITATIVE_COLOR_SCHEME } from "../../constants";
import { typeColorScheme } from "../../types";
import { capitalize } from "../../util";
import { getTypeDisplayString } from "../../util";
interface IInstanceTypeProps {
type: string;
@ -15,7 +15,7 @@ interface IInstanceTypeProps {
*/
const InstanceType: React.FC<IInstanceTypeProps> = ({ type, colorAfterName }) => {
const idx = typeColorScheme.values.indexOf(type);
const name = " " + capitalize(type);
const name = " " + getTypeDisplayString(type);
return (
<>
{!!colorAfterName && name}

View File

@ -4,7 +4,7 @@ import React, { MouseEvent } from "react";
import styled from "styled-components";
import { INSTANCE_TYPES } from "../../constants";
import { getSearchFilterDisplayValue, ISearchFilter } from "../../searchFilters";
import { capitalize } from "../../util";
import { getTypeDisplayString } from "../../util";
const SearchFilterContainer = styled.div`
margin: 10px 0 0 0;
@ -30,7 +30,7 @@ const SearchFilters: React.FC<ISearchFiltersProps> = ({ selectedFilters, selectF
const handleSelectInstanceType = (e: MouseEvent<HTMLElement>) => {
const field = "type";
const relation = "eq";
const value = e.currentTarget.innerText.toLowerCase();
const value = e.currentTarget.innerText.toLowerCase().replace(" ", "");
const filter: ISearchFilter = {
displayValue: getSearchFilterDisplayValue(field, relation, value),
field,
@ -43,7 +43,7 @@ const SearchFilters: React.FC<ISearchFiltersProps> = ({ selectedFilters, selectF
<Menu>
<MenuItem icon={IconNames.SYMBOL_CIRCLE} text="Instance type" disabled={hasInstanceTypeFilter}>
{INSTANCE_TYPES.map(t => (
<MenuItem key={t} text={capitalize(t)} onClick={handleSelectInstanceType} />
<MenuItem key={t} text={getTypeDisplayString(t)} onClick={handleSelectInstanceType} />
))}
</MenuItem>
</Menu>

View File

@ -40,4 +40,4 @@ export interface IInstanceDomainPath {
}
// We could also extract the values from the server response, but this would slow things down...
export const INSTANCE_TYPES = ["mastodon", "gab", "pleroma", "misskey"];
export const INSTANCE_TYPES = ["mastodon", "gab", "pleroma", "misskey", "gnusocial"];

View File

@ -68,3 +68,13 @@ export const getBuckets = (min: number, max: number, steps: number, exponential:
return range(min, max, bucketSize);
}
};
const typeToDisplay = {
gnusocial: "GNU Social"
};
export const getTypeDisplayString = (key: string) => {
if (key in typeToDisplay) {
return typeToDisplay[key];
}
return capitalize(key);
};