parent
9a0bbbb7d9
commit
3320e050c8
|
@ -11,10 +11,12 @@ defmodule Backend.Crawler.ApiCrawler do
|
||||||
* Make sure to check the most recent crawl of the instance so you don't re-crawl old statuses
|
* Make sure to check the most recent crawl of the instance so you don't re-crawl old statuses
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
alias Backend.Crawler.Crawlers.Nodeinfo
|
||||||
|
|
||||||
# {domain_mentioned, count}
|
# {domain_mentioned, count}
|
||||||
@type instance_interactions :: %{String.t() => integer}
|
@type instance_interactions :: %{String.t() => integer}
|
||||||
|
|
||||||
@type instance_type :: :mastodon | :pleroma | :gab | :misskey
|
@type instance_type :: :mastodon | :pleroma | :gab | :misskey | :gnusocial
|
||||||
|
|
||||||
defstruct [
|
defstruct [
|
||||||
:version,
|
:version,
|
||||||
|
@ -30,8 +32,8 @@ defmodule Backend.Crawler.ApiCrawler do
|
||||||
@type t() :: %__MODULE__{
|
@type t() :: %__MODULE__{
|
||||||
version: String.t(),
|
version: String.t(),
|
||||||
description: String.t(),
|
description: String.t(),
|
||||||
user_count: integer,
|
user_count: integer | nil,
|
||||||
status_count: integer,
|
status_count: integer | nil,
|
||||||
peers: [String.t()],
|
peers: [String.t()],
|
||||||
interactions: instance_interactions,
|
interactions: instance_interactions,
|
||||||
statuses_seen: integer,
|
statuses_seen: integer,
|
||||||
|
@ -40,8 +42,9 @@ defmodule Backend.Crawler.ApiCrawler do
|
||||||
|
|
||||||
@doc """
|
@doc """
|
||||||
Check whether the instance at the given domain is of the type that this ApiCrawler implements.
|
Check whether the instance at the given domain is of the type that this ApiCrawler implements.
|
||||||
|
Arguments are the instance domain and the nodeinfo results.
|
||||||
"""
|
"""
|
||||||
@callback is_instance_type?(String.t()) :: boolean()
|
@callback is_instance_type?(String.t(), Nodeinfo.t()) :: boolean()
|
||||||
|
|
||||||
@doc """
|
@doc """
|
||||||
Check whether the instance allows crawling according to its robots.txt or otherwise.
|
Check whether the instance allows crawling according to its robots.txt or otherwise.
|
||||||
|
@ -50,6 +53,7 @@ defmodule Backend.Crawler.ApiCrawler do
|
||||||
|
|
||||||
@doc """
|
@doc """
|
||||||
Crawl the instance at the given domain.
|
Crawl the instance at the given domain.
|
||||||
|
Takes two arguments: the domain to crawl and the existing results (from nodeinfo).
|
||||||
"""
|
"""
|
||||||
@callback crawl(String.t()) :: t()
|
@callback crawl(String.t(), Nodeinfo.t()) :: t()
|
||||||
end
|
end
|
||||||
|
|
|
@ -4,7 +4,7 @@ defmodule Backend.Crawler do
|
||||||
"""
|
"""
|
||||||
|
|
||||||
alias __MODULE__
|
alias __MODULE__
|
||||||
alias Backend.Crawler.Crawlers.{Mastodon, Misskey}
|
alias Backend.Crawler.Crawlers.{GnuSocial, Mastodon, Misskey, Nodeinfo}
|
||||||
alias Backend.Crawler.ApiCrawler
|
alias Backend.Crawler.ApiCrawler
|
||||||
alias Backend.{Crawl, CrawlInteraction, Repo, Instance, InstancePeer}
|
alias Backend.{Crawl, CrawlInteraction, Repo, Instance, InstancePeer}
|
||||||
import Ecto.Query
|
import Ecto.Query
|
||||||
|
@ -16,8 +16,8 @@ defmodule Backend.Crawler do
|
||||||
:domain,
|
:domain,
|
||||||
# a list of ApiCrawlers that will be attempted
|
# a list of ApiCrawlers that will be attempted
|
||||||
:api_crawlers,
|
:api_crawlers,
|
||||||
:found_api?,
|
|
||||||
:allows_crawling?,
|
:allows_crawling?,
|
||||||
|
:found_api?,
|
||||||
:result,
|
:result,
|
||||||
:error
|
:error
|
||||||
]
|
]
|
||||||
|
@ -25,8 +25,8 @@ defmodule Backend.Crawler do
|
||||||
@type t() :: %__MODULE__{
|
@type t() :: %__MODULE__{
|
||||||
domain: String.t(),
|
domain: String.t(),
|
||||||
api_crawlers: [ApiCrawler.t()],
|
api_crawlers: [ApiCrawler.t()],
|
||||||
found_api?: boolean,
|
|
||||||
allows_crawling?: boolean,
|
allows_crawling?: boolean,
|
||||||
|
found_api?: boolean,
|
||||||
result: ApiCrawler.t() | nil,
|
result: ApiCrawler.t() | nil,
|
||||||
error: String.t() | nil
|
error: String.t() | nil
|
||||||
}
|
}
|
||||||
|
@ -37,16 +37,18 @@ defmodule Backend.Crawler do
|
||||||
state = %Crawler{
|
state = %Crawler{
|
||||||
domain: domain,
|
domain: domain,
|
||||||
api_crawlers: [],
|
api_crawlers: [],
|
||||||
found_api?: false,
|
|
||||||
allows_crawling?: true,
|
allows_crawling?: true,
|
||||||
|
found_api?: false,
|
||||||
result: nil,
|
result: nil,
|
||||||
error: nil
|
error: nil
|
||||||
}
|
}
|
||||||
|
|
||||||
state
|
state
|
||||||
# register APICrawlers here
|
# These crawlers are run in the order they're registered. Nodeinfo should be the first one.
|
||||||
|
|> register(Nodeinfo)
|
||||||
|> register(Mastodon)
|
|> register(Mastodon)
|
||||||
|> register(Misskey)
|
|> register(Misskey)
|
||||||
|
|> register(GnuSocial)
|
||||||
# go!
|
# go!
|
||||||
|> crawl()
|
|> crawl()
|
||||||
|> save()
|
|> save()
|
||||||
|
@ -56,33 +58,47 @@ defmodule Backend.Crawler do
|
||||||
|
|
||||||
# Adds a new ApiCrawler that run/1 will check.
|
# Adds a new ApiCrawler that run/1 will check.
|
||||||
defp register(%Crawler{api_crawlers: crawlers} = state, api_crawler) do
|
defp register(%Crawler{api_crawlers: crawlers} = state, api_crawler) do
|
||||||
Map.put(state, :api_crawlers, [api_crawler | crawlers])
|
Map.put(state, :api_crawlers, crawlers ++ [api_crawler])
|
||||||
end
|
end
|
||||||
|
|
||||||
# Recursive function to check whether `domain` has an API that the head of the api_crawlers list can read.
|
# Recursive function to check whether `domain` has an API that the head of the api_crawlers list can read.
|
||||||
# If so, crawls it. If not, continues with the tail of the api_crawlers list.
|
# If so, crawls it. If not, continues with the tail of the api_crawlers list.
|
||||||
defp crawl(%Crawler{api_crawlers: [], domain: domain} = state) do
|
defp crawl(%Crawler{api_crawlers: [], domain: domain} = state) do
|
||||||
Logger.debug("Found no compatible API for #{domain}")
|
Logger.debug("Found no compatible API for #{domain}")
|
||||||
Map.put(state, :found_api?, false)
|
state
|
||||||
end
|
end
|
||||||
|
|
||||||
defp crawl(%Crawler{domain: domain, api_crawlers: [curr | remaining_crawlers]} = state) do
|
# Nodeinfo is distinct from other crawlers in that
|
||||||
if curr.is_instance_type?(domain) do
|
# a) it should always be run first
|
||||||
|
# b) it passes the results on to the next crawlers (e.g. user_count)
|
||||||
|
defp crawl(%Crawler{api_crawlers: [Nodeinfo | remaining_crawlers], domain: domain} = state) do
|
||||||
|
with true <- Nodeinfo.allows_crawling?(domain), {:ok, nodeinfo} <- Nodeinfo.crawl(domain) do
|
||||||
|
Logger.debug("Found nodeinfo for #{domain}.")
|
||||||
|
result = Map.merge(nodeinfo, %{peers: [], interactions: %{}, statuses_seen: 0})
|
||||||
|
crawl(%Crawler{state | result: result, found_api?: true, api_crawlers: remaining_crawlers})
|
||||||
|
else
|
||||||
|
_ ->
|
||||||
|
Logger.debug("Did not find nodeinfo for #{domain}.")
|
||||||
|
crawl(%Crawler{state | api_crawlers: remaining_crawlers})
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
defp crawl(
|
||||||
|
%Crawler{domain: domain, result: result, api_crawlers: [curr | remaining_crawlers]} =
|
||||||
|
state
|
||||||
|
) do
|
||||||
|
if curr.is_instance_type?(domain, result) do
|
||||||
Logger.debug("Found #{curr} instance")
|
Logger.debug("Found #{curr} instance")
|
||||||
state = Map.put(state, :found_api?, true)
|
|
||||||
|
|
||||||
if curr.allows_crawling?(domain) do
|
if curr.allows_crawling?(domain) do
|
||||||
try do
|
try do
|
||||||
%Crawler{state | result: curr.crawl(domain), api_crawlers: []}
|
%Crawler{state | result: curr.crawl(domain, result), found_api?: true}
|
||||||
rescue
|
rescue
|
||||||
e in HTTPoison.Error ->
|
e in HTTPoison.Error ->
|
||||||
Map.put(state, :error, "HTTPoison error: " <> HTTPoison.Error.message(e))
|
Map.put(state, :error, "HTTPoison error: " <> HTTPoison.Error.message(e))
|
||||||
|
|
||||||
e in Jason.DecodeError ->
|
e in Jason.DecodeError ->
|
||||||
Map.put(state, :error, "Jason DecodeError: " <> Jason.DecodeError.message(e))
|
Map.put(state, :error, "Jason DecodeError: " <> Jason.DecodeError.message(e))
|
||||||
|
|
||||||
e in _ ->
|
|
||||||
Map.put(state, :error, "Unknown error: " <> inspect(e))
|
|
||||||
end
|
end
|
||||||
else
|
else
|
||||||
Logger.debug("#{domain} does not allow crawling.")
|
Logger.debug("#{domain} does not allow crawling.")
|
||||||
|
@ -99,9 +115,9 @@ defmodule Backend.Crawler do
|
||||||
defp save(%Crawler{
|
defp save(%Crawler{
|
||||||
domain: domain,
|
domain: domain,
|
||||||
result: result,
|
result: result,
|
||||||
found_api?: true,
|
|
||||||
error: nil,
|
error: nil,
|
||||||
allows_crawling?: true
|
allows_crawling?: true,
|
||||||
|
found_api?: true
|
||||||
}) do
|
}) do
|
||||||
now = get_now()
|
now = get_now()
|
||||||
|
|
||||||
|
@ -240,7 +256,7 @@ defmodule Backend.Crawler do
|
||||||
cond do
|
cond do
|
||||||
not allows_crawling -> "robots.txt"
|
not allows_crawling -> "robots.txt"
|
||||||
error == nil -> "no api found"
|
error == nil -> "no api found"
|
||||||
true -> "unknown error"
|
true -> error
|
||||||
end
|
end
|
||||||
|
|
||||||
# The "+1" is this error!
|
# The "+1" is this error!
|
||||||
|
@ -250,25 +266,25 @@ defmodule Backend.Crawler do
|
||||||
|> Map.get(:crawl_error_count)
|
|> Map.get(:crawl_error_count)
|
||||||
|> Kernel.+(1)
|
|> Kernel.+(1)
|
||||||
|
|
||||||
# The crawl interval grows exponentially at first but never goes above 72 hours
|
# The crawl interval grows exponentially at first but never goes above 24 hours
|
||||||
crawl_interval_mins =
|
crawl_interval_mins =
|
||||||
min(get_config(:crawl_interval_mins) * round(:math.pow(2, error_count)), 4320)
|
min(get_config(:crawl_interval_mins) * round(:math.pow(2, error_count)), 1440)
|
||||||
|
|
||||||
next_crawl = NaiveDateTime.add(now, crawl_interval_mins * 60, :second)
|
next_crawl = NaiveDateTime.add(now, crawl_interval_mins * 60, :second)
|
||||||
|
|
||||||
Repo.transaction(fn ->
|
|
||||||
Repo.insert!(
|
Repo.insert!(
|
||||||
%Instance{
|
%Instance{
|
||||||
domain: domain,
|
domain: domain,
|
||||||
base_domain: get_base_domain(domain),
|
base_domain: get_base_domain(domain),
|
||||||
crawl_error: error,
|
crawl_error: error,
|
||||||
crawl_error_count: error_count,
|
crawl_error_count: error_count,
|
||||||
next_crawl: next_crawl
|
next_crawl: next_crawl,
|
||||||
|
updated_at: now
|
||||||
},
|
},
|
||||||
on_conflict: {:replace, [:base_domain, :crawl_error, :crawl_error_count, :next_crawl]},
|
on_conflict:
|
||||||
|
{:replace, [:base_domain, :crawl_error, :crawl_error_count, :next_crawl, :updated_at]},
|
||||||
conflict_target: :domain
|
conflict_target: :domain
|
||||||
)
|
)
|
||||||
end)
|
|
||||||
|
|
||||||
Appsignal.increment_counter("crawler.failure", 1)
|
Appsignal.increment_counter("crawler.failure", 1)
|
||||||
end
|
end
|
||||||
|
|
178
backend/lib/backend/crawler/crawlers/gnu_social.ex
Normal file
178
backend/lib/backend/crawler/crawlers/gnu_social.ex
Normal file
|
@ -0,0 +1,178 @@
|
||||||
|
defmodule Backend.Crawler.Crawlers.GnuSocial do
|
||||||
|
alias Backend.Crawler.ApiCrawler
|
||||||
|
alias Backend.Crawler.Crawlers.Nodeinfo
|
||||||
|
import Backend.Crawler.Util
|
||||||
|
import Backend.Util
|
||||||
|
require Logger
|
||||||
|
|
||||||
|
@behaviour ApiCrawler
|
||||||
|
|
||||||
|
@impl ApiCrawler
|
||||||
|
def is_instance_type?(_domain, nodeinfo_result) do
|
||||||
|
nodeinfo_result != nil and Map.get(nodeinfo_result, :instance_type) == :gnusocial
|
||||||
|
end
|
||||||
|
|
||||||
|
@impl ApiCrawler
|
||||||
|
def allows_crawling?(domain) do
|
||||||
|
[
|
||||||
|
"/api/statuses/public_timeline.json"
|
||||||
|
]
|
||||||
|
|> Enum.map(fn endpoint -> "https://#{domain}#{endpoint}" end)
|
||||||
|
|> urls_are_crawlable?()
|
||||||
|
end
|
||||||
|
|
||||||
|
@impl ApiCrawler
|
||||||
|
def crawl(domain, nodeinfo_result) do
|
||||||
|
if nodeinfo_result |> Map.get(:user_count) |> is_above_user_threshold?() do
|
||||||
|
crawl_large_instance(domain, nodeinfo_result)
|
||||||
|
else
|
||||||
|
nodeinfo_result
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
@spec crawl_large_instance(String.t(), Nodeinfo.t()) :: ApiCrawler.t()
|
||||||
|
defp crawl_large_instance(domain, nodeinfo_result) do
|
||||||
|
status_datetime_threshold =
|
||||||
|
NaiveDateTime.utc_now()
|
||||||
|
|> NaiveDateTime.add(get_config(:status_age_limit_days) * 24 * 3600 * -1, :second)
|
||||||
|
|
||||||
|
# Don't get any statuses older than this
|
||||||
|
min_timestamp = max_datetime(get_last_crawl_timestamp(domain), status_datetime_threshold)
|
||||||
|
|
||||||
|
{interactions, statuses_seen} = get_interactions(domain, min_timestamp)
|
||||||
|
|
||||||
|
Map.merge(nodeinfo_result, %{
|
||||||
|
interactions: interactions,
|
||||||
|
statuses_seen: statuses_seen,
|
||||||
|
peers: []
|
||||||
|
})
|
||||||
|
end
|
||||||
|
|
||||||
|
@spec get_interactions(
|
||||||
|
String.t(),
|
||||||
|
NaiveDateTime.t(),
|
||||||
|
String.t() | nil,
|
||||||
|
ApiCrawler.instance_interactions(),
|
||||||
|
integer()
|
||||||
|
) :: {ApiCrawler.instance_interactions(), integer()}
|
||||||
|
defp get_interactions(
|
||||||
|
domain,
|
||||||
|
min_timestamp,
|
||||||
|
max_id \\ nil,
|
||||||
|
interactions \\ %{},
|
||||||
|
statuses_seen \\ 0
|
||||||
|
) do
|
||||||
|
endpoint = "https://#{domain}/api/statuses/public_timeline.json"
|
||||||
|
|
||||||
|
endpoint =
|
||||||
|
if max_id != nil do
|
||||||
|
endpoint <> "?max_id=#{max_id}"
|
||||||
|
else
|
||||||
|
endpoint
|
||||||
|
end
|
||||||
|
|
||||||
|
Logger.debug("Crawling #{endpoint}")
|
||||||
|
|
||||||
|
statuses = get_and_decode!(endpoint)
|
||||||
|
|
||||||
|
# Filter to statuses that are in the correct timeframe
|
||||||
|
filtered_statuses =
|
||||||
|
statuses
|
||||||
|
|> Enum.filter(fn s ->
|
||||||
|
s["created_at"]
|
||||||
|
|> parse_timestamp()
|
||||||
|
|> is_after?(min_timestamp)
|
||||||
|
end)
|
||||||
|
|
||||||
|
if length(filtered_statuses) > 0 do
|
||||||
|
# Filter down further to statuses that a) aren't faves and b) aren't from #nobot users
|
||||||
|
eligible_statuses =
|
||||||
|
filtered_statuses |> Enum.filter(fn s -> not is_fave?(s) and not has_nobot?(s) end)
|
||||||
|
|
||||||
|
# get statuses that are eligible (i.e. users don't have #nobot in their profile), have mentions, and are not faves
|
||||||
|
interactions =
|
||||||
|
eligible_statuses
|
||||||
|
|> statuses_to_interactions()
|
||||||
|
|> merge_count_maps(interactions)
|
||||||
|
|
||||||
|
statuses_seen =
|
||||||
|
eligible_statuses
|
||||||
|
|> Kernel.length()
|
||||||
|
|> Kernel.+(statuses_seen)
|
||||||
|
|
||||||
|
oldest_status = Enum.at(filtered_statuses, -1)
|
||||||
|
|
||||||
|
oldest_status_datetime =
|
||||||
|
oldest_status
|
||||||
|
|> Map.get("created_at")
|
||||||
|
|> parse_timestamp()
|
||||||
|
|
||||||
|
if NaiveDateTime.compare(oldest_status_datetime, min_timestamp) == :gt and
|
||||||
|
statuses_seen < get_config(:status_count_limit) and
|
||||||
|
length(filtered_statuses) == length(statuses) do
|
||||||
|
get_interactions(domain, min_timestamp, oldest_status["id"], interactions, statuses_seen)
|
||||||
|
else
|
||||||
|
{interactions, statuses_seen}
|
||||||
|
end
|
||||||
|
else
|
||||||
|
{interactions, statuses_seen}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
@spec statuses_to_interactions(any()) :: ApiCrawler.instance_interactions()
|
||||||
|
defp statuses_to_interactions(statuses) do
|
||||||
|
statuses
|
||||||
|
|> Enum.filter(fn status -> is_mention?(status) end)
|
||||||
|
|> Enum.map(fn status -> extract_mentions_from_status(status) end)
|
||||||
|
|> Enum.reduce(%{}, fn map, acc ->
|
||||||
|
Map.merge(acc, map)
|
||||||
|
end)
|
||||||
|
end
|
||||||
|
|
||||||
|
# Checks whether the status contains one or more mentions
|
||||||
|
@spec is_mention?(any()) :: boolean()
|
||||||
|
defp is_mention?(%{"attentions" => []}) do
|
||||||
|
false
|
||||||
|
end
|
||||||
|
|
||||||
|
defp is_mention?(_status) do
|
||||||
|
true
|
||||||
|
end
|
||||||
|
|
||||||
|
@spec is_fave?(any()) :: boolean()
|
||||||
|
defp is_fave?(status) do
|
||||||
|
uri_elements = status |> Map.get("uri") |> String.split(":")
|
||||||
|
Enum.member?(uri_elements, "fave")
|
||||||
|
end
|
||||||
|
|
||||||
|
@spec has_nobot?(any()) :: boolean()
|
||||||
|
defp has_nobot?(status) do
|
||||||
|
case get_in(status, ["user", "description"]) do
|
||||||
|
nil ->
|
||||||
|
false
|
||||||
|
|
||||||
|
description ->
|
||||||
|
description
|
||||||
|
|> String.downcase()
|
||||||
|
|> String.contains?("nobot")
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
@spec extract_mentions_from_status(any()) :: ApiCrawler.instance_interactions()
|
||||||
|
defp extract_mentions_from_status(status) do
|
||||||
|
status["attentions"]
|
||||||
|
|> Enum.map(fn mention -> get_domain(mention["profileurl"]) end)
|
||||||
|
|> Enum.reduce(%{}, fn domain, acc ->
|
||||||
|
Map.update(acc, domain, 1, &(&1 + 1))
|
||||||
|
end)
|
||||||
|
end
|
||||||
|
|
||||||
|
# Parses the messed-up time format that GNU social uses
|
||||||
|
# Like seriously, it's 2019, why *wouldn't* you use iso8601?
|
||||||
|
@spec parse_timestamp(String.t()) :: NaiveDateTime.t()
|
||||||
|
defp parse_timestamp(timestamp) do
|
||||||
|
timestamp
|
||||||
|
|> Timex.parse!("{WDshort} {Mshort} {0D} {h24}:{0m}:{0s} {0Z} {YYYY}")
|
||||||
|
|> Timex.to_naive_datetime()
|
||||||
|
end
|
||||||
|
end
|
|
@ -7,10 +7,15 @@ defmodule Backend.Crawler.Crawlers.Mastodon do
|
||||||
@behaviour ApiCrawler
|
@behaviour ApiCrawler
|
||||||
|
|
||||||
@impl ApiCrawler
|
@impl ApiCrawler
|
||||||
def is_instance_type?(domain) do
|
def is_instance_type?(domain, result) do
|
||||||
case get("https://#{domain}/api/v1/instance") do
|
# We might already know that this is a Pleroma instance from nodeinfo
|
||||||
{:ok, response} -> if is_http_200?(response), do: has_title?(response.body), else: false
|
if result != nil and Map.get(result, :instance_type) == :pleroma do
|
||||||
{:error, _error} -> false
|
true
|
||||||
|
else
|
||||||
|
case get_and_decode("https://#{domain}/api/v1/instance") do
|
||||||
|
{:ok, %{"title" => _title}} -> true
|
||||||
|
_other -> false
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -26,8 +31,8 @@ defmodule Backend.Crawler.Crawlers.Mastodon do
|
||||||
end
|
end
|
||||||
|
|
||||||
@impl ApiCrawler
|
@impl ApiCrawler
|
||||||
def crawl(domain) do
|
def crawl(domain, _current_result) do
|
||||||
instance = Jason.decode!(get!("https://#{domain}/api/v1/instance").body)
|
instance = get_and_decode!("https://#{domain}/api/v1/instance")
|
||||||
user_count = get_in(instance, ["stats", "user_count"])
|
user_count = get_in(instance, ["stats", "user_count"])
|
||||||
|
|
||||||
if is_above_user_threshold?(user_count) or has_opted_in?(domain) do
|
if is_above_user_threshold?(user_count) or has_opted_in?(domain) do
|
||||||
|
@ -51,12 +56,7 @@ defmodule Backend.Crawler.Crawlers.Mastodon do
|
||||||
|
|
||||||
@spec crawl_large_instance(String.t(), any()) :: ApiCrawler.t()
|
@spec crawl_large_instance(String.t(), any()) :: ApiCrawler.t()
|
||||||
defp crawl_large_instance(domain, instance) do
|
defp crawl_large_instance(domain, instance) do
|
||||||
# servers may not publish peers
|
peers = get_peers(domain)
|
||||||
peers =
|
|
||||||
case get("https://#{domain}/api/v1/instance/peers") do
|
|
||||||
{:ok, response} -> if is_http_200?(response), do: Jason.decode!(response.body), else: []
|
|
||||||
{:error, _error} -> []
|
|
||||||
end
|
|
||||||
|
|
||||||
Logger.debug("Found #{length(peers)} peers.")
|
Logger.debug("Found #{length(peers)} peers.")
|
||||||
|
|
||||||
|
@ -124,15 +124,15 @@ defmodule Backend.Crawler.Crawlers.Mastodon do
|
||||||
|
|
||||||
Logger.debug("Crawling #{endpoint}")
|
Logger.debug("Crawling #{endpoint}")
|
||||||
|
|
||||||
statuses =
|
statuses = get_and_decode!(endpoint)
|
||||||
endpoint
|
|
||||||
|> get!()
|
|
||||||
|> Map.get(:body)
|
|
||||||
|> Jason.decode!()
|
|
||||||
|
|
||||||
filtered_statuses =
|
filtered_statuses =
|
||||||
statuses
|
statuses
|
||||||
|> Enum.filter(fn s -> is_after?(s["created_at"], min_timestamp) end)
|
|> Enum.filter(fn s ->
|
||||||
|
s["created_at"]
|
||||||
|
|> NaiveDateTime.from_iso8601!()
|
||||||
|
|> is_after?(min_timestamp)
|
||||||
|
end)
|
||||||
|
|
||||||
if length(filtered_statuses) > 0 do
|
if length(filtered_statuses) > 0 do
|
||||||
# get statuses that are eligible (i.e. users don't have #nobot in their profile) and have mentions
|
# get statuses that are eligible (i.e. users don't have #nobot in their profile) and have mentions
|
||||||
|
@ -166,12 +166,11 @@ defmodule Backend.Crawler.Crawlers.Mastodon do
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
# To check if the endpoint works as expected
|
defp get_peers(domain) do
|
||||||
@spec has_title?(String.t()) :: boolean
|
# servers may not publish peers
|
||||||
defp has_title?(body) do
|
case get_and_decode("https://#{domain}/api/v1/instance/peers") do
|
||||||
case Jason.decode(body) do
|
{:ok, peers} -> peers
|
||||||
{:ok, decoded} -> Map.has_key?(decoded, "title")
|
{:error, _err} -> []
|
||||||
{:error, _error} -> false
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -7,12 +7,17 @@ defmodule Backend.Crawler.Crawlers.Misskey do
|
||||||
require Logger
|
require Logger
|
||||||
|
|
||||||
@impl ApiCrawler
|
@impl ApiCrawler
|
||||||
def is_instance_type?(domain) do
|
def is_instance_type?(domain, result) do
|
||||||
|
# We may already know that this is a Misskey instance from nodeinfo
|
||||||
|
if result != nil and Map.get(result, :instance_type) == :misskey do
|
||||||
|
true
|
||||||
|
else
|
||||||
case get_version_and_description(domain) do
|
case get_version_and_description(domain) do
|
||||||
{:ok, _} -> true
|
{:ok, _} -> true
|
||||||
{:error, _} -> false
|
{:error, _} -> false
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
end
|
||||||
|
|
||||||
@impl ApiCrawler
|
@impl ApiCrawler
|
||||||
def allows_crawling?(domain) do
|
def allows_crawling?(domain) do
|
||||||
|
@ -27,11 +32,9 @@ defmodule Backend.Crawler.Crawlers.Misskey do
|
||||||
end
|
end
|
||||||
|
|
||||||
@impl ApiCrawler
|
@impl ApiCrawler
|
||||||
def crawl(domain) do
|
def crawl(domain, _result) do
|
||||||
with {:ok, %{status_code: 200, body: stats_body}} <- post("https://#{domain}/api/stats") do
|
with {:ok, %{"originalUsersCount" => user_count, "originalNotesCount" => status_count}} <-
|
||||||
%{"originalUsersCount" => user_count, "originalNotesCount" => status_count} =
|
post_and_decode("https://#{domain}/api/stats") do
|
||||||
Jason.decode!(stats_body)
|
|
||||||
|
|
||||||
if is_above_user_threshold?(user_count) or has_opted_in?(domain) do
|
if is_above_user_threshold?(user_count) or has_opted_in?(domain) do
|
||||||
crawl_large_instance(domain, user_count, status_count)
|
crawl_large_instance(domain, user_count, status_count)
|
||||||
else
|
else
|
||||||
|
@ -107,15 +110,15 @@ defmodule Backend.Crawler.Crawlers.Misskey do
|
||||||
|
|
||||||
Logger.debug("Crawling #{endpoint} with untilId=#{until_id}")
|
Logger.debug("Crawling #{endpoint} with untilId=#{until_id}")
|
||||||
|
|
||||||
statuses =
|
statuses = post_and_decode!(endpoint, Jason.encode!(params))
|
||||||
endpoint
|
|
||||||
|> post!(Jason.encode!(params))
|
|
||||||
|> Map.get(:body)
|
|
||||||
|> Jason.decode!()
|
|
||||||
|
|
||||||
filtered_statuses =
|
filtered_statuses =
|
||||||
statuses
|
statuses
|
||||||
|> Enum.filter(fn s -> is_after?(s["createdAt"], min_timestamp) end)
|
|> Enum.filter(fn s ->
|
||||||
|
s["createdAt"]
|
||||||
|
|> NaiveDateTime.from_iso8601!()
|
||||||
|
|> is_after?(min_timestamp)
|
||||||
|
end)
|
||||||
|
|
||||||
if length(filtered_statuses) > 0 do
|
if length(filtered_statuses) > 0 do
|
||||||
# get statuses that are eligible (i.e. users don't have #nobot in their profile) and have mentions
|
# get statuses that are eligible (i.e. users don't have #nobot in their profile) and have mentions
|
||||||
|
@ -151,35 +154,22 @@ defmodule Backend.Crawler.Crawlers.Misskey do
|
||||||
end
|
end
|
||||||
|
|
||||||
@spec get_version_and_description(String.t()) ::
|
@spec get_version_and_description(String.t()) ::
|
||||||
{:ok, {String.t(), String.t()}} | {:error, String.t()}
|
{:ok, {String.t(), String.t()}} | {:error, Jason.DecodeError.t() | HTTPoison.Error.t()}
|
||||||
defp get_version_and_description(domain) do
|
defp get_version_and_description(domain) do
|
||||||
case post("https://#{domain}/api/meta") do
|
case post_and_decode("https://#{domain}/api/meta") do
|
||||||
{:ok, %{status_code: 200, body: body}} ->
|
{:ok, %{"version" => version, "description" => description}} ->
|
||||||
case Jason.decode(body) do
|
{:ok, {version, description}}
|
||||||
{:ok, decoded} ->
|
|
||||||
{:ok, {Map.get(decoded, "version"), Map.get(decoded, "description")}}
|
|
||||||
|
|
||||||
{:error, _error} ->
|
{:error, err} ->
|
||||||
{:error, "invalid response"}
|
{:error, err}
|
||||||
end
|
|
||||||
|
|
||||||
_ ->
|
|
||||||
{:error, "unsuccesful request"}
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
@spec get_peers(String.t()) :: {:ok, [String.t()]} | {:error, Jason.DecodeError.t()}
|
@spec get_peers(String.t()) :: {:ok, [String.t()]} | {:error, Jason.DecodeError.t()}
|
||||||
defp get_peers(domain) do
|
defp get_peers(domain) do
|
||||||
case get("https://#{domain}/api/v1/instance/peers") do
|
case get_and_decode("https://#{domain}/api/v1/instance/peers") do
|
||||||
{:ok, response} ->
|
{:ok, peers} -> {:ok, peers}
|
||||||
with %{status_code: 200, body: body} <- response do
|
{:error, _} -> {:ok, []}
|
||||||
Jason.decode(body)
|
|
||||||
else
|
|
||||||
_ -> {:ok, []}
|
|
||||||
end
|
|
||||||
|
|
||||||
{:error, _} ->
|
|
||||||
{:ok, []}
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
117
backend/lib/backend/crawler/crawlers/nodeinfo.ex
Normal file
117
backend/lib/backend/crawler/crawlers/nodeinfo.ex
Normal file
|
@ -0,0 +1,117 @@
|
||||||
|
defmodule Backend.Crawler.Crawlers.Nodeinfo do
|
||||||
|
alias Backend.Crawler.ApiCrawler
|
||||||
|
require Logger
|
||||||
|
import Backend.Util
|
||||||
|
import Backend.Crawler.Util
|
||||||
|
|
||||||
|
@moduledoc """
|
||||||
|
This module is slightly different from the other crawlers.
|
||||||
|
It doesn't implement the ApiCrawler spec because it isn't run as a self-contained crawler.
|
||||||
|
Instead, it's run before all the other crawlers.
|
||||||
|
|
||||||
|
This is to get the user count. Some servers don't publish this in other places (e.g. GNU Social, PeerTube) so we need
|
||||||
|
nodeinfo to know whether it's a personal instance or not.
|
||||||
|
"""
|
||||||
|
|
||||||
|
defstruct [
|
||||||
|
:description,
|
||||||
|
:user_count,
|
||||||
|
:status_count,
|
||||||
|
:instance_type,
|
||||||
|
:version
|
||||||
|
]
|
||||||
|
|
||||||
|
@type t() :: %__MODULE__{
|
||||||
|
description: String.t(),
|
||||||
|
user_count: integer,
|
||||||
|
status_count: integer,
|
||||||
|
instance_type: ApiCrawler.instance_type(),
|
||||||
|
version: String.t()
|
||||||
|
}
|
||||||
|
|
||||||
|
@spec allows_crawling?(String.t()) :: boolean()
|
||||||
|
def allows_crawling?(domain) do
|
||||||
|
[
|
||||||
|
".well-known/nodeinfo"
|
||||||
|
]
|
||||||
|
|> Enum.map(fn endpoint -> "https://#{domain}#{endpoint}" end)
|
||||||
|
|> urls_are_crawlable?()
|
||||||
|
end
|
||||||
|
|
||||||
|
@spec crawl(String.t()) :: {:ok, t()} | {:error, nil}
|
||||||
|
def crawl(domain) do
|
||||||
|
with {:ok, nodeinfo_url} <- get_nodeinfo_url(domain),
|
||||||
|
{:ok, nodeinfo} <- get_nodeinfo(nodeinfo_url) do
|
||||||
|
{:ok, nodeinfo}
|
||||||
|
else
|
||||||
|
_other -> {:error, nil}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
@spec get_nodeinfo_url(String.t()) ::
|
||||||
|
{:ok, String.t()} | {:error, Jason.DecodeError.t() | HTTPoison.Error.t()}
|
||||||
|
defp get_nodeinfo_url(domain) do
|
||||||
|
case get_and_decode("https://#{domain}/.well-known/nodeinfo") do
|
||||||
|
{:ok, response} -> {:ok, process_nodeinfo_url(response)}
|
||||||
|
{:error, err} -> {:error, err}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
@spec process_nodeinfo_url(any()) :: String.t()
|
||||||
|
defp process_nodeinfo_url(response) do
|
||||||
|
response
|
||||||
|
|> Map.get("links")
|
||||||
|
|> Enum.filter(fn %{"rel" => rel} -> is_compatible_nodeinfo_version?(rel) end)
|
||||||
|
|> Kernel.hd()
|
||||||
|
|> Map.get("href")
|
||||||
|
end
|
||||||
|
|
||||||
|
@spec get_nodeinfo(String.t()) ::
|
||||||
|
{:ok, t()} | {:error, Jason.DecodeError.t() | HTTPoison.Error.t()}
|
||||||
|
defp get_nodeinfo(nodeinfo_url) do
|
||||||
|
case get_and_decode(nodeinfo_url) do
|
||||||
|
{:ok, nodeinfo} -> {:ok, process_nodeinfo(nodeinfo)}
|
||||||
|
{:error, err} -> {:error, err}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
@spec process_nodeinfo(any()) :: t()
|
||||||
|
defp process_nodeinfo(nodeinfo) do
|
||||||
|
user_count = get_in(nodeinfo, ["usage", "users", "total"])
|
||||||
|
|
||||||
|
if is_above_user_threshold?(user_count) do
|
||||||
|
# Both of these are used, depending on the server implementation
|
||||||
|
description =
|
||||||
|
[
|
||||||
|
get_in(nodeinfo, ["metadata", "description"]),
|
||||||
|
get_in(nodeinfo, ["metadata", "nodeDescription"])
|
||||||
|
]
|
||||||
|
|> Enum.filter(fn d -> d != nil end)
|
||||||
|
|> Enum.at(0)
|
||||||
|
|
||||||
|
type = nodeinfo |> get_in(["software", "name"]) |> String.downcase() |> String.to_atom()
|
||||||
|
|
||||||
|
%__MODULE__{
|
||||||
|
description: description,
|
||||||
|
user_count: user_count,
|
||||||
|
status_count: get_in(nodeinfo, ["usage", "localPosts"]),
|
||||||
|
instance_type: type,
|
||||||
|
version: get_in(nodeinfo, ["software", "version"])
|
||||||
|
}
|
||||||
|
else
|
||||||
|
%{
|
||||||
|
description: nil,
|
||||||
|
user_count: user_count,
|
||||||
|
status_count: nil,
|
||||||
|
instance_type: nil,
|
||||||
|
version: nil
|
||||||
|
}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
@spec is_compatible_nodeinfo_version?(String.t()) :: boolean()
|
||||||
|
defp is_compatible_nodeinfo_version?(schema_url) do
|
||||||
|
version = String.slice(schema_url, (String.length(schema_url) - 3)..-1)
|
||||||
|
Enum.member?(["1.0", "1.1", "2.0"], version)
|
||||||
|
end
|
||||||
|
end
|
|
@ -54,7 +54,7 @@ defmodule Backend.Crawler.StaleInstanceManager do
|
||||||
stale_domains =
|
stale_domains =
|
||||||
Instance
|
Instance
|
||||||
|> select([i], i.domain)
|
|> select([i], i.domain)
|
||||||
|> where([i], i.next_crawl < ^now)
|
|> where([i], i.next_crawl < ^now and not i.opt_out)
|
||||||
|> Repo.all()
|
|> Repo.all()
|
||||||
|> MapSet.new()
|
|> MapSet.new()
|
||||||
|
|
||||||
|
|
|
@ -8,27 +8,19 @@ defmodule Backend.Crawler.Util do
|
||||||
# (e.g. https://mastodon.social/@demouser or https://pleroma.site/users/demouser)
|
# (e.g. https://mastodon.social/@demouser or https://pleroma.site/users/demouser)
|
||||||
@spec get_domain(String.t()) :: String.t()
|
@spec get_domain(String.t()) :: String.t()
|
||||||
def get_domain(url) do
|
def get_domain(url) do
|
||||||
String.slice(url, 8..-1)
|
[_match, domain] = Regex.run(~r/https?:\/\/([\w.-]+)\/.*/, url)
|
||||||
|> String.split("/")
|
domain
|
||||||
|> Enum.at(0)
|
|
||||||
end
|
end
|
||||||
|
|
||||||
@spec is_http_200?(HTTPoison.Response.t()) :: boolean
|
@doc """
|
||||||
def is_http_200?(%{status_code: 200}) do
|
Returns true if the first argument is after the second.
|
||||||
true
|
"""
|
||||||
end
|
@spec is_after?(NaiveDateTime.t(), NaiveDateTime.t() | nil) :: boolean()
|
||||||
|
|
||||||
def is_http_200?(_) do
|
|
||||||
false
|
|
||||||
end
|
|
||||||
|
|
||||||
@spec is_after?(String.t(), NaiveDateTime.t() | nil) :: boolean()
|
|
||||||
def is_after?(timestamp, threshold) do
|
def is_after?(timestamp, threshold) do
|
||||||
if threshold == nil do
|
if threshold == nil do
|
||||||
true
|
true
|
||||||
else
|
else
|
||||||
timestamp
|
timestamp
|
||||||
|> NaiveDateTime.from_iso8601!()
|
|
||||||
# :second is the granularity used in the database
|
# :second is the granularity used in the database
|
||||||
|> NaiveDateTime.truncate(:second)
|
|> NaiveDateTime.truncate(:second)
|
||||||
|> NaiveDateTime.compare(threshold)
|
|> NaiveDateTime.compare(threshold)
|
||||||
|
@ -36,49 +28,6 @@ defmodule Backend.Crawler.Util do
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def get(url) do
|
|
||||||
# TODO: add version number to user agent?
|
|
||||||
HTTPoison.get(url, [{"User-Agent", get_config(:user_agent)}],
|
|
||||||
hackney: [pool: :crawler],
|
|
||||||
recv_timeout: 15000,
|
|
||||||
timeout: 15000
|
|
||||||
)
|
|
||||||
end
|
|
||||||
|
|
||||||
@spec get!(binary) :: %{
|
|
||||||
:__struct__ => HTTPoison.AsyncResponse | HTTPoison.Response,
|
|
||||||
optional(:body) => any,
|
|
||||||
optional(:headers) => [any],
|
|
||||||
optional(:id) => reference,
|
|
||||||
optional(:request) => HTTPoison.Request.t(),
|
|
||||||
optional(:request_url) => any,
|
|
||||||
optional(:status_code) => integer
|
|
||||||
}
|
|
||||||
def get!(url) do
|
|
||||||
# TODO: add version number to user agent?
|
|
||||||
HTTPoison.get!(url, [{"User-Agent", get_config(:user_agent)}],
|
|
||||||
hackney: [pool: :crawler],
|
|
||||||
recv_timeout: 15000,
|
|
||||||
timeout: 15000
|
|
||||||
)
|
|
||||||
end
|
|
||||||
|
|
||||||
def post(url, body \\ "") do
|
|
||||||
HTTPoison.post(url, body, [{"User-Agent", get_config(:user_agent)}],
|
|
||||||
hackney: [pool: :crawler],
|
|
||||||
recv_timeout: 15000,
|
|
||||||
timeout: 15000
|
|
||||||
)
|
|
||||||
end
|
|
||||||
|
|
||||||
def post!(url, body \\ "") do
|
|
||||||
HTTPoison.post!(url, body, [{"User-Agent", get_config(:user_agent)}],
|
|
||||||
hackney: [pool: :crawler],
|
|
||||||
recv_timeout: 15000,
|
|
||||||
timeout: 15000
|
|
||||||
)
|
|
||||||
end
|
|
||||||
|
|
||||||
@spec urls_are_crawlable?([String.t()]) :: boolean()
|
@spec urls_are_crawlable?([String.t()]) :: boolean()
|
||||||
def urls_are_crawlable?(urls) do
|
def urls_are_crawlable?(urls) do
|
||||||
user_agent = get_config(:user_agent)
|
user_agent = get_config(:user_agent)
|
||||||
|
|
|
@ -145,4 +145,54 @@ defmodule Backend.Util do
|
||||||
def convert_keys_to_atoms(map) do
|
def convert_keys_to_atoms(map) do
|
||||||
map |> Map.new(fn {k, v} -> {String.to_atom(k), v} end)
|
map |> Map.new(fn {k, v} -> {String.to_atom(k), v} end)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@doc """
|
||||||
|
Gets and decodes a HTTP response.
|
||||||
|
"""
|
||||||
|
@spec get_and_decode(String.t()) ::
|
||||||
|
{:ok, any()} | {:error, Jason.DecodeError.t() | HTTPoison.Error.t()}
|
||||||
|
def get_and_decode(url) do
|
||||||
|
case HTTPoison.get(url, [{"User-Agent", get_config(:user_agent)}],
|
||||||
|
hackney: [pool: :crawler],
|
||||||
|
recv_timeout: 15000,
|
||||||
|
timeout: 15000
|
||||||
|
) do
|
||||||
|
{:ok, %{status_code: 200, body: body}} -> Jason.decode(body)
|
||||||
|
{:ok, _} -> {:error, %HTTPoison.Error{reason: "Non-200 response"}}
|
||||||
|
{:error, err} -> {:error, err}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
@spec get_and_decode!(String.t()) :: any()
|
||||||
|
def get_and_decode!(url) do
|
||||||
|
case get_and_decode(url) do
|
||||||
|
{:ok, decoded} -> decoded
|
||||||
|
{:error, error} -> raise error
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
@doc """
|
||||||
|
POSTS to a HTTP endpoint and decodes the JSON response.
|
||||||
|
"""
|
||||||
|
@spec post_and_decode(String.t(), String.t()) ::
|
||||||
|
{:ok, any()} | {:error, Jason.DecodeError.t() | HTTPoison.Error.t()}
|
||||||
|
def post_and_decode(url, body \\ "") do
|
||||||
|
case HTTPoison.post(url, body, [{"User-Agent", get_config(:user_agent)}],
|
||||||
|
hackney: [pool: :crawler],
|
||||||
|
recv_timeout: 15000,
|
||||||
|
timeout: 15000
|
||||||
|
) do
|
||||||
|
{:ok, %{status_code: 200, body: response_body}} -> Jason.decode(response_body)
|
||||||
|
{:ok, _} -> {:error, %HTTPoison.Error{reason: "Non-200 response"}}
|
||||||
|
{:error, err} -> {:error, err}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
@spec post_and_decode!(String.t(), String.t()) :: any()
|
||||||
|
def post_and_decode!(url, body \\ "") do
|
||||||
|
case post_and_decode(url, body) do
|
||||||
|
{:ok, decoded} -> decoded
|
||||||
|
{:error, error} -> raise error
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -14,10 +14,7 @@ defmodule BackendWeb.AdminLoginController do
|
||||||
# TODO: this assumes mastodon/pleroma API
|
# TODO: this assumes mastodon/pleroma API
|
||||||
cleaned_domain = clean_domain(domain)
|
cleaned_domain = clean_domain(domain)
|
||||||
|
|
||||||
instance_data =
|
instance_data = get_and_decode!("https://#{cleaned_domain}/api/v1/instance")
|
||||||
HTTPoison.get!("https://#{cleaned_domain}/api/v1/instance")
|
|
||||||
|> Map.get(:body)
|
|
||||||
|> Jason.decode!()
|
|
||||||
|
|
||||||
render(conn, "show.json", instance_data: instance_data, cleaned_domain: cleaned_domain)
|
render(conn, "show.json", instance_data: instance_data, cleaned_domain: cleaned_domain)
|
||||||
end
|
end
|
||||||
|
@ -25,10 +22,7 @@ defmodule BackendWeb.AdminLoginController do
|
||||||
def create(conn, %{"domain" => domain, "type" => type}) do
|
def create(conn, %{"domain" => domain, "type" => type}) do
|
||||||
cleaned_domain = clean_domain(domain)
|
cleaned_domain = clean_domain(domain)
|
||||||
|
|
||||||
instance_data =
|
instance_data = get_and_decode!("https://#{cleaned_domain}/api/v1/instance")
|
||||||
HTTPoison.get!("https://#{cleaned_domain}/api/v1/instance")
|
|
||||||
|> Map.get(:body)
|
|
||||||
|> Jason.decode!()
|
|
||||||
|
|
||||||
error =
|
error =
|
||||||
cond do
|
cond do
|
||||||
|
|
|
@ -3,7 +3,7 @@ import { IconNames } from "@blueprintjs/icons";
|
||||||
import React from "react";
|
import React from "react";
|
||||||
import { QUALITATIVE_COLOR_SCHEME } from "../../constants";
|
import { QUALITATIVE_COLOR_SCHEME } from "../../constants";
|
||||||
import { typeColorScheme } from "../../types";
|
import { typeColorScheme } from "../../types";
|
||||||
import { capitalize } from "../../util";
|
import { getTypeDisplayString } from "../../util";
|
||||||
|
|
||||||
interface IInstanceTypeProps {
|
interface IInstanceTypeProps {
|
||||||
type: string;
|
type: string;
|
||||||
|
@ -15,7 +15,7 @@ interface IInstanceTypeProps {
|
||||||
*/
|
*/
|
||||||
const InstanceType: React.FC<IInstanceTypeProps> = ({ type, colorAfterName }) => {
|
const InstanceType: React.FC<IInstanceTypeProps> = ({ type, colorAfterName }) => {
|
||||||
const idx = typeColorScheme.values.indexOf(type);
|
const idx = typeColorScheme.values.indexOf(type);
|
||||||
const name = " " + capitalize(type);
|
const name = " " + getTypeDisplayString(type);
|
||||||
return (
|
return (
|
||||||
<>
|
<>
|
||||||
{!!colorAfterName && name}
|
{!!colorAfterName && name}
|
||||||
|
|
|
@ -4,7 +4,7 @@ import React, { MouseEvent } from "react";
|
||||||
import styled from "styled-components";
|
import styled from "styled-components";
|
||||||
import { INSTANCE_TYPES } from "../../constants";
|
import { INSTANCE_TYPES } from "../../constants";
|
||||||
import { getSearchFilterDisplayValue, ISearchFilter } from "../../searchFilters";
|
import { getSearchFilterDisplayValue, ISearchFilter } from "../../searchFilters";
|
||||||
import { capitalize } from "../../util";
|
import { getTypeDisplayString } from "../../util";
|
||||||
|
|
||||||
const SearchFilterContainer = styled.div`
|
const SearchFilterContainer = styled.div`
|
||||||
margin: 10px 0 0 0;
|
margin: 10px 0 0 0;
|
||||||
|
@ -30,7 +30,7 @@ const SearchFilters: React.FC<ISearchFiltersProps> = ({ selectedFilters, selectF
|
||||||
const handleSelectInstanceType = (e: MouseEvent<HTMLElement>) => {
|
const handleSelectInstanceType = (e: MouseEvent<HTMLElement>) => {
|
||||||
const field = "type";
|
const field = "type";
|
||||||
const relation = "eq";
|
const relation = "eq";
|
||||||
const value = e.currentTarget.innerText.toLowerCase();
|
const value = e.currentTarget.innerText.toLowerCase().replace(" ", "");
|
||||||
const filter: ISearchFilter = {
|
const filter: ISearchFilter = {
|
||||||
displayValue: getSearchFilterDisplayValue(field, relation, value),
|
displayValue: getSearchFilterDisplayValue(field, relation, value),
|
||||||
field,
|
field,
|
||||||
|
@ -43,7 +43,7 @@ const SearchFilters: React.FC<ISearchFiltersProps> = ({ selectedFilters, selectF
|
||||||
<Menu>
|
<Menu>
|
||||||
<MenuItem icon={IconNames.SYMBOL_CIRCLE} text="Instance type" disabled={hasInstanceTypeFilter}>
|
<MenuItem icon={IconNames.SYMBOL_CIRCLE} text="Instance type" disabled={hasInstanceTypeFilter}>
|
||||||
{INSTANCE_TYPES.map(t => (
|
{INSTANCE_TYPES.map(t => (
|
||||||
<MenuItem key={t} text={capitalize(t)} onClick={handleSelectInstanceType} />
|
<MenuItem key={t} text={getTypeDisplayString(t)} onClick={handleSelectInstanceType} />
|
||||||
))}
|
))}
|
||||||
</MenuItem>
|
</MenuItem>
|
||||||
</Menu>
|
</Menu>
|
||||||
|
|
|
@ -40,4 +40,4 @@ export interface IInstanceDomainPath {
|
||||||
}
|
}
|
||||||
|
|
||||||
// We could also extract the values from the server response, but this would slow things down...
|
// We could also extract the values from the server response, but this would slow things down...
|
||||||
export const INSTANCE_TYPES = ["mastodon", "gab", "pleroma", "misskey"];
|
export const INSTANCE_TYPES = ["mastodon", "gab", "pleroma", "misskey", "gnusocial"];
|
||||||
|
|
|
@ -68,3 +68,13 @@ export const getBuckets = (min: number, max: number, steps: number, exponential:
|
||||||
return range(min, max, bucketSize);
|
return range(min, max, bucketSize);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const typeToDisplay = {
|
||||||
|
gnusocial: "GNU Social"
|
||||||
|
};
|
||||||
|
export const getTypeDisplayString = (key: string) => {
|
||||||
|
if (key in typeToDisplay) {
|
||||||
|
return typeToDisplay[key];
|
||||||
|
}
|
||||||
|
return capitalize(key);
|
||||||
|
};
|
||||||
|
|
Loading…
Reference in a new issue