add friendica crawler

This commit is contained in:
Tao Bror Bojlén 2019-08-18 15:27:50 +02:00
parent 4a7804d987
commit f033b1eec0
No known key found for this signature in database
GPG key ID: C6EC7AAB905F9E6F
3 changed files with 94 additions and 2 deletions

View file

@ -4,7 +4,7 @@ defmodule Backend.Crawler do
""" """
alias __MODULE__ alias __MODULE__
alias Backend.Crawler.Crawlers.{GnuSocial, Mastodon, Misskey, Nodeinfo} alias Backend.Crawler.Crawlers.{Friendica, GnuSocial, Mastodon, Misskey, Nodeinfo}
alias Backend.Crawler.ApiCrawler alias Backend.Crawler.ApiCrawler
alias Backend.{Crawl, CrawlInteraction, MostRecentCrawl, Repo, Instance, InstancePeer} alias Backend.{Crawl, CrawlInteraction, MostRecentCrawl, Repo, Instance, InstancePeer}
import Ecto.Query import Ecto.Query
@ -49,6 +49,7 @@ defmodule Backend.Crawler do
|> register(Mastodon) |> register(Mastodon)
|> register(Misskey) |> register(Misskey)
|> register(GnuSocial) |> register(GnuSocial)
|> register(Friendica)
# go! # go!
|> crawl() |> crawl()
|> save() |> save()

View file

@ -0,0 +1,82 @@
defmodule Backend.Crawler.Crawlers.Friendica do
alias Backend.Crawler.ApiCrawler
import Backend.Crawler.Util
import Backend.Util
require Logger
@behaviour ApiCrawler
@impl ApiCrawler
def is_instance_type?(domain, nodeinfo_result) do
if nodeinfo_result != nil do
Map.get(nodeinfo_result, :instance_type) == :friendica
else
case get_statistics(domain) do
{:ok, stats} -> Map.get(stats, "network") |> String.downcase() == "friendica"
{:error, _other} -> false
end
end
end
@impl ApiCrawler
def allows_crawling?(domain) do
[
"/statistics.json"
]
|> Enum.map(fn endpoint -> "https://#{domain}#{endpoint}" end)
|> urls_are_crawlable?()
end
@impl ApiCrawler
def crawl(domain, nodeinfo_result) do
details =
case get_statistics(domain) do
{:ok, s} -> s
{:error, _err} -> %{}
end
|> convert_keys_to_atoms()
|> (fn m ->
%{
version: m.version,
user_count: m.total_users,
status_count: m.local_posts
}
end).()
|> Map.merge(nodeinfo_result)
peers =
case get_and_decode("https://#{domain}/poco/@server") do
{:ok, p} -> p
{:error, _err} -> []
end
|> Enum.map(fn peer ->
peer
|> Map.get("url")
|> to_domain()
end)
if details |> Map.get(:user_count, 0) |> is_above_user_threshold?() do
Map.merge(
%{peers: peers, interactions: %{}, statuses_seen: 0, instance_type: :friendica},
Map.take(details, [:description, :version, :user_count, :status_count])
)
else
nodeinfo_result
end
end
defp get_statistics(domain) do
get_and_decode("https://#{domain}/statistics.json")
end
defp to_domain(url) do
url =
cond do
String.starts_with?(url, "https://") -> String.slice(url, 8..-1)
String.starts_with?(url, "http://") -> String.slice(url, 7..-1)
true -> url
end
url
end
end

View file

@ -39,4 +39,13 @@ export interface IInstanceDomainPath {
domain: string; domain: string;
} }
export const INSTANCE_TYPES = ["mastodon", "gab", "pleroma", "misskey", "gnusocial", "writefreely", "peertube"]; export const INSTANCE_TYPES = [
"mastodon",
"gab",
"pleroma",
"misskey",
"gnusocial",
"writefreely",
"peertube",
"friendica"
];