From f033b1eec046e0da246e877f322647ae2822ee96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tao=20Bror=20Bojl=C3=A9n?= Date: Sun, 18 Aug 2019 15:27:50 +0200 Subject: [PATCH] add friendica crawler --- backend/lib/backend/crawler/crawler.ex | 3 +- .../lib/backend/crawler/crawlers/friendica.ex | 82 +++++++++++++++++++ frontend/src/constants.tsx | 11 ++- 3 files changed, 94 insertions(+), 2 deletions(-) create mode 100644 backend/lib/backend/crawler/crawlers/friendica.ex diff --git a/backend/lib/backend/crawler/crawler.ex b/backend/lib/backend/crawler/crawler.ex index f747a22..0d3f852 100644 --- a/backend/lib/backend/crawler/crawler.ex +++ b/backend/lib/backend/crawler/crawler.ex @@ -4,7 +4,7 @@ defmodule Backend.Crawler do """ alias __MODULE__ - alias Backend.Crawler.Crawlers.{GnuSocial, Mastodon, Misskey, Nodeinfo} + alias Backend.Crawler.Crawlers.{Friendica, GnuSocial, Mastodon, Misskey, Nodeinfo} alias Backend.Crawler.ApiCrawler alias Backend.{Crawl, CrawlInteraction, MostRecentCrawl, Repo, Instance, InstancePeer} import Ecto.Query @@ -49,6 +49,7 @@ defmodule Backend.Crawler do |> register(Mastodon) |> register(Misskey) |> register(GnuSocial) + |> register(Friendica) # go! |> crawl() |> save() diff --git a/backend/lib/backend/crawler/crawlers/friendica.ex b/backend/lib/backend/crawler/crawlers/friendica.ex new file mode 100644 index 0000000..5fc4abf --- /dev/null +++ b/backend/lib/backend/crawler/crawlers/friendica.ex @@ -0,0 +1,82 @@ +defmodule Backend.Crawler.Crawlers.Friendica do + alias Backend.Crawler.ApiCrawler + import Backend.Crawler.Util + import Backend.Util + require Logger + + @behaviour ApiCrawler + + @impl ApiCrawler + def is_instance_type?(domain, nodeinfo_result) do + if nodeinfo_result != nil do + Map.get(nodeinfo_result, :instance_type) == :friendica + else + case get_statistics(domain) do + {:ok, stats} -> Map.get(stats, "network") |> String.downcase() == "friendica" + {:error, _other} -> false + end + end + end + + @impl ApiCrawler + def allows_crawling?(domain) do + [ + "/statistics.json" + ] + |> Enum.map(fn endpoint -> "https://#{domain}#{endpoint}" end) + |> urls_are_crawlable?() + end + + @impl ApiCrawler + def crawl(domain, nodeinfo_result) do + details = + case get_statistics(domain) do + {:ok, s} -> s + {:error, _err} -> %{} + end + |> convert_keys_to_atoms() + |> (fn m -> + %{ + version: m.version, + user_count: m.total_users, + status_count: m.local_posts + } + end).() + |> Map.merge(nodeinfo_result) + + peers = + case get_and_decode("https://#{domain}/poco/@server") do + {:ok, p} -> p + {:error, _err} -> [] + end + |> Enum.map(fn peer -> + peer + |> Map.get("url") + |> to_domain() + end) + + if details |> Map.get(:user_count, 0) |> is_above_user_threshold?() do + Map.merge( + %{peers: peers, interactions: %{}, statuses_seen: 0, instance_type: :friendica}, + Map.take(details, [:description, :version, :user_count, :status_count]) + ) + else + nodeinfo_result + end + end + + defp get_statistics(domain) do + get_and_decode("https://#{domain}/statistics.json") + end + + defp to_domain(url) do + url = + cond do + String.starts_with?(url, "https://") -> String.slice(url, 8..-1) + String.starts_with?(url, "http://") -> String.slice(url, 7..-1) + true -> url + end + + url + end +end diff --git a/frontend/src/constants.tsx b/frontend/src/constants.tsx index 55f29bf..86207fa 100644 --- a/frontend/src/constants.tsx +++ b/frontend/src/constants.tsx @@ -39,4 +39,13 @@ export interface IInstanceDomainPath { domain: string; } -export const INSTANCE_TYPES = ["mastodon", "gab", "pleroma", "misskey", "gnusocial", "writefreely", "peertube"]; +export const INSTANCE_TYPES = [ + "mastodon", + "gab", + "pleroma", + "misskey", + "gnusocial", + "writefreely", + "peertube", + "friendica" +];