also detect gnusocial instances w/o nodeinfo

This commit is contained in:
Tao Bror Bojlén 2019-08-10 14:16:55 +03:00
parent 71b76a4332
commit 271c67ea8e
No known key found for this signature in database
GPG key ID: C6EC7AAB905F9E6F
2 changed files with 11 additions and 4 deletions

View file

@ -8,8 +8,15 @@ defmodule Backend.Crawler.Crawlers.GnuSocial do
@behaviour ApiCrawler
@impl ApiCrawler
def is_instance_type?(_domain, nodeinfo_result) do
nodeinfo_result != nil and Map.get(nodeinfo_result, :instance_type) == :gnusocial
def is_instance_type?(domain, nodeinfo_result) do
if nodeinfo_result != nil do
Map.get(nodeinfo_result, :instance_type) == :gnusocial
else
case get_and_decode("https://#{domain}/api/statuses/public_timeline.json") do
{:ok, statuses} -> is_list(statuses)
{:error, _other} -> false
end
end
end
@impl ApiCrawler

View file

@ -84,7 +84,7 @@ defmodule Backend.Scheduler do
# We want the earliest sucessful crawl so that we can exclude it from the statistics.
# This is because the first crawl goes up to one month into the past -- this would mess up the counts!
# The statistics from here assume that all statuses were written at exactly the crawl's inserted_at timestamp.
earliest_successful_crawl_subquery =
earliest_crawl_subquery =
Crawl
|> group_by([c], c.instance_domain)
|> select([c], %{
@ -94,7 +94,7 @@ defmodule Backend.Scheduler do
instances =
Crawl
|> join(:inner, [c], c2 in subquery(earliest_successful_crawl_subquery),
|> join(:inner, [c], c2 in subquery(earliest_crawl_subquery),
on: c.instance_domain == c2.instance_domain
)
|> where(