also detect gnusocial instances w/o nodeinfo
This commit is contained in:
parent
71b76a4332
commit
271c67ea8e
|
@ -8,8 +8,15 @@ defmodule Backend.Crawler.Crawlers.GnuSocial do
|
|||
@behaviour ApiCrawler
|
||||
|
||||
@impl ApiCrawler
|
||||
def is_instance_type?(_domain, nodeinfo_result) do
|
||||
nodeinfo_result != nil and Map.get(nodeinfo_result, :instance_type) == :gnusocial
|
||||
def is_instance_type?(domain, nodeinfo_result) do
|
||||
if nodeinfo_result != nil do
|
||||
Map.get(nodeinfo_result, :instance_type) == :gnusocial
|
||||
else
|
||||
case get_and_decode("https://#{domain}/api/statuses/public_timeline.json") do
|
||||
{:ok, statuses} -> is_list(statuses)
|
||||
{:error, _other} -> false
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@impl ApiCrawler
|
||||
|
|
|
@ -84,7 +84,7 @@ defmodule Backend.Scheduler do
|
|||
# We want the earliest sucessful crawl so that we can exclude it from the statistics.
|
||||
# This is because the first crawl goes up to one month into the past -- this would mess up the counts!
|
||||
# The statistics from here assume that all statuses were written at exactly the crawl's inserted_at timestamp.
|
||||
earliest_successful_crawl_subquery =
|
||||
earliest_crawl_subquery =
|
||||
Crawl
|
||||
|> group_by([c], c.instance_domain)
|
||||
|> select([c], %{
|
||||
|
@ -94,7 +94,7 @@ defmodule Backend.Scheduler do
|
|||
|
||||
instances =
|
||||
Crawl
|
||||
|> join(:inner, [c], c2 in subquery(earliest_successful_crawl_subquery),
|
||||
|> join(:inner, [c], c2 in subquery(earliest_crawl_subquery),
|
||||
on: c.instance_domain == c2.instance_domain
|
||||
)
|
||||
|> where(
|
||||
|
|
Loading…
Reference in a new issue