also detect gnusocial instances w/o nodeinfo
This commit is contained in:
parent
71b76a4332
commit
271c67ea8e
|
@ -8,8 +8,15 @@ defmodule Backend.Crawler.Crawlers.GnuSocial do
|
||||||
@behaviour ApiCrawler
|
@behaviour ApiCrawler
|
||||||
|
|
||||||
@impl ApiCrawler
|
@impl ApiCrawler
|
||||||
def is_instance_type?(_domain, nodeinfo_result) do
|
def is_instance_type?(domain, nodeinfo_result) do
|
||||||
nodeinfo_result != nil and Map.get(nodeinfo_result, :instance_type) == :gnusocial
|
if nodeinfo_result != nil do
|
||||||
|
Map.get(nodeinfo_result, :instance_type) == :gnusocial
|
||||||
|
else
|
||||||
|
case get_and_decode("https://#{domain}/api/statuses/public_timeline.json") do
|
||||||
|
{:ok, statuses} -> is_list(statuses)
|
||||||
|
{:error, _other} -> false
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
@impl ApiCrawler
|
@impl ApiCrawler
|
||||||
|
|
|
@ -84,7 +84,7 @@ defmodule Backend.Scheduler do
|
||||||
# We want the earliest sucessful crawl so that we can exclude it from the statistics.
|
# We want the earliest sucessful crawl so that we can exclude it from the statistics.
|
||||||
# This is because the first crawl goes up to one month into the past -- this would mess up the counts!
|
# This is because the first crawl goes up to one month into the past -- this would mess up the counts!
|
||||||
# The statistics from here assume that all statuses were written at exactly the crawl's inserted_at timestamp.
|
# The statistics from here assume that all statuses were written at exactly the crawl's inserted_at timestamp.
|
||||||
earliest_successful_crawl_subquery =
|
earliest_crawl_subquery =
|
||||||
Crawl
|
Crawl
|
||||||
|> group_by([c], c.instance_domain)
|
|> group_by([c], c.instance_domain)
|
||||||
|> select([c], %{
|
|> select([c], %{
|
||||||
|
@ -94,7 +94,7 @@ defmodule Backend.Scheduler do
|
||||||
|
|
||||||
instances =
|
instances =
|
||||||
Crawl
|
Crawl
|
||||||
|> join(:inner, [c], c2 in subquery(earliest_successful_crawl_subquery),
|
|> join(:inner, [c], c2 in subquery(earliest_crawl_subquery),
|
||||||
on: c.instance_domain == c2.instance_domain
|
on: c.instance_domain == c2.instance_domain
|
||||||
)
|
)
|
||||||
|> where(
|
|> where(
|
||||||
|
|
Loading…
Reference in a new issue