make sure crawl results are never overwritten with a blank ApiCrawler

This commit is contained in:
Tao Bror Bojlén 2019-10-15 13:24:37 +01:00
parent 8558f96635
commit 6715d9395f
No known key found for this signature in database
GPG Key ID: C6EC7AAB905F9E6F
3 changed files with 6 additions and 2 deletions

View File

@ -62,9 +62,13 @@ config :backend, :crawler,
crawl_interval_mins: 60,
crawl_workers: 20,
blacklist: [
# spam
"gab.best",
# spam
"4chan.icu",
# *really* doesn't want to be listed on fediverse.space
"pleroma.site",
# dummy instances used for pleroma CI
"pleroma.online"
],
user_agent: "fediverse.space crawler",

View File

@ -34,7 +34,7 @@ defmodule Backend.Crawler.Crawlers.GnuSocial do
def crawl(domain, nodeinfo) do
if nodeinfo == nil or
nodeinfo |> Map.get(:user_count) |> is_above_user_threshold?() do
Map.merge(crawl_large_instance(domain), nodeinfo)
Map.merge(nodeinfo, crawl_large_instance(domain))
else
Map.merge(ApiCrawler.get_default(), nodeinfo)
end

View File

@ -166,7 +166,7 @@ defmodule Backend.Scheduler do
Edges are only generated if
* both instances have been succesfully crawled
* neither of the instances have blocked each other
* there are interactions in each direction
* there are interactions in each direction (if :require_bidirectional_edges is true in config)
"""
def generate_edges do
now = get_now()