make sure crawl results are never overwritten with a blank ApiCrawler
This commit is contained in:
parent
8558f96635
commit
6715d9395f
|
@ -62,9 +62,13 @@ config :backend, :crawler,
|
||||||
crawl_interval_mins: 60,
|
crawl_interval_mins: 60,
|
||||||
crawl_workers: 20,
|
crawl_workers: 20,
|
||||||
blacklist: [
|
blacklist: [
|
||||||
|
# spam
|
||||||
"gab.best",
|
"gab.best",
|
||||||
|
# spam
|
||||||
"4chan.icu",
|
"4chan.icu",
|
||||||
|
# *really* doesn't want to be listed on fediverse.space
|
||||||
"pleroma.site",
|
"pleroma.site",
|
||||||
|
# dummy instances used for pleroma CI
|
||||||
"pleroma.online"
|
"pleroma.online"
|
||||||
],
|
],
|
||||||
user_agent: "fediverse.space crawler",
|
user_agent: "fediverse.space crawler",
|
||||||
|
|
|
@ -34,7 +34,7 @@ defmodule Backend.Crawler.Crawlers.GnuSocial do
|
||||||
def crawl(domain, nodeinfo) do
|
def crawl(domain, nodeinfo) do
|
||||||
if nodeinfo == nil or
|
if nodeinfo == nil or
|
||||||
nodeinfo |> Map.get(:user_count) |> is_above_user_threshold?() do
|
nodeinfo |> Map.get(:user_count) |> is_above_user_threshold?() do
|
||||||
Map.merge(crawl_large_instance(domain), nodeinfo)
|
Map.merge(nodeinfo, crawl_large_instance(domain))
|
||||||
else
|
else
|
||||||
Map.merge(ApiCrawler.get_default(), nodeinfo)
|
Map.merge(ApiCrawler.get_default(), nodeinfo)
|
||||||
end
|
end
|
||||||
|
|
|
@ -166,7 +166,7 @@ defmodule Backend.Scheduler do
|
||||||
Edges are only generated if
|
Edges are only generated if
|
||||||
* both instances have been succesfully crawled
|
* both instances have been succesfully crawled
|
||||||
* neither of the instances have blocked each other
|
* neither of the instances have blocked each other
|
||||||
* there are interactions in each direction
|
* there are interactions in each direction (if :require_bidirectional_edges is true in config)
|
||||||
"""
|
"""
|
||||||
def generate_edges do
|
def generate_edges do
|
||||||
now = get_now()
|
now = get_now()
|
||||||
|
|
Loading…
Reference in a new issue