From 6715d9395f3c345a484947c4e87490991b974ebb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tao=20Bror=20Bojl=C3=A9n?= Date: Tue, 15 Oct 2019 13:24:37 +0100 Subject: [PATCH] make sure crawl results are never overwritten with a blank ApiCrawler --- backend/config/config.exs | 4 ++++ backend/lib/backend/crawler/crawlers/gnu_social.ex | 2 +- backend/lib/backend/scheduler.ex | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/backend/config/config.exs b/backend/config/config.exs index eb1ceae..88ae468 100644 --- a/backend/config/config.exs +++ b/backend/config/config.exs @@ -62,9 +62,13 @@ config :backend, :crawler, crawl_interval_mins: 60, crawl_workers: 20, blacklist: [ + # spam "gab.best", + # spam "4chan.icu", + # *really* doesn't want to be listed on fediverse.space "pleroma.site", + # dummy instances used for pleroma CI "pleroma.online" ], user_agent: "fediverse.space crawler", diff --git a/backend/lib/backend/crawler/crawlers/gnu_social.ex b/backend/lib/backend/crawler/crawlers/gnu_social.ex index 0f71238..67f55ea 100644 --- a/backend/lib/backend/crawler/crawlers/gnu_social.ex +++ b/backend/lib/backend/crawler/crawlers/gnu_social.ex @@ -34,7 +34,7 @@ defmodule Backend.Crawler.Crawlers.GnuSocial do def crawl(domain, nodeinfo) do if nodeinfo == nil or nodeinfo |> Map.get(:user_count) |> is_above_user_threshold?() do - Map.merge(crawl_large_instance(domain), nodeinfo) + Map.merge(nodeinfo, crawl_large_instance(domain)) else Map.merge(ApiCrawler.get_default(), nodeinfo) end diff --git a/backend/lib/backend/scheduler.ex b/backend/lib/backend/scheduler.ex index 907579c..4560d94 100644 --- a/backend/lib/backend/scheduler.ex +++ b/backend/lib/backend/scheduler.ex @@ -166,7 +166,7 @@ defmodule Backend.Scheduler do Edges are only generated if * both instances have been succesfully crawled * neither of the instances have blocked each other - * there are interactions in each direction + * there are interactions in each direction (if :require_bidirectional_edges is true in config) """ def generate_edges do now = get_now()