diff --git a/backend/lib/backend/crawler/crawler.ex b/backend/lib/backend/crawler/crawler.ex index 392425c..d471707 100644 --- a/backend/lib/backend/crawler/crawler.ex +++ b/backend/lib/backend/crawler/crawler.ex @@ -49,6 +49,8 @@ defmodule Backend.Crawler do # go! |> crawl() |> save() + + Appsignal.increment_counter("crawler.total", 1) end # Adds a new ApiCrawler that run/1 will check. @@ -207,6 +209,8 @@ defmodule Backend.Crawler do CrawlInteraction |> Repo.insert_all(interactions) + + Appsignal.increment_counter("crawler.success", 1) end defp save(%{domain: domain, error: error, allows_crawling?: allows_crawling}) do @@ -232,6 +236,8 @@ defmodule Backend.Crawler do error: error }) end) + + Appsignal.increment_counter("crawler.failure", 1) end defp get_base_domain(domain) do diff --git a/backend/lib/backend/crawler/stale_instance_manager.ex b/backend/lib/backend/crawler/stale_instance_manager.ex index 4306b9c..0792d29 100644 --- a/backend/lib/backend/crawler/stale_instance_manager.ex +++ b/backend/lib/backend/crawler/stale_instance_manager.ex @@ -74,10 +74,28 @@ defmodule Backend.Crawler.StaleInstanceManager do ) |> select([i], i.domain) |> Repo.all() + |> MapSet.new() - Logger.debug("Adding #{length(stale_domains)} stale domains to queue.") + # Don't add a domain that's already in the queue + domains_in_queue = + Honeydew.filter(:crawl_queue, fn job -> + is_pending_crawl_job = match?(%Honeydew.Job{completed_at: nil, task: {:run, [_]}}, job) - stale_domains + if is_pending_crawl_job do + %Honeydew.Job{completed_at: nil, task: {:run, [d]}} = job + MapSet.member?(stale_domains, d) + else + false + end + end) + |> Enum.map(fn %Honeydew.Job{task: {:run, [d]}} -> d end) + |> MapSet.new() + + domains_to_queue = MapSet.difference(stale_domains, domains_in_queue) + + Logger.debug("Adding #{MapSet.size(domains_to_queue)} stale domains to queue.") + + domains_to_queue |> Enum.each(fn domain -> add_to_queue(domain) end) end