don't add domains to the queue twice
This commit is contained in:
parent
74fc30e8a5
commit
ea0fae2152
|
@ -49,6 +49,8 @@ defmodule Backend.Crawler do
|
|||
# go!
|
||||
|> crawl()
|
||||
|> save()
|
||||
|
||||
Appsignal.increment_counter("crawler.total", 1)
|
||||
end
|
||||
|
||||
# Adds a new ApiCrawler that run/1 will check.
|
||||
|
@ -207,6 +209,8 @@ defmodule Backend.Crawler do
|
|||
|
||||
CrawlInteraction
|
||||
|> Repo.insert_all(interactions)
|
||||
|
||||
Appsignal.increment_counter("crawler.success", 1)
|
||||
end
|
||||
|
||||
defp save(%{domain: domain, error: error, allows_crawling?: allows_crawling}) do
|
||||
|
@ -232,6 +236,8 @@ defmodule Backend.Crawler do
|
|||
error: error
|
||||
})
|
||||
end)
|
||||
|
||||
Appsignal.increment_counter("crawler.failure", 1)
|
||||
end
|
||||
|
||||
defp get_base_domain(domain) do
|
||||
|
|
|
@ -74,10 +74,28 @@ defmodule Backend.Crawler.StaleInstanceManager do
|
|||
)
|
||||
|> select([i], i.domain)
|
||||
|> Repo.all()
|
||||
|> MapSet.new()
|
||||
|
||||
Logger.debug("Adding #{length(stale_domains)} stale domains to queue.")
|
||||
# Don't add a domain that's already in the queue
|
||||
domains_in_queue =
|
||||
Honeydew.filter(:crawl_queue, fn job ->
|
||||
is_pending_crawl_job = match?(%Honeydew.Job{completed_at: nil, task: {:run, [_]}}, job)
|
||||
|
||||
stale_domains
|
||||
if is_pending_crawl_job do
|
||||
%Honeydew.Job{completed_at: nil, task: {:run, [d]}} = job
|
||||
MapSet.member?(stale_domains, d)
|
||||
else
|
||||
false
|
||||
end
|
||||
end)
|
||||
|> Enum.map(fn %Honeydew.Job{task: {:run, [d]}} -> d end)
|
||||
|> MapSet.new()
|
||||
|
||||
domains_to_queue = MapSet.difference(stale_domains, domains_in_queue)
|
||||
|
||||
Logger.debug("Adding #{MapSet.size(domains_to_queue)} stale domains to queue.")
|
||||
|
||||
domains_to_queue
|
||||
|> Enum.each(fn domain -> add_to_queue(domain) end)
|
||||
end
|
||||
|
||||
|
|
Loading…
Reference in a new issue