diff --git a/CHANGELOG.md b/CHANGELOG.md index 845756c..3325ea3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,7 +17,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - Instances are now crawled hourly instead of every 30 minutes. -- The colors for color coding have been made brighter (more visible against the dark background. +- There are now 100 concurrent crawl workers by default (increased from 50). +- The colors for color coding have been made brighter (more visible against the dark background). ### Deprecated @@ -25,6 +26,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed +- Fixed a process leak that could cause the server to OOM. +- Domains are no longer added to the queue twice. + ### Security - The server administrator can now be notified if there's a new potential spam domain. diff --git a/backend/config/config.exs b/backend/config/config.exs index a34caf3..ecef9a5 100644 --- a/backend/config/config.exs +++ b/backend/config/config.exs @@ -55,7 +55,7 @@ config :backend, :crawler, status_count_limit: 5000, personal_instance_threshold: 10, crawl_interval_mins: 60, - crawl_workers: 50, + crawl_workers: 100, blacklist: [ "gab.best", "4chan.icu" diff --git a/backend/config/dev.exs b/backend/config/dev.exs index a85c4c9..cc1c912 100644 --- a/backend/config/dev.exs +++ b/backend/config/dev.exs @@ -60,8 +60,8 @@ config :backend, :crawler, status_age_limit_days: 28, status_count_limit: 100, personal_instance_threshold: 5, - crawl_interval_mins: 1, - crawl_workers: 10, + crawl_interval_mins: 60, + crawl_workers: 1, blacklist: [ "gab.best", "4chan.icu" diff --git a/backend/lib/backend/application.ex b/backend/lib/backend/application.ex index 1e9c298..f600ed4 100644 --- a/backend/lib/backend/application.ex +++ b/backend/lib/backend/application.ex @@ -61,17 +61,23 @@ defmodule Backend.Application do Appsignal.Probes.register(:crawler, fn -> %{ queue: %{ - count: count, - in_progress: in_progress, mnesia: mnesia } } = Honeydew.status(:crawl_queue) + # How much memory the mnesia queue in using memory = mnesia |> Map.get(:"honeydew_:crawl_queue") |> Keyword.get(:memory) - - Appsignal.set_gauge("queue_length", count) - Appsignal.set_gauge("in_progress", in_progress) Appsignal.set_gauge("mnesia_memory", memory) + + # How many jobs are pending in the queue + queue_length = + Honeydew.filter( + :crawl_queue, + &match?(%Honeydew.Job{completed_at: nil, task: {:run, _}}, &1) + ) + |> Enum.count() + + Appsignal.set_gauge("queue_length", queue_length) end) end end