perf and metrics tweaks
This commit is contained in:
parent
ea0fae2152
commit
0f01620413
|
@ -17,7 +17,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||||
### Changed
|
### Changed
|
||||||
|
|
||||||
- Instances are now crawled hourly instead of every 30 minutes.
|
- Instances are now crawled hourly instead of every 30 minutes.
|
||||||
- The colors for color coding have been made brighter (more visible against the dark background.
|
- There are now 100 concurrent crawl workers by default (increased from 50).
|
||||||
|
- The colors for color coding have been made brighter (more visible against the dark background).
|
||||||
|
|
||||||
### Deprecated
|
### Deprecated
|
||||||
|
|
||||||
|
@ -25,6 +26,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||||
|
|
||||||
### Fixed
|
### Fixed
|
||||||
|
|
||||||
|
- Fixed a process leak that could cause the server to OOM.
|
||||||
|
- Domains are no longer added to the queue twice.
|
||||||
|
|
||||||
### Security
|
### Security
|
||||||
|
|
||||||
- The server administrator can now be notified if there's a new potential spam domain.
|
- The server administrator can now be notified if there's a new potential spam domain.
|
||||||
|
|
|
@ -55,7 +55,7 @@ config :backend, :crawler,
|
||||||
status_count_limit: 5000,
|
status_count_limit: 5000,
|
||||||
personal_instance_threshold: 10,
|
personal_instance_threshold: 10,
|
||||||
crawl_interval_mins: 60,
|
crawl_interval_mins: 60,
|
||||||
crawl_workers: 50,
|
crawl_workers: 100,
|
||||||
blacklist: [
|
blacklist: [
|
||||||
"gab.best",
|
"gab.best",
|
||||||
"4chan.icu"
|
"4chan.icu"
|
||||||
|
|
|
@ -60,8 +60,8 @@ config :backend, :crawler,
|
||||||
status_age_limit_days: 28,
|
status_age_limit_days: 28,
|
||||||
status_count_limit: 100,
|
status_count_limit: 100,
|
||||||
personal_instance_threshold: 5,
|
personal_instance_threshold: 5,
|
||||||
crawl_interval_mins: 1,
|
crawl_interval_mins: 60,
|
||||||
crawl_workers: 10,
|
crawl_workers: 1,
|
||||||
blacklist: [
|
blacklist: [
|
||||||
"gab.best",
|
"gab.best",
|
||||||
"4chan.icu"
|
"4chan.icu"
|
||||||
|
|
|
@ -61,17 +61,23 @@ defmodule Backend.Application do
|
||||||
Appsignal.Probes.register(:crawler, fn ->
|
Appsignal.Probes.register(:crawler, fn ->
|
||||||
%{
|
%{
|
||||||
queue: %{
|
queue: %{
|
||||||
count: count,
|
|
||||||
in_progress: in_progress,
|
|
||||||
mnesia: mnesia
|
mnesia: mnesia
|
||||||
}
|
}
|
||||||
} = Honeydew.status(:crawl_queue)
|
} = Honeydew.status(:crawl_queue)
|
||||||
|
|
||||||
|
# How much memory the mnesia queue in using
|
||||||
memory = mnesia |> Map.get(:"honeydew_:crawl_queue") |> Keyword.get(:memory)
|
memory = mnesia |> Map.get(:"honeydew_:crawl_queue") |> Keyword.get(:memory)
|
||||||
|
|
||||||
Appsignal.set_gauge("queue_length", count)
|
|
||||||
Appsignal.set_gauge("in_progress", in_progress)
|
|
||||||
Appsignal.set_gauge("mnesia_memory", memory)
|
Appsignal.set_gauge("mnesia_memory", memory)
|
||||||
|
|
||||||
|
# How many jobs are pending in the queue
|
||||||
|
queue_length =
|
||||||
|
Honeydew.filter(
|
||||||
|
:crawl_queue,
|
||||||
|
&match?(%Honeydew.Job{completed_at: nil, task: {:run, _}}, &1)
|
||||||
|
)
|
||||||
|
|> Enum.count()
|
||||||
|
|
||||||
|
Appsignal.set_gauge("queue_length", queue_length)
|
||||||
end)
|
end)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in a new issue