prune old interactions regularly

6 changed files with 65 additions and 4 deletions

@ -29,14 +29,20 @@ config :phoenix, :json_library, Jason
config :backend, :crawler,
status_age_limit_days: 28,
status_count_limit: 100,
personal_instance_threshold: 1,
status_count_limit: 5000,
personal_instance_threshold: 10,
crawl_interval_mins: 30,
crawl_workers: 50,
blacklist: [
config :backend, Backend.Crawler.InteractionPruner,
jobs: [
# At midnight every day
{"@daily", {Backend.Crawler.InteractionPruner, :prune, [1, "month"]}}
# Import environment specific config. This must remain at the bottom
# of this file so it overrides the configuration defined above.
import_config "#{Mix.env()}.exs"

@ -54,3 +54,19 @@ config :backend, Backend.Repo,
database: "backend_dev",
hostname: "localhost",
pool_size: 10
config :backend, :crawler,
status_age_limit_days: 28,
status_count_limit: 100,
personal_instance_threshold: 1,
crawl_interval_mins: 30,
crawl_workers: 10,
blacklist: [
config :backend, Backend.Crawler.InteractionPruner,
jobs: [
# Every minute
{"* * * * *", {Backend.Crawler.InteractionPruner, :prune, [30, "minute"]}}

@ -22,7 +22,8 @@ defmodule Backend.Application do
Honeydew.start_queue(:crawl_queue, failure_mode: Honeydew.FailureMode.Abandon)
Honeydew.start_workers(:crawl_queue, Backend.Crawler, num: crawl_worker_count)
@ -0,0 +1,31 @@
defmodule Backend.Crawler.InteractionPruner do
@moduledoc """
This module runs a recurring task that deletes old interaction metadata. only uses recent data
to generate its graph; we aren't interested in storing data long-term.
alias Backend.{Interaction, Repo}
import Ecto.Query
require Logger
use Quantum.Scheduler, otp_app: :backend
@spec prune(integer, String.t()) :: any
@doc """
Prunes all interactions that are more than `integer` `unit`s old.
For example, to delete interactions older than one month, call `prune(1, "month")`.
`unit` must singular, e.g. "second", "minute", "hour", "month", "year", etc...
def prune(amount, unit) do
{deleted_num, _} =
|> where(
i.timestamp <
datetime_add(^NaiveDateTime.utc_now(), -1 * ^amount, ^unit)
|> Repo.delete_all()"Pruned #{deleted_num} old interactions.")

@ -43,7 +43,8 @@ defmodule Backend.MixProject do
{:plug_cowboy, "~> 2.0"},
{:httpoison, "~> 1.5"},
{:timex, "~> 3.5"},
{:honeydew, "~> 1.4.3"}
{:honeydew, "~> 1.4.3"},
{:quantum, "~> 2.3"}

