only crawl new statuses since last crawl
This commit is contained in:
parent
0b7df993b9
commit
d94e700e6a
|
@ -74,7 +74,8 @@ defmodule Backend.Crawler.Crawler do
|
|||
description: result.description,
|
||||
version: result.version,
|
||||
user_count: result.user_count,
|
||||
status_count: result.status_count
|
||||
status_count: result.status_count,
|
||||
last_crawl_timestamp: now
|
||||
},
|
||||
on_conflict: [
|
||||
set: [
|
||||
|
@ -82,6 +83,7 @@ defmodule Backend.Crawler.Crawler do
|
|||
version: result.version,
|
||||
user_count: result.user_count,
|
||||
status_count: result.status_count,
|
||||
last_crawl_timestamp: now,
|
||||
updated_at: now
|
||||
]
|
||||
],
|
||||
|
|
|
@ -2,7 +2,7 @@ defmodule Backend.Crawler.Crawlers.Mastodon do
|
|||
require Logger
|
||||
import Backend.Crawler.Util
|
||||
alias Backend.Crawler.ApiCrawler
|
||||
alias Backend.Interaction
|
||||
alias Backend.Instance
|
||||
alias Backend.Repo
|
||||
import Ecto.Query
|
||||
|
||||
|
@ -148,14 +148,12 @@ defmodule Backend.Crawler.Crawlers.Mastodon do
|
|||
# most recent status we have.
|
||||
min_timestamp =
|
||||
if statuses_seen == 0 do
|
||||
Interaction
|
||||
|> where(source_domain: ^domain)
|
||||
|> select([:timestamp])
|
||||
|> order_by(desc: :timestamp)
|
||||
|> limit(1)
|
||||
|> Repo.all()
|
||||
|> List.first()
|
||||
|> (fn i -> i.timestamp end).()
|
||||
Instance
|
||||
|> select([:last_crawl_timestamp])
|
||||
|> Repo.get_by(domain: domain)
|
||||
|> (fn result ->
|
||||
if result == nil, do: nil, else: Map.get(result, :last_crawl_timestamp)
|
||||
end).()
|
||||
else
|
||||
min_timestamp
|
||||
end
|
||||
|
|
|
@ -9,6 +9,10 @@ defmodule Backend.Instance do
|
|||
field :user_count, :integer
|
||||
field :version, :string
|
||||
|
||||
# this is distinct from `updated_at` -- it indicates when the last *successful* crawl was. `updated_at` also
|
||||
# gets updated if the crawl fails.
|
||||
field :last_crawl_timestamp, :naive_datetime
|
||||
|
||||
many_to_many :peers, Backend.Instance,
|
||||
join_through: Backend.InstancePeer,
|
||||
join_keys: [source: :domain, target: :domain]
|
||||
|
|
|
@ -9,6 +9,8 @@ defmodule Backend.Repo.Migrations.CreateInstances do
|
|||
add :description, :text
|
||||
add :version, :string
|
||||
|
||||
add :last_crawl_timestamp, :naive_datetime
|
||||
|
||||
timestamps()
|
||||
end
|
||||
|
||||
|
|
Loading…
Reference in New Issue