diff --git a/CHANGELOG.md b/CHANGELOG.md index 51c0d69..3a4d0d0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,7 +17,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed -- Added missing indices on crawls and crawl_interactions tables. +- Added missing indices on `crawls` and `crawl_interactions` tables. +- Added table to store most recent crawl. This speeds up the instance view by a lot! ### Security diff --git a/backend/lib/backend/crawler/crawler.ex b/backend/lib/backend/crawler/crawler.ex index 0621f60..f747a22 100644 --- a/backend/lib/backend/crawler/crawler.ex +++ b/backend/lib/backend/crawler/crawler.ex @@ -6,7 +6,7 @@ defmodule Backend.Crawler do alias __MODULE__ alias Backend.Crawler.Crawlers.{GnuSocial, Mastodon, Misskey, Nodeinfo} alias Backend.Crawler.ApiCrawler - alias Backend.{Crawl, CrawlInteraction, Repo, Instance, InstancePeer} + alias Backend.{Crawl, CrawlInteraction, MostRecentCrawl, Repo, Instance, InstancePeer} import Ecto.Query import Backend.Util require Logger @@ -167,10 +167,23 @@ defmodule Backend.Crawler do Repo.insert!(%Crawl{ instance_domain: domain, interactions_seen: - result.interactions |> Map.values() |> Enum.reduce(0, fn count, acc -> count + acc end), + result.interactions + |> Map.values() + |> Enum.reduce(0, fn count, acc -> count + acc end), statuses_seen: result.statuses_seen }) + Repo.insert!( + %MostRecentCrawl{ + instance_domain: domain, + crawl_id: curr_crawl.id, + inserted_at: now, + updated_at: now + }, + on_conflict: {:replace, [:crawl_id, :updated_at]}, + conflict_target: :instance_domain + ) + # We get a list of peers from two places: # * the official peers endpoint (which may be disabled) # * the interactions diff --git a/backend/lib/backend/crawler/crawlers/gnu_social.ex b/backend/lib/backend/crawler/crawlers/gnu_social.ex index d3a6193..f292d54 100644 --- a/backend/lib/backend/crawler/crawlers/gnu_social.ex +++ b/backend/lib/backend/crawler/crawlers/gnu_social.ex @@ -49,11 +49,24 @@ defmodule Backend.Crawler.Crawlers.GnuSocial do {interactions, statuses_seen} = get_interactions(domain, min_timestamp) - Map.merge(nodeinfo_result, %{ - interactions: interactions, - statuses_seen: statuses_seen, - peers: [] - }) + if nodeinfo_result != nil do + Map.merge(nodeinfo_result, %{ + interactions: interactions, + statuses_seen: statuses_seen, + peers: [] + }) + else + %{ + version: nil, + description: nil, + user_count: nil, + status_count: nil, + peers: [], + interactions: interactions, + statuses_seen: statuses_seen, + instance_type: :gnusocial + } + end end @spec get_interactions( diff --git a/backend/lib/backend/most_recent_crawl.ex b/backend/lib/backend/most_recent_crawl.ex new file mode 100644 index 0000000..1958ae3 --- /dev/null +++ b/backend/lib/backend/most_recent_crawl.ex @@ -0,0 +1,22 @@ +defmodule Backend.MostRecentCrawl do + use Ecto.Schema + import Ecto.Changeset + + schema "most_recent_crawl" do + belongs_to :instance, Backend.Instance, + references: :domain, + type: :string, + foreign_key: :instance_domain + + belongs_to :crawl, Backend.Crawl + + timestamps() + end + + @doc false + def changeset(edge, attrs) do + edge + |> cast(attrs, [:instance, :crawl]) + |> validate_required([:instance, :crawl]) + end +end diff --git a/backend/lib/backend/util.ex b/backend/lib/backend/util.ex index b98de1d..9d21ca0 100644 --- a/backend/lib/backend/util.ex +++ b/backend/lib/backend/util.ex @@ -1,7 +1,7 @@ defmodule Backend.Util do import Ecto.Query require Logger - alias Backend.{Crawl, Repo} + alias Backend.{Crawl, MostRecentCrawl, Repo} @doc """ Returns the given key from :backend, :crawler in the config. @@ -78,11 +78,17 @@ defmodule Backend.Util do @spec get_last_crawl(String.t()) :: Crawl.t() | nil def get_last_crawl(domain) do + most_recent_crawl_subquery = + MostRecentCrawl + |> select([mrc], %{ + most_recent_id: mrc.crawl_id + }) + |> where([mrc], mrc.instance_domain == ^domain) + Crawl - |> select([c], c) - |> where([c], c.instance_domain == ^domain) - |> order_by(desc: :id) - |> limit(1) + |> join(:inner, [c], mrc in subquery(most_recent_crawl_subquery), + on: c.id == mrc.most_recent_id + ) |> Repo.one() end diff --git a/backend/priv/repo/migrations/20190810125304_add_most_recent_crawl_table.exs b/backend/priv/repo/migrations/20190810125304_add_most_recent_crawl_table.exs new file mode 100644 index 0000000..e43c7f6 --- /dev/null +++ b/backend/priv/repo/migrations/20190810125304_add_most_recent_crawl_table.exs @@ -0,0 +1,32 @@ +defmodule Backend.Repo.Migrations.AddMostRecentCrawlTable do + use Ecto.Migration + + def change do + create table(:most_recent_crawl) do + add :instance_domain, references(:instances, column: :domain, type: :string) + add :crawl_id, references(:crawls) + + timestamps() + end + + create unique_index(:most_recent_crawl, [:instance_domain]) + + flush() + + execute( + " + INSERT INTO most_recent_crawl (instance_domain, crawl_id, updated_at, inserted_at) + SELECT + c.instance_domain, + MAX(c.id) AS crawl_id, + (SELECT NOW()) AS updated_at, + (SELECT NOW()) AS inserted_at + FROM + crawls c + GROUP BY + c.instance_domain + ", + "DELETE FROM most_recent_crawl" + ) + end +end