diff --git a/.vscode/settings.json b/.vscode/settings.json
index 193d895..b40ffd5 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -1,3 +1,4 @@
{
- "elixirLS.projectDir": "backend/"
+ "elixirLS.projectDir": "backend/",
+ "elixirLS.fetchDeps": false
}
\ No newline at end of file
diff --git a/CHANGELOG.md b/CHANGELOG.md
index fccd59a..845756c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,10 +10,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added
- Instance administrators can now log in to opt in or out of crawling.
+- Added ElasticSearch full-text search over instance domains and descriptions.
+- Search results are now highlighted on the graph.
+- When you hover a search result, it is now highlighted on the graph.
### Changed
- Instances are now crawled hourly instead of every 30 minutes.
+- The colors for color coding have been made brighter (more visible against the dark background.
### Deprecated
diff --git a/backend/Procfile b/backend/Procfile
index bf37e9b..eb5141e 100644
--- a/backend/Procfile
+++ b/backend/Procfile
@@ -1,2 +1,2 @@
web: /app/bin/backend start
-release: /app/bin/backend eval "Backend.Release.migrate"
\ No newline at end of file
+release: /app/bin/backend eval "Backend.Release.run_all"
\ No newline at end of file
diff --git a/backend/config/config.exs b/backend/config/config.exs
index 2646339..db2fbb8 100644
--- a/backend/config/config.exs
+++ b/backend/config/config.exs
@@ -19,6 +19,20 @@ config :backend, BackendWeb.Endpoint,
config :backend, Backend.Repo, queue_target: 5000
+config :backend, Backend.Elasticsearch.Cluster,
+ url: "http://localhost:9200",
+ api: Elasticsearch.API.HTTP,
+ json_library: Jason,
+ indexes: %{
+ instances: %{
+ settings: "priv/elasticsearch/instances.json",
+ store: Backend.Elasticsearch.Store,
+ sources: [Backend.Instance],
+ bulk_page_size: 1000,
+ bulk_wait_interval: 1_000
+ }
+ }
+
# Configures Elixir's Logger
config :logger, :console,
format: "$time $metadata[$level] $message\n",
diff --git a/backend/config/releases.exs b/backend/config/releases.exs
index d5f50d1..cd7736a 100644
--- a/backend/config/releases.exs
+++ b/backend/config/releases.exs
@@ -14,7 +14,8 @@ config :backend, Backend.Repo,
pool_size: String.to_integer(System.get_env("POOL_SIZE") || "10"),
ssl: ssl
-# show_sensitive_data_on_connection_error: true
+config :backend, Backend.Elasticsearch.Cluster,
+ url: System.get_env("ELASTICSEARCH_URL") || "http://localhost:9200"
port = String.to_integer(System.get_env("PORT") || "4000")
diff --git a/backend/lib/backend/api.ex b/backend/lib/backend/api.ex
index 07381d3..4dcc230 100644
--- a/backend/lib/backend/api.ex
+++ b/backend/lib/backend/api.ex
@@ -101,15 +101,67 @@ defmodule Backend.Api do
end
end
- def search_instances(query, cursor_after \\ nil) do
- ilike_query = "%#{query}%"
+ def search_instances(query, from \\ 0) do
+ page_size = 50
- %{entries: instances, metadata: metadata} =
- Instance
- |> where([i], ilike(i.domain, ^ilike_query) and not i.opt_out)
- |> order_by(asc: :id)
- |> Repo.paginate(after: cursor_after, cursor_fields: [:id], limit: 50)
+ search_response =
+ Elasticsearch.post(Backend.Elasticsearch.Cluster, "/instances/_search", %{
+ "sort" => "_score",
+ "from" => from,
+ "size" => page_size,
+ "query" => %{
+ "bool" => %{
+ "should" => [
+ %{
+ "multi_match" => %{
+ "query" => query,
+ "fields" => [
+ "description.english"
+ ]
+ }
+ },
+ %{
+ "wildcard" => %{
+ "domain.keyword" => %{
+ "value" => query,
+ "boost" => 100
+ }
+ }
+ },
+ %{
+ "wildcard" => %{
+ "domain.keyword" => %{
+ "value" => "*#{query}*",
+ "boost" => 1
+ }
+ }
+ },
+ %{
+ "match" => %{
+ "domain.ngram^0.5" => query
+ }
+ }
+ ]
+ }
+ }
+ })
- %{instances: instances, next: metadata.after}
+ with {:ok, result} <- search_response do
+ hits =
+ get_in(result, ["hits", "hits"])
+ |> Enum.map(fn h -> h |> Map.get("_source") |> convert_keys_to_atoms() end)
+
+ next =
+ if length(hits) < page_size do
+ nil
+ else
+ from + page_size
+ end
+
+ %{
+ hits: hits,
+ next: next
+ }
+ end
end
end
diff --git a/backend/lib/backend/application.ex b/backend/lib/backend/application.ex
index 63b3c30..9a42828 100644
--- a/backend/lib/backend/application.ex
+++ b/backend/lib/backend/application.ex
@@ -21,7 +21,8 @@ defmodule Backend.Application do
Honeydew.start_queue(:crawl_queue, failure_mode: Honeydew.FailureMode.Abandon)
Honeydew.start_workers(:crawl_queue, Backend.Crawler, num: crawl_worker_count)
end},
- Backend.Scheduler
+ Backend.Scheduler,
+ Backend.Elasticsearch.Cluster
]
children =
diff --git a/backend/lib/backend/crawler/crawlers/mastodon.ex b/backend/lib/backend/crawler/crawlers/mastodon.ex
index 3c740aa..5e858f5 100644
--- a/backend/lib/backend/crawler/crawlers/mastodon.ex
+++ b/backend/lib/backend/crawler/crawlers/mastodon.ex
@@ -32,7 +32,6 @@ defmodule Backend.Crawler.Crawlers.Mastodon do
end
@impl ApiCrawler
- # sobelow_skip ["DOS.StringToAtom"]
def crawl(domain) do
instance = Jason.decode!(get!("https://#{domain}/api/v1/instance").body)
@@ -48,7 +47,7 @@ defmodule Backend.Crawler.Crawlers.Mastodon do
else
Map.merge(
Map.take(instance["stats"], ["user_count"])
- |> Map.new(fn {k, v} -> {String.to_atom(k), v} end),
+ |> convert_keys_to_atoms(),
%{
peers: [],
interactions: %{},
@@ -63,7 +62,6 @@ defmodule Backend.Crawler.Crawlers.Mastodon do
end
@spec crawl_large_instance(String.t(), any()) :: ApiCrawler.t()
- # sobelow_skip ["DOS.StringToAtom"]
defp crawl_large_instance(domain, instance) do
# servers may not publish peers
peers =
@@ -94,7 +92,7 @@ defmodule Backend.Crawler.Crawlers.Mastodon do
Map.take(instance, ["version", "description"]),
Map.take(instance["stats"], ["user_count", "status_count"])
)
- |> Map.new(fn {k, v} -> {String.to_atom(k), v} end),
+ |> convert_keys_to_atoms(),
%{
peers: peers,
interactions: interactions,
diff --git a/backend/lib/backend/elasticsearch/cluster.ex b/backend/lib/backend/elasticsearch/cluster.ex
new file mode 100644
index 0000000..3f130c9
--- /dev/null
+++ b/backend/lib/backend/elasticsearch/cluster.ex
@@ -0,0 +1,3 @@
+defmodule Backend.Elasticsearch.Cluster do
+ use Elasticsearch.Cluster, otp_app: :backend
+end
diff --git a/backend/lib/backend/elasticsearch/store.ex b/backend/lib/backend/elasticsearch/store.ex
new file mode 100644
index 0000000..a3c9b11
--- /dev/null
+++ b/backend/lib/backend/elasticsearch/store.ex
@@ -0,0 +1,16 @@
+defmodule Backend.Elasticsearch.Store do
+ @behaviour Elasticsearch.Store
+
+ alias Backend.Repo
+
+ @impl true
+ def stream(schema) do
+ Repo.stream(schema)
+ end
+
+ @impl true
+ def transaction(fun) do
+ {:ok, result} = Repo.transaction(fun, timeout: :infinity)
+ result
+ end
+end
diff --git a/backend/lib/backend/instance.ex b/backend/lib/backend/instance.ex
index a2d618f..e0d6891 100644
--- a/backend/lib/backend/instance.ex
+++ b/backend/lib/backend/instance.ex
@@ -46,4 +46,19 @@ defmodule Backend.Instance do
|> validate_required([:domain])
|> put_assoc(:peers, attrs.peers)
end
+
+ defimpl Elasticsearch.Document, for: Backend.Instance do
+ def id(instance), do: instance.id
+ def routing(_), do: false
+
+ def encode(instance) do
+ # Make sure this corresponds with priv/elasticseach/instances.json
+ %{
+ domain: instance.domain,
+ description: instance.description,
+ type: instance.type,
+ user_count: instance.user_count
+ }
+ end
+ end
end
diff --git a/backend/lib/backend/release.ex b/backend/lib/backend/release.ex
index 3bb1e62..f44a63e 100644
--- a/backend/lib/backend/release.ex
+++ b/backend/lib/backend/release.ex
@@ -1,12 +1,24 @@
defmodule Backend.Release do
@app :backend
+ alias Elasticsearch.Index
+ alias Backend.Elasticsearch.Cluster
+
+ def run_all do
+ migrate()
+ index()
+ end
+
def migrate do
for repo <- repos() do
{:ok, _, _} = Ecto.Migrator.with_repo(repo, &Ecto.Migrator.run(&1, :up, all: true))
end
end
+ def index do
+ Index.hot_swap(Cluster, "instances")
+ end
+
def rollback(repo, version) do
{:ok, _, _} = Ecto.Migrator.with_repo(repo, &Ecto.Migrator.run(&1, :down, to: version))
end
diff --git a/backend/lib/backend/util.ex b/backend/lib/backend/util.ex
index 4081527..652d13e 100644
--- a/backend/lib/backend/util.ex
+++ b/backend/lib/backend/util.ex
@@ -157,4 +157,14 @@ defmodule Backend.Util do
"#{String.downcase(username)}@#{clean_domain(domain)}"
end
end
+
+ @doc """
+ Converts a map with string keys to a map with atom keys.
+ Be very careful with this -- only use it on maps where you know the keys! Never run it if the keys can be supplied
+ by the user.
+ """
+ # sobelow_skip ["DOS.StringToAtom"]
+ def convert_keys_to_atoms(map) do
+ map |> Map.new(fn {k, v} -> {String.to_atom(k), v} end)
+ end
end
diff --git a/backend/lib/backend_web/controllers/search_controller.ex b/backend/lib/backend_web/controllers/search_controller.ex
index e9bbe96..82a9d3a 100644
--- a/backend/lib/backend_web/controllers/search_controller.ex
+++ b/backend/lib/backend_web/controllers/search_controller.ex
@@ -6,8 +6,8 @@ defmodule BackendWeb.SearchController do
def index(conn, params) do
query = Map.get(params, "query")
- cursor_after = Map.get(params, "after", nil)
- %{instances: instances, next: next} = Api.search_instances(query, cursor_after)
- render(conn, "index.json", instances: instances, next: next)
+ from = Map.get(params, "after", "0") |> String.to_integer()
+ %{hits: hits, next: next} = Api.search_instances(query, from)
+ render(conn, "index.json", hits: hits, next: next)
end
end
diff --git a/backend/lib/backend_web/views/admin_view.ex b/backend/lib/backend_web/views/admin_view.ex
index cd72cb6..319dc2f 100644
--- a/backend/lib/backend_web/views/admin_view.ex
+++ b/backend/lib/backend_web/views/admin_view.ex
@@ -1,11 +1,8 @@
defmodule BackendWeb.AdminView do
use BackendWeb, :view
- import Backend.Util
require Logger
def render("show.json", %{instance: instance}) do
- Logger.info(inspect(instance))
-
%{
domain: domain,
opt_in: opt_in,
diff --git a/backend/lib/backend_web/views/instance_view.ex b/backend/lib/backend_web/views/instance_view.ex
index 2449c86..8391062 100644
--- a/backend/lib/backend_web/views/instance_view.ex
+++ b/backend/lib/backend_web/views/instance_view.ex
@@ -20,7 +20,7 @@ defmodule BackendWeb.InstanceView do
end
cond do
- instance.user_count < user_threshold ->
+ instance.user_count < user_threshold and not instance.opt_in ->
%{
name: instance.domain,
status: "personal instance"
diff --git a/backend/lib/backend_web/views/search_view.ex b/backend/lib/backend_web/views/search_view.ex
index 87d23c4..d570cda 100644
--- a/backend/lib/backend_web/views/search_view.ex
+++ b/backend/lib/backend_web/views/search_view.ex
@@ -3,28 +3,28 @@ defmodule BackendWeb.SearchView do
alias BackendWeb.SearchView
import Backend.Util
- def render("index.json", %{instances: instances, next: next}) do
+ def render("index.json", %{hits: hits, next: next}) do
%{
- results: render_many(instances, SearchView, "instance.json", as: :instance),
+ results: render_many(hits, SearchView, "instance.json", as: :hit),
next: next
}
end
- def render("instance.json", %{instance: instance}) do
+ def render("instance.json", %{hit: hit}) do
threshold = get_config(:personal_instance_threshold)
description =
- if instance.user_count != nil and instance.user_count < threshold do
+ if hit.user_count != nil and hit.user_count < threshold do
nil
else
- instance.description
+ hit.description
end
%{
- name: instance.domain,
+ name: hit.domain,
description: description,
- userCount: instance.user_count,
- type: instance.type
+ userCount: hit.user_count,
+ type: hit.type
}
end
end
diff --git a/backend/mix.exs b/backend/mix.exs
index 571063a..af593e0 100644
--- a/backend/mix.exs
+++ b/backend/mix.exs
@@ -52,7 +52,8 @@ defmodule Backend.MixProject do
{:public_suffix, "~> 0.6.0"},
{:idna, "~> 5.1.2", override: true},
{:swoosh, "~> 0.23.3"},
- {:ex_twilio, "~> 0.7.0"}
+ {:ex_twilio, "~> 0.7.0"},
+ {:elasticsearch, "~> 1.0"}
]
end
diff --git a/backend/mix.lock b/backend/mix.lock
index c9005b4..0c17276 100644
--- a/backend/mix.lock
+++ b/backend/mix.lock
@@ -13,6 +13,7 @@
"distillery": {:hex, :distillery, "2.1.1", "f9332afc2eec8a1a2b86f22429e068ef35f84a93ea1718265e740d90dd367814", [:mix], [{:artificery, "~> 0.2", [hex: :artificery, repo: "hexpm", optional: false]}], "hexpm"},
"ecto": {:hex, :ecto, "3.1.7", "fa21d06ef56cdc2fdaa62574e8c3ba34a2751d44ea34c30bc65f0728421043e5", [:mix], [{:decimal, "~> 1.6", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}], "hexpm"},
"ecto_sql": {:hex, :ecto_sql, "3.1.6", "1e80e30d16138a729c717f73dcb938590bcdb3a4502f3012414d0cbb261045d8", [:mix], [{:db_connection, "~> 2.0", [hex: :db_connection, repo: "hexpm", optional: false]}, {:ecto, "~> 3.1.0", [hex: :ecto, repo: "hexpm", optional: false]}, {:mariaex, "~> 0.9.1", [hex: :mariaex, repo: "hexpm", optional: true]}, {:myxql, "~> 0.2.0", [hex: :myxql, repo: "hexpm", optional: true]}, {:postgrex, "~> 0.14.0 or ~> 0.15.0", [hex: :postgrex, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm"},
+ "elasticsearch": {:hex, :elasticsearch, "1.0.0", "626d3fb8e7554d9c93eb18817ae2a3d22c2a4191cc903c4644b1334469b15374", [:mix], [{:httpoison, ">= 0.0.0", [hex: :httpoison, repo: "hexpm", optional: false]}, {:poison, ">= 0.0.0", [hex: :poison, repo: "hexpm", optional: true]}, {:sigaws, "~> 0.7", [hex: :sigaws, repo: "hexpm", optional: true]}, {:vex, "~> 0.6.0", [hex: :vex, repo: "hexpm", optional: false]}], "hexpm"},
"ex_twilio": {:hex, :ex_twilio, "0.7.0", "d7ce624ef4661311ae28c3e3aa060ecb66a9f4843184d7400c29072f7d3f5a4a", [:mix], [{:httpoison, ">= 0.9.0", [hex: :httpoison, repo: "hexpm", optional: false]}, {:inflex, "~> 1.0", [hex: :inflex, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:joken, "~> 2.0", [hex: :joken, repo: "hexpm", optional: false]}, {:poison, "~> 3.0", [hex: :poison, repo: "hexpm", optional: false]}], "hexpm"},
"gen_stage": {:hex, :gen_stage, "0.14.2", "6a2a578a510c5bfca8a45e6b27552f613b41cf584b58210f017088d3d17d0b14", [:mix], [], "hexpm"},
"gen_state_machine": {:hex, :gen_state_machine, "2.0.5", "9ac15ec6e66acac994cc442dcc2c6f9796cf380ec4b08267223014be1c728a95", [:mix], [], "hexpm"},
@@ -51,4 +52,5 @@
"timex": {:hex, :timex, "3.6.1", "efdf56d0e67a6b956cc57774353b0329c8ab7726766a11547e529357ffdc1d56", [:mix], [{:combine, "~> 0.10", [hex: :combine, repo: "hexpm", optional: false]}, {:gettext, "~> 0.10", [hex: :gettext, repo: "hexpm", optional: false]}, {:tzdata, "~> 0.1.8 or ~> 0.5 or ~> 1.0.0", [hex: :tzdata, repo: "hexpm", optional: false]}], "hexpm"},
"tzdata": {:hex, :tzdata, "1.0.1", "f6027a331af7d837471248e62733c6ebee86a72e57c613aa071ebb1f750fc71a", [:mix], [{:hackney, "~> 1.0", [hex: :hackney, repo: "hexpm", optional: false]}], "hexpm"},
"unicode_util_compat": {:hex, :unicode_util_compat, "0.3.1", "a1f612a7b512638634a603c8f401892afbf99b8ce93a45041f8aaca99cadb85e", [:rebar3], [], "hexpm"},
+ "vex": {:hex, :vex, "0.6.0", "4e79b396b2ec18cd909eed0450b19108d9631842598d46552dc05031100b7a56", [:mix], [], "hexpm"},
}
diff --git a/backend/priv/elasticsearch/instances.json b/backend/priv/elasticsearch/instances.json
new file mode 100644
index 0000000..cd985cb
--- /dev/null
+++ b/backend/priv/elasticsearch/instances.json
@@ -0,0 +1,53 @@
+{
+ "settings": {
+ "number_of_shards": 1,
+ "number_of_replicas": 0,
+ "analysis": {
+ "analyzer": {
+ "ngramAnalyzer": {
+ "tokenizer": "ngramTokenizer"
+ }
+ },
+ "tokenizer": {
+ "ngramTokenizer": {
+ "type": "ngram",
+ "min_gram": 5,
+ "max_gram": 5
+ }
+ }
+ }
+ },
+ "mappings": {
+ "_doc": {
+ "properties": {
+ "domain": {
+ "type": "text",
+ "fields": {
+ "ngram": {
+ "type": "text",
+ "analyzer": "ngramAnalyzer"
+ },
+ "keyword": {
+ "type": "keyword"
+ }
+ }
+ },
+ "description": {
+ "type": "text",
+ "fields": {
+ "english": {
+ "type": "text",
+ "analyzer": "english"
+ }
+ }
+ },
+ "type": {
+ "type": "keyword"
+ },
+ "user_count": {
+ "type": "integer"
+ }
+ }
+ }
+ }
+}
diff --git a/docker-compose.yml b/docker-compose.yml
index 7aec131..fb39f81 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -10,6 +10,22 @@ services:
- pgdata:/var/lib/postgresql/data
networks:
- database_network
+ elasticsearch:
+ image: elasticsearch:6.8.1
+ ports:
+ - "9200:9200"
+ volumes:
+ - esdata:/usr/share/elasticsearch/data
+ networks:
+ - phoenix_network
+ - es_network
+ # Kibana is just for development, really
+ kibana:
+ image: kibana:6.8.1
+ networks:
+ - es_network
+ ports:
+ - "5601:5601"
# This is for running the occasional graph layout task. It's in docker-compose.yml so that it's built at the same time
# as everything else, but it should be run regularly with a cron job or similar.
gephi:
@@ -26,6 +42,7 @@ services:
build: ./backend
networks:
- database_network
+ - phoenix_network
depends_on:
- db
ports:
@@ -37,7 +54,12 @@ services:
- BACKEND_HOSTNAME
volumes:
pgdata:
+ esdata:
gradle-cache:
networks:
database_network:
driver: bridge
+ phoenix_network:
+ driver: bridge
+ es_network:
+ driver: bridge
diff --git a/frontend/src/components/atoms/GraphKey.tsx b/frontend/src/components/atoms/GraphKey.tsx
index 4ce0a54..83f6908 100644
--- a/frontend/src/components/atoms/GraphKey.tsx
+++ b/frontend/src/components/atoms/GraphKey.tsx
@@ -47,7 +47,7 @@ const GraphKey: React.FCKey
{current.values.map(v => (
-