From 09708e74ab8cb298e0000dbfbc51dc61782f0237 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tao=20Bror=20Bojl=C3=A9n?= Date: Fri, 2 Aug 2019 19:03:21 +0300 Subject: [PATCH] update ES index on crawl --- README.md | 6 ++++++ backend/config/config.exs | 2 +- backend/lib/backend/crawler/crawler.ex | 22 +++++++++++++--------- 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 08521af..6dcccec 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,9 @@ Though dockerized, backend development is easiest if you have the following inst - `docker-compose build` - `docker-compose up -d phoenix` - if you don't specify `phoenix`, it'll also start `gephi` which should only be run as a regular one-off job +- Create the elasticsearch index: + - `iex -S mix app.start` + - `Elasticsearch.Index.hot_swap(Backend.Elasticsearch.Cluster, :instances)` ### Frontend @@ -96,6 +99,9 @@ SHELL=/bin/bash 10. (Optional) Set up caching with something like [dokku-nginx-cache](https://github.com/Aluxian/dokku-nginx-cache) +Before the app starts running, make sure that the Elasticsearch index exists -- otherwise it'll create one called +`instances`, which should be the name of the alias. Then it won't be able to hot swap if you reindex in the future. + ## Acknowledgements [![NLnet logo](/nlnet-logo.png)](https://nlnet.nl/project/fediverse_space/) diff --git a/backend/config/config.exs b/backend/config/config.exs index e95c6df..6b19f97 100644 --- a/backend/config/config.exs +++ b/backend/config/config.exs @@ -23,7 +23,7 @@ config :backend, Backend.Repo, queue_target: 5000 instances_config_path = if System.get_env("MIX_ENV") == "prod", do: "lib/backend-2.2.0/priv/elasticsearch/instances.json", - else: "instances.json" + else: "priv/elasticsearch/instances.json" config :backend, Backend.Elasticsearch.Cluster, url: "http://localhost:9200", diff --git a/backend/lib/backend/crawler/crawler.ex b/backend/lib/backend/crawler/crawler.ex index d471707..d62b779 100644 --- a/backend/lib/backend/crawler/crawler.ex +++ b/backend/lib/backend/crawler/crawler.ex @@ -111,22 +111,26 @@ defmodule Backend.Crawler do end ## Update the instance we crawled ## + instance = %Instance{ + domain: domain, + description: result.description, + version: result.version, + user_count: result.user_count, + status_count: result.status_count, + type: instance_type, + base_domain: get_base_domain(domain) + } + Repo.insert!( - %Instance{ - domain: domain, - description: result.description, - version: result.version, - user_count: result.user_count, - status_count: result.status_count, - type: instance_type, - base_domain: get_base_domain(domain) - }, + instance, on_conflict: {:replace, [:description, :version, :user_count, :status_count, :type, :base_domain, :updated_at]}, conflict_target: :domain ) + Elasticsearch.put_document(Backend.Elasticsearch.Cluster, instance, "instances/_doc") + # Save details of a new crawl curr_crawl = Repo.insert!(%Crawl{