add mastodon crawler

This commit is contained in:
Tao Bror Bojlén 2019-06-23 16:19:29 +02:00
parent 5b72d699f1
commit 79378cd9f4
No known key found for this signature in database
GPG Key ID: C6EC7AAB905F9E6F
15 changed files with 560 additions and 10 deletions

2
backend/.gitignore vendored
View File

@ -34,3 +34,5 @@ backend-*.tar
# secrets files as long as you replace their contents by environment
# variables.
/config/*.secret.exs
/.elixir_ls/

View File

@ -17,6 +17,11 @@ config :backend, BackendWeb.Endpoint,
render_errors: [view: BackendWeb.ErrorView, accepts: ~w(html json)],
pubsub: [name: Backend.PubSub, adapter: Phoenix.PubSub.PG2]
config :backend, Backend.Crawler,
status_age_limit_days: 28,
status_count_limit: 5000,
personal_instance_threshold: 10
# Configures Elixir's Logger
config :logger, :console,
format: "$time $metadata[$level] $message\n",

View File

@ -0,0 +1,104 @@
defmodule Backend.Crawler do
@moduledoc """
The Crawler context.
"""
import Ecto.Query, warn: false
alias Backend.Repo
alias Backend.Crawler.Instance
@doc """
Returns the list of instances.
## Examples
iex> list_instances()
[%Instance{}, ...]
"""
def list_instances do
Repo.all(Instance)
end
@doc """
Gets a single instance.
Raises `Ecto.NoResultsError` if the Instance does not exist.
## Examples
iex> get_instance!(123)
%Instance{}
iex> get_instance!(456)
** (Ecto.NoResultsError)
"""
def get_instance!(id), do: Repo.get!(Instance, id)
@doc """
Creates a instance.
## Examples
iex> create_instance(%{field: value})
{:ok, %Instance{}}
iex> create_instance(%{field: bad_value})
{:error, %Ecto.Changeset{}}
"""
def create_instance(attrs \\ %{}) do
%Instance{}
|> Instance.changeset(attrs)
|> Repo.insert()
end
@doc """
Updates a instance.
## Examples
iex> update_instance(instance, %{field: new_value})
{:ok, %Instance{}}
iex> update_instance(instance, %{field: bad_value})
{:error, %Ecto.Changeset{}}
"""
def update_instance(%Instance{} = instance, attrs) do
instance
|> Instance.changeset(attrs)
|> Repo.update()
end
@doc """
Deletes a Instance.
## Examples
iex> delete_instance(instance)
{:ok, %Instance{}}
iex> delete_instance(instance)
{:error, %Ecto.Changeset{}}
"""
def delete_instance(%Instance{} = instance) do
Repo.delete(instance)
end
@doc """
Returns an `%Ecto.Changeset{}` for tracking instance changes.
## Examples
iex> change_instance(instance)
%Ecto.Changeset{source: %Instance{}}
"""
def change_instance(%Instance{} = instance) do
Instance.changeset(instance, %{})
end
end

View File

@ -0,0 +1,44 @@
defmodule Backend.Crawler.ApiCrawler do
@moduledoc """
This module is a specification. Crawlers for all instance types must implement its behaviour.
TODO:
Make sure to respect the following configuration values:
* `:status_age_limit_days` specifies that you must only crawl statuses from the most recent N days
* `:status_count_limit` specifies the max number of statuses to crawl in one go
* `:personal_instance_threshold` specifies that instances with fewer than this number of users should not be crawled
* also, profiles with the string "nobot" (case insensitive) in their profile must not be included in any stats.
"""
alias __MODULE__
# {domain_mentioned, datetime}
@type instance_interaction :: {String.t(), DateTime}
defstruct [
:version,
:description,
:user_count,
:status_count,
:peers,
:interactions
]
@type crawl_result :: %ApiCrawler{
version: String.t(),
description: String.t(),
user_count: Number,
status_count: Number,
peers: [String.t()],
interactions: [instance_interaction]
}
@doc """
Check whether the instance at the given domain is of the type that this ApiCrawler implements.
"""
@callback is_instance_type?(String.t()) :: boolean()
@doc """
Crawl the instance at the given domain.
"""
@callback crawl(String.t()) :: crawl_result()
end

View File

@ -0,0 +1,61 @@
defmodule Backend.Crawler.Crawler do
@moduledoc """
This module crawls instances. Run `run(domain)` to crawl a given domain.
"""
alias __MODULE__
alias Backend.Crawler.Crawlers.Mastodon
require Logger
defstruct [
# the instance domain (a string)
:instance,
# a list of ApiCrawlers that will be attempted
:api_crawlers,
:found_api?,
# an instance of the ApiCrawler struct
:result
]
def run(domain) do
Logger.info("Crawling #{domain}...")
HTTPoison.start()
state = %Crawler{instance: domain, api_crawlers: [], found_api?: false, result: nil}
state
# register APICrawlers here
|> register(Mastodon)
# go!
|> crawl(domain)
# |> save()
end
# Adds a new ApiCrawler that run/1 will check.
defp register(%Crawler{api_crawlers: crawlers} = state, api_crawler) do
Map.put(state, :api_crawlers, [api_crawler | crawlers])
end
# Recursive function to check whether `instance` has an API that the head of the api_crawlers list can read.
# If so, crawls it. If not, continues with the tail of the api_crawlers list.
defp crawl(%Crawler{api_crawlers: []} = state, instance) do
Logger.info("Found no compatible API for #{instance}")
Map.put(state, :found_api?, false)
end
defp crawl(state, instance) do
crawlers = Map.get(state, :api_crawlers, [])
curr = hd(crawlers)
if curr.is_instance_type?(instance) do
Logger.debug("Found #{curr} instance")
Map.put(state, :found_api?, true)
|> Map.put(:result, curr.crawl(instance))
else
# Nothing found so check the next APICrawler
Logger.debug("#{instance} is not an instance of #{curr}")
crawl(%Crawler{state | api_crawlers: tl(crawlers)}, instance)
end
end
end

View File

@ -0,0 +1,152 @@
defmodule Backend.Crawler.Crawlers.Mastodon do
require Logger
import Backend.Crawler.Util
alias Backend.Crawler.ApiCrawler
@behaviour ApiCrawler
@impl ApiCrawler
def is_instance_type?(domain) do
case HTTPoison.get("https://#{domain}/api/v1/instance") do
{:ok, response} -> if is_http_200?(response), do: has_title?(response.body), else: false
{:error, _error} -> false
end
end
@impl ApiCrawler
def crawl(domain) do
instance = Jason.decode!(HTTPoison.get!("https://#{domain}/api/v1/instance").body)
if get_in(instance, ["stats", "user_count"]) > get_config(:personal_instance_threshold) do
crawl_large_instance(domain, instance)
else
Map.merge(
Map.merge(
Map.take(instance, ["version", "description"]),
Map.take(instance["stats"], ["user_count", "status_count"])
),
%{peers: [], interactions: []}
)
end
end
@spec crawl_large_instance(String.t(), any()) :: ApiCrawler.crawl_result()
defp crawl_large_instance(domain, instance) do
# servers may not publish peers
peers =
case HTTPoison.get("https://#{domain}/api/v1/instance/peers") do
{:ok, response} -> if is_http_200?(response), do: Jason.decode!(response.body), else: []
{:error, _error} -> []
end
Logger.debug("Found #{length(peers)} peers.")
{mentions, statuses_seen} = get_mentions(domain)
Logger.info("Found #{length(mentions)} mentions in #{statuses_seen} statuses.")
Map.merge(
Map.merge(
Map.take(instance, ["version", "description"]),
Map.take(instance["stats"], ["user_count", "status_count"])
),
%{peers: peers, interactions: mentions}
)
end
defp is_http_200?(response) do
case response do
%{status_code: 200} -> true
_ -> false
end
end
# check if the endpoint works as expected
defp has_title?(body) do
case Jason.decode(body) do
{:ok, decoded} -> Map.has_key?(decoded, "title")
{:error, _error} -> false
end
end
defp is_mention?(status) do
case status do
%{mentions: []} -> false
%{mentions: nil} -> false
_ -> true
end
end
defp is_eligible?(status) do
if not is_mention?(status) do
false
end
account = status["account"]
fields =
account["fields"]
|> Enum.map(fn %{"name" => name, "value" => value} -> name <> value end)
|> Enum.join("")
# this also means that any users who mentioned ethnobotany in their profiles will be excluded lol ¯\_(ツ)_/¯
(account["note"] <> fields)
|> String.downcase()
|> String.contains?("nobot")
|> Kernel.not()
end
@spec extract_mentions_from_status(any()) :: [ApiCrawler.instance_interaction()]
defp extract_mentions_from_status(status) do
datetime = Timex.parse!(status["created_at"], "{ISO:Extended:Z}")
status["mentions"]
|> Enum.map(fn mention -> get_domain(mention["url"]) end)
|> Enum.map(fn domain -> {domain, datetime} end)
end
@spec statuses_to_mentions(any()) :: [ApiCrawler.instance_interaction()]
defp statuses_to_mentions(statuses) do
statuses
|> Enum.filter(fn status -> is_eligible?(status) end)
|> Enum.flat_map(fn status -> extract_mentions_from_status(status) end)
end
defp get_mentions(domain, max_id \\ nil, mentions \\ [], statuses_seen \\ 0) do
endpoint = "https://#{domain}/api/v1/timelines/public?local=true"
endpoint =
if max_id do
endpoint <> "&max_id=#{max_id}"
else
endpoint
end
Logger.debug("Crawling #{endpoint}")
# TODO: handle errors here
endpoint_response = HTTPoison.get!(endpoint)
statuses = Jason.decode!(endpoint_response.body)
if length(statuses) > 0 do
# get statuses that are eligible (i.e. users don't have #nobot in their profile) and have mentions
mentions = mentions ++ statuses_to_mentions(statuses)
statuses_seen = statuses_seen + length(statuses)
oldest_status = Enum.at(statuses, -1)
status_datetime_threshold =
Timex.shift(Timex.now(), days: -1 * get_config(:status_age_limit_days))
oldest_status_datetime = Timex.parse!(oldest_status["created_at"], "{ISO:Extended:Z}")
if Timex.compare(oldest_status_datetime, status_datetime_threshold) == 1 and
statuses_seen < get_config(:status_count_limit) do
get_mentions(domain, oldest_status["id"], mentions, statuses_seen)
else
{mentions, statuses_seen}
end
else
{mentions, statuses_seen}
end
end
end

View File

@ -0,0 +1,20 @@
defmodule Backend.Crawler.Instance do
use Ecto.Schema
import Ecto.Changeset
schema "instances" do
field :domain, :string
field :num_statuses, :integer
field :num_users, :integer
field :version, :string
timestamps()
end
@doc false
def changeset(instance, attrs) do
instance
|> cast(attrs, [:domain, :version, :num_users, :num_statuses])
|> validate_required([:domain])
end
end

View File

@ -0,0 +1,20 @@
defmodule Backend.Crawler.Interaction do
use Ecto.Schema
import Ecto.Changeset
schema "interactions" do
field :time, :utc_datetime
field :type, :string
field :source_id, :id
field :target_id, :id
timestamps()
end
@doc false
def changeset(interaction, attrs) do
interaction
|> cast(attrs, [:type, :time])
|> validate_required([:type, :time])
end
end

View File

@ -0,0 +1,13 @@
defmodule Backend.Crawler.Util do
def get_config(key) do
Application.get_env(:backend, Backend.Crawler)[key]
end
# Gets the domain from a Mastodon/Pleroma account URL
# (e.g. https://mastodon.social/@demouser or https://pleroma.site/users/demouser)
def get_domain(url) do
String.slice(url, 8..-1)
|> String.split("/")
|> Enum.at(0)
end
end

View File

@ -0,0 +1,13 @@
defmodule Mix.Tasks.Crawl do
alias Backend.Crawler.Crawler
use Mix.Task
@shortdoc "Crawl a given instance."
def run(domain) do
Mix.Task.run("app.start")
# Application.ensure_all_started(:timex)
# Mix.Task.run("loadconfig")
Crawler.run(domain)
end
end

View File

@ -42,7 +42,9 @@ defmodule Backend.MixProject do
{:phoenix_live_reload, "~> 1.2", only: :dev},
{:gettext, "~> 0.11"},
{:jason, "~> 1.0"},
{:plug_cowboy, "~> 2.0"}
{:plug_cowboy, "~> 2.0"},
{:httpoison, "~> 1.5"},
{:timex, "~> 3.5"}
]
end

View File

@ -1,24 +1,36 @@
%{
"certifi": {:hex, :certifi, "2.5.1", "867ce347f7c7d78563450a18a6a28a8090331e77fa02380b4a21962a65d36ee5", [:rebar3], [{:parse_trans, "~>3.3", [hex: :parse_trans, repo: "hexpm", optional: false]}], "hexpm"},
"combine": {:hex, :combine, "0.10.0", "eff8224eeb56498a2af13011d142c5e7997a80c8f5b97c499f84c841032e429f", [:mix], [], "hexpm"},
"connection": {:hex, :connection, "1.0.4", "a1cae72211f0eef17705aaededacac3eb30e6625b04a6117c1b2db6ace7d5976", [:mix], [], "hexpm"},
"cowboy": {:hex, :cowboy, "2.6.3", "99aa50e94e685557cad82e704457336a453d4abcb77839ad22dbe71f311fcc06", [:rebar3], [{:cowlib, "~> 2.7.3", [hex: :cowlib, repo: "hexpm", optional: false]}, {:ranch, "~> 1.7.1", [hex: :ranch, repo: "hexpm", optional: false]}], "hexpm"},
"cowlib": {:hex, :cowlib, "2.7.3", "a7ffcd0917e6d50b4d5fb28e9e2085a0ceb3c97dea310505f7460ff5ed764ce9", [:rebar3], [], "hexpm"},
"db_connection": {:hex, :db_connection, "2.0.6", "bde2f85d047969c5b5800cb8f4b3ed6316c8cb11487afedac4aa5f93fd39abfa", [:mix], [{:connection, "~> 1.0.2", [hex: :connection, repo: "hexpm", optional: false]}], "hexpm"},
"db_connection": {:hex, :db_connection, "2.1.0", "122e2f62c4906bf2e49554f1e64db5030c19229aa40935f33088e7d543aa79d0", [:mix], [{:connection, "~> 1.0.2", [hex: :connection, repo: "hexpm", optional: false]}], "hexpm"},
"decimal": {:hex, :decimal, "1.7.0", "30d6b52c88541f9a66637359ddf85016df9eb266170d53105f02e4a67e00c5aa", [:mix], [], "hexpm"},
"ecto": {:hex, :ecto, "3.1.1", "d6677f95f1e0bd39bc3db3db6b23a59977cb154ed2cceec69a56becd805128be", [:mix], [{:decimal, "~> 1.6", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}], "hexpm"},
"ecto_sql": {:hex, :ecto_sql, "3.1.1", "af2458e7a467d75a6389e1d4ebfb57c328ccc684d6ee52145f7b34e94efb5fc4", [:mix], [{:db_connection, "~> 2.0", [hex: :db_connection, repo: "hexpm", optional: false]}, {:ecto, "~> 3.1.0", [hex: :ecto, repo: "hexpm", optional: false]}, {:mariaex, "~> 0.9.1", [hex: :mariaex, repo: "hexpm", optional: true]}, {:myxql, "~> 0.2.0", [hex: :myxql, repo: "hexpm", optional: true]}, {:postgrex, "~> 0.14.0", [hex: :postgrex, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm"},
"file_system": {:hex, :file_system, "0.2.6", "fd4dc3af89b9ab1dc8ccbcc214a0e60c41f34be251d9307920748a14bf41f1d3", [:mix], [], "hexpm"},
"ecto": {:hex, :ecto, "3.1.6", "e890bf66c1d4d8e2b8e010f7cba092a08139b55437bc3382371f72a6ee40757e", [:mix], [{:decimal, "~> 1.6", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}], "hexpm"},
"ecto_sql": {:hex, :ecto_sql, "3.1.5", "b5201fe99fa6bf6a93f64adb2d4976ded3d201f932b7c5bd4c44468642f4fb1f", [:mix], [{:db_connection, "~> 2.0", [hex: :db_connection, repo: "hexpm", optional: false]}, {:ecto, "~> 3.1.0", [hex: :ecto, repo: "hexpm", optional: false]}, {:mariaex, "~> 0.9.1", [hex: :mariaex, repo: "hexpm", optional: true]}, {:myxql, "~> 0.2.0", [hex: :myxql, repo: "hexpm", optional: true]}, {:postgrex, "~> 0.14.0 or ~> 0.15.0", [hex: :postgrex, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm"},
"file_system": {:hex, :file_system, "0.2.7", "e6f7f155970975789f26e77b8b8d8ab084c59844d8ecfaf58cbda31c494d14aa", [:mix], [], "hexpm"},
"gettext": {:hex, :gettext, "0.16.1", "e2130b25eebcbe02bb343b119a07ae2c7e28bd4b146c4a154da2ffb2b3507af2", [:mix], [], "hexpm"},
"hackney": {:hex, :hackney, "1.15.1", "9f8f471c844b8ce395f7b6d8398139e26ddca9ebc171a8b91342ee15a19963f4", [:rebar3], [{:certifi, "2.5.1", [hex: :certifi, repo: "hexpm", optional: false]}, {:idna, "6.0.0", [hex: :idna, repo: "hexpm", optional: false]}, {:metrics, "1.0.1", [hex: :metrics, repo: "hexpm", optional: false]}, {:mimerl, "~>1.1", [hex: :mimerl, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "1.1.4", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}], "hexpm"},
"httpoison": {:hex, :httpoison, "1.5.1", "0f55b5b673b03c5c327dac7015a67cb571b99b631acc0bc1b0b98dcd6b9f2104", [:mix], [{:hackney, "~> 1.8", [hex: :hackney, repo: "hexpm", optional: false]}], "hexpm"},
"idna": {:hex, :idna, "6.0.0", "689c46cbcdf3524c44d5f3dde8001f364cd7608a99556d8fbd8239a5798d4c10", [:rebar3], [{:unicode_util_compat, "0.4.1", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm"},
"jason": {:hex, :jason, "1.1.2", "b03dedea67a99223a2eaf9f1264ce37154564de899fd3d8b9a21b1a6fd64afe7", [:mix], [{:decimal, "~> 1.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm"},
"metrics": {:hex, :metrics, "1.0.1", "25f094dea2cda98213cecc3aeff09e940299d950904393b2a29d191c346a8486", [:rebar3], [], "hexpm"},
"mime": {:hex, :mime, "1.3.1", "30ce04ab3175b6ad0bdce0035cba77bba68b813d523d1aac73d9781b4d193cf8", [:mix], [], "hexpm"},
"phoenix": {:hex, :phoenix, "1.4.3", "8eed4a64ff1e12372cd634724bddd69185938f52c18e1396ebac76375d85677d", [:mix], [{:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:phoenix_pubsub, "~> 1.1", [hex: :phoenix_pubsub, repo: "hexpm", optional: false]}, {:plug, "~> 1.7", [hex: :plug, repo: "hexpm", optional: false]}, {:plug_cowboy, "~> 1.0 or ~> 2.0", [hex: :plug_cowboy, repo: "hexpm", optional: true]}], "hexpm"},
"mimerl": {:hex, :mimerl, "1.2.0", "67e2d3f571088d5cfd3e550c383094b47159f3eee8ffa08e64106cdf5e981be3", [:rebar3], [], "hexpm"},
"parse_trans": {:hex, :parse_trans, "3.3.0", "09765507a3c7590a784615cfd421d101aec25098d50b89d7aa1d66646bc571c1", [:rebar3], [], "hexpm"},
"phoenix": {:hex, :phoenix, "1.4.8", "c72dc3adeb49c70eb963a0ea24f7a064ec1588e651e84e1b7ad5ed8253c0b4a2", [:mix], [{:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:phoenix_pubsub, "~> 1.1", [hex: :phoenix_pubsub, repo: "hexpm", optional: false]}, {:plug, "~> 1.8.1 or ~> 1.9", [hex: :plug, repo: "hexpm", optional: false]}, {:plug_cowboy, "~> 1.0 or ~> 2.0", [hex: :plug_cowboy, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm"},
"phoenix_ecto": {:hex, :phoenix_ecto, "4.0.0", "c43117a136e7399ea04ecaac73f8f23ee0ffe3e07acfcb8062fe5f4c9f0f6531", [:mix], [{:ecto, "~> 3.0", [hex: :ecto, repo: "hexpm", optional: false]}, {:phoenix_html, "~> 2.9", [hex: :phoenix_html, repo: "hexpm", optional: true]}, {:plug, "~> 1.0", [hex: :plug, repo: "hexpm", optional: false]}], "hexpm"},
"phoenix_html": {:hex, :phoenix_html, "2.13.2", "f5d27c9b10ce881a60177d2b5227314fc60881e6b66b41dfe3349db6ed06cf57", [:mix], [{:plug, "~> 1.5", [hex: :plug, repo: "hexpm", optional: false]}], "hexpm"},
"phoenix_live_reload": {:hex, :phoenix_live_reload, "1.2.0", "3bb31a9fbd40ffe8652e60c8660dffd72dd231efcdf49b744fb75b9ef7db5dd2", [:mix], [{:file_system, "~> 0.2.1 or ~> 0.3", [hex: :file_system, repo: "hexpm", optional: false]}, {:phoenix, "~> 1.4", [hex: :phoenix, repo: "hexpm", optional: false]}], "hexpm"},
"phoenix_html": {:hex, :phoenix_html, "2.13.3", "850e292ff6e204257f5f9c4c54a8cb1f6fbc16ed53d360c2b780a3d0ba333867", [:mix], [{:plug, "~> 1.5", [hex: :plug, repo: "hexpm", optional: false]}], "hexpm"},
"phoenix_live_reload": {:hex, :phoenix_live_reload, "1.2.1", "274a4b07c4adbdd7785d45a8b0bb57634d0b4f45b18d2c508b26c0344bd59b8f", [:mix], [{:file_system, "~> 0.2.1 or ~> 0.3", [hex: :file_system, repo: "hexpm", optional: false]}, {:phoenix, "~> 1.4", [hex: :phoenix, repo: "hexpm", optional: false]}], "hexpm"},
"phoenix_pubsub": {:hex, :phoenix_pubsub, "1.1.2", "496c303bdf1b2e98a9d26e89af5bba3ab487ba3a3735f74bf1f4064d2a845a3e", [:mix], [], "hexpm"},
"plug": {:hex, :plug, "1.8.0", "9d2685cb007fe5e28ed9ac27af2815bc262b7817a00929ac10f56f169f43b977", [:mix], [{:mime, "~> 1.0", [hex: :mime, repo: "hexpm", optional: false]}, {:plug_crypto, "~> 1.0", [hex: :plug_crypto, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4", [hex: :telemetry, repo: "hexpm", optional: true]}], "hexpm"},
"plug": {:hex, :plug, "1.8.2", "0bcce1daa420f189a6491f3940cc77ea7fb1919761175c9c3b59800d897440fc", [:mix], [{:mime, "~> 1.0", [hex: :mime, repo: "hexpm", optional: false]}, {:plug_crypto, "~> 1.0", [hex: :plug_crypto, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4", [hex: :telemetry, repo: "hexpm", optional: true]}], "hexpm"},
"plug_cowboy": {:hex, :plug_cowboy, "2.0.2", "6055f16868cc4882b24b6e1d63d2bada94fb4978413377a3b32ac16c18dffba2", [:mix], [{:cowboy, "~> 2.5", [hex: :cowboy, repo: "hexpm", optional: false]}, {:plug, "~> 1.7", [hex: :plug, repo: "hexpm", optional: false]}], "hexpm"},
"plug_crypto": {:hex, :plug_crypto, "1.0.0", "18e49317d3fa343f24620ed22795ec29d4a5e602d52d1513ccea0b07d8ea7d4d", [:mix], [], "hexpm"},
"postgrex": {:hex, :postgrex, "0.14.2", "6680591bbce28d92f043249205e8b01b36cab9ef2a7911abc43649242e1a3b78", [:mix], [{:connection, "~> 1.0", [hex: :connection, repo: "hexpm", optional: false]}, {:db_connection, "~> 2.0", [hex: :db_connection, repo: "hexpm", optional: false]}, {:decimal, "~> 1.5", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}], "hexpm"},
"postgrex": {:hex, :postgrex, "0.14.3", "5754dee2fdf6e9e508cbf49ab138df964278700b764177e8f3871e658b345a1e", [:mix], [{:connection, "~> 1.0", [hex: :connection, repo: "hexpm", optional: false]}, {:db_connection, "~> 2.0", [hex: :db_connection, repo: "hexpm", optional: false]}, {:decimal, "~> 1.5", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}], "hexpm"},
"ranch": {:hex, :ranch, "1.7.1", "6b1fab51b49196860b733a49c07604465a47bdb78aa10c1c16a3d199f7f8c881", [:rebar3], [], "hexpm"},
"ssl_verify_fun": {:hex, :ssl_verify_fun, "1.1.4", "f0eafff810d2041e93f915ef59899c923f4568f4585904d010387ed74988e77b", [:make, :mix, :rebar3], [], "hexpm"},
"telemetry": {:hex, :telemetry, "0.4.0", "8339bee3fa8b91cb84d14c2935f8ecf399ccd87301ad6da6b71c09553834b2ab", [:rebar3], [], "hexpm"},
"timex": {:hex, :timex, "3.5.0", "b0a23167da02d0fe4f1a4e104d1f929a00d348502b52432c05de875d0b9cffa5", [:mix], [{:combine, "~> 0.10", [hex: :combine, repo: "hexpm", optional: false]}, {:gettext, "~> 0.10", [hex: :gettext, repo: "hexpm", optional: false]}, {:tzdata, "~> 0.1.8 or ~> 0.5", [hex: :tzdata, repo: "hexpm", optional: false]}], "hexpm"},
"tzdata": {:hex, :tzdata, "0.5.20", "304b9e98a02840fb32a43ec111ffbe517863c8566eb04a061f1c4dbb90b4d84c", [:mix], [{:hackney, "~> 1.0", [hex: :hackney, repo: "hexpm", optional: false]}], "hexpm"},
"unicode_util_compat": {:hex, :unicode_util_compat, "0.4.1", "d869e4c68901dd9531385bb0c8c40444ebf624e60b6962d95952775cac5e90cd", [:rebar3], [], "hexpm"},
}

View File

@ -0,0 +1,15 @@
defmodule Backend.Repo.Migrations.CreateInstances do
use Ecto.Migration
def change do
create table(:instances) do
add :domain, :string
add :version, :string
add :num_users, :integer
add :num_statuses, :integer
timestamps()
end
end
end

View File

@ -0,0 +1,17 @@
defmodule Backend.Repo.Migrations.CreateInteractions do
use Ecto.Migration
def change do
create table(:interactions) do
add :type, :string
add :time, :utc_datetime
add :source_id, references(:instances, on_delete: :nothing)
add :target_id, references(:instances, on_delete: :nothing)
timestamps()
end
create index(:interactions, [:source_id])
create index(:interactions, [:target_id])
end
end

View File

@ -0,0 +1,70 @@
defmodule Backend.CrawlerTest do
use Backend.DataCase
alias Backend.Crawler
describe "instances" do
alias Backend.Crawler.Instance
@valid_attrs %{domain: "some domain", num_statuses: 42, num_users: 42, version: "some version"}
@update_attrs %{domain: "some updated domain", num_statuses: 43, num_users: 43, version: "some updated version"}
@invalid_attrs %{domain: nil, num_statuses: nil, num_users: nil, version: nil}
def instance_fixture(attrs \\ %{}) do
{:ok, instance} =
attrs
|> Enum.into(@valid_attrs)
|> Crawler.create_instance()
instance
end
test "list_instances/0 returns all instances" do
instance = instance_fixture()
assert Crawler.list_instances() == [instance]
end
test "get_instance!/1 returns the instance with given id" do
instance = instance_fixture()
assert Crawler.get_instance!(instance.id) == instance
end
test "create_instance/1 with valid data creates a instance" do
assert {:ok, %Instance{} = instance} = Crawler.create_instance(@valid_attrs)
assert instance.domain == "some domain"
assert instance.num_statuses == 42
assert instance.num_users == 42
assert instance.version == "some version"
end
test "create_instance/1 with invalid data returns error changeset" do
assert {:error, %Ecto.Changeset{}} = Crawler.create_instance(@invalid_attrs)
end
test "update_instance/2 with valid data updates the instance" do
instance = instance_fixture()
assert {:ok, %Instance{} = instance} = Crawler.update_instance(instance, @update_attrs)
assert instance.domain == "some updated domain"
assert instance.num_statuses == 43
assert instance.num_users == 43
assert instance.version == "some updated version"
end
test "update_instance/2 with invalid data returns error changeset" do
instance = instance_fixture()
assert {:error, %Ecto.Changeset{}} = Crawler.update_instance(instance, @invalid_attrs)
assert instance == Crawler.get_instance!(instance.id)
end
test "delete_instance/1 deletes the instance" do
instance = instance_fixture()
assert {:ok, %Instance{}} = Crawler.delete_instance(instance)
assert_raise Ecto.NoResultsError, fn -> Crawler.get_instance!(instance.id) end
end
test "change_instance/1 returns a instance changeset" do
instance = instance_fixture()
assert %Ecto.Changeset{} = Crawler.change_instance(instance)
end
end
end