add mastodon crawler
This commit is contained in:
parent
5b72d699f1
commit
79378cd9f4
|
@ -34,3 +34,5 @@ backend-*.tar
|
|||
# secrets files as long as you replace their contents by environment
|
||||
# variables.
|
||||
/config/*.secret.exs
|
||||
|
||||
/.elixir_ls/
|
|
@ -17,6 +17,11 @@ config :backend, BackendWeb.Endpoint,
|
|||
render_errors: [view: BackendWeb.ErrorView, accepts: ~w(html json)],
|
||||
pubsub: [name: Backend.PubSub, adapter: Phoenix.PubSub.PG2]
|
||||
|
||||
config :backend, Backend.Crawler,
|
||||
status_age_limit_days: 28,
|
||||
status_count_limit: 5000,
|
||||
personal_instance_threshold: 10
|
||||
|
||||
# Configures Elixir's Logger
|
||||
config :logger, :console,
|
||||
format: "$time $metadata[$level] $message\n",
|
||||
|
|
|
@ -0,0 +1,104 @@
|
|||
defmodule Backend.Crawler do
|
||||
@moduledoc """
|
||||
The Crawler context.
|
||||
"""
|
||||
|
||||
import Ecto.Query, warn: false
|
||||
alias Backend.Repo
|
||||
|
||||
alias Backend.Crawler.Instance
|
||||
|
||||
@doc """
|
||||
Returns the list of instances.
|
||||
|
||||
## Examples
|
||||
|
||||
iex> list_instances()
|
||||
[%Instance{}, ...]
|
||||
|
||||
"""
|
||||
def list_instances do
|
||||
Repo.all(Instance)
|
||||
end
|
||||
|
||||
@doc """
|
||||
Gets a single instance.
|
||||
|
||||
Raises `Ecto.NoResultsError` if the Instance does not exist.
|
||||
|
||||
## Examples
|
||||
|
||||
iex> get_instance!(123)
|
||||
%Instance{}
|
||||
|
||||
iex> get_instance!(456)
|
||||
** (Ecto.NoResultsError)
|
||||
|
||||
"""
|
||||
def get_instance!(id), do: Repo.get!(Instance, id)
|
||||
|
||||
@doc """
|
||||
Creates a instance.
|
||||
|
||||
## Examples
|
||||
|
||||
iex> create_instance(%{field: value})
|
||||
{:ok, %Instance{}}
|
||||
|
||||
iex> create_instance(%{field: bad_value})
|
||||
{:error, %Ecto.Changeset{}}
|
||||
|
||||
"""
|
||||
def create_instance(attrs \\ %{}) do
|
||||
%Instance{}
|
||||
|> Instance.changeset(attrs)
|
||||
|> Repo.insert()
|
||||
end
|
||||
|
||||
@doc """
|
||||
Updates a instance.
|
||||
|
||||
## Examples
|
||||
|
||||
iex> update_instance(instance, %{field: new_value})
|
||||
{:ok, %Instance{}}
|
||||
|
||||
iex> update_instance(instance, %{field: bad_value})
|
||||
{:error, %Ecto.Changeset{}}
|
||||
|
||||
"""
|
||||
def update_instance(%Instance{} = instance, attrs) do
|
||||
instance
|
||||
|> Instance.changeset(attrs)
|
||||
|> Repo.update()
|
||||
end
|
||||
|
||||
@doc """
|
||||
Deletes a Instance.
|
||||
|
||||
## Examples
|
||||
|
||||
iex> delete_instance(instance)
|
||||
{:ok, %Instance{}}
|
||||
|
||||
iex> delete_instance(instance)
|
||||
{:error, %Ecto.Changeset{}}
|
||||
|
||||
"""
|
||||
def delete_instance(%Instance{} = instance) do
|
||||
Repo.delete(instance)
|
||||
end
|
||||
|
||||
@doc """
|
||||
Returns an `%Ecto.Changeset{}` for tracking instance changes.
|
||||
|
||||
## Examples
|
||||
|
||||
iex> change_instance(instance)
|
||||
%Ecto.Changeset{source: %Instance{}}
|
||||
|
||||
"""
|
||||
def change_instance(%Instance{} = instance) do
|
||||
Instance.changeset(instance, %{})
|
||||
end
|
||||
end
|
|
@ -0,0 +1,44 @@
|
|||
defmodule Backend.Crawler.ApiCrawler do
|
||||
@moduledoc """
|
||||
This module is a specification. Crawlers for all instance types must implement its behaviour.
|
||||
|
||||
TODO:
|
||||
Make sure to respect the following configuration values:
|
||||
* `:status_age_limit_days` specifies that you must only crawl statuses from the most recent N days
|
||||
* `:status_count_limit` specifies the max number of statuses to crawl in one go
|
||||
* `:personal_instance_threshold` specifies that instances with fewer than this number of users should not be crawled
|
||||
* also, profiles with the string "nobot" (case insensitive) in their profile must not be included in any stats.
|
||||
"""
|
||||
|
||||
alias __MODULE__
|
||||
|
||||
# {domain_mentioned, datetime}
|
||||
@type instance_interaction :: {String.t(), DateTime}
|
||||
|
||||
defstruct [
|
||||
:version,
|
||||
:description,
|
||||
:user_count,
|
||||
:status_count,
|
||||
:peers,
|
||||
:interactions
|
||||
]
|
||||
|
||||
@type crawl_result :: %ApiCrawler{
|
||||
version: String.t(),
|
||||
description: String.t(),
|
||||
user_count: Number,
|
||||
status_count: Number,
|
||||
peers: [String.t()],
|
||||
interactions: [instance_interaction]
|
||||
}
|
||||
|
||||
@doc """
|
||||
Check whether the instance at the given domain is of the type that this ApiCrawler implements.
|
||||
"""
|
||||
@callback is_instance_type?(String.t()) :: boolean()
|
||||
@doc """
|
||||
Crawl the instance at the given domain.
|
||||
"""
|
||||
@callback crawl(String.t()) :: crawl_result()
|
||||
end
|
|
@ -0,0 +1,61 @@
|
|||
defmodule Backend.Crawler.Crawler do
|
||||
@moduledoc """
|
||||
This module crawls instances. Run `run(domain)` to crawl a given domain.
|
||||
"""
|
||||
|
||||
alias __MODULE__
|
||||
alias Backend.Crawler.Crawlers.Mastodon
|
||||
require Logger
|
||||
|
||||
defstruct [
|
||||
# the instance domain (a string)
|
||||
:instance,
|
||||
# a list of ApiCrawlers that will be attempted
|
||||
:api_crawlers,
|
||||
:found_api?,
|
||||
# an instance of the ApiCrawler struct
|
||||
:result
|
||||
]
|
||||
|
||||
def run(domain) do
|
||||
Logger.info("Crawling #{domain}...")
|
||||
HTTPoison.start()
|
||||
state = %Crawler{instance: domain, api_crawlers: [], found_api?: false, result: nil}
|
||||
|
||||
state
|
||||
# register APICrawlers here
|
||||
|> register(Mastodon)
|
||||
# go!
|
||||
|> crawl(domain)
|
||||
|
||||
# |> save()
|
||||
end
|
||||
|
||||
# Adds a new ApiCrawler that run/1 will check.
|
||||
defp register(%Crawler{api_crawlers: crawlers} = state, api_crawler) do
|
||||
Map.put(state, :api_crawlers, [api_crawler | crawlers])
|
||||
end
|
||||
|
||||
# Recursive function to check whether `instance` has an API that the head of the api_crawlers list can read.
|
||||
# If so, crawls it. If not, continues with the tail of the api_crawlers list.
|
||||
defp crawl(%Crawler{api_crawlers: []} = state, instance) do
|
||||
Logger.info("Found no compatible API for #{instance}")
|
||||
Map.put(state, :found_api?, false)
|
||||
end
|
||||
|
||||
defp crawl(state, instance) do
|
||||
crawlers = Map.get(state, :api_crawlers, [])
|
||||
curr = hd(crawlers)
|
||||
|
||||
if curr.is_instance_type?(instance) do
|
||||
Logger.debug("Found #{curr} instance")
|
||||
|
||||
Map.put(state, :found_api?, true)
|
||||
|> Map.put(:result, curr.crawl(instance))
|
||||
else
|
||||
# Nothing found so check the next APICrawler
|
||||
Logger.debug("#{instance} is not an instance of #{curr}")
|
||||
crawl(%Crawler{state | api_crawlers: tl(crawlers)}, instance)
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,152 @@
|
|||
defmodule Backend.Crawler.Crawlers.Mastodon do
|
||||
require Logger
|
||||
import Backend.Crawler.Util
|
||||
alias Backend.Crawler.ApiCrawler
|
||||
|
||||
@behaviour ApiCrawler
|
||||
|
||||
@impl ApiCrawler
|
||||
def is_instance_type?(domain) do
|
||||
case HTTPoison.get("https://#{domain}/api/v1/instance") do
|
||||
{:ok, response} -> if is_http_200?(response), do: has_title?(response.body), else: false
|
||||
{:error, _error} -> false
|
||||
end
|
||||
end
|
||||
|
||||
@impl ApiCrawler
|
||||
def crawl(domain) do
|
||||
instance = Jason.decode!(HTTPoison.get!("https://#{domain}/api/v1/instance").body)
|
||||
|
||||
if get_in(instance, ["stats", "user_count"]) > get_config(:personal_instance_threshold) do
|
||||
crawl_large_instance(domain, instance)
|
||||
else
|
||||
Map.merge(
|
||||
Map.merge(
|
||||
Map.take(instance, ["version", "description"]),
|
||||
Map.take(instance["stats"], ["user_count", "status_count"])
|
||||
),
|
||||
%{peers: [], interactions: []}
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
@spec crawl_large_instance(String.t(), any()) :: ApiCrawler.crawl_result()
|
||||
defp crawl_large_instance(domain, instance) do
|
||||
# servers may not publish peers
|
||||
peers =
|
||||
case HTTPoison.get("https://#{domain}/api/v1/instance/peers") do
|
||||
{:ok, response} -> if is_http_200?(response), do: Jason.decode!(response.body), else: []
|
||||
{:error, _error} -> []
|
||||
end
|
||||
|
||||
Logger.debug("Found #{length(peers)} peers.")
|
||||
|
||||
{mentions, statuses_seen} = get_mentions(domain)
|
||||
Logger.info("Found #{length(mentions)} mentions in #{statuses_seen} statuses.")
|
||||
|
||||
Map.merge(
|
||||
Map.merge(
|
||||
Map.take(instance, ["version", "description"]),
|
||||
Map.take(instance["stats"], ["user_count", "status_count"])
|
||||
),
|
||||
%{peers: peers, interactions: mentions}
|
||||
)
|
||||
end
|
||||
|
||||
defp is_http_200?(response) do
|
||||
case response do
|
||||
%{status_code: 200} -> true
|
||||
_ -> false
|
||||
end
|
||||
end
|
||||
|
||||
# check if the endpoint works as expected
|
||||
defp has_title?(body) do
|
||||
case Jason.decode(body) do
|
||||
{:ok, decoded} -> Map.has_key?(decoded, "title")
|
||||
{:error, _error} -> false
|
||||
end
|
||||
end
|
||||
|
||||
defp is_mention?(status) do
|
||||
case status do
|
||||
%{mentions: []} -> false
|
||||
%{mentions: nil} -> false
|
||||
_ -> true
|
||||
end
|
||||
end
|
||||
|
||||
defp is_eligible?(status) do
|
||||
if not is_mention?(status) do
|
||||
false
|
||||
end
|
||||
|
||||
account = status["account"]
|
||||
|
||||
fields =
|
||||
account["fields"]
|
||||
|> Enum.map(fn %{"name" => name, "value" => value} -> name <> value end)
|
||||
|> Enum.join("")
|
||||
|
||||
# this also means that any users who mentioned ethnobotany in their profiles will be excluded lol ¯\_(ツ)_/¯
|
||||
(account["note"] <> fields)
|
||||
|> String.downcase()
|
||||
|> String.contains?("nobot")
|
||||
|> Kernel.not()
|
||||
end
|
||||
|
||||
@spec extract_mentions_from_status(any()) :: [ApiCrawler.instance_interaction()]
|
||||
defp extract_mentions_from_status(status) do
|
||||
datetime = Timex.parse!(status["created_at"], "{ISO:Extended:Z}")
|
||||
|
||||
status["mentions"]
|
||||
|> Enum.map(fn mention -> get_domain(mention["url"]) end)
|
||||
|> Enum.map(fn domain -> {domain, datetime} end)
|
||||
end
|
||||
|
||||
@spec statuses_to_mentions(any()) :: [ApiCrawler.instance_interaction()]
|
||||
defp statuses_to_mentions(statuses) do
|
||||
statuses
|
||||
|> Enum.filter(fn status -> is_eligible?(status) end)
|
||||
|> Enum.flat_map(fn status -> extract_mentions_from_status(status) end)
|
||||
end
|
||||
|
||||
defp get_mentions(domain, max_id \\ nil, mentions \\ [], statuses_seen \\ 0) do
|
||||
endpoint = "https://#{domain}/api/v1/timelines/public?local=true"
|
||||
|
||||
endpoint =
|
||||
if max_id do
|
||||
endpoint <> "&max_id=#{max_id}"
|
||||
else
|
||||
endpoint
|
||||
end
|
||||
|
||||
Logger.debug("Crawling #{endpoint}")
|
||||
|
||||
# TODO: handle errors here
|
||||
endpoint_response = HTTPoison.get!(endpoint)
|
||||
statuses = Jason.decode!(endpoint_response.body)
|
||||
|
||||
if length(statuses) > 0 do
|
||||
# get statuses that are eligible (i.e. users don't have #nobot in their profile) and have mentions
|
||||
mentions = mentions ++ statuses_to_mentions(statuses)
|
||||
statuses_seen = statuses_seen + length(statuses)
|
||||
|
||||
oldest_status = Enum.at(statuses, -1)
|
||||
|
||||
status_datetime_threshold =
|
||||
Timex.shift(Timex.now(), days: -1 * get_config(:status_age_limit_days))
|
||||
|
||||
oldest_status_datetime = Timex.parse!(oldest_status["created_at"], "{ISO:Extended:Z}")
|
||||
|
||||
if Timex.compare(oldest_status_datetime, status_datetime_threshold) == 1 and
|
||||
statuses_seen < get_config(:status_count_limit) do
|
||||
get_mentions(domain, oldest_status["id"], mentions, statuses_seen)
|
||||
else
|
||||
{mentions, statuses_seen}
|
||||
end
|
||||
else
|
||||
{mentions, statuses_seen}
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,20 @@
|
|||
defmodule Backend.Crawler.Instance do
|
||||
use Ecto.Schema
|
||||
import Ecto.Changeset
|
||||
|
||||
schema "instances" do
|
||||
field :domain, :string
|
||||
field :num_statuses, :integer
|
||||
field :num_users, :integer
|
||||
field :version, :string
|
||||
|
||||
timestamps()
|
||||
end
|
||||
|
||||
@doc false
|
||||
def changeset(instance, attrs) do
|
||||
instance
|
||||
|> cast(attrs, [:domain, :version, :num_users, :num_statuses])
|
||||
|> validate_required([:domain])
|
||||
end
|
||||
end
|
|
@ -0,0 +1,20 @@
|
|||
defmodule Backend.Crawler.Interaction do
|
||||
use Ecto.Schema
|
||||
import Ecto.Changeset
|
||||
|
||||
schema "interactions" do
|
||||
field :time, :utc_datetime
|
||||
field :type, :string
|
||||
field :source_id, :id
|
||||
field :target_id, :id
|
||||
|
||||
timestamps()
|
||||
end
|
||||
|
||||
@doc false
|
||||
def changeset(interaction, attrs) do
|
||||
interaction
|
||||
|> cast(attrs, [:type, :time])
|
||||
|> validate_required([:type, :time])
|
||||
end
|
||||
end
|
|
@ -0,0 +1,13 @@
|
|||
defmodule Backend.Crawler.Util do
|
||||
def get_config(key) do
|
||||
Application.get_env(:backend, Backend.Crawler)[key]
|
||||
end
|
||||
|
||||
# Gets the domain from a Mastodon/Pleroma account URL
|
||||
# (e.g. https://mastodon.social/@demouser or https://pleroma.site/users/demouser)
|
||||
def get_domain(url) do
|
||||
String.slice(url, 8..-1)
|
||||
|> String.split("/")
|
||||
|> Enum.at(0)
|
||||
end
|
||||
end
|
|
@ -0,0 +1,13 @@
|
|||
defmodule Mix.Tasks.Crawl do
|
||||
alias Backend.Crawler.Crawler
|
||||
use Mix.Task
|
||||
|
||||
@shortdoc "Crawl a given instance."
|
||||
|
||||
def run(domain) do
|
||||
Mix.Task.run("app.start")
|
||||
# Application.ensure_all_started(:timex)
|
||||
# Mix.Task.run("loadconfig")
|
||||
Crawler.run(domain)
|
||||
end
|
||||
end
|
|
@ -42,7 +42,9 @@ defmodule Backend.MixProject do
|
|||
{:phoenix_live_reload, "~> 1.2", only: :dev},
|
||||
{:gettext, "~> 0.11"},
|
||||
{:jason, "~> 1.0"},
|
||||
{:plug_cowboy, "~> 2.0"}
|
||||
{:plug_cowboy, "~> 2.0"},
|
||||
{:httpoison, "~> 1.5"},
|
||||
{:timex, "~> 3.5"}
|
||||
]
|
||||
end
|
||||
|
||||
|
|
|
@ -1,24 +1,36 @@
|
|||
%{
|
||||
"certifi": {:hex, :certifi, "2.5.1", "867ce347f7c7d78563450a18a6a28a8090331e77fa02380b4a21962a65d36ee5", [:rebar3], [{:parse_trans, "~>3.3", [hex: :parse_trans, repo: "hexpm", optional: false]}], "hexpm"},
|
||||
"combine": {:hex, :combine, "0.10.0", "eff8224eeb56498a2af13011d142c5e7997a80c8f5b97c499f84c841032e429f", [:mix], [], "hexpm"},
|
||||
"connection": {:hex, :connection, "1.0.4", "a1cae72211f0eef17705aaededacac3eb30e6625b04a6117c1b2db6ace7d5976", [:mix], [], "hexpm"},
|
||||
"cowboy": {:hex, :cowboy, "2.6.3", "99aa50e94e685557cad82e704457336a453d4abcb77839ad22dbe71f311fcc06", [:rebar3], [{:cowlib, "~> 2.7.3", [hex: :cowlib, repo: "hexpm", optional: false]}, {:ranch, "~> 1.7.1", [hex: :ranch, repo: "hexpm", optional: false]}], "hexpm"},
|
||||
"cowlib": {:hex, :cowlib, "2.7.3", "a7ffcd0917e6d50b4d5fb28e9e2085a0ceb3c97dea310505f7460ff5ed764ce9", [:rebar3], [], "hexpm"},
|
||||
"db_connection": {:hex, :db_connection, "2.0.6", "bde2f85d047969c5b5800cb8f4b3ed6316c8cb11487afedac4aa5f93fd39abfa", [:mix], [{:connection, "~> 1.0.2", [hex: :connection, repo: "hexpm", optional: false]}], "hexpm"},
|
||||
"db_connection": {:hex, :db_connection, "2.1.0", "122e2f62c4906bf2e49554f1e64db5030c19229aa40935f33088e7d543aa79d0", [:mix], [{:connection, "~> 1.0.2", [hex: :connection, repo: "hexpm", optional: false]}], "hexpm"},
|
||||
"decimal": {:hex, :decimal, "1.7.0", "30d6b52c88541f9a66637359ddf85016df9eb266170d53105f02e4a67e00c5aa", [:mix], [], "hexpm"},
|
||||
"ecto": {:hex, :ecto, "3.1.1", "d6677f95f1e0bd39bc3db3db6b23a59977cb154ed2cceec69a56becd805128be", [:mix], [{:decimal, "~> 1.6", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}], "hexpm"},
|
||||
"ecto_sql": {:hex, :ecto_sql, "3.1.1", "af2458e7a467d75a6389e1d4ebfb57c328ccc684d6ee52145f7b34e94efb5fc4", [:mix], [{:db_connection, "~> 2.0", [hex: :db_connection, repo: "hexpm", optional: false]}, {:ecto, "~> 3.1.0", [hex: :ecto, repo: "hexpm", optional: false]}, {:mariaex, "~> 0.9.1", [hex: :mariaex, repo: "hexpm", optional: true]}, {:myxql, "~> 0.2.0", [hex: :myxql, repo: "hexpm", optional: true]}, {:postgrex, "~> 0.14.0", [hex: :postgrex, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm"},
|
||||
"file_system": {:hex, :file_system, "0.2.6", "fd4dc3af89b9ab1dc8ccbcc214a0e60c41f34be251d9307920748a14bf41f1d3", [:mix], [], "hexpm"},
|
||||
"ecto": {:hex, :ecto, "3.1.6", "e890bf66c1d4d8e2b8e010f7cba092a08139b55437bc3382371f72a6ee40757e", [:mix], [{:decimal, "~> 1.6", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}], "hexpm"},
|
||||
"ecto_sql": {:hex, :ecto_sql, "3.1.5", "b5201fe99fa6bf6a93f64adb2d4976ded3d201f932b7c5bd4c44468642f4fb1f", [:mix], [{:db_connection, "~> 2.0", [hex: :db_connection, repo: "hexpm", optional: false]}, {:ecto, "~> 3.1.0", [hex: :ecto, repo: "hexpm", optional: false]}, {:mariaex, "~> 0.9.1", [hex: :mariaex, repo: "hexpm", optional: true]}, {:myxql, "~> 0.2.0", [hex: :myxql, repo: "hexpm", optional: true]}, {:postgrex, "~> 0.14.0 or ~> 0.15.0", [hex: :postgrex, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm"},
|
||||
"file_system": {:hex, :file_system, "0.2.7", "e6f7f155970975789f26e77b8b8d8ab084c59844d8ecfaf58cbda31c494d14aa", [:mix], [], "hexpm"},
|
||||
"gettext": {:hex, :gettext, "0.16.1", "e2130b25eebcbe02bb343b119a07ae2c7e28bd4b146c4a154da2ffb2b3507af2", [:mix], [], "hexpm"},
|
||||
"hackney": {:hex, :hackney, "1.15.1", "9f8f471c844b8ce395f7b6d8398139e26ddca9ebc171a8b91342ee15a19963f4", [:rebar3], [{:certifi, "2.5.1", [hex: :certifi, repo: "hexpm", optional: false]}, {:idna, "6.0.0", [hex: :idna, repo: "hexpm", optional: false]}, {:metrics, "1.0.1", [hex: :metrics, repo: "hexpm", optional: false]}, {:mimerl, "~>1.1", [hex: :mimerl, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "1.1.4", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}], "hexpm"},
|
||||
"httpoison": {:hex, :httpoison, "1.5.1", "0f55b5b673b03c5c327dac7015a67cb571b99b631acc0bc1b0b98dcd6b9f2104", [:mix], [{:hackney, "~> 1.8", [hex: :hackney, repo: "hexpm", optional: false]}], "hexpm"},
|
||||
"idna": {:hex, :idna, "6.0.0", "689c46cbcdf3524c44d5f3dde8001f364cd7608a99556d8fbd8239a5798d4c10", [:rebar3], [{:unicode_util_compat, "0.4.1", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm"},
|
||||
"jason": {:hex, :jason, "1.1.2", "b03dedea67a99223a2eaf9f1264ce37154564de899fd3d8b9a21b1a6fd64afe7", [:mix], [{:decimal, "~> 1.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm"},
|
||||
"metrics": {:hex, :metrics, "1.0.1", "25f094dea2cda98213cecc3aeff09e940299d950904393b2a29d191c346a8486", [:rebar3], [], "hexpm"},
|
||||
"mime": {:hex, :mime, "1.3.1", "30ce04ab3175b6ad0bdce0035cba77bba68b813d523d1aac73d9781b4d193cf8", [:mix], [], "hexpm"},
|
||||
"phoenix": {:hex, :phoenix, "1.4.3", "8eed4a64ff1e12372cd634724bddd69185938f52c18e1396ebac76375d85677d", [:mix], [{:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:phoenix_pubsub, "~> 1.1", [hex: :phoenix_pubsub, repo: "hexpm", optional: false]}, {:plug, "~> 1.7", [hex: :plug, repo: "hexpm", optional: false]}, {:plug_cowboy, "~> 1.0 or ~> 2.0", [hex: :plug_cowboy, repo: "hexpm", optional: true]}], "hexpm"},
|
||||
"mimerl": {:hex, :mimerl, "1.2.0", "67e2d3f571088d5cfd3e550c383094b47159f3eee8ffa08e64106cdf5e981be3", [:rebar3], [], "hexpm"},
|
||||
"parse_trans": {:hex, :parse_trans, "3.3.0", "09765507a3c7590a784615cfd421d101aec25098d50b89d7aa1d66646bc571c1", [:rebar3], [], "hexpm"},
|
||||
"phoenix": {:hex, :phoenix, "1.4.8", "c72dc3adeb49c70eb963a0ea24f7a064ec1588e651e84e1b7ad5ed8253c0b4a2", [:mix], [{:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:phoenix_pubsub, "~> 1.1", [hex: :phoenix_pubsub, repo: "hexpm", optional: false]}, {:plug, "~> 1.8.1 or ~> 1.9", [hex: :plug, repo: "hexpm", optional: false]}, {:plug_cowboy, "~> 1.0 or ~> 2.0", [hex: :plug_cowboy, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm"},
|
||||
"phoenix_ecto": {:hex, :phoenix_ecto, "4.0.0", "c43117a136e7399ea04ecaac73f8f23ee0ffe3e07acfcb8062fe5f4c9f0f6531", [:mix], [{:ecto, "~> 3.0", [hex: :ecto, repo: "hexpm", optional: false]}, {:phoenix_html, "~> 2.9", [hex: :phoenix_html, repo: "hexpm", optional: true]}, {:plug, "~> 1.0", [hex: :plug, repo: "hexpm", optional: false]}], "hexpm"},
|
||||
"phoenix_html": {:hex, :phoenix_html, "2.13.2", "f5d27c9b10ce881a60177d2b5227314fc60881e6b66b41dfe3349db6ed06cf57", [:mix], [{:plug, "~> 1.5", [hex: :plug, repo: "hexpm", optional: false]}], "hexpm"},
|
||||
"phoenix_live_reload": {:hex, :phoenix_live_reload, "1.2.0", "3bb31a9fbd40ffe8652e60c8660dffd72dd231efcdf49b744fb75b9ef7db5dd2", [:mix], [{:file_system, "~> 0.2.1 or ~> 0.3", [hex: :file_system, repo: "hexpm", optional: false]}, {:phoenix, "~> 1.4", [hex: :phoenix, repo: "hexpm", optional: false]}], "hexpm"},
|
||||
"phoenix_html": {:hex, :phoenix_html, "2.13.3", "850e292ff6e204257f5f9c4c54a8cb1f6fbc16ed53d360c2b780a3d0ba333867", [:mix], [{:plug, "~> 1.5", [hex: :plug, repo: "hexpm", optional: false]}], "hexpm"},
|
||||
"phoenix_live_reload": {:hex, :phoenix_live_reload, "1.2.1", "274a4b07c4adbdd7785d45a8b0bb57634d0b4f45b18d2c508b26c0344bd59b8f", [:mix], [{:file_system, "~> 0.2.1 or ~> 0.3", [hex: :file_system, repo: "hexpm", optional: false]}, {:phoenix, "~> 1.4", [hex: :phoenix, repo: "hexpm", optional: false]}], "hexpm"},
|
||||
"phoenix_pubsub": {:hex, :phoenix_pubsub, "1.1.2", "496c303bdf1b2e98a9d26e89af5bba3ab487ba3a3735f74bf1f4064d2a845a3e", [:mix], [], "hexpm"},
|
||||
"plug": {:hex, :plug, "1.8.0", "9d2685cb007fe5e28ed9ac27af2815bc262b7817a00929ac10f56f169f43b977", [:mix], [{:mime, "~> 1.0", [hex: :mime, repo: "hexpm", optional: false]}, {:plug_crypto, "~> 1.0", [hex: :plug_crypto, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4", [hex: :telemetry, repo: "hexpm", optional: true]}], "hexpm"},
|
||||
"plug": {:hex, :plug, "1.8.2", "0bcce1daa420f189a6491f3940cc77ea7fb1919761175c9c3b59800d897440fc", [:mix], [{:mime, "~> 1.0", [hex: :mime, repo: "hexpm", optional: false]}, {:plug_crypto, "~> 1.0", [hex: :plug_crypto, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4", [hex: :telemetry, repo: "hexpm", optional: true]}], "hexpm"},
|
||||
"plug_cowboy": {:hex, :plug_cowboy, "2.0.2", "6055f16868cc4882b24b6e1d63d2bada94fb4978413377a3b32ac16c18dffba2", [:mix], [{:cowboy, "~> 2.5", [hex: :cowboy, repo: "hexpm", optional: false]}, {:plug, "~> 1.7", [hex: :plug, repo: "hexpm", optional: false]}], "hexpm"},
|
||||
"plug_crypto": {:hex, :plug_crypto, "1.0.0", "18e49317d3fa343f24620ed22795ec29d4a5e602d52d1513ccea0b07d8ea7d4d", [:mix], [], "hexpm"},
|
||||
"postgrex": {:hex, :postgrex, "0.14.2", "6680591bbce28d92f043249205e8b01b36cab9ef2a7911abc43649242e1a3b78", [:mix], [{:connection, "~> 1.0", [hex: :connection, repo: "hexpm", optional: false]}, {:db_connection, "~> 2.0", [hex: :db_connection, repo: "hexpm", optional: false]}, {:decimal, "~> 1.5", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}], "hexpm"},
|
||||
"postgrex": {:hex, :postgrex, "0.14.3", "5754dee2fdf6e9e508cbf49ab138df964278700b764177e8f3871e658b345a1e", [:mix], [{:connection, "~> 1.0", [hex: :connection, repo: "hexpm", optional: false]}, {:db_connection, "~> 2.0", [hex: :db_connection, repo: "hexpm", optional: false]}, {:decimal, "~> 1.5", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}], "hexpm"},
|
||||
"ranch": {:hex, :ranch, "1.7.1", "6b1fab51b49196860b733a49c07604465a47bdb78aa10c1c16a3d199f7f8c881", [:rebar3], [], "hexpm"},
|
||||
"ssl_verify_fun": {:hex, :ssl_verify_fun, "1.1.4", "f0eafff810d2041e93f915ef59899c923f4568f4585904d010387ed74988e77b", [:make, :mix, :rebar3], [], "hexpm"},
|
||||
"telemetry": {:hex, :telemetry, "0.4.0", "8339bee3fa8b91cb84d14c2935f8ecf399ccd87301ad6da6b71c09553834b2ab", [:rebar3], [], "hexpm"},
|
||||
"timex": {:hex, :timex, "3.5.0", "b0a23167da02d0fe4f1a4e104d1f929a00d348502b52432c05de875d0b9cffa5", [:mix], [{:combine, "~> 0.10", [hex: :combine, repo: "hexpm", optional: false]}, {:gettext, "~> 0.10", [hex: :gettext, repo: "hexpm", optional: false]}, {:tzdata, "~> 0.1.8 or ~> 0.5", [hex: :tzdata, repo: "hexpm", optional: false]}], "hexpm"},
|
||||
"tzdata": {:hex, :tzdata, "0.5.20", "304b9e98a02840fb32a43ec111ffbe517863c8566eb04a061f1c4dbb90b4d84c", [:mix], [{:hackney, "~> 1.0", [hex: :hackney, repo: "hexpm", optional: false]}], "hexpm"},
|
||||
"unicode_util_compat": {:hex, :unicode_util_compat, "0.4.1", "d869e4c68901dd9531385bb0c8c40444ebf624e60b6962d95952775cac5e90cd", [:rebar3], [], "hexpm"},
|
||||
}
|
||||
|
|
|
@ -0,0 +1,15 @@
|
|||
defmodule Backend.Repo.Migrations.CreateInstances do
|
||||
use Ecto.Migration
|
||||
|
||||
def change do
|
||||
create table(:instances) do
|
||||
add :domain, :string
|
||||
add :version, :string
|
||||
add :num_users, :integer
|
||||
add :num_statuses, :integer
|
||||
|
||||
timestamps()
|
||||
end
|
||||
|
||||
end
|
||||
end
|
|
@ -0,0 +1,17 @@
|
|||
defmodule Backend.Repo.Migrations.CreateInteractions do
|
||||
use Ecto.Migration
|
||||
|
||||
def change do
|
||||
create table(:interactions) do
|
||||
add :type, :string
|
||||
add :time, :utc_datetime
|
||||
add :source_id, references(:instances, on_delete: :nothing)
|
||||
add :target_id, references(:instances, on_delete: :nothing)
|
||||
|
||||
timestamps()
|
||||
end
|
||||
|
||||
create index(:interactions, [:source_id])
|
||||
create index(:interactions, [:target_id])
|
||||
end
|
||||
end
|
|
@ -0,0 +1,70 @@
|
|||
defmodule Backend.CrawlerTest do
|
||||
use Backend.DataCase
|
||||
|
||||
alias Backend.Crawler
|
||||
|
||||
describe "instances" do
|
||||
alias Backend.Crawler.Instance
|
||||
|
||||
@valid_attrs %{domain: "some domain", num_statuses: 42, num_users: 42, version: "some version"}
|
||||
@update_attrs %{domain: "some updated domain", num_statuses: 43, num_users: 43, version: "some updated version"}
|
||||
@invalid_attrs %{domain: nil, num_statuses: nil, num_users: nil, version: nil}
|
||||
|
||||
def instance_fixture(attrs \\ %{}) do
|
||||
{:ok, instance} =
|
||||
attrs
|
||||
|> Enum.into(@valid_attrs)
|
||||
|> Crawler.create_instance()
|
||||
|
||||
instance
|
||||
end
|
||||
|
||||
test "list_instances/0 returns all instances" do
|
||||
instance = instance_fixture()
|
||||
assert Crawler.list_instances() == [instance]
|
||||
end
|
||||
|
||||
test "get_instance!/1 returns the instance with given id" do
|
||||
instance = instance_fixture()
|
||||
assert Crawler.get_instance!(instance.id) == instance
|
||||
end
|
||||
|
||||
test "create_instance/1 with valid data creates a instance" do
|
||||
assert {:ok, %Instance{} = instance} = Crawler.create_instance(@valid_attrs)
|
||||
assert instance.domain == "some domain"
|
||||
assert instance.num_statuses == 42
|
||||
assert instance.num_users == 42
|
||||
assert instance.version == "some version"
|
||||
end
|
||||
|
||||
test "create_instance/1 with invalid data returns error changeset" do
|
||||
assert {:error, %Ecto.Changeset{}} = Crawler.create_instance(@invalid_attrs)
|
||||
end
|
||||
|
||||
test "update_instance/2 with valid data updates the instance" do
|
||||
instance = instance_fixture()
|
||||
assert {:ok, %Instance{} = instance} = Crawler.update_instance(instance, @update_attrs)
|
||||
assert instance.domain == "some updated domain"
|
||||
assert instance.num_statuses == 43
|
||||
assert instance.num_users == 43
|
||||
assert instance.version == "some updated version"
|
||||
end
|
||||
|
||||
test "update_instance/2 with invalid data returns error changeset" do
|
||||
instance = instance_fixture()
|
||||
assert {:error, %Ecto.Changeset{}} = Crawler.update_instance(instance, @invalid_attrs)
|
||||
assert instance == Crawler.get_instance!(instance.id)
|
||||
end
|
||||
|
||||
test "delete_instance/1 deletes the instance" do
|
||||
instance = instance_fixture()
|
||||
assert {:ok, %Instance{}} = Crawler.delete_instance(instance)
|
||||
assert_raise Ecto.NoResultsError, fn -> Crawler.get_instance!(instance.id) end
|
||||
end
|
||||
|
||||
test "change_instance/1 returns a instance changeset" do
|
||||
instance = instance_fixture()
|
||||
assert %Ecto.Changeset{} = Crawler.change_instance(instance)
|
||||
end
|
||||
end
|
||||
end
|
Loading…
Reference in New Issue