handle weirdly-formatted friendica peers

This commit is contained in:
Tao Bror Bojlén 2019-08-21 22:30:32 +02:00
parent c2124468a7
commit 2f1a654520
No known key found for this signature in database
GPG key ID: C6EC7AAB905F9E6F
6 changed files with 30 additions and 11 deletions

View file

@ -21,6 +21,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Added ON DELETE to `most_recent_crawl` table, such that it can handle previously-crawled but now-dead instances.
- You can now login to the admin view by clicking, not just by pressing enter.
- Add handling for weirdly-formatted Friendica peers
## [2.7.0 - 2018-08-18]

View file

@ -76,13 +76,15 @@ defmodule Backend.Crawler.Crawlers.Friendica do
end
defp to_domain(url) do
url =
cond do
String.starts_with?(url, "https://") -> String.slice(url, 8..-1)
String.starts_with?(url, "http://") -> String.slice(url, 7..-1)
true -> url
url
|> strip_prefix("http://")
|> strip_prefix("https://")
|> strip_username()
end
url
# Sometimes a url at the poco/@server endpoint has the form username@domain.tld, in which case we only want domain.tld
defp strip_username(string) do
[_match, _username, domain] = Regex.run(~r/([\w-_]+@)?([\w.-_]+)/, string)
domain
end
end

View file

@ -205,7 +205,7 @@ defmodule Backend.Crawler.Crawlers.Misskey do
defp extract_mentions_from_status(status) do
status_content = Map.get(status, "text")
Regex.scan(~r/@\w+@([\w.-]+)/, status_content)
Regex.scan(~r/@\w+@([\w._-]+)/, status_content)
|> Enum.map(fn [_match, domain] -> domain end)
|> Enum.reduce(%{}, fn domain, acc ->
Map.update(acc, domain, 1, &(&1 + 1))

View file

@ -9,7 +9,7 @@ defmodule Backend.Crawler.Util do
# (e.g. https://mastodon.social/@demouser or https://pleroma.site/users/demouser)
@spec get_domain(String.t()) :: String.t()
def get_domain(url) do
[_match, domain] = Regex.run(~r/https?:\/\/([\w.-]+)\/.*/, url)
[_match, domain] = Regex.run(~r/https?:\/\/([\w.-_]+)\/.*/, url)
domain
end

View file

@ -202,4 +202,22 @@ defmodule Backend.Util do
{:error, error} -> raise error
end
end
@doc """
Strips `prefix` from `string`. If it doesn't start with that prefix, just returns the string.
"""
@spec strip_prefix(String.t(), String.t()) :: String.t()
def strip_prefix(string, prefix) do
if String.starts_with?(string, prefix) do
prefix_length = String.length(prefix)
String.slice(string, prefix_length..-1)
else
string
end
end
@spec is_valid_domain?(String.t()) :: boolean
def is_valid_domain?(domain) do
Regex.match?(~r/[\w.-_]+/, domain)
end
end

View file

@ -1,7 +1,5 @@
defmodule BackendWeb.InstanceController do
use BackendWeb, :controller
import Backend.Util
alias Graph.Cache
action_fallback(BackendWeb.FallbackController)