handle weirdly-formatted friendica peers
This commit is contained in:
parent
c2124468a7
commit
2f1a654520
|
@ -21,6 +21,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||||
|
|
||||||
- Added ON DELETE to `most_recent_crawl` table, such that it can handle previously-crawled but now-dead instances.
|
- Added ON DELETE to `most_recent_crawl` table, such that it can handle previously-crawled but now-dead instances.
|
||||||
- You can now login to the admin view by clicking, not just by pressing enter.
|
- You can now login to the admin view by clicking, not just by pressing enter.
|
||||||
|
- Add handling for weirdly-formatted Friendica peers
|
||||||
|
|
||||||
## [2.7.0 - 2018-08-18]
|
## [2.7.0 - 2018-08-18]
|
||||||
|
|
||||||
|
|
|
@ -76,13 +76,15 @@ defmodule Backend.Crawler.Crawlers.Friendica do
|
||||||
end
|
end
|
||||||
|
|
||||||
defp to_domain(url) do
|
defp to_domain(url) do
|
||||||
url =
|
url
|
||||||
cond do
|
|> strip_prefix("http://")
|
||||||
String.starts_with?(url, "https://") -> String.slice(url, 8..-1)
|
|> strip_prefix("https://")
|
||||||
String.starts_with?(url, "http://") -> String.slice(url, 7..-1)
|
|> strip_username()
|
||||||
true -> url
|
|
||||||
end
|
end
|
||||||
|
|
||||||
url
|
# Sometimes a url at the poco/@server endpoint has the form username@domain.tld, in which case we only want domain.tld
|
||||||
|
defp strip_username(string) do
|
||||||
|
[_match, _username, domain] = Regex.run(~r/([\w-_]+@)?([\w.-_]+)/, string)
|
||||||
|
domain
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -205,7 +205,7 @@ defmodule Backend.Crawler.Crawlers.Misskey do
|
||||||
defp extract_mentions_from_status(status) do
|
defp extract_mentions_from_status(status) do
|
||||||
status_content = Map.get(status, "text")
|
status_content = Map.get(status, "text")
|
||||||
|
|
||||||
Regex.scan(~r/@\w+@([\w.-]+)/, status_content)
|
Regex.scan(~r/@\w+@([\w._-]+)/, status_content)
|
||||||
|> Enum.map(fn [_match, domain] -> domain end)
|
|> Enum.map(fn [_match, domain] -> domain end)
|
||||||
|> Enum.reduce(%{}, fn domain, acc ->
|
|> Enum.reduce(%{}, fn domain, acc ->
|
||||||
Map.update(acc, domain, 1, &(&1 + 1))
|
Map.update(acc, domain, 1, &(&1 + 1))
|
||||||
|
|
|
@ -9,7 +9,7 @@ defmodule Backend.Crawler.Util do
|
||||||
# (e.g. https://mastodon.social/@demouser or https://pleroma.site/users/demouser)
|
# (e.g. https://mastodon.social/@demouser or https://pleroma.site/users/demouser)
|
||||||
@spec get_domain(String.t()) :: String.t()
|
@spec get_domain(String.t()) :: String.t()
|
||||||
def get_domain(url) do
|
def get_domain(url) do
|
||||||
[_match, domain] = Regex.run(~r/https?:\/\/([\w.-]+)\/.*/, url)
|
[_match, domain] = Regex.run(~r/https?:\/\/([\w.-_]+)\/.*/, url)
|
||||||
domain
|
domain
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -202,4 +202,22 @@ defmodule Backend.Util do
|
||||||
{:error, error} -> raise error
|
{:error, error} -> raise error
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@doc """
|
||||||
|
Strips `prefix` from `string`. If it doesn't start with that prefix, just returns the string.
|
||||||
|
"""
|
||||||
|
@spec strip_prefix(String.t(), String.t()) :: String.t()
|
||||||
|
def strip_prefix(string, prefix) do
|
||||||
|
if String.starts_with?(string, prefix) do
|
||||||
|
prefix_length = String.length(prefix)
|
||||||
|
String.slice(string, prefix_length..-1)
|
||||||
|
else
|
||||||
|
string
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
@spec is_valid_domain?(String.t()) :: boolean
|
||||||
|
def is_valid_domain?(domain) do
|
||||||
|
Regex.match?(~r/[\w.-_]+/, domain)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -1,7 +1,5 @@
|
||||||
defmodule BackendWeb.InstanceController do
|
defmodule BackendWeb.InstanceController do
|
||||||
use BackendWeb, :controller
|
use BackendWeb, :controller
|
||||||
|
|
||||||
import Backend.Util
|
|
||||||
alias Graph.Cache
|
alias Graph.Cache
|
||||||
|
|
||||||
action_fallback(BackendWeb.FallbackController)
|
action_fallback(BackendWeb.FallbackController)
|
||||||
|
|
Loading…
Reference in a new issue