handle weirdly-formatted friendica peers
This commit is contained in:
parent
c2124468a7
commit
2f1a654520
|
@ -21,6 +21,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|||
|
||||
- Added ON DELETE to `most_recent_crawl` table, such that it can handle previously-crawled but now-dead instances.
|
||||
- You can now login to the admin view by clicking, not just by pressing enter.
|
||||
- Add handling for weirdly-formatted Friendica peers
|
||||
|
||||
## [2.7.0 - 2018-08-18]
|
||||
|
||||
|
|
|
@ -76,13 +76,15 @@ defmodule Backend.Crawler.Crawlers.Friendica do
|
|||
end
|
||||
|
||||
defp to_domain(url) do
|
||||
url =
|
||||
cond do
|
||||
String.starts_with?(url, "https://") -> String.slice(url, 8..-1)
|
||||
String.starts_with?(url, "http://") -> String.slice(url, 7..-1)
|
||||
true -> url
|
||||
end
|
||||
|
||||
url
|
||||
|> strip_prefix("http://")
|
||||
|> strip_prefix("https://")
|
||||
|> strip_username()
|
||||
end
|
||||
|
||||
# Sometimes a url at the poco/@server endpoint has the form username@domain.tld, in which case we only want domain.tld
|
||||
defp strip_username(string) do
|
||||
[_match, _username, domain] = Regex.run(~r/([\w-_]+@)?([\w.-_]+)/, string)
|
||||
domain
|
||||
end
|
||||
end
|
||||
|
|
|
@ -205,7 +205,7 @@ defmodule Backend.Crawler.Crawlers.Misskey do
|
|||
defp extract_mentions_from_status(status) do
|
||||
status_content = Map.get(status, "text")
|
||||
|
||||
Regex.scan(~r/@\w+@([\w.-]+)/, status_content)
|
||||
Regex.scan(~r/@\w+@([\w._-]+)/, status_content)
|
||||
|> Enum.map(fn [_match, domain] -> domain end)
|
||||
|> Enum.reduce(%{}, fn domain, acc ->
|
||||
Map.update(acc, domain, 1, &(&1 + 1))
|
||||
|
|
|
@ -9,7 +9,7 @@ defmodule Backend.Crawler.Util do
|
|||
# (e.g. https://mastodon.social/@demouser or https://pleroma.site/users/demouser)
|
||||
@spec get_domain(String.t()) :: String.t()
|
||||
def get_domain(url) do
|
||||
[_match, domain] = Regex.run(~r/https?:\/\/([\w.-]+)\/.*/, url)
|
||||
[_match, domain] = Regex.run(~r/https?:\/\/([\w.-_]+)\/.*/, url)
|
||||
domain
|
||||
end
|
||||
|
||||
|
|
|
@ -202,4 +202,22 @@ defmodule Backend.Util do
|
|||
{:error, error} -> raise error
|
||||
end
|
||||
end
|
||||
|
||||
@doc """
|
||||
Strips `prefix` from `string`. If it doesn't start with that prefix, just returns the string.
|
||||
"""
|
||||
@spec strip_prefix(String.t(), String.t()) :: String.t()
|
||||
def strip_prefix(string, prefix) do
|
||||
if String.starts_with?(string, prefix) do
|
||||
prefix_length = String.length(prefix)
|
||||
String.slice(string, prefix_length..-1)
|
||||
else
|
||||
string
|
||||
end
|
||||
end
|
||||
|
||||
@spec is_valid_domain?(String.t()) :: boolean
|
||||
def is_valid_domain?(domain) do
|
||||
Regex.match?(~r/[\w.-_]+/, domain)
|
||||
end
|
||||
end
|
||||
|
|
|
@ -1,7 +1,5 @@
|
|||
defmodule BackendWeb.InstanceController do
|
||||
use BackendWeb, :controller
|
||||
|
||||
import Backend.Util
|
||||
alias Graph.Cache
|
||||
|
||||
action_fallback(BackendWeb.FallbackController)
|
||||
|
|
Loading…
Reference in a new issue