From a37452f13868f6e4f14acf0bec3149584558717a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tao=20Bojl=C3=A9n?= <2803708-taobojlen@users.noreply.gitlab.com> Date: Sun, 14 Jul 2019 11:47:06 +0000 Subject: [PATCH] refactor/elixir backend --- .dokku-monorepo | 2 + .gitignore | 173 +- backend/.dockerignore | 9 + backend/.formatter.exs | 5 + backend/Dockerfile | 61 +- backend/README.md | 33 + backend/apiv1/__init__.py | 0 backend/apiv1/_util.py | 8 - backend/apiv1/apps.py | 5 - backend/apiv1/serializers.py | 105 - backend/apiv1/views.py | 37 - backend/backend/__init__.py | 0 backend/backend/settings/base.py | 124 - backend/backend/settings/development.py | 7 - backend/backend/settings/production.py | 10 - backend/backend/urls.py | 37 - backend/backend/wsgi.py | 13 - backend/config/config.exs | 51 + backend/config/dev.exs | 72 + backend/config/prod.exs | 57 + backend/config/releases.exs | 27 + backend/config/test.exs | 18 + backend/lib/backend.ex | 9 + backend/lib/backend/api.ex | 68 + backend/lib/backend/application.ex | 46 + backend/lib/backend/crawl.ex | 26 + backend/lib/backend/crawl_interaction.ex | 29 + backend/lib/backend/crawler/api_crawler.ex | 45 + backend/lib/backend/crawler/crawler.ex | 196 + .../lib/backend/crawler/crawlers/mastodon.ex | 193 + .../backend/crawler/stale_instance_manager.ex | 84 + backend/lib/backend/crawler/util.ex | 63 + backend/lib/backend/edge.ex | 25 + backend/lib/backend/instance.ex | 41 + backend/lib/backend/instance_peer.ex | 27 + backend/lib/backend/release.ex | 18 + backend/lib/backend/repo.ex | 5 + backend/lib/backend/scheduler.ex | 116 + backend/lib/backend/util.ex | 129 + backend/lib/backend_web.ex | 66 + .../lib/backend_web/channels/user_socket.ex | 33 + .../controllers/fallback_controller.ex | 15 + .../controllers/graph_controller.ex | 13 + .../controllers/instance_controller.ex | 27 + backend/lib/backend_web/endpoint.ex | 51 + backend/lib/backend_web/gettext.ex | 24 + backend/lib/backend_web/router.ex | 14 + .../lib/backend_web/views/changeset_view.ex | 19 + .../lib/backend_web/views/error_helpers.ex | 33 + backend/lib/backend_web/views/error_view.ex | 16 + backend/lib/backend_web/views/graph_view.ex | 36 + .../lib/backend_web/views/instance_view.ex | 45 + backend/lib/mix/tasks/crawl.ex | 13 + backend/manage.py | 15 - backend/mix.exs | 65 + backend/mix.lock | 43 + backend/priv/gettext/en/LC_MESSAGES/errors.po | 97 + backend/priv/repo/migrations/.formatter.exs | 4 + .../20190624090436_create_instances.exs | 29 + .../20190710133755_create_edges.exs | 15 + .../20190710155001_create_crawls.exs | 20 + ...190710155112_create_crawl_interactions.exs | 16 + .../20190712133009_add_instance_coords.exs | 10 + backend/priv/repo/seeds.exs | 11 + backend/rel/rel/vm.args.eex | 1 + backend/requirements.txt | 28 - backend/scraper/__init__.py | 0 backend/scraper/admin.py | 3 - backend/scraper/apps.py | 5 - backend/scraper/management/commands/_util.py | 84 - .../management/commands/build_edges.py | 38 - backend/scraper/management/commands/scrape.py | 276 - backend/scraper/migrations/0001_initial.py | 67 - .../migrations/0002_auto_20190419_1346.py | 24 - backend/scraper/migrations/__init__.py | 0 backend/scraper/models.py | 59 - backend/scripts/docker-entrypoint.sh | 26 - .../controllers/graph_controller_test.exs | 104 + .../controllers/instance_controller_test.exs | 88 + .../backend_web/views/error_view_test.exs | 15 + backend/test/support/channel_case.ex | 37 + backend/test/support/conn_case.ex | 38 + backend/test/support/data_case.ex | 53 + backend/test/test_helper.exs | 2 + config/Caddyfile | 13 - config/gunicorn.conf.py | 196 - docker-compose.production.yml | 41 +- docker-compose.yml | 26 +- example.env | 10 +- frontend/.gitignore | 21 - frontend/package.json | 60 +- frontend/src/components/CytoscapeGraph.tsx | 206 + frontend/src/components/FloatingCard.tsx | 14 + .../src/components/FloatingLayoutSelect.tsx | 53 + .../src/components/FloatingResetButton.tsx | 12 + frontend/src/components/Sidebar.tsx | 2 +- .../src/components/screens/GraphScreen.tsx | 4 +- frontend/src/constants.tsx | 3 + frontend/src/redux/actions.ts | 8 +- frontend/src/typings/cytoscape-cola.d.ts | 3 + frontend/src/util.ts | 3 +- frontend/tsconfig.json | 15 +- frontend/yarn.lock | 4544 +++++++++-------- gephi/.classpath | 12 + gephi/.gitignore | 19 - gephi/.project | 23 + .../org.eclipse.buildship.core.prefs | 2 + .../space/fediverse/graph/GraphBuilder.class | Bin 0 -> 8570 bytes .../space/fediverse/graph/GraphBuilder.java | 72 +- 109 files changed, 5339 insertions(+), 3675 deletions(-) create mode 100644 .dokku-monorepo create mode 100644 backend/.dockerignore create mode 100644 backend/.formatter.exs create mode 100644 backend/README.md delete mode 100644 backend/apiv1/__init__.py delete mode 100644 backend/apiv1/_util.py delete mode 100644 backend/apiv1/apps.py delete mode 100644 backend/apiv1/serializers.py delete mode 100644 backend/apiv1/views.py delete mode 100644 backend/backend/__init__.py delete mode 100644 backend/backend/settings/base.py delete mode 100644 backend/backend/settings/development.py delete mode 100644 backend/backend/settings/production.py delete mode 100644 backend/backend/urls.py delete mode 100644 backend/backend/wsgi.py create mode 100644 backend/config/config.exs create mode 100644 backend/config/dev.exs create mode 100644 backend/config/prod.exs create mode 100644 backend/config/releases.exs create mode 100644 backend/config/test.exs create mode 100644 backend/lib/backend.ex create mode 100644 backend/lib/backend/api.ex create mode 100644 backend/lib/backend/application.ex create mode 100644 backend/lib/backend/crawl.ex create mode 100644 backend/lib/backend/crawl_interaction.ex create mode 100644 backend/lib/backend/crawler/api_crawler.ex create mode 100644 backend/lib/backend/crawler/crawler.ex create mode 100644 backend/lib/backend/crawler/crawlers/mastodon.ex create mode 100644 backend/lib/backend/crawler/stale_instance_manager.ex create mode 100644 backend/lib/backend/crawler/util.ex create mode 100644 backend/lib/backend/edge.ex create mode 100644 backend/lib/backend/instance.ex create mode 100644 backend/lib/backend/instance_peer.ex create mode 100644 backend/lib/backend/release.ex create mode 100644 backend/lib/backend/repo.ex create mode 100644 backend/lib/backend/scheduler.ex create mode 100644 backend/lib/backend/util.ex create mode 100644 backend/lib/backend_web.ex create mode 100644 backend/lib/backend_web/channels/user_socket.ex create mode 100644 backend/lib/backend_web/controllers/fallback_controller.ex create mode 100644 backend/lib/backend_web/controllers/graph_controller.ex create mode 100644 backend/lib/backend_web/controllers/instance_controller.ex create mode 100644 backend/lib/backend_web/endpoint.ex create mode 100644 backend/lib/backend_web/gettext.ex create mode 100644 backend/lib/backend_web/router.ex create mode 100644 backend/lib/backend_web/views/changeset_view.ex create mode 100644 backend/lib/backend_web/views/error_helpers.ex create mode 100644 backend/lib/backend_web/views/error_view.ex create mode 100644 backend/lib/backend_web/views/graph_view.ex create mode 100644 backend/lib/backend_web/views/instance_view.ex create mode 100644 backend/lib/mix/tasks/crawl.ex delete mode 100755 backend/manage.py create mode 100644 backend/mix.exs create mode 100644 backend/mix.lock create mode 100644 backend/priv/gettext/en/LC_MESSAGES/errors.po create mode 100644 backend/priv/repo/migrations/.formatter.exs create mode 100644 backend/priv/repo/migrations/20190624090436_create_instances.exs create mode 100644 backend/priv/repo/migrations/20190710133755_create_edges.exs create mode 100644 backend/priv/repo/migrations/20190710155001_create_crawls.exs create mode 100644 backend/priv/repo/migrations/20190710155112_create_crawl_interactions.exs create mode 100644 backend/priv/repo/migrations/20190712133009_add_instance_coords.exs create mode 100644 backend/priv/repo/seeds.exs create mode 100644 backend/rel/rel/vm.args.eex delete mode 100644 backend/requirements.txt delete mode 100644 backend/scraper/__init__.py delete mode 100644 backend/scraper/admin.py delete mode 100644 backend/scraper/apps.py delete mode 100644 backend/scraper/management/commands/_util.py delete mode 100644 backend/scraper/management/commands/build_edges.py delete mode 100644 backend/scraper/management/commands/scrape.py delete mode 100644 backend/scraper/migrations/0001_initial.py delete mode 100644 backend/scraper/migrations/0002_auto_20190419_1346.py delete mode 100644 backend/scraper/migrations/__init__.py delete mode 100644 backend/scraper/models.py delete mode 100644 backend/scripts/docker-entrypoint.sh create mode 100644 backend/test/backend_web/controllers/graph_controller_test.exs create mode 100644 backend/test/backend_web/controllers/instance_controller_test.exs create mode 100644 backend/test/backend_web/views/error_view_test.exs create mode 100644 backend/test/support/channel_case.ex create mode 100644 backend/test/support/conn_case.ex create mode 100644 backend/test/support/data_case.ex create mode 100644 backend/test/test_helper.exs delete mode 100644 config/Caddyfile delete mode 100644 config/gunicorn.conf.py delete mode 100644 frontend/.gitignore create mode 100644 frontend/src/components/CytoscapeGraph.tsx create mode 100644 frontend/src/components/FloatingCard.tsx create mode 100644 frontend/src/components/FloatingLayoutSelect.tsx create mode 100644 frontend/src/components/FloatingResetButton.tsx create mode 100644 frontend/src/typings/cytoscape-cola.d.ts create mode 100644 gephi/.classpath delete mode 100644 gephi/.gitignore create mode 100644 gephi/.project create mode 100644 gephi/.settings/org.eclipse.buildship.core.prefs create mode 100644 gephi/bin/main/space/fediverse/graph/GraphBuilder.class diff --git a/.dokku-monorepo b/.dokku-monorepo new file mode 100644 index 0000000..3f4f098 --- /dev/null +++ b/.dokku-monorepo @@ -0,0 +1,2 @@ +backend=backend +gephi=gephi \ No newline at end of file diff --git a/.gitignore b/.gitignore index bb59864..745528a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,93 +1,9 @@ *.csv .idea/ -backend/backend/static/ -backend/static/ *.gexf backend/whitelist.txt data/ - -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -./lib/ -./lib64/ -parts/ -sdist/ -var/ -wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -.hypothesis/ -.pytest_cache/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# pyenv -.python-version - -# celery beat schedule file -celerybeat-schedule - -# SageMath parsed files -*.sage.py +.vscode/ # Environments .env @@ -99,15 +15,84 @@ ENV/ env.bak/ venv.bak/ -# Spyder project settings -.spyderproject -.spyproject +# The directory Mix will write compiled artifacts to. +/backend/_build/ -# Rope project settings -.ropeproject +# If you run "mix test --cover", coverage assets end up here. +/backend/cover/ -# mkdocs documentation -/site +# The directory Mix downloads your dependencies sources to. +/backend/deps/ -# mypy -.mypy_cache/ +# Where 3rd-party dependencies like ExDoc output generated docs. +/backend/doc/ + +# Ignore .fetch files in case you like to edit your project deps locally. +/backend/.fetch + +# If the VM crashes, it generates a dump, let's ignore it too. +erl_crash.dump + +# Also ignore archive artifacts (built via "mix archive.build"). +*.ez + +# Ignore package tarball (built via "mix hex.build"). +backend-*.tar + +# Since we are building assets from assets/, +# we ignore priv/static. You may want to comment +# this depending on your deployment strategy. +/backend/priv/static/ + +# Files matching config/*.secret.exs pattern contain sensitive +# data and you should not commit them into version control. +# +# Alternatively, you may comment the line below and commit the +# secrets files as long as you replace their contents by environment +# variables. +/backend/config/*.secret.exs + +/backend/.elixir_ls/ + +*.pot +*.po + +# dependencies +/frontend/node_modules + +# testing +/frontend/coverage + +# production +/frontend/build + +# misc +.DS_Store +.env.local +.env.development.local +.env.test.local +.env.production.local + +npm-debug.log* +yarn-debug.log* +yarn-error.log* + +/gephi/.gradle/ +/gephi/build/ +/gephi/lib/* +/gephi/!lib/.gitkeep +# 64MB file but I don't have much faith that it'll remain available... +!/gephi/lib/gephi-toolkit-0.9.2.jar + +*/.idea/ + +# Ignore Gradle GUI config +/gephi/gradle-app.setting + +# Avoid ignoring Gradle wrapper jar file (.jar files are usually ignored) +!/gephi/gradle-wrapper.jar + +# Cache of project +/gephi/.gradletasknamecache + +*.javac diff --git a/backend/.dockerignore b/backend/.dockerignore new file mode 100644 index 0000000..7222781 --- /dev/null +++ b/backend/.dockerignore @@ -0,0 +1,9 @@ +_build/ +deps/ +.git/ +.gitignore +Dockerfile +Makefile +README* +test/ +priv/static/ diff --git a/backend/.formatter.exs b/backend/.formatter.exs new file mode 100644 index 0000000..8a6391c --- /dev/null +++ b/backend/.formatter.exs @@ -0,0 +1,5 @@ +[ + import_deps: [:ecto, :phoenix], + inputs: ["*.{ex,exs}", "priv/*/seeds.exs", "{config,lib,test}/**/*.{ex,exs}"], + subdirectories: ["priv/*/migrations"] +] diff --git a/backend/Dockerfile b/backend/Dockerfile index b1a0094..5a751c8 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -1,12 +1,53 @@ -FROM python:3 -ENV PYTHONUNBUFFERED 1 +FROM elixir:1.9.0-alpine as build -RUN apt-get update && \ - apt-get install -qqy --no-install-recommends \ - postgresql-client-9.6=9.6.10-0+deb9u1 +# install build dependencies +RUN apk add --update git build-base -RUN mkdir /code -WORKDIR /code -COPY requirements.txt /code/ -RUN pip install -r requirements.txt -COPY . /code/ +# prepare build dir +RUN mkdir /app +WORKDIR /app + +# install hex + rebar +RUN mix local.hex --force && \ + mix local.rebar --force + +# set build ENV +ENV MIX_ENV=prod + +# install mix dependencies +COPY mix.exs mix.lock ./ +COPY config config +RUN mix deps.get +RUN mix deps.compile + +# build assets +# COPY assets assets +# RUN cd assets && npm install && npm run deploy +# RUN mix phx.digest + +# build project +COPY priv priv +COPY lib lib +RUN mix compile + +# build release +COPY rel rel +RUN mix release + +# prepare release image +FROM alpine:3.9 AS app +RUN apk add --update bash openssl + +RUN mkdir /app +WORKDIR /app + +ENV APP_NAME=backend + +COPY --from=build /app/_build/prod/rel/${APP_NAME} ./ +RUN chown -R nobody: /app +USER nobody + +ENV HOME=/app + +# The command to start the backend +CMD trap 'exit' INT; ${HOME}/bin/${APP_NAME} start diff --git a/backend/README.md b/backend/README.md new file mode 100644 index 0000000..73a5fea --- /dev/null +++ b/backend/README.md @@ -0,0 +1,33 @@ +# fediverse.space backend + +## Notes + +- This project requires Elixir >= 1.9. +- Run with `SKIP_CRAWL=true` to just run the server (useful for working on the API without also crawling) + +## Deployment + +Deployment with Docker is handled as per the [Distillery docs](https://hexdocs.pm/distillery/guides/working_with_docker.html). + +- To build a new version, run `make build` in this directory. +- To migrate a released version, run `./backend eval "Backend.Release.migrate"` + +# Default README + +To start your Phoenix server: + +- Install dependencies with `mix deps.get` +- Create and migrate your database with `mix ecto.setup` +- Start Phoenix endpoint with `mix phx.server` + +Now you can visit [`localhost:4000`](http://localhost:4000) from your browser. + +Ready to run in production? Please [check our deployment guides](https://hexdocs.pm/phoenix/deployment.html). + +## Learn more + +- Official website: http://www.phoenixframework.org/ +- Guides: https://hexdocs.pm/phoenix/overview.html +- Docs: https://hexdocs.pm/phoenix +- Mailing list: http://groups.google.com/group/phoenix-talk +- Source: https://github.com/phoenixframework/phoenix diff --git a/backend/apiv1/__init__.py b/backend/apiv1/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/backend/apiv1/_util.py b/backend/apiv1/_util.py deleted file mode 100644 index 329199c..0000000 --- a/backend/apiv1/_util.py +++ /dev/null @@ -1,8 +0,0 @@ -def to_representation(self, instance): - """ - Object instance -> Dict of primitive datatypes. - We use a custom to_representation function to exclude empty fields in the serialized JSON. - """ - ret = super(InstanceListSerializer, self).to_representation(instance) - ret = OrderedDict(list(filter(lambda x: x[1], ret.items()))) - return ret diff --git a/backend/apiv1/apps.py b/backend/apiv1/apps.py deleted file mode 100644 index d64dab4..0000000 --- a/backend/apiv1/apps.py +++ /dev/null @@ -1,5 +0,0 @@ -from django.apps import AppConfig - - -class Apiv1Config(AppConfig): - name = 'apiv1' diff --git a/backend/apiv1/serializers.py b/backend/apiv1/serializers.py deleted file mode 100644 index 2c4e08d..0000000 --- a/backend/apiv1/serializers.py +++ /dev/null @@ -1,105 +0,0 @@ -from rest_framework import serializers -import math -from collections import OrderedDict -from scraper.models import Instance, Edge - - -class InstanceListSerializer(serializers.ModelSerializer): - """ - Minimal instance details used in the full list of instances. - """ - class Meta: - model = Instance - fields = ('name', 'user_count') - - def to_representation(self, instance): - """ - Object instance -> Dict of primitive datatypes. - We use a custom to_representation function to exclude empty fields in the serialized JSON. - """ - ret = super(InstanceListSerializer, self).to_representation(instance) - ret = OrderedDict(list(filter(lambda x: x[1], ret.items()))) - return ret - - -class InstanceDetailSerializer(serializers.ModelSerializer): - """ - Detailed instance view. - """ - userCount = serializers.SerializerMethodField() - statusCount = serializers.SerializerMethodField() - domainCount = serializers.SerializerMethodField() - lastUpdated = serializers.SerializerMethodField() - peers = InstanceListSerializer(many=True, read_only=True) - - def get_userCount(self, obj): - return obj.user_count - - def get_statusCount(self, obj): - return obj.status_count - - def get_domainCount(self, obj): - return obj.domain_count - - def get_lastUpdated(self, obj): - return obj.last_updated - - class Meta: - model = Instance - fields = ('name', 'description', 'version', 'userCount', - 'statusCount', 'domainCount', 'peers', 'lastUpdated', - 'status') - - -class EdgeSerializer(serializers.ModelSerializer): - """ - Used for displaying the graph. - """ - id = serializers.SerializerMethodField('get_pk') - size = serializers.SerializerMethodField('get_weight') - - class Meta: - model = Edge - fields = ('source', 'target', 'id', 'size') - - def get_pk(self, obj): - return obj.pk - - def get_weight(self, obj): - return obj.weight - - -class NodeSerializer(serializers.ModelSerializer): - """ - Used for displaying the graph. - """ - id = serializers.SerializerMethodField('get_name') - label = serializers.SerializerMethodField('get_name') - size = serializers.SerializerMethodField() - x = serializers.SerializerMethodField() - y = serializers.SerializerMethodField() - - class Meta: - model = Instance - fields = ('id', 'label', 'size', 'x', 'y') - - def get_name(self, obj): - return obj.name - - def get_size(self, obj): - return math.log(obj.user_count) if (obj.user_count and (obj.user_count > 1)) else 1 - - def get_x(self, obj): - return obj.x_coord - - def get_y(self, obj): - return obj.y_coord - - def to_representation(self, instance): - """ - Object instance -> Dict of primitive datatypes. - We use a custom to_representation function to exclude empty fields in the serialized JSON. - """ - ret = super(NodeSerializer, self).to_representation(instance) - ret = OrderedDict(list(filter(lambda x: x[1], ret.items()))) - return ret diff --git a/backend/apiv1/views.py b/backend/apiv1/views.py deleted file mode 100644 index eeedb45..0000000 --- a/backend/apiv1/views.py +++ /dev/null @@ -1,37 +0,0 @@ -from rest_framework import viewsets -from scraper.models import Instance, Edge -from apiv1.serializers import InstanceListSerializer, InstanceDetailSerializer, NodeSerializer, EdgeSerializer - - -class InstanceViewSet(viewsets.ReadOnlyModelViewSet): - """API endpoint to view stats for, and the peers of, an instance""" - - lookup_field = 'name' - lookup_value_regex = '[a-zA-Z0-9-_\.]+' - - queryset = Instance.objects.all() - serializer_class = InstanceListSerializer - detail_serializer_class = InstanceDetailSerializer # this serializer also includes stats and a list of peers - - def get_serializer_class(self): - if self.action == 'retrieve': - if hasattr(self, 'detail_serializer_class'): - return self.detail_serializer_class - return self.serializer_class - - -class EdgeView(viewsets.ReadOnlyModelViewSet): - """ - Endpoint to get a list of the graph's edges in a SigmaJS-friendly format. - """ - queryset = Edge.objects.all() - serializer_class = EdgeSerializer - - -class NodeView(viewsets.ReadOnlyModelViewSet): - """ - Endpoint to get a list of the graph's nodes in a SigmaJS-friendly format. - """ - queryset = Instance.objects.filter(status='success', x_coord__isnull=False, y_coord__isnull=False, user_count__isnull=False)\ - .exclude(sources__isnull=True, targets__isnull=True) - serializer_class = NodeSerializer diff --git a/backend/backend/__init__.py b/backend/backend/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/backend/backend/settings/base.py b/backend/backend/settings/base.py deleted file mode 100644 index c24fcff..0000000 --- a/backend/backend/settings/base.py +++ /dev/null @@ -1,124 +0,0 @@ -""" -Django settings for backend project. - -Generated by 'django-admin startproject' using Django 2.1. - -For more information on this file, see -https://docs.djangoproject.com/en/2.1/topics/settings/ - -For the full list of settings and their values, see -https://docs.djangoproject.com/en/2.1/ref/settings/ -""" - -import os -import json -from django.core.exceptions import ImproperlyConfigured - -SECRET_KEY = os.getenv("SECRET_KEY") - -# Build paths inside the project like this: os.path.join(BASE_DIR, ...) -BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - -# Application definition - -INSTALLED_APPS = [ - 'django.contrib.admin', - 'django.contrib.auth', - 'django.contrib.contenttypes', - 'django.contrib.sessions', - 'django.contrib.messages', - 'django.contrib.staticfiles', - 'rest_framework', - 'silk', - 'corsheaders', - 'scraper.apps.ScraperConfig', - 'apiv1.apps.Apiv1Config', -] - -MIDDLEWARE = [ - 'corsheaders.middleware.CorsMiddleware', - 'django.middleware.security.SecurityMiddleware', - 'django.contrib.sessions.middleware.SessionMiddleware', - 'django.middleware.common.CommonMiddleware', - 'django.middleware.csrf.CsrfViewMiddleware', - 'django.contrib.auth.middleware.AuthenticationMiddleware', - 'django.contrib.messages.middleware.MessageMiddleware', - 'django.middleware.clickjacking.XFrameOptionsMiddleware', - 'silk.middleware.SilkyMiddleware', -] - -ROOT_URLCONF = 'backend.urls' - -TEMPLATES = [ - { - 'BACKEND': 'django.template.backends.django.DjangoTemplates', - 'DIRS': [os.path.join(BASE_DIR, '../../frontend/build')], - 'APP_DIRS': True, - 'OPTIONS': { - 'context_processors': [ - 'django.template.context_processors.debug', - 'django.template.context_processors.request', - 'django.contrib.auth.context_processors.auth', - 'django.contrib.messages.context_processors.messages', - ], - }, - }, -] - -WSGI_APPLICATION = 'backend.wsgi.application' - - -# Database -# https://docs.djangoproject.com/en/2.1/ref/settings/#databases - -DATABASES = { - 'default': { - 'ENGINE': 'django.db.backends.postgresql', - 'NAME': os.getenv("POSTGRES_DB"), - 'USER': os.getenv("POSTGRES_USER"), - 'PASSWORD': os.getenv("POSTGRES_PASSWORD"), - 'HOST': 'db', - 'PORT': 5432, - } -} - - -# Password validation -# https://docs.djangoproject.com/en/2.1/ref/settings/#auth-password-validators - -AUTH_PASSWORD_VALIDATORS = [ - { - 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator', - }, - { - 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator', - }, - { - 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator', - }, - { - 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator', - }, -] - - -# Internationalization -# https://docs.djangoproject.com/en/2.1/topics/i18n/ - -LANGUAGE_CODE = 'en-us' - -TIME_ZONE = 'UTC' - -USE_I18N = True - -USE_L10N = True - -USE_TZ = False - - -# Static files (CSS, JavaScript, Images) -# https://docs.djangoproject.com/en/2.1/howto/static-files/ - -STATIC_URL = '/static/' -STATICFILES_DIRS = [] -STATIC_ROOT = os.path.join(BASE_DIR, 'static') diff --git a/backend/backend/settings/development.py b/backend/backend/settings/development.py deleted file mode 100644 index 4c781c9..0000000 --- a/backend/backend/settings/development.py +++ /dev/null @@ -1,7 +0,0 @@ -from .base import * - -DEBUG = True - -ALLOWED_HOSTS = ['localhost'] - -CORS_ORIGIN_ALLOW_ALL = True \ No newline at end of file diff --git a/backend/backend/settings/production.py b/backend/backend/settings/production.py deleted file mode 100644 index 3e0f690..0000000 --- a/backend/backend/settings/production.py +++ /dev/null @@ -1,10 +0,0 @@ -from .base import * - -DEBUG = False - -ALLOWED_HOSTS = ['backend.fediverse.space'] - -CORS_ORIGIN_REGEX_WHITELIST = [ - r'^(https?:\/\/)?(\w+\.)?(.*)?fediverse-space\.netlify\.com\/?$', - r'^(https?:\/\/)?(\w+\.)?(.*)?fediverse\.space\/?$', -] diff --git a/backend/backend/urls.py b/backend/backend/urls.py deleted file mode 100644 index b77c246..0000000 --- a/backend/backend/urls.py +++ /dev/null @@ -1,37 +0,0 @@ -"""backend URL Configuration - -The `urlpatterns` list routes URLs to views. For more information please see: - https://docs.djangoproject.com/en/2.1/topics/http/urls/ -Examples: -Function views - 1. Add an import: from my_app import views - 2. Add a URL to urlpatterns: path('', views.home, name='home') -Class-based views - 1. Add an import: from other_app.views import Home - 2. Add a URL to urlpatterns: path('', Home.as_view(), name='home') -Including another URLconf - 1. Import the include() function: from django.urls import include, path - 2. Add a URL to urlpatterns: path('blog/', include('blog.urls')) -""" -from django.urls import path, include -from django.views.generic import TemplateView -from rest_framework import routers -from apiv1 import views - - -class OptionalTrailingSlashRouter(routers.DefaultRouter): - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.trailing_slash = r'/?' - - -router = OptionalTrailingSlashRouter() -router.register(r'instances', views.InstanceViewSet) -router.register(r'graph/nodes', views.NodeView) -router.register(r'graph/edges', views.EdgeView, base_name='edge') - -urlpatterns = [ - path('api/v1/', include(router.urls)), - path('silk/', include('silk.urls', namespace='silk')), -] diff --git a/backend/backend/wsgi.py b/backend/backend/wsgi.py deleted file mode 100644 index 238f221..0000000 --- a/backend/backend/wsgi.py +++ /dev/null @@ -1,13 +0,0 @@ -""" -WSGI config for backend project. - -It exposes the WSGI callable as a module-level variable named ``application``. - -For more information on this file, see -https://docs.djangoproject.com/en/2.1/howto/deployment/wsgi/ -""" - -import os -from django.core.wsgi import get_wsgi_application - -application = get_wsgi_application() diff --git a/backend/config/config.exs b/backend/config/config.exs new file mode 100644 index 0000000..a739a89 --- /dev/null +++ b/backend/config/config.exs @@ -0,0 +1,51 @@ +# This file is responsible for configuring your application +# and its dependencies with the aid of the Mix.Config module. +# +# This configuration file is loaded before any dependency and +# is restricted to this project. + +# General application configuration +import Config + +config :backend, + ecto_repos: [Backend.Repo] + +# Configures the endpoint +config :backend, BackendWeb.Endpoint, + url: [host: "localhost"], + secret_key_base: "XL4NKGBN9lZMrQbMEI1KJOlwAt8S7younVJl90TdAgzmwyapr3g7BRYSNYvX0sZ9", + render_errors: [view: BackendWeb.ErrorView, accepts: ~w(json)], + pubsub: [name: Backend.PubSub, adapter: Phoenix.PubSub.PG2] + +config :backend, Backend.Repo, queue_target: 5000 + +# Configures Elixir's Logger +config :logger, :console, + format: "$time $metadata[$level] $message\n", + metadata: [:request_id] + +# Use Jason for JSON parsing in Phoenix +config :phoenix, :json_library, Jason + +config :backend, :crawler, + status_age_limit_days: 28, + status_count_limit: 5000, + personal_instance_threshold: 10, + crawl_interval_mins: 30, + crawl_workers: 50, + blacklist: [ + "gab.best" + ], + user_agent: "fediverse.space crawler" + +config :backend, Backend.Scheduler, + jobs: [ + # At midnight every day + {"@daily", {Backend.Scheduler, :prune_crawls, [1, "month"]}}, + # 00.15 daily + {"15 0 * * *", {Backend.Scheduler, :generate_edges, []}} + ] + +# Import environment specific config. This must remain at the bottom +# of this file so it overrides the configuration defined above. +import_config "#{Mix.env()}.exs" diff --git a/backend/config/dev.exs b/backend/config/dev.exs new file mode 100644 index 0000000..e47c9b5 --- /dev/null +++ b/backend/config/dev.exs @@ -0,0 +1,72 @@ +import Config + +# For development, we disable any cache and enable +# debugging and code reloading. +# +# The watchers configuration can be used to run external +# watchers to your application. For example, we use it +# with webpack to recompile .js and .css sources. +config :backend, BackendWeb.Endpoint, + http: [port: 4000], + debug_errors: true, + code_reloader: true, + check_origin: false, + watchers: [] + +# ## SSL Support +# +# In order to use HTTPS in development, a self-signed +# certificate can be generated by running the following +# Mix task: +# +# mix phx.gen.cert +# +# Note that this task requires Erlang/OTP 20 or later. +# Run `mix help phx.gen.cert` for more information. +# +# The `http:` config above can be replaced with: +# +# https: [ +# port: 4001, +# cipher_suite: :strong, +# keyfile: "priv/cert/selfsigned_key.pem", +# certfile: "priv/cert/selfsigned.pem" +# ], +# +# If desired, both `http:` and `https:` keys can be +# configured to run both http and https servers on +# different ports. + +# Do not include metadata nor timestamps in development logs +config :logger, :console, format: "[$level] $message\n" + +# Set a higher stacktrace during development. Avoid configuring such +# in production as building large stacktraces may be expensive. +config :phoenix, :stacktrace_depth, 20 + +# Initialize plugs at runtime for faster development compilation +config :phoenix, :plug_init_mode, :runtime + +# Configure your database +config :backend, Backend.Repo, + username: "postgres", + password: "postgres", + database: "backend_dev", + hostname: "localhost", + pool_size: 10 + +config :backend, :crawler, + status_age_limit_days: 28, + status_count_limit: 100, + personal_instance_threshold: 1, + crawl_interval_mins: 1, + crawl_workers: 10, + blacklist: [ + "gab.best" + ] + +config :backend, Backend.Scheduler, + jobs: [ + # Every 15 minutes + {"*/15 * * * *", {Backend.Scheduler, :prune_crawls, [12, "hour"]}} + ] diff --git a/backend/config/prod.exs b/backend/config/prod.exs new file mode 100644 index 0000000..0197c65 --- /dev/null +++ b/backend/config/prod.exs @@ -0,0 +1,57 @@ +import Config + +# Do not print debug messages in production +config :logger, level: :info + +# ## SSL Support +# +# To get SSL working, you will need to add the `https` key +# to the previous section and set your `:url` port to 443: +# +# config :backend, BackendWeb.Endpoint, +# ... +# url: [host: "example.com", port: 443], +# https: [ +# :inet6, +# port: 443, +# cipher_suite: :strong, +# keyfile: System.get_env("SOME_APP_SSL_KEY_PATH"), +# certfile: System.get_env("SOME_APP_SSL_CERT_PATH") +# ] +# +# The `cipher_suite` is set to `:strong` to support only the +# latest and more secure SSL ciphers. This means old browsers +# and clients may not be supported. You can set it to +# `:compatible` for wider support. +# +# `:keyfile` and `:certfile` expect an absolute path to the key +# and cert in disk or a relative path inside priv, for example +# "priv/ssl/server.key". For all supported SSL configuration +# options, see https://hexdocs.pm/plug/Plug.SSL.html#configure/1 +# +# We also recommend setting `force_ssl` in your endpoint, ensuring +# no data is ever sent via http, always redirecting to https: +# +# config :backend, BackendWeb.Endpoint, +# force_ssl: [hsts: true] +# +# Check `Plug.SSL` for all available options in `force_ssl`. + +# ## Using releases (distillery) +# +# If you are doing OTP releases, you need to instruct Phoenix +# to start the server for all endpoints: +# +# config :phoenix, :serve_endpoints, true +# +# Alternatively, you can configure exactly which server to +# start per endpoint: +# +# config :backend, BackendWeb.Endpoint, server: true +# +# Note you can't rely on `System.get_env/1` when using releases. +# See the releases documentation accordingly. + +# Finally import the config/prod.secret.exs which should be versioned +# separately. +# import_config "prod.secret.exs" diff --git a/backend/config/releases.exs b/backend/config/releases.exs new file mode 100644 index 0000000..3317772 --- /dev/null +++ b/backend/config/releases.exs @@ -0,0 +1,27 @@ +# This file is for *runtime configuration in releases* only. +# https://hexdocs.pm/phoenix/releases.html#runtime-configuration + +import Config + +# For production, don't forget to configure the url host +# to something meaningful, Phoenix uses this information +# when generating URLs. +config :backend, Backend.Repo, + # username: System.get_env("POSTGRES_USER"), + # password: System.get_env("POSTGRES_PASSWORD"), + # database: System.get_env("POSTGRES_DB"), + # hostname: System.get_env("POSTGRES_HOSTNAME"), + url: System.get_env("ecto://" <> "DATABASE_URL"), + pool_size: String.to_integer(System.get_env("POOL_SIZE") || "10"), + ssl: true + +# show_sensitive_data_on_connection_error: true + +port = String.to_integer(System.get_env("PORT") || "4000") + +config :backend, BackendWeb.Endpoint, + http: [:inet6, port: port], + url: [host: System.get_env("BACKEND_HOSTNAME"), port: port], + root: ".", + secret_key_base: System.get_env("SECRET_KEY_BASE"), + server: true diff --git a/backend/config/test.exs b/backend/config/test.exs new file mode 100644 index 0000000..221df5a --- /dev/null +++ b/backend/config/test.exs @@ -0,0 +1,18 @@ +import Config + +# We don't run a server during test. If one is required, +# you can enable the server option below. +config :backend, BackendWeb.Endpoint, + http: [port: 4002], + server: false + +# Print only warnings and errors during test +config :logger, level: :warn + +# Configure your database +config :backend, Backend.Repo, + username: "postgres", + password: "postgres", + database: "backend_test", + hostname: "localhost", + pool: Ecto.Adapters.SQL.Sandbox diff --git a/backend/lib/backend.ex b/backend/lib/backend.ex new file mode 100644 index 0000000..d059a26 --- /dev/null +++ b/backend/lib/backend.ex @@ -0,0 +1,9 @@ +defmodule Backend do + @moduledoc """ + Backend keeps the contexts that define your domain + and business logic. + + Contexts are also responsible for managing your data, regardless + if it comes from the database, an external API or others. + """ +end diff --git a/backend/lib/backend/api.ex b/backend/lib/backend/api.ex new file mode 100644 index 0000000..be436d3 --- /dev/null +++ b/backend/lib/backend/api.ex @@ -0,0 +1,68 @@ +defmodule Backend.Api do + alias Backend.{Crawl, Edge, Instance, Repo} + import Ecto.Query + + @spec list_instances() :: [Instance.t()] + def list_instances() do + Instance + |> Repo.all() + end + + @spec get_instance!(String.t()) :: Instance.t() + def get_instance!(domain) do + Instance + |> preload(:peers) + |> Repo.get_by!(domain: domain) + end + + @doc """ + Returns a list of instances that + * have at least one successful crawl + * have a user count (required to give the instance a size on the graph) + """ + @spec list_nodes() :: [Instance.t()] + def list_nodes() do + crawl_subquery = + Crawl + |> select([c], %{ + instance_domain: c.instance_domain, + crawl_count: count(c.id) + }) + |> where([c], is_nil(c.error)) + |> group_by([c], c.instance_domain) + + Instance + |> join(:inner, [i], c in subquery(crawl_subquery), on: i.domain == c.instance_domain) + |> where( + [i, c], + c.crawl_count > 0 and not is_nil(i.user_count) and not is_nil(i.x) and not is_nil(i.y) + ) + |> select([c], [:domain, :user_count, :x, :y]) + |> Repo.all() + end + + @spec list_edges() :: [Edge.t()] + def list_edges() do + crawl_subquery = + Crawl + |> select([c], %{ + instance_domain: c.instance_domain, + crawl_count: count(c.id) + }) + |> where([c], is_nil(c.error)) + |> group_by([c], c.instance_domain) + + Edge + |> join(:inner, [e], c1 in subquery(crawl_subquery), on: e.source_domain == c1.instance_domain) + |> join(:inner, [e], c2 in subquery(crawl_subquery), on: e.target_domain == c2.instance_domain) + |> join(:inner, [e], i1 in Instance, on: e.source_domain == i1.domain) + |> join(:inner, [e], i2 in Instance, on: e.target_domain == i2.domain) + |> select([e], [:id, :source_domain, :target_domain, :weight]) + |> where( + [e, c1, c2, i1, i2], + c1.crawl_count > 0 and c2.crawl_count > 0 and not is_nil(i1.x) and not is_nil(i1.y) and + not is_nil(i2.x) and not is_nil(i2.y) and e.source_domain != e.target_domain + ) + |> Repo.all() + end +end diff --git a/backend/lib/backend/application.ex b/backend/lib/backend/application.ex new file mode 100644 index 0000000..8880fe3 --- /dev/null +++ b/backend/lib/backend/application.ex @@ -0,0 +1,46 @@ +defmodule Backend.Application do + # See https://hexdocs.pm/elixir/Application.html + # for more information on OTP Applications + @moduledoc false + + use Application + require Logger + import Backend.Util + + def start(_type, _args) do + crawl_worker_count = get_config(:crawl_workers) + + children = [ + # Start the Ecto repository + Backend.Repo, + # Start the endpoint when the application starts + BackendWeb.Endpoint, + # Crawler children + :hackney_pool.child_spec(:crawler, timeout: 15000, max_connections: crawl_worker_count), + {Task, + fn -> + Honeydew.start_queue(:crawl_queue, failure_mode: Honeydew.FailureMode.Abandon) + Honeydew.start_workers(:crawl_queue, Backend.Crawler, num: crawl_worker_count) + end}, + Backend.Scheduler + ] + + children = + case Enum.member?(["true", 1, "1"], System.get_env("SKIP_CRAWL")) do + true -> children + false -> children ++ [Backend.Crawler.StaleInstanceManager] + end + + # See https://hexdocs.pm/elixir/Supervisor.html + # for other strategies and supported options + opts = [strategy: :one_for_one, name: Backend.Supervisor] + Supervisor.start_link(children, opts) + end + + # Tell Phoenix to update the endpoint configuration + # whenever the application is updated. + def config_change(changed, _new, removed) do + BackendWeb.Endpoint.config_change(changed, removed) + :ok + end +end diff --git a/backend/lib/backend/crawl.ex b/backend/lib/backend/crawl.ex new file mode 100644 index 0000000..bedd4af --- /dev/null +++ b/backend/lib/backend/crawl.ex @@ -0,0 +1,26 @@ +defmodule Backend.Crawl do + use Ecto.Schema + import Ecto.Changeset + + schema "crawls" do + belongs_to :instance, Backend.Instance, + references: :domain, + type: :string, + foreign_key: :instance_domain + + field :interactions_seen, :integer + field :statuses_seen, :integer + + # if something went wrong, otherwise null + field :error, :string + + timestamps() + end + + @doc false + def changeset(crawl, attrs) do + crawl + |> cast(attrs, [:instance, :statuses_seen, :interactions_seen, :error]) + |> validate_required([:instance]) + end +end diff --git a/backend/lib/backend/crawl_interaction.ex b/backend/lib/backend/crawl_interaction.ex new file mode 100644 index 0000000..cb29122 --- /dev/null +++ b/backend/lib/backend/crawl_interaction.ex @@ -0,0 +1,29 @@ +defmodule Backend.CrawlInteraction do + use Ecto.Schema + import Ecto.Changeset + + schema "crawl_interactions" do + belongs_to :crawl, Backend.Crawl + + belongs_to :source, Backend.Instance, + references: :domain, + type: :string, + foreign_key: :source_domain + + belongs_to :target, Backend.Instance, + references: :domain, + type: :string, + foreign_key: :target_domain + + field :mentions, :integer + + timestamps() + end + + @doc false + def changeset(crawl_interaction, attrs) do + crawl_interaction + |> cast(attrs, [:crawl, :source, :target, :mentions]) + |> validate_required([:crawl, :source, :target, :mentions]) + end +end diff --git a/backend/lib/backend/crawler/api_crawler.ex b/backend/lib/backend/crawler/api_crawler.ex new file mode 100644 index 0000000..059914b --- /dev/null +++ b/backend/lib/backend/crawler/api_crawler.ex @@ -0,0 +1,45 @@ +defmodule Backend.Crawler.ApiCrawler do + @moduledoc """ + This module is a specification. Crawlers for all instance types must implement its behaviour. + + Make sure to respect the following: + * You must adhere to the following configuration values: + * `:status_age_limit_days` specifies that you must only crawl statuses from the most recent N days + * `:status_count_limit` specifies the max number of statuses to crawl in one go + * `:personal_instance_threshold` specifies that instances with fewer than this number of users should not be crawled + * profiles with the string "nobot" (case insensitive) in their profile must not be included in any stats + * Make sure to check the most recent crawl of the instance so you don't re-crawl old statuses + """ + + # {domain_mentioned, count} + @type instance_interactions :: %{String.t() => integer} + + defstruct [ + :version, + :description, + :user_count, + :status_count, + :peers, + :interactions, + :statuses_seen + ] + + @type t() :: %__MODULE__{ + version: String.t(), + description: String.t(), + user_count: integer, + status_count: integer, + peers: [String.t()], + interactions: instance_interactions, + statuses_seen: integer + } + + @doc """ + Check whether the instance at the given domain is of the type that this ApiCrawler implements. + """ + @callback is_instance_type?(String.t()) :: boolean() + @doc """ + Crawl the instance at the given domain. + """ + @callback crawl(String.t()) :: t() +end diff --git a/backend/lib/backend/crawler/crawler.ex b/backend/lib/backend/crawler/crawler.ex new file mode 100644 index 0000000..1538a77 --- /dev/null +++ b/backend/lib/backend/crawler/crawler.ex @@ -0,0 +1,196 @@ +defmodule Backend.Crawler do + @moduledoc """ + This module crawls instances. Run `run(domain)` to crawl a given domain. + """ + + alias __MODULE__ + alias Backend.Crawler.Crawlers.Mastodon + alias Backend.Crawler.ApiCrawler + alias Backend.{Crawl, CrawlInteraction, Repo, Instance, InstancePeer} + import Ecto.Query + import Backend.Util + require Logger + + defstruct [ + # the instance domain (a string) + :domain, + # a list of ApiCrawlers that will be attempted + :api_crawlers, + :found_api?, + :result, + :error + ] + + @type t() :: %__MODULE__{ + domain: String.t(), + api_crawlers: [ApiCrawler.t()], + found_api?: boolean, + result: ApiCrawler.t() | nil, + error: String.t() | nil + } + + def run(domain) do + Logger.info("Crawling #{domain}...") + HTTPoison.start() + state = %Crawler{domain: domain, api_crawlers: [], found_api?: false, result: nil, error: nil} + + state + # register APICrawlers here + |> register(Mastodon) + # go! + |> crawl() + |> save() + end + + # Adds a new ApiCrawler that run/1 will check. + defp register(%Crawler{api_crawlers: crawlers} = state, api_crawler) do + Map.put(state, :api_crawlers, [api_crawler | crawlers]) + end + + # Recursive function to check whether `domain` has an API that the head of the api_crawlers list can read. + # If so, crawls it. If not, continues with the tail of the api_crawlers list. + defp crawl(%Crawler{api_crawlers: [], domain: domain} = state) do + Logger.debug("Found no compatible API for #{domain}") + Map.put(state, :found_api?, false) + end + + defp crawl(%Crawler{domain: domain, api_crawlers: [curr | remaining_crawlers]} = state) do + if curr.is_instance_type?(domain) do + Logger.debug("Found #{curr} instance") + state = Map.put(state, :found_api?, true) + + try do + %Crawler{state | result: curr.crawl(domain), api_crawlers: []} + rescue + e in HTTPoison.Error -> + Map.put(state, :error, "HTTPoison error: " <> HTTPoison.Error.message(e)) + + e in Jason.DecodeError -> + Map.put(state, :error, "Jason DecodeError: " <> Jason.DecodeError.message(e)) + + e in _ -> + Map.put(state, :error, "Unknown error: " <> inspect(e)) + end + else + # Nothing found so check the next APICrawler + Logger.debug("#{domain} is not an instance of #{curr}") + crawl(%Crawler{state | api_crawlers: remaining_crawlers}) + end + end + + # Save the state (after crawling) to the database. + defp save(%Crawler{domain: domain, result: result, found_api?: true, error: nil}) do + now = NaiveDateTime.truncate(NaiveDateTime.utc_now(), :second) + + ## Update the instance we crawled ## + Repo.insert!( + %Instance{ + domain: domain, + description: result.description, + version: result.version, + user_count: result.user_count, + status_count: result.status_count + }, + on_conflict: [ + set: [ + description: result.description, + version: result.version, + user_count: result.user_count, + status_count: result.status_count, + updated_at: now + ] + ], + conflict_target: :domain + ) + + # Save details of a new crawl + curr_crawl = + Repo.insert!(%Crawl{ + instance_domain: domain, + interactions_seen: + result.interactions |> Map.values() |> Enum.reduce(0, fn count, acc -> count + acc end), + statuses_seen: result.statuses_seen + }) + + # We get a list of peers from two places: + # * the official peers endpoint (which may be disabled) + # * the interactions + peers_domains = + result.interactions + |> Map.keys() + |> list_union(result.peers) + |> Enum.filter(fn domain -> not is_blacklisted?(domain) end) + + peers = + peers_domains + |> Enum.map(&%{domain: &1, inserted_at: now, updated_at: now}) + + Instance + |> Repo.insert_all(peers, on_conflict: :nothing, conflict_target: :domain) + + Repo.transaction(fn -> + ## Save peer relationships ## + # get current peers (a list of strings) + current_peers = + InstancePeer + |> where(source_domain: ^domain) + |> select([p], p.target_domain) + |> Repo.all() + + wanted_peers_set = MapSet.new(peers_domains) + current_peers_set = MapSet.new(current_peers) + + # delete the peers we don't want + dont_want = current_peers_set |> MapSet.difference(wanted_peers_set) |> MapSet.to_list() + + if length(dont_want) > 0 do + InstancePeer + |> where(source_domain: ^domain) + |> where([p], p.target_domain in ^dont_want) + |> Repo.delete_all([]) + end + + # insert the ones we don't have yet + new_instance_peers = + wanted_peers_set + |> MapSet.difference(current_peers_set) + |> MapSet.to_list() + |> Enum.map( + &%{ + source_domain: domain, + target_domain: &1, + inserted_at: now, + updated_at: now + } + ) + + InstancePeer + |> Repo.insert_all(new_instance_peers) + end) + + ## Save interactions ## + interactions = + result.interactions + |> Enum.filter(fn {target_domain, _count} -> not is_blacklisted?(target_domain) end) + |> Enum.map(fn {target_domain, count} -> + %{ + crawl_id: curr_crawl.id, + source_domain: domain, + target_domain: target_domain, + mentions: count, + inserted_at: now, + updated_at: now + } + end) + + CrawlInteraction + |> Repo.insert_all(interactions) + end + + defp save(%{domain: domain, error: error}) do + Repo.insert!(%Crawl{ + instance_domain: domain, + error: error + }) + end +end diff --git a/backend/lib/backend/crawler/crawlers/mastodon.ex b/backend/lib/backend/crawler/crawlers/mastodon.ex new file mode 100644 index 0000000..3476256 --- /dev/null +++ b/backend/lib/backend/crawler/crawlers/mastodon.ex @@ -0,0 +1,193 @@ +defmodule Backend.Crawler.Crawlers.Mastodon do + require Logger + import Backend.Crawler.Util + alias Backend.Crawler.ApiCrawler + + @behaviour ApiCrawler + + @impl ApiCrawler + def is_instance_type?(domain) do + case get("https://#{domain}/api/v1/instance") do + {:ok, response} -> if is_http_200?(response), do: has_title?(response.body), else: false + {:error, _error} -> false + end + end + + @impl ApiCrawler + def crawl(domain) do + instance = Jason.decode!(get!("https://#{domain}/api/v1/instance").body) + + if get_in(instance, ["stats", "user_count"]) > get_config(:personal_instance_threshold) do + crawl_large_instance(domain, instance) + else + Map.merge( + Map.merge( + Map.take(instance, ["version", "description"]), + Map.take(instance["stats"], ["user_count", "status_count"]) + ) + |> Map.new(fn {k, v} -> {String.to_atom(k), v} end), + %{peers: [], interactions: %{}, statuses_seen: 0} + ) + end + end + + @spec crawl_large_instance(String.t(), any()) :: ApiCrawler.t() + defp crawl_large_instance(domain, instance) do + # servers may not publish peers + peers = + case get("https://#{domain}/api/v1/instance/peers") do + {:ok, response} -> if is_http_200?(response), do: Jason.decode!(response.body), else: [] + {:error, _error} -> [] + end + + Logger.debug("Found #{length(peers)} peers.") + + {interactions, statuses_seen} = get_interactions(domain) + + Logger.debug( + "#{domain}: found #{ + interactions |> Map.values() |> Enum.reduce(0, fn count, acc -> count + acc end) + } mentions in #{statuses_seen} statuses." + ) + + Map.merge( + Map.merge( + Map.take(instance, ["version", "description"]), + Map.take(instance["stats"], ["user_count", "status_count"]) + ) + |> Map.new(fn {k, v} -> {String.to_atom(k), v} end), + %{peers: peers, interactions: interactions, statuses_seen: statuses_seen} + ) + end + + @spec get_interactions( + String.t(), + String.t() | nil, + Calendar.naive_datetime() | nil, + ApiCrawler.instance_interactions(), + integer + ) :: {ApiCrawler.instance_interactions(), integer} + defp get_interactions( + domain, + max_id \\ nil, + min_timestamp \\ nil, + interactions \\ %{}, + statuses_seen \\ 0 + ) do + # If `statuses_seen == 0`, it's the first call of this function, which means we want to query the database for the + # most recent status we have. + min_timestamp = + if statuses_seen == 0 do + get_last_successful_crawl_timestamp(domain) + else + min_timestamp + end + + endpoint = "https://#{domain}/api/v1/timelines/public?local=true" + + endpoint = + if max_id do + endpoint <> "&max_id=#{max_id}" + else + endpoint + end + + Logger.debug("Crawling #{endpoint}") + + statuses = + endpoint + |> get!() + |> Map.get(:body) + |> Jason.decode!() + + filtered_statuses = + statuses + |> Enum.filter(fn s -> is_after?(s["created_at"], min_timestamp) end) + + if length(filtered_statuses) > 0 do + # get statuses that are eligible (i.e. users don't have #nobot in their profile) and have mentions + interactions = Map.merge(interactions, statuses_to_interactions(filtered_statuses)) + statuses_seen = statuses_seen + length(filtered_statuses) + + status_datetime_threshold = + NaiveDateTime.utc_now() + |> NaiveDateTime.add(get_config(:status_age_limit_days) * 24 * 3600 * -1, :second) + + oldest_status = Enum.at(filtered_statuses, -1) + + oldest_status_datetime = + oldest_status + |> (fn s -> s["created_at"] end).() + |> NaiveDateTime.from_iso8601!() + + if NaiveDateTime.compare(oldest_status_datetime, status_datetime_threshold) == :gt and + statuses_seen < get_config(:status_count_limit) and + length(filtered_statuses) == length(statuses) do + get_interactions(domain, oldest_status["id"], min_timestamp, interactions, statuses_seen) + else + {interactions, statuses_seen} + end + else + {interactions, statuses_seen} + end + end + + # To check if the endpoint works as expected + @spec has_title?(String.t()) :: boolean + defp has_title?(body) do + case Jason.decode(body) do + {:ok, decoded} -> Map.has_key?(decoded, "title") + {:error, _error} -> false + end + end + + # Checks whether the status contains one or more mentions + defp is_mention?(status) do + case status["mentions"] do + [] -> false + nil -> false + _ -> true + end + end + + # Checks if the author of the status has "nobot" in their profile + defp has_nobot?(status) do + account = status["account"] + + fields = + account["fields"] + |> Enum.map(fn %{"name" => name, "value" => value} -> name <> value end) + |> Enum.join("") + + # this also means that any users who mentioned ethnobotany in their profiles will be excluded lol ¯\_(ツ)_/¯ + (account["note"] <> fields) + |> String.downcase() + |> String.contains?("nobot") + end + + # This checks if the status + # a) contains one or more mentions, and + # b) that the person posting doesn't have "nobot" in their profile + defp is_eligible?(status) do + is_mention?(status) and not has_nobot?(status) + end + + @spec extract_mentions_from_status(any()) :: ApiCrawler.instance_interactions() + defp extract_mentions_from_status(status) do + status["mentions"] + |> Enum.map(fn mention -> get_domain(mention["url"]) end) + |> Enum.reduce(%{}, fn domain, acc -> + Map.update(acc, domain, 1, &(&1 + 1)) + end) + end + + @spec statuses_to_interactions(any()) :: ApiCrawler.instance_interactions() + defp statuses_to_interactions(statuses) do + statuses + |> Enum.filter(fn status -> is_eligible?(status) end) + |> Enum.map(fn status -> extract_mentions_from_status(status) end) + |> Enum.reduce(%{}, fn map, acc -> + Map.merge(acc, map) + end) + end +end diff --git a/backend/lib/backend/crawler/stale_instance_manager.ex b/backend/lib/backend/crawler/stale_instance_manager.ex new file mode 100644 index 0000000..79bf15f --- /dev/null +++ b/backend/lib/backend/crawler/stale_instance_manager.ex @@ -0,0 +1,84 @@ +defmodule Backend.Crawler.StaleInstanceManager do + use GenServer + alias Backend.{Crawl, Instance, Repo} + import Ecto.Query + import Backend.Util + require Logger + + @moduledoc """ + This module regularly finds stale instances (i.e. instances that haven't been updated for longer than the crawl + interval) and adds them to the job queue. It runs once a minute. + """ + + def start_link(_opts) do + GenServer.start_link(__MODULE__, [], name: __MODULE__) + end + + @impl true + def init(_opts) do + instance_count = + Instance + |> where([i], not is_nil(i.version)) + |> select([i], count(i.domain)) + |> Repo.one() + + case instance_count do + # Add m.s. as the seed and schedule the next add + 0 -> + add_to_queue("mastodon.social") + schedule_add() + + # Start immediately + _ -> + Process.send(self(), :queue_stale_domains, []) + end + + {:ok, []} + end + + @impl true + def handle_info(:queue_stale_domains, state) do + queue_stale_domains() + schedule_add() + {:noreply, state} + end + + defp schedule_add() do + Process.send_after(self(), :queue_stale_domains, 60_000) + end + + defp queue_stale_domains() do + interval = -1 * get_config(:crawl_interval_mins) + + # Get domains that have never been crawled and where the last crawl is past the threshold + crawls_subquery = + Crawl + |> select([c], %{ + instance_domain: c.instance_domain, + most_recent_crawl: max(c.inserted_at), + crawl_count: count(c.id) + }) + |> where([c], is_nil(c.error)) + |> group_by([c], c.instance_domain) + + stale_domains = + Instance + |> join(:left, [i], c in subquery(crawls_subquery), on: i.domain == c.instance_domain) + |> where( + [i, c], + c.most_recent_crawl < datetime_add(^NaiveDateTime.utc_now(), ^interval, "minute") or + is_nil(c.crawl_count) + ) + |> select([i], i.domain) + |> Repo.all() + + Logger.debug("Adding #{length(stale_domains)} stale domains to queue.") + + stale_domains + |> Enum.each(fn domain -> add_to_queue(domain) end) + end + + defp add_to_queue(domain) do + {:run, [domain]} |> Honeydew.async(:crawl_queue) + end +end diff --git a/backend/lib/backend/crawler/util.ex b/backend/lib/backend/crawler/util.ex new file mode 100644 index 0000000..c5013f1 --- /dev/null +++ b/backend/lib/backend/crawler/util.ex @@ -0,0 +1,63 @@ +defmodule Backend.Crawler.Util do + require Logger + import Backend.Util + + # Gets the domain from a Mastodon/Pleroma account URL + # (e.g. https://mastodon.social/@demouser or https://pleroma.site/users/demouser) + @spec get_domain(String.t()) :: String.t() + def get_domain(url) do + String.slice(url, 8..-1) + |> String.split("/") + |> Enum.at(0) + end + + @spec is_http_200?(HTTPoison.Response.t()) :: boolean + def is_http_200?(%{status_code: 200}) do + true + end + + def is_http_200?(_) do + false + end + + @spec is_after?(String.t(), NaiveDateTime.t() | nil) :: boolean() + def is_after?(timestamp, threshold) do + if threshold == nil do + true + else + timestamp + |> NaiveDateTime.from_iso8601!() + # :second is the granularity used in the database + |> NaiveDateTime.truncate(:second) + |> NaiveDateTime.compare(threshold) + |> Kernel.===(:gt) + end + end + + def get(url) do + # TODO: add version number to user agent? + HTTPoison.get(url, [{"User-Agent", get_config(:user_agent)}], + hackney: [pool: :crawler], + recv_timeout: 15000, + timeout: 15000 + ) + end + + @spec get!(binary) :: %{ + :__struct__ => HTTPoison.AsyncResponse | HTTPoison.Response, + optional(:body) => any, + optional(:headers) => [any], + optional(:id) => reference, + optional(:request) => HTTPoison.Request.t(), + optional(:request_url) => any, + optional(:status_code) => integer + } + def get!(url) do + # TODO: add version number to user agent? + HTTPoison.get!(url, [{"User-Agent", get_config(:user_agent)}], + hackney: [pool: :crawler], + recv_timeout: 15000, + timeout: 15000 + ) + end +end diff --git a/backend/lib/backend/edge.ex b/backend/lib/backend/edge.ex new file mode 100644 index 0000000..f808271 --- /dev/null +++ b/backend/lib/backend/edge.ex @@ -0,0 +1,25 @@ +defmodule Backend.Edge do + use Ecto.Schema + import Ecto.Changeset + + schema "edges" do + belongs_to :source, Backend.Instance, + references: :domain, + type: :string, + foreign_key: :source_domain + + belongs_to :target, Backend.Instance, + references: :domain, + type: :string, + foreign_key: :target_domain + + timestamps() + end + + @doc false + def changeset(edge, attrs) do + edge + |> cast(attrs, [:source, :target]) + |> validate_required([:source, :target]) + end +end diff --git a/backend/lib/backend/instance.ex b/backend/lib/backend/instance.ex new file mode 100644 index 0000000..656a668 --- /dev/null +++ b/backend/lib/backend/instance.ex @@ -0,0 +1,41 @@ +defmodule Backend.Instance do + use Ecto.Schema + import Ecto.Changeset + + schema "instances" do + field :domain, :string + field :description, :string + field :user_count, :integer + field :status_count, :integer + field :version, :string + field :insularity, :float + + many_to_many :peers, Backend.Instance, + join_through: Backend.InstancePeer, + join_keys: [source_domain: :domain, target_domain: :domain] + + # This may look like it's duplicating :peers above, but it allows us to insert peer relationships quickly. + # https://stackoverflow.com/a/56764241/3697202 + has_many :instance_peers, Backend.InstancePeer, + foreign_key: :source_domain, + references: :domain + + timestamps() + end + + @doc false + def changeset(instance, attrs) do + instance + |> cast(attrs, [ + :domain, + :description, + :user_count, + :status_count, + :version, + :insularity, + :updated_at + ]) + |> validate_required([:domain]) + |> put_assoc(:peers, attrs.peers) + end +end diff --git a/backend/lib/backend/instance_peer.ex b/backend/lib/backend/instance_peer.ex new file mode 100644 index 0000000..fbe2731 --- /dev/null +++ b/backend/lib/backend/instance_peer.ex @@ -0,0 +1,27 @@ +defmodule Backend.InstancePeer do + use Ecto.Schema + import Ecto.Changeset + + schema "instance_peers" do + belongs_to :source, Backend.Instance, + references: :domain, + type: :string, + foreign_key: :source_domain + + belongs_to :target, Backend.Instance, + references: :domain, + type: :string, + foreign_key: :target_domain + + field :weight, :float, default: 0.0 + + timestamps() + end + + @doc false + def changeset(instance_peer, attrs) do + instance_peer + |> cast(attrs, []) + |> validate_required([]) + end +end diff --git a/backend/lib/backend/release.ex b/backend/lib/backend/release.ex new file mode 100644 index 0000000..3bb1e62 --- /dev/null +++ b/backend/lib/backend/release.ex @@ -0,0 +1,18 @@ +defmodule Backend.Release do + @app :backend + + def migrate do + for repo <- repos() do + {:ok, _, _} = Ecto.Migrator.with_repo(repo, &Ecto.Migrator.run(&1, :up, all: true)) + end + end + + def rollback(repo, version) do + {:ok, _, _} = Ecto.Migrator.with_repo(repo, &Ecto.Migrator.run(&1, :down, to: version)) + end + + defp repos do + Application.load(@app) + Application.fetch_env!(@app, :ecto_repos) + end +end diff --git a/backend/lib/backend/repo.ex b/backend/lib/backend/repo.ex new file mode 100644 index 0000000..5f5b578 --- /dev/null +++ b/backend/lib/backend/repo.ex @@ -0,0 +1,5 @@ +defmodule Backend.Repo do + use Ecto.Repo, + otp_app: :backend, + adapter: Ecto.Adapters.Postgres +end diff --git a/backend/lib/backend/scheduler.ex b/backend/lib/backend/scheduler.ex new file mode 100644 index 0000000..81b1ad5 --- /dev/null +++ b/backend/lib/backend/scheduler.ex @@ -0,0 +1,116 @@ +defmodule Backend.Scheduler do + @moduledoc """ + This module runs recurring tasks. + """ + + use Quantum.Scheduler, otp_app: :backend + + alias Backend.{Crawl, Edge, Interaction, Instance, Repo} + import Ecto.Query + require Logger + + @doc """ + Prunes all crawls that are more than `integer` `unit`s old. + For example, to delete crawls older than one month, call `prune(1, "month")`. + + `unit` must singular, e.g. "second", "minute", "hour", "month", "year", etc... + """ + @spec prune_crawls(integer, String.t()) :: any + def prune_crawls(amount, unit) do + {deleted_num, _} = + Crawl + |> where( + [i], + i.inserted_at < + datetime_add(^NaiveDateTime.utc_now(), -1 * ^amount, ^unit) + ) + |> Repo.delete_all() + + Logger.info("Pruned #{deleted_num} old crawls.") + end + + @doc """ + This function aggregates statistics from the interactions in the database. + It calculates the strength of edges between nodes. + + TODO: generate edge weights. The weight of an edge between two instances will be + (number of mentions of each other) / (total number of statuses crawled). + This requires us to keep track of how many statuses we've seen. + """ + def generate_edges() do + interactions = + Interaction + |> select([inter], {inter.source_domain, inter.target_domain}) + |> join(:left, [inter], i_source in Instance, on: inter.source_domain == i_source.domain) + |> join(:left, [inter], i_target in Instance, on: inter.target_domain == i_target.domain) + |> where( + [inter, i_source, i_target], + not is_nil(i_source.last_crawl_timestamp) and not is_nil(i_target.last_crawl_timestamp) + ) + # Repo.all() returns a tuple like {"mastodon.social", "cursed.technology"} + |> Repo.all() + # Create a map of %{source_domain => [target_domains]} + |> Enum.group_by(fn tuple -> Kernel.elem(tuple, 0) end, fn tuple -> + Kernel.elem(tuple, 1) + end) + + # Calculate insularity score + Repo.transaction(fn -> + interactions + |> Enum.each(fn {source, targets} -> + total_mentions = length(targets) + self_mentions = Enum.count(targets, fn t -> t == source end) + + insularity = self_mentions / total_mentions + + Repo.insert!( + %Instance{ + domain: source, + insularity: insularity + }, + on_conflict: [set: [insularity: insularity]], + conflict_target: :domain + ) + end) + + # Get edges + edges = MapSet.new() + + interactions + |> Enum.each(fn {source, targets} -> + targets + |> Enum.each(fn target -> + [key_a, key_b] = Enum.sort([source, target]) + + edge = %Edge{ + source_domain: key_a, + target_domain: key_b + } + + MapSet.put(edges, edge) + Logger.debug(inspect(edges)) + end) + end) + + Logger.debug(inspect(edges)) + + now = NaiveDateTime.truncate(NaiveDateTime.utc_now(), :second) + + Repo.delete_all(Edge) + + edges = + edges + |> MapSet.to_list() + |> Enum.map(fn %{source_domain: source_domain, target_domain: target_domain} -> + %Edge{ + source_domain: source_domain, + target_domain: target_domain, + updated_at: now, + inserted_at: now + } + end) + + Repo.insert_all(Edge, edges) + end) + end +end diff --git a/backend/lib/backend/util.ex b/backend/lib/backend/util.ex new file mode 100644 index 0000000..6b7abca --- /dev/null +++ b/backend/lib/backend/util.ex @@ -0,0 +1,129 @@ +defmodule Backend.Util do + import Ecto.Query + alias Backend.{Crawl, Repo} + + @doc """ + Returns the given key from :backend, :crawler in the config. + """ + @spec get_config(atom) :: any + def get_config(key) do + Application.get_env(:backend, :crawler)[key] + end + + @doc """ + Takes two lists and returns a list of the union thereof (without duplicates). + """ + def list_union(list_one, list_two) do + list_one + |> MapSet.new() + |> (fn set -> MapSet.union(set, MapSet.new(list_two)) end).() + |> MapSet.to_list() + end + + @doc """ + Returns `true` if `domain` ends with a blacklisted domain. + If e.g. "masto.host" is blacklisted, allof its subdomains will return `true`. + """ + @spec is_blacklisted?(String.t()) :: boolean + def is_blacklisted?(domain) do + blacklist = + case get_config(:blacklist) do + nil -> [] + _ -> get_config(:blacklist) + end + + blacklist + |> Enum.any?(fn blacklisted_domain -> + String.ends_with?(domain, blacklisted_domain) + end) + end + + @doc """ + Returns the key to use for non-directed edges + (really, just the two domains sorted alphabetically) + """ + @spec get_interaction_key(String.t(), String.t()) :: String.t() + def get_interaction_key(source, target) do + [source, target] + |> Enum.sort() + |> List.to_tuple() + end + + @doc """ + Gets the current UTC time as a NaiveDateTime in a format that can be inserted into the database. + """ + def get_now() do + NaiveDateTime.truncate(NaiveDateTime.utc_now(), :second) + end + + @doc """ + Returns the later of two NaiveDateTimes. + """ + @spec max_datetime(NaiveDateTime.t() | nil, NaiveDateTime.t() | nil) :: NaiveDateTime.t() + def max_datetime(datetime_one, nil) do + datetime_one + end + + def max_datetime(nil, datetime_two) do + datetime_two + end + + def max_datetime(datetime_one, datetime_two) do + case NaiveDateTime.compare(datetime_one, datetime_two) do + :gt -> datetime_one + _ -> datetime_two + end + end + + @spec get_last_crawl(String.t()) :: Crawl.t() | nil + def get_last_crawl(domain) do + crawls = + Crawl + |> select([c], c) + |> where([c], c.instance_domain == ^domain) + |> order_by(desc: :id) + |> limit(1) + |> Repo.all() + + case length(crawls) do + 1 -> hd(crawls) + 0 -> nil + end + end + + @spec get_last_successful_crawl(String.t()) :: Crawl.t() | nil + def get_last_successful_crawl(domain) do + crawls = + Crawl + |> select([c], c) + |> where([c], is_nil(c.error) and c.instance_domain == ^domain) + |> order_by(desc: :id) + |> limit(1) + |> Repo.all() + + case length(crawls) do + 1 -> hd(crawls) + 0 -> nil + end + end + + @spec get_last_successful_crawl_timestamp(String.t()) :: NaiveDateTime.t() | nil + def get_last_successful_crawl_timestamp(domain) do + crawl = get_last_crawl(domain) + + case crawl do + nil -> nil + _ -> crawl.inserted_at + end + end + + @doc """ + Takes two maps with numeric values and merges them, adding the values of duplicate keys. + """ + def merge_count_maps(map1, map2) do + map1 + |> Enum.reduce(map2, fn {key, val}, acc -> + Map.update(acc, key, val, &(&1 + val)) + end) + end +end diff --git a/backend/lib/backend_web.ex b/backend/lib/backend_web.ex new file mode 100644 index 0000000..f0dd1ac --- /dev/null +++ b/backend/lib/backend_web.ex @@ -0,0 +1,66 @@ +defmodule BackendWeb do + @moduledoc """ + The entrypoint for defining your web interface, such + as controllers, views, channels and so on. + + This can be used in your application as: + + use BackendWeb, :controller + use BackendWeb, :view + + The definitions below will be executed for every view, + controller, etc, so keep them short and clean, focused + on imports, uses and aliases. + + Do NOT define functions inside the quoted expressions + below. Instead, define any helper function in modules + and import those modules here. + """ + + def controller do + quote do + use Phoenix.Controller, namespace: BackendWeb + + import Plug.Conn + import BackendWeb.Gettext + alias BackendWeb.Router.Helpers, as: Routes + end + end + + def view do + quote do + use Phoenix.View, + root: "lib/backend_web/templates", + namespace: BackendWeb + + # Import convenience functions from controllers + import Phoenix.Controller, only: [get_flash: 1, get_flash: 2, view_module: 1] + + import BackendWeb.ErrorHelpers + import BackendWeb.Gettext + alias BackendWeb.Router.Helpers, as: Routes + end + end + + def router do + quote do + use Phoenix.Router + import Plug.Conn + import Phoenix.Controller + end + end + + def channel do + quote do + use Phoenix.Channel + import BackendWeb.Gettext + end + end + + @doc """ + When used, dispatch to the appropriate controller/view/etc. + """ + defmacro __using__(which) when is_atom(which) do + apply(__MODULE__, which, []) + end +end diff --git a/backend/lib/backend_web/channels/user_socket.ex b/backend/lib/backend_web/channels/user_socket.ex new file mode 100644 index 0000000..081c476 --- /dev/null +++ b/backend/lib/backend_web/channels/user_socket.ex @@ -0,0 +1,33 @@ +defmodule BackendWeb.UserSocket do + use Phoenix.Socket + + ## Channels + # channel "room:*", BackendWeb.RoomChannel + + # Socket params are passed from the client and can + # be used to verify and authenticate a user. After + # verification, you can put default assigns into + # the socket that will be set for all channels, ie + # + # {:ok, assign(socket, :user_id, verified_user_id)} + # + # To deny connection, return `:error`. + # + # See `Phoenix.Token` documentation for examples in + # performing token verification on connect. + def connect(_params, socket, _connect_info) do + {:ok, socket} + end + + # Socket id's are topics that allow you to identify all sockets for a given user: + # + # def id(socket), do: "user_socket:#{socket.assigns.user_id}" + # + # Would allow you to broadcast a "disconnect" event and terminate + # all active sockets and channels for a given user: + # + # BackendWeb.Endpoint.broadcast("user_socket:#{user.id}", "disconnect", %{}) + # + # Returning `nil` makes this socket anonymous. + def id(_socket), do: nil +end diff --git a/backend/lib/backend_web/controllers/fallback_controller.ex b/backend/lib/backend_web/controllers/fallback_controller.ex new file mode 100644 index 0000000..cc4cc12 --- /dev/null +++ b/backend/lib/backend_web/controllers/fallback_controller.ex @@ -0,0 +1,15 @@ +defmodule BackendWeb.FallbackController do + @moduledoc """ + Translates controller action results into valid `Plug.Conn` responses. + + See `Phoenix.Controller.action_fallback/1` for more details. + """ + use BackendWeb, :controller + + def call(conn, {:error, :not_found}) do + conn + |> put_status(:not_found) + |> put_view(BackendWeb.ErrorView) + |> render(:"404") + end +end diff --git a/backend/lib/backend_web/controllers/graph_controller.ex b/backend/lib/backend_web/controllers/graph_controller.ex new file mode 100644 index 0000000..106b8cc --- /dev/null +++ b/backend/lib/backend_web/controllers/graph_controller.ex @@ -0,0 +1,13 @@ +defmodule BackendWeb.GraphController do + use BackendWeb, :controller + + alias Backend.Api + + action_fallback BackendWeb.FallbackController + + def index(conn, _params) do + nodes = Api.list_nodes() + edges = Api.list_edges() + render(conn, "index.json", nodes: nodes, edges: edges) + end +end diff --git a/backend/lib/backend_web/controllers/instance_controller.ex b/backend/lib/backend_web/controllers/instance_controller.ex new file mode 100644 index 0000000..cbf292a --- /dev/null +++ b/backend/lib/backend_web/controllers/instance_controller.ex @@ -0,0 +1,27 @@ +defmodule BackendWeb.InstanceController do + use BackendWeb, :controller + + import Backend.Util + alias Backend.Api + + action_fallback BackendWeb.FallbackController + + def index(conn, _params) do + instances = Api.list_instances() + render(conn, "index.json", instances: instances) + end + + def show(conn, %{"id" => domain}) do + instance = Api.get_instance!(domain) + last_crawl = get_last_crawl(domain) + render(conn, "show.json", instance: instance, crawl: last_crawl) + end + + # def update(conn, %{"id" => id, "instance" => instance_params}) do + # instance = Api.get_instance!(id) + + # with {:ok, %Instance{} = instance} <- Api.update_instance(instance, instance_params) do + # render(conn, "show.json", instance: instance) + # end + # end +end diff --git a/backend/lib/backend_web/endpoint.ex b/backend/lib/backend_web/endpoint.ex new file mode 100644 index 0000000..a795eb8 --- /dev/null +++ b/backend/lib/backend_web/endpoint.ex @@ -0,0 +1,51 @@ +defmodule BackendWeb.Endpoint do + use Phoenix.Endpoint, otp_app: :backend + + socket("/socket", BackendWeb.UserSocket, + websocket: true, + longpoll: false + ) + + # Serve at "/" the static files from "priv/static" directory. + # + # You should set gzip to true if you are running phx.digest + # when deploying your static files in production. + plug(Plug.Static, + at: "/", + from: :backend, + gzip: false, + only: ~w(css fonts images js favicon.ico robots.txt) + ) + + # Code reloading can be explicitly enabled under the + # :code_reloader configuration of your endpoint. + if code_reloading? do + plug(Phoenix.CodeReloader) + end + + plug(Plug.RequestId) + plug(Plug.Logger) + + plug(Plug.Parsers, + parsers: [:urlencoded, :multipart, :json], + pass: ["*/*"], + json_decoder: Phoenix.json_library() + ) + + plug(Plug.MethodOverride) + plug(Plug.Head) + + # The session will be stored in the cookie and signed, + # this means its contents can be read but not tampered with. + # Set :encryption_salt if you would also like to encrypt it. + plug(Plug.Session, + store: :cookie, + key: "_backend_key", + signing_salt: "HJa1j4FI" + ) + + # TODO + plug(Corsica, origins: "*") + + plug(BackendWeb.Router) +end diff --git a/backend/lib/backend_web/gettext.ex b/backend/lib/backend_web/gettext.ex new file mode 100644 index 0000000..0ff499f --- /dev/null +++ b/backend/lib/backend_web/gettext.ex @@ -0,0 +1,24 @@ +defmodule BackendWeb.Gettext do + @moduledoc """ + A module providing Internationalization with a gettext-based API. + + By using [Gettext](https://hexdocs.pm/gettext), + your module gains a set of macros for translations, for example: + + import BackendWeb.Gettext + + # Simple translation + gettext("Here is the string to translate") + + # Plural translation + ngettext("Here is the string to translate", + "Here are the strings to translate", + 3) + + # Domain-based translation + dgettext("errors", "Here is the error message to translate") + + See the [Gettext Docs](https://hexdocs.pm/gettext) for detailed usage. + """ + use Gettext, otp_app: :backend +end diff --git a/backend/lib/backend_web/router.ex b/backend/lib/backend_web/router.ex new file mode 100644 index 0000000..93b0668 --- /dev/null +++ b/backend/lib/backend_web/router.ex @@ -0,0 +1,14 @@ +defmodule BackendWeb.Router do + use BackendWeb, :router + + pipeline :api do + plug :accepts, ["json"] + end + + scope "/api", BackendWeb do + pipe_through :api + + resources "/instances", InstanceController, only: [:index, :show] + resources "/graph", GraphController, only: [:index] + end +end diff --git a/backend/lib/backend_web/views/changeset_view.ex b/backend/lib/backend_web/views/changeset_view.ex new file mode 100644 index 0000000..1205b39 --- /dev/null +++ b/backend/lib/backend_web/views/changeset_view.ex @@ -0,0 +1,19 @@ +defmodule BackendWeb.ChangesetView do + use BackendWeb, :view + + @doc """ + Traverses and translates changeset errors. + + See `Ecto.Changeset.traverse_errors/2` and + `BackendWeb.ErrorHelpers.translate_error/1` for more details. + """ + def translate_errors(changeset) do + Ecto.Changeset.traverse_errors(changeset, &translate_error/1) + end + + def render("error.json", %{changeset: changeset}) do + # When encoded, the changeset returns its errors + # as a JSON object. So we just pass it forward. + %{errors: translate_errors(changeset)} + end +end diff --git a/backend/lib/backend_web/views/error_helpers.ex b/backend/lib/backend_web/views/error_helpers.ex new file mode 100644 index 0000000..59d4acd --- /dev/null +++ b/backend/lib/backend_web/views/error_helpers.ex @@ -0,0 +1,33 @@ +defmodule BackendWeb.ErrorHelpers do + @moduledoc """ + Conveniences for translating and building error messages. + """ + + @doc """ + Translates an error message using gettext. + """ + def translate_error({msg, opts}) do + # When using gettext, we typically pass the strings we want + # to translate as a static argument: + # + # # Translate "is invalid" in the "errors" domain + # dgettext("errors", "is invalid") + # + # # Translate the number of files with plural rules + # dngettext("errors", "1 file", "%{count} files", count) + # + # Because the error messages we show in our forms and APIs + # are defined inside Ecto, we need to translate them dynamically. + # This requires us to call the Gettext module passing our gettext + # backend as first argument. + # + # Note we use the "errors" domain, which means translations + # should be written to the errors.po file. The :count option is + # set by Ecto and indicates we should also apply plural rules. + if count = opts[:count] do + Gettext.dngettext(BackendWeb.Gettext, "errors", msg, msg, count, opts) + else + Gettext.dgettext(BackendWeb.Gettext, "errors", msg, opts) + end + end +end diff --git a/backend/lib/backend_web/views/error_view.ex b/backend/lib/backend_web/views/error_view.ex new file mode 100644 index 0000000..7fde2c4 --- /dev/null +++ b/backend/lib/backend_web/views/error_view.ex @@ -0,0 +1,16 @@ +defmodule BackendWeb.ErrorView do + use BackendWeb, :view + + # If you want to customize a particular status code + # for a certain format, you may uncomment below. + # def render("500.json", _assigns) do + # %{errors: %{detail: "Internal Server Error"}} + # end + + # By default, Phoenix returns the status message from + # the template name. For example, "404.json" becomes + # "Not Found". + def template_not_found(template, _assigns) do + %{errors: %{detail: Phoenix.Controller.status_message_from_template(template)}} + end +end diff --git a/backend/lib/backend_web/views/graph_view.ex b/backend/lib/backend_web/views/graph_view.ex new file mode 100644 index 0000000..b978e4b --- /dev/null +++ b/backend/lib/backend_web/views/graph_view.ex @@ -0,0 +1,36 @@ +defmodule BackendWeb.GraphView do + use BackendWeb, :view + alias BackendWeb.GraphView + + def render("index.json", %{nodes: nodes, edges: edges}) do + %{ + nodes: render_many(nodes, GraphView, "node.json"), + edges: render_many(edges, GraphView, "edge.json") + } + end + + def render("node.json", %{graph: node}) do + size = + case node.user_count > 1 do + true -> :math.log(node.user_count) + false -> 1 + end + + %{ + id: node.domain, + label: node.domain, + size: size, + x: node.x, + y: node.y + } + end + + def render("edge.json", %{graph: edge}) do + %{ + id: edge.id, + source: edge.source_domain, + target: edge.target_domain, + size: edge.weight + } + end +end diff --git a/backend/lib/backend_web/views/instance_view.ex b/backend/lib/backend_web/views/instance_view.ex new file mode 100644 index 0000000..1282c52 --- /dev/null +++ b/backend/lib/backend_web/views/instance_view.ex @@ -0,0 +1,45 @@ +defmodule BackendWeb.InstanceView do + use BackendWeb, :view + alias BackendWeb.InstanceView + require Logger + + def render("index.json", %{instances: instances}) do + render_many(instances, InstanceView, "instance.json") + end + + def render("show.json", %{instance: instance, crawl: crawl}) do + render_one(instance, InstanceView, "instance_detail.json", crawl: crawl) + end + + def render("instance.json", %{instance: instance}) do + %{name: instance.domain} + end + + def render("instance_detail.json", %{instance: instance, crawl: crawl}) do + Logger.info("keys: #{inspect(instance)}") + + [status, last_updated] = + case crawl do + nil -> + ["not crawled", nil] + + _ -> + case crawl.error do + nil -> ["success", crawl.inserted_at] + err -> [err, crawl.inserted_at] + end + end + + %{ + name: instance.domain, + description: instance.description, + version: instance.version, + userCount: instance.user_count, + statusCount: instance.status_count, + domainCount: length(instance.peers), + peers: render_many(instance.peers, InstanceView, "instance.json"), + lastUpdated: last_updated, + status: status + } + end +end diff --git a/backend/lib/mix/tasks/crawl.ex b/backend/lib/mix/tasks/crawl.ex new file mode 100644 index 0000000..c9495c8 --- /dev/null +++ b/backend/lib/mix/tasks/crawl.ex @@ -0,0 +1,13 @@ +defmodule Mix.Tasks.Crawl do + alias Backend.Crawler + use Mix.Task + + @shortdoc "Crawl a given instance." + + def run(domain) do + Mix.Task.run("app.start") + # Application.ensure_all_started(:timex) + # Mix.Task.run("loadconfig") + Crawler.run(domain) + end +end diff --git a/backend/manage.py b/backend/manage.py deleted file mode 100755 index bb33428..0000000 --- a/backend/manage.py +++ /dev/null @@ -1,15 +0,0 @@ -#!/usr/bin/env python -import os -import sys - -if __name__ == '__main__': - try: - from django.core.management import execute_from_command_line - - except ImportError as exc: - raise ImportError( - "Couldn't import Django. Are you sure it's installed and " - "available on your PYTHONPATH environment variable? Did you " - "forget to activate a virtual environment?" - ) from exc - execute_from_command_line(sys.argv) diff --git a/backend/mix.exs b/backend/mix.exs new file mode 100644 index 0000000..68ae703 --- /dev/null +++ b/backend/mix.exs @@ -0,0 +1,65 @@ +defmodule Backend.MixProject do + use Mix.Project + + def project do + [ + app: :backend, + version: "2.0.0-beta.1", + elixir: "~> 1.5", + elixirc_paths: elixirc_paths(Mix.env()), + compilers: [:phoenix, :gettext] ++ Mix.compilers(), + start_permanent: Mix.env() == :prod, + aliases: aliases(), + deps: deps() + ] + end + + # Configuration for the OTP application. + # + # Type `mix help compile.app` for more information. + def application do + [ + mod: {Backend.Application, []}, + extra_applications: [:logger, :runtime_tools, :mnesia] + ] + end + + # Specifies which paths to compile per environment. + defp elixirc_paths(:test), do: ["lib", "test/support"] + defp elixirc_paths(_), do: ["lib"] + + # Specifies your project dependencies. + # + # Type `mix help deps` for examples and options. + defp deps do + [ + {:phoenix, "~> 1.4.3"}, + {:phoenix_pubsub, "~> 1.1"}, + {:phoenix_ecto, "~> 4.0"}, + {:ecto_sql, "~> 3.0"}, + {:postgrex, ">= 0.0.0"}, + {:gettext, "~> 0.11"}, + {:jason, "~> 1.0"}, + {:plug_cowboy, "~> 2.0"}, + {:httpoison, "~> 1.5"}, + {:timex, "~> 3.5"}, + {:honeydew, "~> 1.4.3"}, + {:quantum, "~> 2.3"}, + {:corsica, "~> 1.1.2"} + ] + end + + # Aliases are shortcuts or tasks specific to the current project. + # For example, to create, migrate and run the seeds file at once: + # + # $ mix ecto.setup + # + # See the documentation for `Mix` for more info on aliases. + defp aliases do + [ + "ecto.setup": ["ecto.create", "ecto.migrate", "run priv/repo/seeds.exs"], + "ecto.reset": ["ecto.drop", "ecto.setup"], + test: ["ecto.create --quiet", "ecto.migrate", "test"] + ] + end +end diff --git a/backend/mix.lock b/backend/mix.lock new file mode 100644 index 0000000..54fb5cb --- /dev/null +++ b/backend/mix.lock @@ -0,0 +1,43 @@ +%{ + "artificery": {:hex, :artificery, "0.4.2", "3ded6e29e13113af52811c72f414d1e88f711410cac1b619ab3a2666bbd7efd4", [:mix], [], "hexpm"}, + "certifi": {:hex, :certifi, "2.5.1", "867ce347f7c7d78563450a18a6a28a8090331e77fa02380b4a21962a65d36ee5", [:rebar3], [{:parse_trans, "~>3.3", [hex: :parse_trans, repo: "hexpm", optional: false]}], "hexpm"}, + "combine": {:hex, :combine, "0.10.0", "eff8224eeb56498a2af13011d142c5e7997a80c8f5b97c499f84c841032e429f", [:mix], [], "hexpm"}, + "connection": {:hex, :connection, "1.0.4", "a1cae72211f0eef17705aaededacac3eb30e6625b04a6117c1b2db6ace7d5976", [:mix], [], "hexpm"}, + "corsica": {:hex, :corsica, "1.1.2", "5ad8b9dcbeeda4762d78a57c0c8c2f88e1eef8741508517c98cb79e0db1f107d", [:mix], [{:plug, "~> 1.0", [hex: :plug, repo: "hexpm", optional: false]}], "hexpm"}, + "cowboy": {:hex, :cowboy, "2.6.3", "99aa50e94e685557cad82e704457336a453d4abcb77839ad22dbe71f311fcc06", [:rebar3], [{:cowlib, "~> 2.7.3", [hex: :cowlib, repo: "hexpm", optional: false]}, {:ranch, "~> 1.7.1", [hex: :ranch, repo: "hexpm", optional: false]}], "hexpm"}, + "cowlib": {:hex, :cowlib, "2.7.3", "a7ffcd0917e6d50b4d5fb28e9e2085a0ceb3c97dea310505f7460ff5ed764ce9", [:rebar3], [], "hexpm"}, + "crontab": {:hex, :crontab, "1.1.7", "b9219f0bdc8678b94143655a8f229716c5810c0636a4489f98c0956137e53985", [:mix], [{:ecto, "~> 1.0 or ~> 2.0 or ~> 3.0", [hex: :ecto, repo: "hexpm", optional: true]}], "hexpm"}, + "db_connection": {:hex, :db_connection, "2.1.0", "122e2f62c4906bf2e49554f1e64db5030c19229aa40935f33088e7d543aa79d0", [:mix], [{:connection, "~> 1.0.2", [hex: :connection, repo: "hexpm", optional: false]}], "hexpm"}, + "decimal": {:hex, :decimal, "1.8.0", "ca462e0d885f09a1c5a342dbd7c1dcf27ea63548c65a65e67334f4b61803822e", [:mix], [], "hexpm"}, + "distillery": {:hex, :distillery, "2.1.1", "f9332afc2eec8a1a2b86f22429e068ef35f84a93ea1718265e740d90dd367814", [:mix], [{:artificery, "~> 0.2", [hex: :artificery, repo: "hexpm", optional: false]}], "hexpm"}, + "ecto": {:hex, :ecto, "3.1.7", "fa21d06ef56cdc2fdaa62574e8c3ba34a2751d44ea34c30bc65f0728421043e5", [:mix], [{:decimal, "~> 1.6", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}], "hexpm"}, + "ecto_sql": {:hex, :ecto_sql, "3.1.6", "1e80e30d16138a729c717f73dcb938590bcdb3a4502f3012414d0cbb261045d8", [:mix], [{:db_connection, "~> 2.0", [hex: :db_connection, repo: "hexpm", optional: false]}, {:ecto, "~> 3.1.0", [hex: :ecto, repo: "hexpm", optional: false]}, {:mariaex, "~> 0.9.1", [hex: :mariaex, repo: "hexpm", optional: true]}, {:myxql, "~> 0.2.0", [hex: :myxql, repo: "hexpm", optional: true]}, {:postgrex, "~> 0.14.0 or ~> 0.15.0", [hex: :postgrex, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm"}, + "gen_stage": {:hex, :gen_stage, "0.14.2", "6a2a578a510c5bfca8a45e6b27552f613b41cf584b58210f017088d3d17d0b14", [:mix], [], "hexpm"}, + "gen_state_machine": {:hex, :gen_state_machine, "2.0.5", "9ac15ec6e66acac994cc442dcc2c6f9796cf380ec4b08267223014be1c728a95", [:mix], [], "hexpm"}, + "gettext": {:hex, :gettext, "0.17.0", "abe21542c831887a2b16f4c94556db9c421ab301aee417b7c4fbde7fbdbe01ec", [:mix], [], "hexpm"}, + "hackney": {:hex, :hackney, "1.15.1", "9f8f471c844b8ce395f7b6d8398139e26ddca9ebc171a8b91342ee15a19963f4", [:rebar3], [{:certifi, "2.5.1", [hex: :certifi, repo: "hexpm", optional: false]}, {:idna, "6.0.0", [hex: :idna, repo: "hexpm", optional: false]}, {:metrics, "1.0.1", [hex: :metrics, repo: "hexpm", optional: false]}, {:mimerl, "~>1.1", [hex: :mimerl, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "1.1.4", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}], "hexpm"}, + "honeydew": {:hex, :honeydew, "1.4.3", "f2d976aaf8b9b914a635d2d483f1a71d2f6d8651809474dd5db581953cbebb30", [:mix], [{:ecto, "~> 3.0", [hex: :ecto, repo: "hexpm", optional: true]}], "hexpm"}, + "httpoison": {:hex, :httpoison, "1.5.1", "0f55b5b673b03c5c327dac7015a67cb571b99b631acc0bc1b0b98dcd6b9f2104", [:mix], [{:hackney, "~> 1.8", [hex: :hackney, repo: "hexpm", optional: false]}], "hexpm"}, + "idna": {:hex, :idna, "6.0.0", "689c46cbcdf3524c44d5f3dde8001f364cd7608a99556d8fbd8239a5798d4c10", [:rebar3], [{:unicode_util_compat, "0.4.1", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm"}, + "jason": {:hex, :jason, "1.1.2", "b03dedea67a99223a2eaf9f1264ce37154564de899fd3d8b9a21b1a6fd64afe7", [:mix], [{:decimal, "~> 1.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm"}, + "libring": {:hex, :libring, "1.4.0", "41246ba2f3fbc76b3971f6bce83119dfec1eee17e977a48d8a9cfaaf58c2a8d6", [:mix], [], "hexpm"}, + "metrics": {:hex, :metrics, "1.0.1", "25f094dea2cda98213cecc3aeff09e940299d950904393b2a29d191c346a8486", [:rebar3], [], "hexpm"}, + "mime": {:hex, :mime, "1.3.1", "30ce04ab3175b6ad0bdce0035cba77bba68b813d523d1aac73d9781b4d193cf8", [:mix], [], "hexpm"}, + "mimerl": {:hex, :mimerl, "1.2.0", "67e2d3f571088d5cfd3e550c383094b47159f3eee8ffa08e64106cdf5e981be3", [:rebar3], [], "hexpm"}, + "parse_trans": {:hex, :parse_trans, "3.3.0", "09765507a3c7590a784615cfd421d101aec25098d50b89d7aa1d66646bc571c1", [:rebar3], [], "hexpm"}, + "phoenix": {:hex, :phoenix, "1.4.9", "746d098e10741c334d88143d3c94cab1756435f94387a63441792e66ec0ee974", [:mix], [{:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:phoenix_pubsub, "~> 1.1", [hex: :phoenix_pubsub, repo: "hexpm", optional: false]}, {:plug, "~> 1.8.1 or ~> 1.9", [hex: :plug, repo: "hexpm", optional: false]}, {:plug_cowboy, "~> 1.0 or ~> 2.0", [hex: :plug_cowboy, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm"}, + "phoenix_ecto": {:hex, :phoenix_ecto, "4.0.0", "c43117a136e7399ea04ecaac73f8f23ee0ffe3e07acfcb8062fe5f4c9f0f6531", [:mix], [{:ecto, "~> 3.0", [hex: :ecto, repo: "hexpm", optional: false]}, {:phoenix_html, "~> 2.9", [hex: :phoenix_html, repo: "hexpm", optional: true]}, {:plug, "~> 1.0", [hex: :plug, repo: "hexpm", optional: false]}], "hexpm"}, + "phoenix_pubsub": {:hex, :phoenix_pubsub, "1.1.2", "496c303bdf1b2e98a9d26e89af5bba3ab487ba3a3735f74bf1f4064d2a845a3e", [:mix], [], "hexpm"}, + "plug": {:hex, :plug, "1.8.2", "0bcce1daa420f189a6491f3940cc77ea7fb1919761175c9c3b59800d897440fc", [:mix], [{:mime, "~> 1.0", [hex: :mime, repo: "hexpm", optional: false]}, {:plug_crypto, "~> 1.0", [hex: :plug_crypto, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4", [hex: :telemetry, repo: "hexpm", optional: true]}], "hexpm"}, + "plug_cowboy": {:hex, :plug_cowboy, "2.1.0", "b75768153c3a8a9e8039d4b25bb9b14efbc58e9c4a6e6a270abff1cd30cbe320", [:mix], [{:cowboy, "~> 2.5", [hex: :cowboy, repo: "hexpm", optional: false]}, {:plug, "~> 1.7", [hex: :plug, repo: "hexpm", optional: false]}], "hexpm"}, + "plug_crypto": {:hex, :plug_crypto, "1.0.0", "18e49317d3fa343f24620ed22795ec29d4a5e602d52d1513ccea0b07d8ea7d4d", [:mix], [], "hexpm"}, + "postgrex": {:hex, :postgrex, "0.14.3", "5754dee2fdf6e9e508cbf49ab138df964278700b764177e8f3871e658b345a1e", [:mix], [{:connection, "~> 1.0", [hex: :connection, repo: "hexpm", optional: false]}, {:db_connection, "~> 2.0", [hex: :db_connection, repo: "hexpm", optional: false]}, {:decimal, "~> 1.5", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}], "hexpm"}, + "quantum": {:hex, :quantum, "2.3.4", "72a0e8855e2adc101459eac8454787cb74ab4169de6ca50f670e72142d4960e9", [:mix], [{:calendar, "~> 0.17", [hex: :calendar, repo: "hexpm", optional: true]}, {:crontab, "~> 1.1", [hex: :crontab, repo: "hexpm", optional: false]}, {:gen_stage, "~> 0.12", [hex: :gen_stage, repo: "hexpm", optional: false]}, {:swarm, "~> 3.3", [hex: :swarm, repo: "hexpm", optional: false]}, {:timex, "~> 3.1", [hex: :timex, repo: "hexpm", optional: true]}], "hexpm"}, + "ranch": {:hex, :ranch, "1.7.1", "6b1fab51b49196860b733a49c07604465a47bdb78aa10c1c16a3d199f7f8c881", [:rebar3], [], "hexpm"}, + "ssl_verify_fun": {:hex, :ssl_verify_fun, "1.1.4", "f0eafff810d2041e93f915ef59899c923f4568f4585904d010387ed74988e77b", [:make, :mix, :rebar3], [], "hexpm"}, + "swarm": {:hex, :swarm, "3.4.0", "64f8b30055d74640d2186c66354b33b999438692a91be275bb89cdc7e401f448", [:mix], [{:gen_state_machine, "~> 2.0", [hex: :gen_state_machine, repo: "hexpm", optional: false]}, {:libring, "~> 1.0", [hex: :libring, repo: "hexpm", optional: false]}], "hexpm"}, + "telemetry": {:hex, :telemetry, "0.4.0", "8339bee3fa8b91cb84d14c2935f8ecf399ccd87301ad6da6b71c09553834b2ab", [:rebar3], [], "hexpm"}, + "timex": {:hex, :timex, "3.6.1", "efdf56d0e67a6b956cc57774353b0329c8ab7726766a11547e529357ffdc1d56", [:mix], [{:combine, "~> 0.10", [hex: :combine, repo: "hexpm", optional: false]}, {:gettext, "~> 0.10", [hex: :gettext, repo: "hexpm", optional: false]}, {:tzdata, "~> 0.1.8 or ~> 0.5 or ~> 1.0.0", [hex: :tzdata, repo: "hexpm", optional: false]}], "hexpm"}, + "tzdata": {:hex, :tzdata, "1.0.1", "f6027a331af7d837471248e62733c6ebee86a72e57c613aa071ebb1f750fc71a", [:mix], [{:hackney, "~> 1.0", [hex: :hackney, repo: "hexpm", optional: false]}], "hexpm"}, + "unicode_util_compat": {:hex, :unicode_util_compat, "0.4.1", "d869e4c68901dd9531385bb0c8c40444ebf624e60b6962d95952775cac5e90cd", [:rebar3], [], "hexpm"}, +} diff --git a/backend/priv/gettext/en/LC_MESSAGES/errors.po b/backend/priv/gettext/en/LC_MESSAGES/errors.po new file mode 100644 index 0000000..a589998 --- /dev/null +++ b/backend/priv/gettext/en/LC_MESSAGES/errors.po @@ -0,0 +1,97 @@ +## `msgid`s in this file come from POT (.pot) files. +## +## Do not add, change, or remove `msgid`s manually here as +## they're tied to the ones in the corresponding POT file +## (with the same domain). +## +## Use `mix gettext.extract --merge` or `mix gettext.merge` +## to merge POT files into PO files. +msgid "" +msgstr "" +"Language: en\n" + +## From Ecto.Changeset.cast/4 +msgid "can't be blank" +msgstr "" + +## From Ecto.Changeset.unique_constraint/3 +msgid "has already been taken" +msgstr "" + +## From Ecto.Changeset.put_change/3 +msgid "is invalid" +msgstr "" + +## From Ecto.Changeset.validate_acceptance/3 +msgid "must be accepted" +msgstr "" + +## From Ecto.Changeset.validate_format/3 +msgid "has invalid format" +msgstr "" + +## From Ecto.Changeset.validate_subset/3 +msgid "has an invalid entry" +msgstr "" + +## From Ecto.Changeset.validate_exclusion/3 +msgid "is reserved" +msgstr "" + +## From Ecto.Changeset.validate_confirmation/3 +msgid "does not match confirmation" +msgstr "" + +## From Ecto.Changeset.no_assoc_constraint/3 +msgid "is still associated with this entry" +msgstr "" + +msgid "are still associated with this entry" +msgstr "" + +## From Ecto.Changeset.validate_length/3 +msgid "should be %{count} character(s)" +msgid_plural "should be %{count} character(s)" +msgstr[0] "" +msgstr[1] "" + +msgid "should have %{count} item(s)" +msgid_plural "should have %{count} item(s)" +msgstr[0] "" +msgstr[1] "" + +msgid "should be at least %{count} character(s)" +msgid_plural "should be at least %{count} character(s)" +msgstr[0] "" +msgstr[1] "" + +msgid "should have at least %{count} item(s)" +msgid_plural "should have at least %{count} item(s)" +msgstr[0] "" +msgstr[1] "" + +msgid "should be at most %{count} character(s)" +msgid_plural "should be at most %{count} character(s)" +msgstr[0] "" +msgstr[1] "" + +msgid "should have at most %{count} item(s)" +msgid_plural "should have at most %{count} item(s)" +msgstr[0] "" +msgstr[1] "" + +## From Ecto.Changeset.validate_number/3 +msgid "must be less than %{number}" +msgstr "" + +msgid "must be greater than %{number}" +msgstr "" + +msgid "must be less than or equal to %{number}" +msgstr "" + +msgid "must be greater than or equal to %{number}" +msgstr "" + +msgid "must be equal to %{number}" +msgstr "" diff --git a/backend/priv/repo/migrations/.formatter.exs b/backend/priv/repo/migrations/.formatter.exs new file mode 100644 index 0000000..49f9151 --- /dev/null +++ b/backend/priv/repo/migrations/.formatter.exs @@ -0,0 +1,4 @@ +[ + import_deps: [:ecto_sql], + inputs: ["*.exs"] +] diff --git a/backend/priv/repo/migrations/20190624090436_create_instances.exs b/backend/priv/repo/migrations/20190624090436_create_instances.exs new file mode 100644 index 0000000..fa27d26 --- /dev/null +++ b/backend/priv/repo/migrations/20190624090436_create_instances.exs @@ -0,0 +1,29 @@ +defmodule Backend.Repo.Migrations.CreateInstances do + use Ecto.Migration + + def change do + create table(:instances) do + add :domain, :string, null: false + add :description, :text + add :user_count, :integer + add :status_count, :integer + add :version, :string + add :insularity, :float + + timestamps() + end + + create unique_index(:instances, [:domain]) + + create table(:instance_peers) do + add :source_domain, references(:instances, column: :domain, type: :string) + add :target_domain, references(:instances, column: :domain, type: :string) + + add :weight, :float + + timestamps() + end + + create unique_index(:instance_peers, [:source_domain, :target_domain]) + end +end diff --git a/backend/priv/repo/migrations/20190710133755_create_edges.exs b/backend/priv/repo/migrations/20190710133755_create_edges.exs new file mode 100644 index 0000000..1bfd546 --- /dev/null +++ b/backend/priv/repo/migrations/20190710133755_create_edges.exs @@ -0,0 +1,15 @@ +defmodule Backend.Repo.Migrations.CreateEdges do + use Ecto.Migration + + def change do + create table(:edges) do + add :source_domain, references(:instances, column: :domain, type: :string), null: false + add :target_domain, references(:instances, column: :domain, type: :string), null: false + + timestamps() + end + + create index(:edges, [:source_domain]) + create index(:edges, [:target_domain]) + end +end diff --git a/backend/priv/repo/migrations/20190710155001_create_crawls.exs b/backend/priv/repo/migrations/20190710155001_create_crawls.exs new file mode 100644 index 0000000..79b321e --- /dev/null +++ b/backend/priv/repo/migrations/20190710155001_create_crawls.exs @@ -0,0 +1,20 @@ +defmodule Backend.Repo.Migrations.CreateCrawls do + use Ecto.Migration + + def change do + create table(:crawls) do + add :instance_domain, references(:instances, column: :domain, type: :string), null: false + + add :statuses_seen, :integer + add :interactions_seen, :integer + + add :error, :text + + timestamps() + end + + # TODO: does this actually make WHERE error IS NULL queries faster? if not, drop it + create index(:crawls, [:error]) + create index(:crawls, [:inserted_at]) + end +end diff --git a/backend/priv/repo/migrations/20190710155112_create_crawl_interactions.exs b/backend/priv/repo/migrations/20190710155112_create_crawl_interactions.exs new file mode 100644 index 0000000..061654d --- /dev/null +++ b/backend/priv/repo/migrations/20190710155112_create_crawl_interactions.exs @@ -0,0 +1,16 @@ +defmodule Backend.Repo.Migrations.CreateCrawlInteractions do + use Ecto.Migration + + def change do + create table(:crawl_interactions) do + add :crawl_id, references(:crawls, on_delete: :delete_all), null: false + + add :source_domain, references(:instances, column: :domain, type: :string), null: false + add :target_domain, references(:instances, column: :domain, type: :string), null: false + + add :mentions, :integer + + timestamps() + end + end +end diff --git a/backend/priv/repo/migrations/20190712133009_add_instance_coords.exs b/backend/priv/repo/migrations/20190712133009_add_instance_coords.exs new file mode 100644 index 0000000..8d68453 --- /dev/null +++ b/backend/priv/repo/migrations/20190712133009_add_instance_coords.exs @@ -0,0 +1,10 @@ +defmodule Backend.Repo.Migrations.AddInstanceCoords do + use Ecto.Migration + + def change do + alter table(:instances) do + add :x, :float + add :y, :float + end + end +end diff --git a/backend/priv/repo/seeds.exs b/backend/priv/repo/seeds.exs new file mode 100644 index 0000000..effcb9c --- /dev/null +++ b/backend/priv/repo/seeds.exs @@ -0,0 +1,11 @@ +# Script for populating the database. You can run it as: +# +# mix run priv/repo/seeds.exs +# +# Inside the script, you can read and write to any of your +# repositories directly: +# +# Backend.Repo.insert!(%Backend.SomeSchema{}) +# +# We recommend using the bang functions (`insert!`, `update!` +# and so on) as they will fail if something goes wrong. diff --git a/backend/rel/rel/vm.args.eex b/backend/rel/rel/vm.args.eex new file mode 100644 index 0000000..b4d02e8 --- /dev/null +++ b/backend/rel/rel/vm.args.eex @@ -0,0 +1 @@ ++C multi_time_warp \ No newline at end of file diff --git a/backend/requirements.txt b/backend/requirements.txt deleted file mode 100644 index 0365321..0000000 --- a/backend/requirements.txt +++ /dev/null @@ -1,28 +0,0 @@ -autopep8==1.3.5 -certifi==2018.8.24 -chardet==3.0.4 -dill==0.2.5 -Django==2.1.7 -django-bulk-update==2.2.0 -django-cors-headers==2.4.0 -django-letsencrypt==3.0.1 -django-silk==3.0.1 -djangorestframework==3.8.2 -future==0.16.0 -gprof2dot==2016.10.13 -gunicorn==19.9.0 -idna==2.7 -Jinja2==2.10 -MarkupSafe==1.0 -psycopg2-binary==2.7.5 -pycodestyle==2.4.0 -PyFunctional==1.1.3 -Pygments==2.2.0 -python-dateutil==2.7.3 -pytz==2018.5 -requests==2.20.1 -six==1.10.0 -sqlparse==0.2.4 -tabulate==0.7.7 -tqdm==4.25.0 -urllib3==1.23 diff --git a/backend/scraper/__init__.py b/backend/scraper/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/backend/scraper/admin.py b/backend/scraper/admin.py deleted file mode 100644 index 8c38f3f..0000000 --- a/backend/scraper/admin.py +++ /dev/null @@ -1,3 +0,0 @@ -from django.contrib import admin - -# Register your models here. diff --git a/backend/scraper/apps.py b/backend/scraper/apps.py deleted file mode 100644 index 5145a0e..0000000 --- a/backend/scraper/apps.py +++ /dev/null @@ -1,5 +0,0 @@ -from django.apps import AppConfig - - -class ScraperConfig(AppConfig): - name = 'scraper' diff --git a/backend/scraper/management/commands/_util.py b/backend/scraper/management/commands/_util.py deleted file mode 100644 index 7bdb743..0000000 --- a/backend/scraper/management/commands/_util.py +++ /dev/null @@ -1,84 +0,0 @@ -from datetime import datetime - -LOCK_MODES = ( - 'ACCESS SHARE', - 'ROW SHARE', - 'ROW EXCLUSIVE', - 'SHARE UPDATE EXCLUSIVE', - 'SHARE', - 'SHARE ROW EXCLUSIVE', - 'EXCLUSIVE', - 'ACCESS EXCLUSIVE', -) - - -def require_lock(model, lock): - """ - Decorator for PostgreSQL's table-level lock functionality - - Example: - @transaction.commit_on_success - @require_lock(MyModel, 'ACCESS EXCLUSIVE') - def myview(request) - ... - - PostgreSQL's LOCK Documentation: - http://www.postgresql.org/docs/8.3/interactive/sql-lock.html - """ - - def require_lock_decorator(view_func): - def wrapper(*args, **kwargs): - if lock not in LOCK_MODES: - raise ValueError('%s is not a PostgreSQL supported lock mode.') - from django.db import connection - cursor = connection.cursor() - cursor.execute( - 'LOCK TABLE %s IN %s MODE' % (model._meta.db_table, lock) - ) - return view_func(*args, **kwargs) - - return wrapper - - return require_lock_decorator - - -class InvalidResponseException(Exception): - """Used for all responses other than HTTP 200""" - pass - - -class PersonalInstanceException(Exception): - """ - Used for instances that we don't want to scrape because there are too few users. - We don't want information on individuals, but aggregate statistics on instances and how they interact. - """ - pass - -class BlacklistedDomainException(Exception): - """ - Used for instances whose domain is blacklisted. - """ - pass - -def get_key(data, keys: list): - try: - val = data[keys.pop(0)] - while keys: - val = val[keys.pop(0)] - return val - except (KeyError, TypeError): - return '' - - -def validate_int(integer): - return integer if (isinstance(integer, int) and 0 <= integer < 2147483647) else None - - -def log(obj, text, success=False, error=False): - text = "{} - {}".format(datetime.now().isoformat(), text) - if success: - text = obj.style.SUCCESS(text) - if error: - obj.stderr.write(text) - else: - obj.stdout.write(text) diff --git a/backend/scraper/management/commands/build_edges.py b/backend/scraper/management/commands/build_edges.py deleted file mode 100644 index 0b311eb..0000000 --- a/backend/scraper/management/commands/build_edges.py +++ /dev/null @@ -1,38 +0,0 @@ -import subprocess -from django.core.management.base import BaseCommand -from django.conf import settings -from scraper.models import PeerRelationship, Edge - - -class Command(BaseCommand): - help = "Takes what's in the database and calls Gephi to create and layout a graph" - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - def handle(self, *args, **options): - self.stdout.write("Creating Edges from PeerRelationships...") - # Turn symmetrical PeerRelationships into symmetrical Edges - relationships = PeerRelationship.objects.filter(source__status='success', target__status='success') - # Loop over once and put 'em into a dict for fast access - relationships = {(r.source_id, r.target_id): r for r in relationships} - - edges = [] - while relationships: - (source_id, target_id), outgoing = relationships.popitem() - total_statuses = outgoing.statuses_seen - mention_count = outgoing.mention_count - incoming = relationships.pop((target_id, source_id), None) - oldest_data = outgoing.last_updated - if incoming: - total_statuses += (incoming.statuses_seen) - mention_count += (incoming.mention_count) - oldest_data = min(oldest_data, incoming.last_updated) - if mention_count == 0 or total_statuses == 0: - # don't add edges with weight 0 - continue - ratio = float(mention_count)/total_statuses - edges.append(Edge(source_id=source_id, target_id=target_id, weight=ratio, last_updated=oldest_data)) - - Edge.objects.all().delete() - Edge.objects.bulk_create(edges) diff --git a/backend/scraper/management/commands/scrape.py b/backend/scraper/management/commands/scrape.py deleted file mode 100644 index 70c7748..0000000 --- a/backend/scraper/management/commands/scrape.py +++ /dev/null @@ -1,276 +0,0 @@ -""" -This script starts at a seed instance and loads the list of connected -peers. From there, it scrapes the peers of all instances it finds, -gradually mapping the fediverse. -""" -import json -import multiprocessing as mp -import requests -import time -import os -from dateutil.parser import parse as datetime_parser -from datetime import datetime, timedelta, timezone -from functional import seq -from django_bulk_update.helper import bulk_update -from django.core.management.base import BaseCommand -from django import db -from django.conf import settings -from django.utils import timezone -from scraper.models import Instance, PeerRelationship -from scraper.management.commands._util import require_lock, InvalidResponseException, get_key, log, validate_int, PersonalInstanceException, BlacklistedDomainException - -# TODO: use the /api/v1/server/followers and /api/v1/server/following endpoints in peertube instances - -SEED = 'mastodon.social' -TIMEOUT = 20 # seconds -NUM_THREADS = 16 # roughly 40MB each -PERSONAL_INSTANCE_THRESHOLD = 10 # instances with < this many users won't be crawled -MAX_STATUSES_PER_PAGE = 40 -STATUS_SCRAPE_LIMIT = 5000 -INSTANCE_SCRAPE_LIMIT = 50 # note: this does not include newly discovered instances! they will always be crawled. - - -class Command(BaseCommand): - help = "Scrapes the entire fediverse" - - def add_arguments(self, parser): - # Named (optional) arguments - parser.add_argument( - '--unlimited', - action='store_true', - dest='unlimited', - help="Crawl all stale instances rather than limiting to {}".format(INSTANCE_SCRAPE_LIMIT), - ) - parser.add_argument( - '--all', - action='store_true', - dest='all', - help="Crawl all instances rather than limiting to stale ones" - ) - parser.add_argument( - '--verbose', - action='store_true', - dest='verbose', - help="Verbose logging" - ) - parser.add_argument( - '--instance', - dest='instance', - help="Crawl a single instance" - ) - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.verbose = False - self.scraped_count = 0 - f = open(os.path.join(settings.BASE_DIR, '../whitelist.txt'), 'r') - self.whitelist = seq(f.readlines()).map(lambda i: i.lower().strip()).to_list() - f.close() - - def get_instance_info(self, instance_name: str): - """Collect info about instance""" - url = 'https://' + instance_name + '/api/v1/instance' - response = requests.get(url, timeout=TIMEOUT) - json = response.json() - if response.status_code != 200 or get_key(json, ['error']): - if self.verbose: - log(self, "Couldn't get instance info for {}: {}".format(instance_name, response), error=True) - raise InvalidResponseException("Could not get info for {}".format(instance_name)) - return json - - def get_instance_peers(self, instance_name: str): - """Collect connected instances""" - # The peers endpoint returns a "list of all domain names known to this instance" - # (https://github.com/tootsuite/mastodon/pull/6125) - url = 'https://' + instance_name + '/api/v1/instance/peers' - response = requests.get(url, timeout=TIMEOUT) - peers = response.json() - if response.status_code != 200 or not isinstance(peers, list) or get_key(peers, ['error']): - if self.verbose: - log(self, "Couldn't get peers for {}: {}".format(instance_name, response), error=True) - raise InvalidResponseException("Could not get peers for {}".format(instance_name)) - # Get rid of peers that just say "null" and the instance itself - # Also make sure to lowercase all instance names and remove duplicates - return list(set([peer.lower() for peer in peers if peer and peer != instance_name])) - - def get_statuses(self, instance_name: str): - """Collect all statuses that mention users on other instances""" - mentions = [] - datetime_threshold = datetime.now(timezone.utc) - timedelta(days=31) - statuses_seen = 0 - # We'll ask for lots of statuses, but Mastodon never returns more than 40. Some Pleroma instances will ignore - # the limit and return 20. - url = 'https://{}/api/v1/timelines/public?local=true&limit={}'.format(instance_name, MAX_STATUSES_PER_PAGE) - while True: - if self.verbose: - log(self, "({} posts seen)\tGetting {}".format(statuses_seen, url)) - response = requests.get(url, timeout=TIMEOUT) - statuses = response.json() - if response.status_code != 200 or get_key(statuses, ['error']): - if self.verbose: - log(self, "Couldn't get statuses for {}: {}".format(instance_name, response), error=True) - raise InvalidResponseException("Could not get statuses for {}".format(instance_name)) - elif len(statuses) == 0: - break - # Get mentions from this instance - mentions.extend((seq(statuses) - .filter(lambda s: datetime_parser(s['created_at']) > datetime_threshold) - .flat_map(lambda s: s['mentions']))) # map to mentions - - # Find out if we should stop here - earliest_status = statuses[-1] - earliest_time_seen = datetime_parser(earliest_status['created_at']) - statuses_seen += len(statuses) - # Mastodon returns max 40 statuses; if we ever see less than that we know there aren't any more - if earliest_time_seen < datetime_threshold or statuses_seen >= STATUS_SCRAPE_LIMIT: - break - # Continuing, so get url for next page - min_id = earliest_status['id'] - url = 'https://{}/api/v1/timelines/public?local=true&limit={}&max_id={}'.format(instance_name, MAX_STATUSES_PER_PAGE, min_id) - time.sleep(2) # Sleep to avoid overloading the instance - - mentions_seq = (seq(mentions) - .filter(lambda m: not m['acct'].endswith(instance_name) and '@' in m['acct']) - .map(lambda m: m['acct'].split('@')[-1]) # map to instance name - .map(lambda m: (m, 1)) - .reduce_by_key(lambda x, y: x+y)) # sequence of tuples (instance, count) - mentions_by_instance = {t[0]: t[1] for t in mentions_seq} # dict of instance -> number of mentions - - return mentions_by_instance, statuses_seen - - def process_instance(self, instance: Instance): - """Given an instance, get all the data we're interested in""" - data = dict() - try: - if instance.name.endswith("gab.best"): - raise BlacklistedDomainException - - data['instance_name'] = instance.name - data['info'] = self.get_instance_info(instance.name) - - # Check if this is a personal instance before continuing - user_count = get_key(data, ['info', 'stats', 'user_count']) - if isinstance(user_count, int)\ - and user_count < PERSONAL_INSTANCE_THRESHOLD\ - and instance.name not in self.whitelist: - raise PersonalInstanceException - - data['peers'] = self.get_instance_peers(instance.name) - if not data['info'] and not data['peers']: - # We got a response from the instance, but it didn't have any of the information we were expecting. - raise InvalidResponseException - - data['mentions'], data['statuses_seen'] = self.get_statuses(instance.name) - data['status'] = 'success' - return data - - except (InvalidResponseException, - PersonalInstanceException, - BlacklistedDomainException, - requests.exceptions.RequestException, - json.decoder.JSONDecodeError) as e: - data['instance_name'] = instance.name - data['status'] = type(e).__name__ - return data - - @db.transaction.atomic - @require_lock(Instance, 'ACCESS EXCLUSIVE') - def save_data(self, instance, data, queue, existing_instance_ids): - """Save data""" - # Validate the ints. Some servers that appear to be fake instances have e.g. negative numbers here. - instance.domain_count = validate_int(get_key(data, ['info', 'stats', 'domain_count'])) - instance.status_count = validate_int(get_key(data, ['info', 'stats', 'status_count'])) - instance.user_count = validate_int(get_key(data, ['info', 'stats', 'user_count'])) - instance.description = get_key(data, ['info', 'description']) - instance.version = get_key(data, ['info', 'version']) - instance.status = get_key(data, ['status']) - instance.last_updated = timezone.now() - instance.save() - if data['status'] == 'success' and data['peers']: - # TODO: handle a peer disappeer-ing - # Create instances for the peers we haven't seen before and add them to the queue - new_instance_ids = [peer_id for peer_id in data['peers'] if peer_id not in existing_instance_ids] - # bulk_create doesn't call save(), so the auto_now_add field won't get set automatically - new_instances = [Instance(name=id, first_seen=datetime.now(), last_updated=datetime.utcfromtimestamp(0)) - for id in new_instance_ids] - existing_instance_ids.extend(new_instance_ids) - Instance.objects.bulk_create(new_instances) - for new_instance in new_instances: - queue.put(new_instance) - - # Create relationships we haven't seen before - existing_peer_ids = PeerRelationship.objects.filter(source=instance).values_list('target', flat=True) - new_peer_ids = [peer_id for peer_id in data['peers'] if peer_id not in existing_peer_ids] - if new_peer_ids: - # new_peers = Instance.objects.filter(name__in=new_peer_ids) - new_relationships = [PeerRelationship(source=instance, target_id=new_peer, first_seen=datetime.now()) - for new_peer in new_peer_ids] - PeerRelationship.objects.bulk_create(new_relationships) - - if data['status'] == 'success' and data['mentions']: - # At this point, we can assume that a relationship exists for every peer that's mentioned in statuses - mentions = data['mentions'] - relationships = PeerRelationship.objects.filter(source=instance, - target_id__in=list(mentions.keys())) - for relationship in relationships: - relationship.mention_count = mentions[relationship.target_id] - relationship.statuses_seen = data['statuses_seen'] - relationship.last_updated = datetime.now() - bulk_update(relationships, update_fields=['mention_count', 'statuses_seen', 'last_updated']) - - log(self, "Processed {}: {}".format(data['instance_name'], data['status'])) - - def worker(self, queue: mp.JoinableQueue, existing_instance_ids, scraped_ids): - """The main worker that processes instances""" - db.connections.close_all() # https://stackoverflow.com/a/38356519/3697202 - while True: - instance = queue.get() - if instance.name in scraped_ids: - # If we hit this branch, it's indicative of a bug - log(self, "Skipping {}, already done. This should not have been added to the queue!".format(instance), - error=True) - queue.task_done() - else: - # Fetch data on instance - log(self, "Processing {}".format(instance.name)) - data = self.process_instance(instance) - self.save_data(instance, data, queue, existing_instance_ids) - scraped_ids[instance.name] = 1 - queue.task_done() - - def handle(self, *args, **options): - start_time = time.time() - - self.verbose = options['verbose'] - - if options['instance']: - stale_instance, _ = Instance.objects.get_or_create(name=options['instance']) - stale_instances = [stale_instance] - elif options['all']: - stale_instances = Instance.objects.all() - else: - stale_instances = Instance.objects.filter(last_updated__lte=datetime.now()-timedelta(days=1)) - - if not options['unlimited']: - stale_instances = stale_instances[:INSTANCE_SCRAPE_LIMIT] - - with mp.Manager() as manager: - # Share the list of existing instances amongst all threads (to avoid each thread having to query - # for it on every instance it scrapes) - existing_instance_ids = manager.list(list(Instance.objects.values_list('name', flat=True))) - scraped_ids = manager.dict() - queue = mp.JoinableQueue() - if stale_instances: - for instance in stale_instances: - queue.put(instance) - elif not Instance.objects.exists(): - instance, _ = Instance.objects.get_or_create(name=SEED) - existing_instance_ids.append(instance.name) - queue.put(instance) - - pool = mp.Pool(NUM_THREADS, initializer=self.worker, initargs=(queue, existing_instance_ids, scraped_ids)) - queue.join() - self.scraped_count = len(scraped_ids.keys()) - - end_time = time.time() - log(self, "Scraped {} instances in {:.0f}s".format(self.scraped_count, end_time - start_time), True) diff --git a/backend/scraper/migrations/0001_initial.py b/backend/scraper/migrations/0001_initial.py deleted file mode 100644 index 3733cb5..0000000 --- a/backend/scraper/migrations/0001_initial.py +++ /dev/null @@ -1,67 +0,0 @@ -# Generated by Django 2.1.7 on 2019-02-21 12:27 - -from django.db import migrations, models -import django.db.models.deletion -import django.utils.timezone - - -class Migration(migrations.Migration): - - initial = True - - dependencies = [ - ] - - operations = [ - migrations.CreateModel( - name='Edge', - fields=[ - ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('weight', models.FloatField(blank=True, null=True)), - ('last_updated', models.DateTimeField(default=django.utils.timezone.now)), - ], - ), - migrations.CreateModel( - name='Instance', - fields=[ - ('name', models.CharField(max_length=200, primary_key=True, serialize=False)), - ('description', models.TextField(blank=True)), - ('domain_count', models.IntegerField(blank=True, null=True)), - ('status_count', models.IntegerField(blank=True, null=True)), - ('user_count', models.IntegerField(blank=True, null=True)), - ('version', models.CharField(blank=True, max_length=1000)), - ('status', models.CharField(max_length=100)), - ('x_coord', models.FloatField(blank=True, null=True)), - ('y_coord', models.FloatField(blank=True, null=True)), - ('first_seen', models.DateTimeField(auto_now_add=True)), - ('last_updated', models.DateTimeField(default=django.utils.timezone.now)), - ], - ), - migrations.CreateModel( - name='PeerRelationship', - fields=[ - ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('mention_count', models.IntegerField(default=0)), - ('statuses_seen', models.IntegerField(default=0)), - ('first_seen', models.DateTimeField(auto_now_add=True)), - ('last_updated', models.DateTimeField(default=django.utils.timezone.now)), - ('source', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='following_relationship', to='scraper.Instance')), - ('target', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='follower_relationships', to='scraper.Instance')), - ], - ), - migrations.AddField( - model_name='instance', - name='peers', - field=models.ManyToManyField(through='scraper.PeerRelationship', to='scraper.Instance'), - ), - migrations.AddField( - model_name='edge', - name='source', - field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='+', to='scraper.Instance'), - ), - migrations.AddField( - model_name='edge', - name='target', - field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='+', to='scraper.Instance'), - ), - ] diff --git a/backend/scraper/migrations/0002_auto_20190419_1346.py b/backend/scraper/migrations/0002_auto_20190419_1346.py deleted file mode 100644 index 07d9486..0000000 --- a/backend/scraper/migrations/0002_auto_20190419_1346.py +++ /dev/null @@ -1,24 +0,0 @@ -# Generated by Django 2.1.7 on 2019-04-19 13:46 - -from django.db import migrations, models -import django.db.models.deletion - - -class Migration(migrations.Migration): - - dependencies = [ - ('scraper', '0001_initial'), - ] - - operations = [ - migrations.AlterField( - model_name='edge', - name='source', - field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='targets', to='scraper.Instance'), - ), - migrations.AlterField( - model_name='edge', - name='target', - field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='sources', to='scraper.Instance'), - ), - ] diff --git a/backend/scraper/migrations/__init__.py b/backend/scraper/migrations/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/backend/scraper/models.py b/backend/scraper/models.py deleted file mode 100644 index 1387630..0000000 --- a/backend/scraper/models.py +++ /dev/null @@ -1,59 +0,0 @@ -from django.db import models -from django.utils import timezone - - -class Instance(models.Model): - """ - The main model that saves details of an instance and links between them in the peers - property. - - Don't change the schema without verifying that the gephi script can still read the data. - """ - # Primary key - name = models.CharField(max_length=200, primary_key=True) - - # Details - description = models.TextField(blank=True) - domain_count = models.IntegerField(blank=True, null=True) - status_count = models.IntegerField(blank=True, null=True) - user_count = models.IntegerField(blank=True, null=True) - version = models.CharField(max_length=1000, blank=True) # In Django CharField is never stored as NULL in the db - status = models.CharField(max_length=100) - - # Foreign keys - peers = models.ManyToManyField('self', symmetrical=False, through='PeerRelationship') - - # Graph - x_coord = models.FloatField(blank=True, null=True) - y_coord = models.FloatField(blank=True, null=True) - - # Automatic fields - first_seen = models.DateTimeField(auto_now_add=True) - last_updated = models.DateTimeField(default=timezone.now) - - -class PeerRelationship(models.Model): - source = models.ForeignKey(Instance, related_name="following_relationship", on_delete=models.CASCADE) - target = models.ForeignKey(Instance, related_name="follower_relationships", on_delete=models.CASCADE) - - # Interaction stats - mention_count = models.IntegerField(default=0) - statuses_seen = models.IntegerField(default=0) # because we want mention_count as a ratio - - # Metadata - first_seen = models.DateTimeField(auto_now_add=True) - last_updated = models.DateTimeField(default=timezone.now) - - -class Edge(models.Model): - """ - This class is automatically generated from PeerRelationship using the build_edges command. - It aggregates stats from the asymmetrical PeerRelationship to a symmetrical one that's suitable for serving - to the front-end. - """ - source = models.ForeignKey(Instance, related_name='targets', on_delete=models.CASCADE) - target = models.ForeignKey(Instance, related_name='sources', on_delete=models.CASCADE) - weight = models.FloatField(blank=True, null=True) - - # Metadata - last_updated = models.DateTimeField(default=timezone.now) diff --git a/backend/scripts/docker-entrypoint.sh b/backend/scripts/docker-entrypoint.sh deleted file mode 100644 index 5232253..0000000 --- a/backend/scripts/docker-entrypoint.sh +++ /dev/null @@ -1,26 +0,0 @@ -#! /bin/bash - -SLEEP_SECONDS=3 - ->&2 echo "Checking Postgres status..." - -# https://docs.docker.com/compose/startup-order/ -export PGPASSWORD=$POSTGRES_PASSWORD -until psql -h db -U "$POSTGRES_USER" -p 5432 -d "$POSTGRES_DB" -c '\q' -do - >&2 echo "Postgres is unavailable - sleeping" - sleep $SLEEP_SECONDS -done ->&2 echo "Postgres is up" - -python manage.py collectstatic --noinput -python manage.py migrate --noinput - -if [[ $ENVIRONMENT == "development" ]] -then - >&2 echo "Running Django server on port 8000 for development" - python manage.py runserver 0.0.0.0:8000 -else - >&2 echo "Running gunicorn server" - gunicorn backend.wsgi -c /config/gunicorn.conf.py -fi diff --git a/backend/test/backend_web/controllers/graph_controller_test.exs b/backend/test/backend_web/controllers/graph_controller_test.exs new file mode 100644 index 0000000..c3dda96 --- /dev/null +++ b/backend/test/backend_web/controllers/graph_controller_test.exs @@ -0,0 +1,104 @@ +defmodule BackendWeb.GraphControllerTest do + use BackendWeb.ConnCase + + alias Backend.Api + alias Backend.Api.Graph + + @create_attrs %{ + id: "some id", + label: "some label", + size: 120.5, + x: 120.5, + y: 120.5 + } + @update_attrs %{ + id: "some updated id", + label: "some updated label", + size: 456.7, + x: 456.7, + y: 456.7 + } + @invalid_attrs %{id: nil, label: nil, size: nil, x: nil, y: nil} + + def fixture(:graph) do + {:ok, graph} = Api.create_graph(@create_attrs) + graph + end + + setup %{conn: conn} do + {:ok, conn: put_req_header(conn, "accept", "application/json")} + end + + describe "index" do + test "lists all nodes", %{conn: conn} do + conn = get(conn, Routes.graph_path(conn, :index)) + assert json_response(conn, 200)["data"] == [] + end + end + + describe "create graph" do + test "renders graph when data is valid", %{conn: conn} do + conn = post(conn, Routes.graph_path(conn, :create), graph: @create_attrs) + assert %{"id" => id} = json_response(conn, 201)["data"] + + conn = get(conn, Routes.graph_path(conn, :show, id)) + + assert %{ + "id" => id, + "id" => "some id", + "label" => "some label", + "size" => 120.5, + "x" => 120.5, + "y" => 120.5 + } = json_response(conn, 200)["data"] + end + + test "renders errors when data is invalid", %{conn: conn} do + conn = post(conn, Routes.graph_path(conn, :create), graph: @invalid_attrs) + assert json_response(conn, 422)["errors"] != %{} + end + end + + describe "update graph" do + setup [:create_graph] + + test "renders graph when data is valid", %{conn: conn, graph: %Graph{id: id} = graph} do + conn = put(conn, Routes.graph_path(conn, :update, graph), graph: @update_attrs) + assert %{"id" => ^id} = json_response(conn, 200)["data"] + + conn = get(conn, Routes.graph_path(conn, :show, id)) + + assert %{ + "id" => id, + "id" => "some updated id", + "label" => "some updated label", + "size" => 456.7, + "x" => 456.7, + "y" => 456.7 + } = json_response(conn, 200)["data"] + end + + test "renders errors when data is invalid", %{conn: conn, graph: graph} do + conn = put(conn, Routes.graph_path(conn, :update, graph), graph: @invalid_attrs) + assert json_response(conn, 422)["errors"] != %{} + end + end + + describe "delete graph" do + setup [:create_graph] + + test "deletes chosen graph", %{conn: conn, graph: graph} do + conn = delete(conn, Routes.graph_path(conn, :delete, graph)) + assert response(conn, 204) + + assert_error_sent 404, fn -> + get(conn, Routes.graph_path(conn, :show, graph)) + end + end + end + + defp create_graph(_) do + graph = fixture(:graph) + {:ok, graph: graph} + end +end diff --git a/backend/test/backend_web/controllers/instance_controller_test.exs b/backend/test/backend_web/controllers/instance_controller_test.exs new file mode 100644 index 0000000..3225376 --- /dev/null +++ b/backend/test/backend_web/controllers/instance_controller_test.exs @@ -0,0 +1,88 @@ +defmodule BackendWeb.InstanceControllerTest do + use BackendWeb.ConnCase + + alias Backend.Api + alias Backend.Api.Instance + + @create_attrs %{ + name: "some name" + } + @update_attrs %{ + name: "some updated name" + } + @invalid_attrs %{name: nil} + + def fixture(:instance) do + {:ok, instance} = Api.create_instance(@create_attrs) + instance + end + + setup %{conn: conn} do + {:ok, conn: put_req_header(conn, "accept", "application/json")} + end + + describe "index" do + test "lists all instances", %{conn: conn} do + conn = get(conn, Routes.instance_path(conn, :index)) + assert json_response(conn, 200)["data"] == [] + end + end + + describe "create instance" do + test "renders instance when data is valid", %{conn: conn} do + conn = post(conn, Routes.instance_path(conn, :create), instance: @create_attrs) + assert %{"id" => id} = json_response(conn, 201)["data"] + + conn = get(conn, Routes.instance_path(conn, :show, id)) + + assert %{ + "id" => id, + "name" => "some name" + } = json_response(conn, 200)["data"] + end + + test "renders errors when data is invalid", %{conn: conn} do + conn = post(conn, Routes.instance_path(conn, :create), instance: @invalid_attrs) + assert json_response(conn, 422)["errors"] != %{} + end + end + + describe "update instance" do + setup [:create_instance] + + test "renders instance when data is valid", %{conn: conn, instance: %Instance{id: id} = instance} do + conn = put(conn, Routes.instance_path(conn, :update, instance), instance: @update_attrs) + assert %{"id" => ^id} = json_response(conn, 200)["data"] + + conn = get(conn, Routes.instance_path(conn, :show, id)) + + assert %{ + "id" => id, + "name" => "some updated name" + } = json_response(conn, 200)["data"] + end + + test "renders errors when data is invalid", %{conn: conn, instance: instance} do + conn = put(conn, Routes.instance_path(conn, :update, instance), instance: @invalid_attrs) + assert json_response(conn, 422)["errors"] != %{} + end + end + + describe "delete instance" do + setup [:create_instance] + + test "deletes chosen instance", %{conn: conn, instance: instance} do + conn = delete(conn, Routes.instance_path(conn, :delete, instance)) + assert response(conn, 204) + + assert_error_sent 404, fn -> + get(conn, Routes.instance_path(conn, :show, instance)) + end + end + end + + defp create_instance(_) do + instance = fixture(:instance) + {:ok, instance: instance} + end +end diff --git a/backend/test/backend_web/views/error_view_test.exs b/backend/test/backend_web/views/error_view_test.exs new file mode 100644 index 0000000..b49bfa5 --- /dev/null +++ b/backend/test/backend_web/views/error_view_test.exs @@ -0,0 +1,15 @@ +defmodule BackendWeb.ErrorViewTest do + use BackendWeb.ConnCase, async: true + + # Bring render/3 and render_to_string/3 for testing custom views + import Phoenix.View + + test "renders 404.json" do + assert render(BackendWeb.ErrorView, "404.json", []) == %{errors: %{detail: "Not Found"}} + end + + test "renders 500.json" do + assert render(BackendWeb.ErrorView, "500.json", []) == + %{errors: %{detail: "Internal Server Error"}} + end +end diff --git a/backend/test/support/channel_case.ex b/backend/test/support/channel_case.ex new file mode 100644 index 0000000..29cb07f --- /dev/null +++ b/backend/test/support/channel_case.ex @@ -0,0 +1,37 @@ +defmodule BackendWeb.ChannelCase do + @moduledoc """ + This module defines the test case to be used by + channel tests. + + Such tests rely on `Phoenix.ChannelTest` and also + import other functionality to make it easier + to build common data structures and query the data layer. + + Finally, if the test case interacts with the database, + it cannot be async. For this reason, every test runs + inside a transaction which is reset at the beginning + of the test unless the test case is marked as async. + """ + + use ExUnit.CaseTemplate + + using do + quote do + # Import conveniences for testing with channels + use Phoenix.ChannelTest + + # The default endpoint for testing + @endpoint BackendWeb.Endpoint + end + end + + setup tags do + :ok = Ecto.Adapters.SQL.Sandbox.checkout(Backend.Repo) + + unless tags[:async] do + Ecto.Adapters.SQL.Sandbox.mode(Backend.Repo, {:shared, self()}) + end + + :ok + end +end diff --git a/backend/test/support/conn_case.ex b/backend/test/support/conn_case.ex new file mode 100644 index 0000000..d211e49 --- /dev/null +++ b/backend/test/support/conn_case.ex @@ -0,0 +1,38 @@ +defmodule BackendWeb.ConnCase do + @moduledoc """ + This module defines the test case to be used by + tests that require setting up a connection. + + Such tests rely on `Phoenix.ConnTest` and also + import other functionality to make it easier + to build common data structures and query the data layer. + + Finally, if the test case interacts with the database, + it cannot be async. For this reason, every test runs + inside a transaction which is reset at the beginning + of the test unless the test case is marked as async. + """ + + use ExUnit.CaseTemplate + + using do + quote do + # Import conveniences for testing with connections + use Phoenix.ConnTest + alias BackendWeb.Router.Helpers, as: Routes + + # The default endpoint for testing + @endpoint BackendWeb.Endpoint + end + end + + setup tags do + :ok = Ecto.Adapters.SQL.Sandbox.checkout(Backend.Repo) + + unless tags[:async] do + Ecto.Adapters.SQL.Sandbox.mode(Backend.Repo, {:shared, self()}) + end + + {:ok, conn: Phoenix.ConnTest.build_conn()} + end +end diff --git a/backend/test/support/data_case.ex b/backend/test/support/data_case.ex new file mode 100644 index 0000000..a073792 --- /dev/null +++ b/backend/test/support/data_case.ex @@ -0,0 +1,53 @@ +defmodule Backend.DataCase do + @moduledoc """ + This module defines the setup for tests requiring + access to the application's data layer. + + You may define functions here to be used as helpers in + your tests. + + Finally, if the test case interacts with the database, + it cannot be async. For this reason, every test runs + inside a transaction which is reset at the beginning + of the test unless the test case is marked as async. + """ + + use ExUnit.CaseTemplate + + using do + quote do + alias Backend.Repo + + import Ecto + import Ecto.Changeset + import Ecto.Query + import Backend.DataCase + end + end + + setup tags do + :ok = Ecto.Adapters.SQL.Sandbox.checkout(Backend.Repo) + + unless tags[:async] do + Ecto.Adapters.SQL.Sandbox.mode(Backend.Repo, {:shared, self()}) + end + + :ok + end + + @doc """ + A helper that transforms changeset errors into a map of messages. + + assert {:error, changeset} = Accounts.create_user(%{password: "short"}) + assert "password is too short" in errors_on(changeset).password + assert %{password: ["password is too short"]} = errors_on(changeset) + + """ + def errors_on(changeset) do + Ecto.Changeset.traverse_errors(changeset, fn {message, opts} -> + Enum.reduce(opts, message, fn {key, value}, acc -> + String.replace(acc, "%{#{key}}", to_string(value)) + end) + end) + end +end diff --git a/backend/test/test_helper.exs b/backend/test/test_helper.exs new file mode 100644 index 0000000..a7d0703 --- /dev/null +++ b/backend/test/test_helper.exs @@ -0,0 +1,2 @@ +ExUnit.start() +Ecto.Adapters.SQL.Sandbox.mode(Backend.Repo, :manual) diff --git a/config/Caddyfile b/config/Caddyfile deleted file mode 100644 index e1a144c..0000000 --- a/config/Caddyfile +++ /dev/null @@ -1,13 +0,0 @@ -backend.fediverse.space { - tls tao@btao.org - gzip - - cors - - root /srv - proxy / django:8000 { - transparent - except /static - } -} - diff --git a/config/gunicorn.conf.py b/config/gunicorn.conf.py deleted file mode 100644 index dfe5cb5..0000000 --- a/config/gunicorn.conf.py +++ /dev/null @@ -1,196 +0,0 @@ -# -# Server socket -# -# bind - The socket to bind. -# -# A string of the form: 'HOST', 'HOST:PORT', 'unix:PATH'. -# An IP is a valid HOST. -# -# backlog - The number of pending connections. This refers -# to the number of clients that can be waiting to be -# served. Exceeding this number results in the client -# getting an error when attempting to connect. It should -# only affect servers under significant load. -# -# Must be a positive integer. Generally set in the 64-2048 -# range. -# - -bind = [':8000'] - -# -# Worker processes -# -# workers - The number of worker processes that this server -# should keep alive for handling requests. -# -# A positive integer generally in the 2-4 x $(NUM_CORES) -# range. You'll want to vary this a bit to find the best -# for your particular application's work load. -# -# worker_class - The type of workers to use. The default -# sync class should handle most 'normal' types of work -# loads. You'll want to read -# http://docs.gunicorn.org/en/latest/design.html#choosing-a-worker-type -# for information on when you might want to choose one -# of the other worker classes. -# -# A string referring to a Python path to a subclass of -# gunicorn.workers.base.Worker. The default provided values -# can be seen at -# http://docs.gunicorn.org/en/latest/settings.html#worker-class -# -# worker_connections - For the eventlet and gevent worker classes -# this limits the maximum number of simultaneous clients that -# a single process can handle. -# -# A positive integer generally set to around 1000. -# -# timeout - If a worker does not notify the master process in this -# number of seconds it is killed and a new worker is spawned -# to replace it. -# -# Generally set to thirty seconds. Only set this noticeably -# higher if you're sure of the repercussions for sync workers. -# For the non sync workers it just means that the worker -# process is still communicating and is not tied to the length -# of time required to handle a single request. -# -# keepalive - The number of seconds to wait for the next request -# on a Keep-Alive HTTP connection. -# -# A positive integer. Generally set in the 1-5 seconds range. -# - -# try: -# # fail 'successfully' if either of these modules aren't installed -# from gevent import monkey -# from psycogreen.gevent import patch_psycopg - - -# # setting this inside the 'try' ensures that we only -# # activate the gevent worker pool if we have gevent installed -# worker_class = 'gevent' -# workers = 4 -# # this ensures forked processes are patched with gevent/gevent-psycopg2 -# def do_post_fork(server, worker): -# monkey.patch_all() -# patch_psycopg() - -# # you should see this text in your gunicorn logs if it was successful -# worker.log.info("Made Psycopg2 Green") - -# post_fork = do_post_fork -# except ImportError: -# pass - -workers = 4 -# worker_connections = 1000 -# timeout = 30 -# keepalive = 2 - -# -# spew - Install a trace function that spews every line of Python -# that is executed when running the server. This is the -# nuclear option. -# -# True or False -# - -spew = False - -# -# Server mechanics -# -# daemon - Detach the main Gunicorn process from the controlling -# terminal with a standard fork/fork sequence. -# -# True or False -# -# pidfile - The path to a pid file to write -# -# A path string or None to not write a pid file. -# -# user - Switch worker processes to run as this user. -# -# A valid user id (as an integer) or the name of a user that -# can be retrieved with a call to pwd.getpwnam(value) or None -# to not change the worker process user. -# -# group - Switch worker process to run as this group. -# -# A valid group id (as an integer) or the name of a user that -# can be retrieved with a call to pwd.getgrnam(value) or None -# to change the worker processes group. -# -# umask - A mask for file permissions written by Gunicorn. Note that -# this affects unix socket permissions. -# -# A valid value for the os.umask(mode) call or a string -# compatible with int(value, 0) (0 means Python guesses -# the base, so values like "0", "0xFF", "0022" are valid -# for decimal, hex, and octal representations) -# -# tmp_upload_dir - A directory to store temporary request data when -# requests are read. This will most likely be disappearing soon. -# -# A path to a directory where the process owner can write. Or -# None to signal that Python should choose one on its own. -# - -daemon = False -pidfile = '/var/gunicorn/.pid' -umask = 0 -user = None -group = None -tmp_upload_dir = None - -# -# Logging -# -# logfile - The path to a log file to write to. -# -# A path string. "-" means log to stdout. -# -# loglevel - The granularity of log output -# -# A string of "debug", "info", "warning", "error", "critical" -# - -errorlog = '-' -loglevel = 'warning' -accesslog = '-' -access_log_format = '%(h)s %(t)s %(m)s %(U)s %(q)s %(H)s %(s)s %(B)s %(f)s %(a)s %(L)s' - -# -# Process naming -# -# proc_name - A base to use with setproctitle to change the way -# that Gunicorn processes are reported in the system process -# table. This affects things like 'ps' and 'top'. If you're -# going to be running more than one instance of Gunicorn you'll -# probably want to set a name to tell them apart. This requires -# that you install the setproctitle module. -# -# A string or None to choose a default of something like 'gunicorn'. -# - -proc_name = None - -# -# Server hooks -# -# post_fork - Called just after a worker has been forked. -# -# A callable that takes a server and worker instance -# as arguments. -# -# pre_fork - Called just prior to forking the worker subprocess. -# -# A callable that accepts the same arguments as after_fork -# -# pre_exec - Called just prior to forking off a secondary -# master process during things like config reloading. -# -# A callable that takes a server instance as the sole argument. -# diff --git a/docker-compose.production.yml b/docker-compose.production.yml index 6701f37..0aaf813 100644 --- a/docker-compose.production.yml +++ b/docker-compose.production.yml @@ -1,38 +1,23 @@ -version: '3' +version: "3" services: db: restart: always networks: - database_network - django: + phoenix: restart: always - volumes: - - ./config/gunicorn.conf.py:/config/gunicorn.conf.py - - gunicorn-socket:/var/gunicorn - - staticfiles:/code/backend/static + build: ./backend networks: - database_network - - server_network - environment: - - ENVIRONMENT=production - - DJANGO_SETTINGS_MODULE=backend.settings.production - caddy: - restart: always - image: abiosoft/caddy:0.11.4-no-stats - ports: - - "80:80" - - "443:443" - volumes: - - ./config/Caddyfile:/etc/Caddyfile - - staticfiles:/srv/static - - caddycerts:/etc/caddycerts - networks: - - server_network depends_on: - - django + - db + ports: + - "${PORT}:${PORT}" environment: - - ACME_AGREE - - CADDYPATH=/etc/caddycerts + - DATABASE_URL + - SECRET_KEY_BASE + - PORT + - BACKEND_HOSTNAME gephi: networks: - database_network @@ -41,9 +26,3 @@ services: networks: database_network: driver: bridge - server_network: - driver: bridge -volumes: - gunicorn-socket: - caddycerts: - staticfiles: diff --git a/docker-compose.yml b/docker-compose.yml index fad3ce4..42628bc 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,38 +1,18 @@ -version: '3' +version: "3" services: db: image: postgres environment: - - POSTGRES_USER - - POSTGRES_PASSWORD - - POSTGRES_DB + - DATABASE_URL ports: - "5432:5432" volumes: - pgdata:/var/lib/postgresql/data - django: - environment: - - SECRET_KEY - - POSTGRES_USER - - POSTGRES_PASSWORD - - POSTGRES_DB - - DJANGO_SETTINGS_MODULE - - ENVIRONMENT=development - build: ./backend - command: bash scripts/docker-entrypoint.sh - volumes: - - ./backend:/code - ports: - - "8000:8000" - depends_on: - - db # This is for running the occasional graph layout task. It's in docker-compose.yml so that it's built at the same time # as everything else, but it should be run regularly with a cron job or similar. gephi: environment: - - POSTGRES_USER - - POSTGRES_PASSWORD - - POSTGRES_DB + - DATABASE_URL build: ./gephi volumes: - gradle-cache:/code/.gradle diff --git a/example.env b/example.env index 6b74cf4..3ae6590 100644 --- a/example.env +++ b/example.env @@ -1,6 +1,4 @@ -SECRET_KEY=a-long-secret-key -POSTGRES_USER=postgres -POSTGRES_PASSWORD=postgres -POSTGRES_DB=fediverse -DJANGO_SETTINGS_MODULE=backend.settings.development -ACME_AGREE=true +DATABASE_URL="postgres://postgres:postgres@localhost:5432/backend_dev" +PORT=4000 +BACKEND_HOSTNAME=localhost +SECRET_KEY_BASE=jLqbBjtQTyZj+1yLwDV8xgZYvZKIBx1MBWbcC2a0mZqB5ivYKQ7GOqNR91g6YnR8 \ No newline at end of file diff --git a/frontend/.gitignore b/frontend/.gitignore deleted file mode 100644 index d30f40e..0000000 --- a/frontend/.gitignore +++ /dev/null @@ -1,21 +0,0 @@ -# See https://help.github.com/ignore-files/ for more about ignoring files. - -# dependencies -/node_modules - -# testing -/coverage - -# production -/build - -# misc -.DS_Store -.env.local -.env.development.local -.env.test.local -.env.production.local - -npm-debug.log* -yarn-debug.log* -yarn-error.log* diff --git a/frontend/package.json b/frontend/package.json index 4c5b9c8..cc0947d 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -5,7 +5,8 @@ "scripts": { "start": "NODE_ENV=development react-scripts start", "build": "react-scripts build", - "lint": "tslint -p tsconfig.json -c tslint.json \"src/**/*.{ts,tsx}\"", + "typecheck": "tsc --noemit", + "lint": "yarn typecheck && tslint -p tsconfig.json -c tslint.json \"src/**/*.{ts,tsx}\"", "lint:fix": "yarn lint --fix", "pretty": "prettier --write \"src/**/*.{ts,tsx}\"", "test": "yarn lint && react-scripts test", @@ -27,44 +28,47 @@ "printWidth": 120 }, "dependencies": { - "@blueprintjs/core": "^3.4.0", - "@blueprintjs/icons": "^3.1.0", - "@blueprintjs/select": "^3.1.0", + "@blueprintjs/core": "^3.17.1", + "@blueprintjs/icons": "^3.9.1", + "@blueprintjs/select": "^3.9.0", "classnames": "^2.2.6", - "cross-fetch": "^3.0.2", - "lodash": "^4.17.10", + "cross-fetch": "^3.0.4", + "cytoscape": "^3.8.1", + "cytoscape-cola": "^2.3.0", + "lodash": "^4.17.14", "moment": "^2.22.2", "normalize.css": "^8.0.0", "react": "^16.4.2", "react-dom": "^16.4.2", - "react-redux": "^7.0.2", - "react-router-dom": "^5.0.0", - "react-scripts": "^2.1.8", + "react-redux": "^7.1.0", + "react-router-dom": "^5.0.1", + "react-scripts": "^3.0.1", "react-sigma": "^1.2.30", - "react-virtualized": "^9.20.1", - "redux": "^4.0.0", + "react-virtualized": "^9.21.1", + "redux": "^4.0.4", "redux-thunk": "^2.3.0", - "sanitize-html": "^1.18.4", - "styled-components": "^4.2.0" + "sanitize-html": "^1.20.1", + "styled-components": "^4.3.2" }, "devDependencies": { - "@blueprintjs/tslint-config": "^1.8.0", - "@types/classnames": "^2.2.6", - "@types/jest": "^24.0.11", - "@types/lodash": "^4.14.116", - "@types/node": "^11.13.4", - "@types/react": "^16.8.13", + "@blueprintjs/tslint-config": "^1.8.1", + "@types/classnames": "^2.2.9", + "@types/cytoscape": "^3.4.3", + "@types/jest": "^24.0.15", + "@types/lodash": "^4.14.136", + "@types/node": "^12.6.2", + "@types/react": "^16.8.23", "@types/react-dom": "^16.8.4", - "@types/react-redux": "^7.0.6", - "@types/react-router-dom": "^4.3.2", - "@types/react-virtualized": "^9.18.7", - "@types/sanitize-html": "^1.18.3", - "@types/styled-components": "4.1.8", - "husky": "^1.3.1", - "lint-staged": "^8.1.5", - "tslint": "^5.16.0", + "@types/react-redux": "^7.1.1", + "@types/react-router-dom": "^4.3.4", + "@types/react-virtualized": "^9.21.2", + "@types/sanitize-html": "^1.20.1", + "@types/styled-components": "4.1.18", + "husky": "^3.0.0", + "lint-staged": "^9.2.0", + "tslint": "^5.18.0", "tslint-eslint-rules": "^5.4.0", - "typescript": "^3.0.1" + "typescript": "^3.5.3" }, "browserslist": [ ">0.2%", diff --git a/frontend/src/components/CytoscapeGraph.tsx b/frontend/src/components/CytoscapeGraph.tsx new file mode 100644 index 0000000..453103a --- /dev/null +++ b/frontend/src/components/CytoscapeGraph.tsx @@ -0,0 +1,206 @@ +import cytoscape from "cytoscape"; +// import cola from "cytoscape-cola"; +import * as React from "react"; +import { connect } from "react-redux"; + +import { Dispatch } from "redux"; +import styled from "styled-components"; +import { DEFAULT_NODE_COLOR, SELECTED_NODE_COLOR } from "../constants"; +import { selectAndLoadInstance } from "../redux/actions"; +import { IAppState, IGraph } from "../redux/types"; +import { ErrorState } from "./ErrorState"; +// import { FloatingLayoutSelect } from "./FloatingLayoutSelect"; +import { FloatingResetButton } from "./FloatingResetButton"; + +interface IGraphProps { + graph?: IGraph; + currentInstanceName: string | null; + selectAndLoadInstance: (name: string) => void; +} +interface IGraphState { + layoutAlgorithm: string; + isLayouting: boolean; + didError: boolean; +} +class GraphImpl extends React.Component { + private cy?: cytoscape.Core; + // private layout?: cytoscape.Layouts; + private cytoscapeDiv: React.RefObject; + + public constructor(props: IGraphProps) { + super(props); + this.cytoscapeDiv = React.createRef(); + this.state = { layoutAlgorithm: "cola", isLayouting: false, didError: false }; + } + + public render() { + if (this.state.didError) { + return ; + } + + const FullDiv = styled.div` + position: absolute; + top: 50px; + bottom: 0; + right: 0; + left: 0; + `; + + return ( +
+ + {/* */} + +
+ ); + } + + public componentDidMount() { + let { graph } = this.props; + if (!graph) { + this.setState({ didError: true }); + return; + } + + // Check that all nodes have size & coordinates; otherwise the graph will look messed up + const lengthBeforeFilter = graph.nodes.length; + graph = { ...graph, nodes: graph.nodes.filter(n => n.size && n.x && n.y) }; + if (graph.nodes.length !== lengthBeforeFilter) { + // tslint:disable-next-line:no-console + console.error( + "Some nodes were missing details: " + graph.nodes.filter(n => !n.size || !n.x || !n.y).map(n => n.label) + ); + this.setState({ didError: true }); + } + + // cytoscape.use(cola as any); + this.initGraph(); + } + + public componentDidUpdate() { + this.initGraph(); + } + + // private handleLayoutSelect = (layout: string) => { + // this.setState({ layoutAlgorithm: layout }); + // }; + + // private startLayout = () => { + // if (!this.cy) { + // return; + // } + // const options = { + // cola: { + // animate: true, + // convergenceThreshold: 0.1, + // edgeLength: (edge: any) => 1 / edge.data("weight"), + // name: "cola" + // }, + // cose: { + // animate: false, + // idealEdgeLength: (edge: any) => 1 / edge.data("weight"), + // name: "cose", + // numIter: 100 + // } + // }; + // this.layout = this.cy.layout(options[this.state.layoutAlgorithm] as any); + // this.layout.run(); + // }; + + // private stopLayout = () => { + // if (!this.layout) { + // return; + // } + // this.layout.stop(); + // }; + + private initGraph = () => { + const { graph } = this.props; + if (this.state.didError || !graph) { + return; + } + this.cy = cytoscape({ + autoungrabify: true, + container: this.cytoscapeDiv.current, + elements: { + edges: graph.edges.map(edge => ({ + data: { + id: edge.id || `${edge.source}${edge.target}`, + source: edge.source, + target: edge.target, + weight: edge.size + }, + group: "edges" as "edges" + })), + nodes: graph.nodes.map(node => ({ + data: { + id: node.id + }, + group: "nodes" as "nodes", + position: { + x: node.x, + y: node.y + } + })) + }, + layout: { + name: "preset" + }, + selectionType: "single", + style: [ + { + selector: "node:selected", + style: { + "background-color": SELECTED_NODE_COLOR, + label: "data(id)" + } + }, + { + selector: "node", + style: { + "background-color": DEFAULT_NODE_COLOR + } + } + ] + }); + this.cy.nodes().on("select", e => { + const instanceId = e.target.data("id"); + if (instanceId) { + // console.log(`selecting ${instanceId}`); + // console.log(`now selected: ${this.cy && this.cy.$(":selected")}`); + this.props.selectAndLoadInstance(instanceId); + } + }); + this.cy.nodes().on("unselect", e => { + const instanceId = e.target.data("id"); + if (instanceId) { + // console.log(`unselecting ${instanceId}`); + this.props.selectAndLoadInstance(""); + } + }); + }; + + private resetGraph = () => { + if (!this.cy) { + return; + } + this.cy.reset(); + }; +} + +const mapStateToProps = (state: IAppState) => ({ + currentInstanceName: state.currentInstance.currentInstanceName, + graph: state.data.graph +}); +const mapDispatchToProps = (dispatch: Dispatch) => ({ + selectAndLoadInstance: (instanceName: string) => dispatch(selectAndLoadInstance(instanceName) as any) +}); +export const CytoscapeGraph = connect( + mapStateToProps, + mapDispatchToProps +)(GraphImpl); diff --git a/frontend/src/components/FloatingCard.tsx b/frontend/src/components/FloatingCard.tsx new file mode 100644 index 0000000..1b4fa80 --- /dev/null +++ b/frontend/src/components/FloatingCard.tsx @@ -0,0 +1,14 @@ +import { Card, Elevation, ICardProps } from "@blueprintjs/core"; +import * as React from "react"; +import styled from "styled-components"; + +const FloatingCardElement = styled(Card)` + position: absolute; + bottom: 10px; + left: 10px; + z-index: 20; +`; + +const FloatingCard: React.FC = props => ; + +export default FloatingCard; diff --git a/frontend/src/components/FloatingLayoutSelect.tsx b/frontend/src/components/FloatingLayoutSelect.tsx new file mode 100644 index 0000000..f59f37d --- /dev/null +++ b/frontend/src/components/FloatingLayoutSelect.tsx @@ -0,0 +1,53 @@ +import { Button, H6, MenuItem } from "@blueprintjs/core"; +import { IconNames } from "@blueprintjs/icons"; +import { ItemRenderer, Select } from "@blueprintjs/select"; +import * as React from "react"; +import FloatingCard from "./FloatingCard"; + +interface ILayoutToDisplayName { + [key: string]: string; +} +const layouts: ILayoutToDisplayName = { + cola: "COLA", + cose: "CoSE" +}; +const LayoutSelect = Select.ofType(); + +const LayoutItemRenderer: ItemRenderer = (layout, { handleClick, modifiers }) => ( + +); + +interface IFloatingLayoutSelectProps { + currentLayoutKey: string; + onItemSelect: (layout: string) => void; + startLayout: () => void; + stopLayout: () => void; +} +export const FloatingLayoutSelect: React.FC = ({ + currentLayoutKey, + onItemSelect, + startLayout, + stopLayout +}) => { + return ( + +
Layout
+ +