diff --git a/.env b/.env index 8473a84c7a8..36aa5654325 100644 --- a/.env +++ b/.env @@ -22,6 +22,14 @@ MIGRATOR_PASSWORD=specify_migrator APP_USER_NAME=specify_user APP_USER_PASSWORD=specify_user +# Enabling this option allows administrators with access to the +# backend Specify instance to log in as any user for support +# purposes without knowing their password. +# https://discourse.specifysoftware.org/t/allow-support-login-documentation/2838 +ALLOW_SUPPORT_LOGIN=false +# The amount of time in seconds each token is valid for +SUPPORT_LOGIN_TTL = 180 + # Make sure to set the `SECRET_KEY` to a unique value SECRET_KEY=change_this_to_some_unique_random_string @@ -29,6 +37,10 @@ ASSET_SERVER_URL=http://host.docker.internal/web_asset_store.xml # Make sure to set the `ASSET_SERVER_KEY` to a unique value ASSET_SERVER_KEY=your_asset_server_access_key +REDIS_HOST=redis +REDIS_PORT=6379 +REDIS_DB_INDEX=0 + REPORT_RUNNER_HOST=report-runner REPORT_RUNNER_PORT=8080 diff --git a/Dockerfile b/Dockerfile index 6655b0cc524..7f05311a171 100644 --- a/Dockerfile +++ b/Dockerfile @@ -185,6 +185,9 @@ WEB_ATTACHMENT_KEY = os.getenv('ASSET_SERVER_KEY', None) WEB_ATTACHMENT_COLLECTION = os.getenv('ASSET_SERVER_COLLECTION', None) SEPARATE_WEB_ATTACHMENT_FOLDERS = os.getenv('SEPARATE_WEB_ATTACHMENT_FOLDERS', None) +REDIS_HOST = os.getenv('REDIS_HOST', 'redis') +REDIS_PORT = os.getenv('REDIS_PORT', 6379) +REDIS_DB_INDEX = os.getenv('REDIS_DB_INDEX', 0) CELERY_BROKER_URL = os.getenv('CELERY_BROKER_URL', None) CELERY_RESULT_BACKEND = os.getenv('CELERY_RESULT_BACKEND', None) CELERY_TASK_DEFAULT_QUEUE = os.getenv('CELERY_TASK_QUEUE', DATABASE_NAME) @@ -192,6 +195,8 @@ CELERY_TASK_DEFAULT_QUEUE = os.getenv('CELERY_TASK_QUEUE', DATABASE_NAME) ANONYMOUS_USER = os.getenv('ANONYMOUS_USER', None) SPECIFY_CONFIG_DIR = os.environ.get('SPECIFY_CONFIG_DIR', '/opt/Specify/config') TIME_ZONE = os.environ.get('TIME_ZONE', 'America/Chicago') +ALLOW_SUPPORT_LOGIN = os.environ.get('ALLOW_SUPPORT_LOGIN', False) +SUPPORT_LOGIN_TTL = int(os.environ.get('SUPPORT_LOGIN_TTL', 180)) # Resolve ALLOWED_HOSTS in the following precedence: # - Use the ALLOWED_HOSTS environment variable (if present) diff --git a/requirements.txt b/requirements.txt index 352783f27cd..bf78415aa47 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,7 +3,8 @@ tzdata wheel # backports.zoneinfo==0.2.1 kombu==5.5.2 -celery[redis]==5.5.1 +redis==6.4.0 +celery==5.5.1 Django==4.2.24 mysqlclient==2.1.1 SQLAlchemy==1.4.54 diff --git a/specifyweb/backend/accounts/views.py b/specifyweb/backend/accounts/views.py index a6c57d744f2..84d13926b89 100644 --- a/specifyweb/backend/accounts/views.py +++ b/specifyweb/backend/accounts/views.py @@ -6,10 +6,11 @@ import logging import requests import time +from urllib.parse import unquote_plus from django import forms from django import http from django.conf import settings -from django.contrib.auth import login, logout +from django.contrib.auth import login from django.contrib.auth.models import AbstractBaseUser from django.db import connection from django.db.models import Max @@ -34,6 +35,7 @@ from specifyweb.specify.models import Specifyuser from django.views.decorators.http import require_POST from specifyweb.backend.permissions.permissions import check_permission_targets +from specifyweb.specify.auth.support_login import b64_url_to_bytes from django.db import transaction, connection logger = logging.getLogger(__name__) @@ -351,8 +353,10 @@ def support_login(request: http.HttpRequest) -> http.HttpResponse: return http.HttpResponseForbidden() from django.contrib.auth import login, authenticate + token = request.GET["token"] + key = b64_url_to_bytes(request.GET["key"]) - user = authenticate(token=request.GET['token']) + user = authenticate(token=token, key=key) if user is not None: login(request, user, backend='specifyweb.specify.auth.support_login.SupportLoginBackend') return http.HttpResponseRedirect('/') diff --git a/specifyweb/backend/redis_cache/__init__.py b/specifyweb/backend/redis_cache/__init__.py new file mode 100644 index 00000000000..bb6d693ce31 --- /dev/null +++ b/specifyweb/backend/redis_cache/__init__.py @@ -0,0 +1,6 @@ +from .store import ( + set_bytes, + set_string, + get_bytes, + get_string +) \ No newline at end of file diff --git a/specifyweb/backend/redis_cache/store.py b/specifyweb/backend/redis_cache/store.py new file mode 100644 index 00000000000..61ad5634d80 --- /dev/null +++ b/specifyweb/backend/redis_cache/store.py @@ -0,0 +1,17 @@ +from .utils import _set_string, _get_string + + +def set_string(key: str, value: str, time_to_live=None, override_existing=True): + return _set_string(key, value, time_to_live=time_to_live, override_existing=override_existing, decode_responses=True) + + +def set_bytes(key: str, value: bytes, time_to_live=None, override_existing=True): + return _set_string(key, value, time_to_live=time_to_live, override_existing=override_existing, decode_responses=False) + + +def get_string(key: str, delete_key=False) -> str: + return _get_string(key, delete_key=delete_key, decode_responses=True) + + +def get_bytes(key: str, delete_key=False) -> bytes: + return _get_string(key, delete_key=delete_key, decode_responses=False) diff --git a/specifyweb/backend/redis_cache/utils.py b/specifyweb/backend/redis_cache/utils.py new file mode 100644 index 00000000000..d8a709380aa --- /dev/null +++ b/specifyweb/backend/redis_cache/utils.py @@ -0,0 +1,39 @@ +from typing import overload + +from redis import Redis +from django.conf import settings + + +def redis_connection(decode_responses=True): + redis_host = getattr(settings, "REDIS_HOST", None) + redis_port = getattr(settings, "REDIS_PORT", None) + redis_db_index = getattr(settings, "REDIS_DB_INDEX", 0) + if None in (redis_host, redis_port, redis_db_index): + raise ValueError("Redis is not correctly configured", redis_host, redis_port) + return Redis(host=redis_host, port=redis_port, db=redis_db_index, decode_responses=decode_responses) + + +def _set_string(key: str, value: str, time_to_live=None, override_existing=True, decode_responses=True): + host = redis_connection(decode_responses=decode_responses) + # See https://redis.readthedocs.io/en/stable/commands.html#redis.commands.core.CoreCommands.set + flags = { + "ex": time_to_live, + "nx": not override_existing + } + host.set(key, value, **flags) + + +@overload +def _get_string(key: str, delete_key: bool, decode_responses: True) -> str | None: ... + + +@overload +def _get_string(key: str, delete_key: bool, decode_responses: False) -> bytes | None: ... + + +def _get_string(key: str, delete_key: bool=False, decode_responses=True) -> str | bytes | None: + host = redis_connection(decode_responses=decode_responses) + if delete_key: + return host.getdel(key) + + return host.get(key) diff --git a/specifyweb/backend/workbench/upload/auditlog.py b/specifyweb/backend/workbench/upload/auditlog.py index c56252b5d32..3900b412bf8 100644 --- a/specifyweb/backend/workbench/upload/auditlog.py +++ b/specifyweb/backend/workbench/upload/auditlog.py @@ -25,11 +25,15 @@ from . import auditcodes - -def truncate_str_to_bytes(string: str, bytes: int) -> str: + +def str_to_bytes(string: str, max_length: int) -> bytes: str_as_bytes = string.encode() + return str_as_bytes[:max_length] + +def truncate_str_to_bytes(string: str, max_length: int) -> str: + str_as_bytes = str_to_bytes(string, max_length) try: - return str_as_bytes[:bytes].decode() + return str_as_bytes.decode() except UnicodeDecodeError as err: return str_as_bytes[:err.start].decode() diff --git a/specifyweb/frontend/js_src/lib/components/InitialContext/systemInfo.ts b/specifyweb/frontend/js_src/lib/components/InitialContext/systemInfo.ts index da2414c3854..21518f41335 100644 --- a/specifyweb/frontend/js_src/lib/components/InitialContext/systemInfo.ts +++ b/specifyweb/frontend/js_src/lib/components/InitialContext/systemInfo.ts @@ -44,7 +44,7 @@ function buildStatsLambdaUrl(base: string | null | undefined): string | null { if (!hasRoute) { const stage = 'prod'; const route = 'AggrgatedSp7Stats'; - u = `${u.replace(/\/$/, '') }/${stage}/${route}`; + u = `${u.replace(/\/$/, '')}/${stage}/${route}`; } return u; } @@ -58,7 +58,10 @@ export const fetchContext = load( if (systemInfo.stats_url !== null) { let counts: StatsCounts | null = null; try { - counts = await load('/context/stats_counts.json', 'application/json'); + counts = await load( + '/context/stats_counts.json', + 'application/json' + ); } catch { // If counts fetch fails, proceed without them. counts = null; @@ -102,12 +105,13 @@ export const fetchContext = load( const lambdaUrl = buildStatsLambdaUrl(systemInfo.stats_2_url); if (lambdaUrl) { - await ping(formatUrl(lambdaUrl, parameters, false), { errorMode: 'silent' }) - .catch(softFail); + await ping(formatUrl(lambdaUrl, parameters, false), { + errorMode: 'silent', + }).catch(softFail); } } return systemInfo; }); -export const getSystemInfo = (): SystemInfo => systemInfo; \ No newline at end of file +export const getSystemInfo = (): SystemInfo => systemInfo; diff --git a/specifyweb/settings/specify_settings.py b/specifyweb/settings/specify_settings.py index 062d88e4927..bb110ffe892 100644 --- a/specifyweb/settings/specify_settings.py +++ b/specifyweb/settings/specify_settings.py @@ -93,6 +93,11 @@ REPORT_RUNNER_HOST = '' REPORT_RUNNER_PORT = '' +# Information to connect to a Redis database +REDIS_HOST="redis" +REDIS_PORT=6379 +REDIS_DB_INDEX=0 + # The message queue for the Specify 7 worker(s). # This should point to a Redis server for sending jobs # and retrieving results from the worker. @@ -106,9 +111,13 @@ # For exception logging using Sentry (https://github.com/getsentry/sentry). RAVEN_CONFIG = None -# Support login mechanism. -ALLOW_SUPPORT_LOGIN = False -SUPPORT_LOGIN_TTL = 300 +# Enabling this option allows administrators with access to the +# backend Specify instance to log in as any user for support +# purposes without knowing their password. +# https://discourse.specifysoftware.org/t/allow-support-login-documentation/2838 +ALLOW_SUPPORT_LOGIN = True +# The amount of time in seconds each token is valid for +SUPPORT_LOGIN_TTL = 180 # Usage stats are transmitted to the following address. # Set to None to disable. diff --git a/specifyweb/specify/auth/support_login.py b/specifyweb/specify/auth/support_login.py index 87b06c9a754..e00ca9759ce 100644 --- a/specifyweb/specify/auth/support_login.py +++ b/specifyweb/specify/auth/support_login.py @@ -1,41 +1,177 @@ +import time import logging import hmac +import jwt +from base64 import b64encode, b64decode, urlsafe_b64encode, urlsafe_b64decode +from collections.abc import Iterable from hashlib import sha256 -from time import time + +from jwt.exceptions import InvalidTokenError +from Crypto.Cipher import AES +from Crypto.Protocol.KDF import HKDF +from Crypto.Hash import SHA256 +from Crypto.Random import get_random_bytes from django.conf import settings from django.core.exceptions import PermissionDenied from specifyweb.specify.models import Specifyuser +from specifyweb.backend.workbench.upload.auditlog import str_to_bytes +from specifyweb.backend.redis_cache import set_bytes, get_bytes logger = logging.getLogger(__name__) + TTL = settings.SUPPORT_LOGIN_TTL -def make_digest(msg): - return hmac.new(settings.SECRET_KEY.encode(), msg.encode(), sha256).hexdigest() -def make_token(user): - msg = f"{user.id}-{int(time())}" - return msg + '-' + make_digest(msg) +def familiarize_digest(key: bytes): + """Given a bytes object that may provided to the user, assoicate it with this instance + by hashing it with the SECRET_KEY of the server. + This should prevent malicious agents from just generating their own + token + key pair, as the encryption key and signing key both rely on this + value. + """ + return hmac.new(settings.SECRET_KEY.encode(), key, sha256).digest() + + +def derive_key_pair(nonce: bytes, key_length: int, salt: bytes = b'') -> tuple[bytes, bytes]: + """ Use a HKDF (HMAC-based Key Derivation Function) to deterministically + generate a cyrptographically secure pair of keys of a fixed length given + some pseudo-random inputs. + + See [Pycrptodome's HKDF](https://pycryptodome.readthedocs.io/en/latest/src/protocol/kdf.html#hkdf) + and the orginal spec, [RFC 5869](https://datatracker.ietf.org/doc/html/rfc5869) + + Parameters: + - nonce: An initial value of bytes to "seed" new key generation + - key_length: The number of bytes to generate for each key pair + - salt: a preferably 32 byte number used to increase randomness + + Returns: A two-tuple of byte strings, each of length key_length + """ + okm = HKDF(familiarize_digest(nonce), 2 * key_length, salt, SHA256) + return okm[:key_length], okm[key_length:2*key_length] + + +def bytes_to_b64_string(input_bytes: bytes): + return b64encode(input_bytes).decode("utf-8") + + +def bytes_to_b64_url(input_bytes: bytes) -> str: + return urlsafe_b64encode(input_bytes).decode("utf-8") + + +def b64_url_to_bytes(url: str) -> bytes: + # add padding back if needed + padding = '=' * (-len(url) % 4) + return urlsafe_b64decode(url + padding) + + +def make_token(user, key: bytes): + # We randomly generate the salt value for the key pair generation. + # This is later stored in memory via Redis with the configured TTL to + # prevent attacks where the server SECRET_KEY is exposed + salt = get_random_bytes(32) + encryption_key, signing_key = derive_key_pair(key, 32, salt) + cipher = AES.new(encryption_key, AES.MODE_GCM) + issue_time = int(time.time()) + msg = f"{user.id}-{user.name}-{issue_time}" + + ciphertext, tag = cipher.encrypt_and_digest(msg.encode()) + payload = { + "nonce": bytes_to_b64_string(cipher.nonce), + "tag": bytes_to_b64_string(tag), + "text": bytes_to_b64_string(ciphertext), + + # JWT options --- + # Issued at time + "iat": issue_time, + # Expiry time + "exp": issue_time + TTL + } + token: str = jwt.encode(payload, signing_key, algorithm="HS256") + set_bytes(key, salt, time_to_live=TTL) + + return token + + +def decode_token(encoded_token: str, signing_key: bytes, required_keys: Iterable[str]) -> dict[str, str | bytes]: + algorithms = ["HS256"] + # See https://pyjwt.readthedocs.io/en/latest/api.html#jwt.decode + jwt_decode_options = { + "require": ["iat", "exp"], + "verify_signature": True, + "verify_iat": True, + "verify_exp": True + } + try: + decoded_payload = jwt.decode( + encoded_token, signing_key, options=jwt_decode_options, algorithms=algorithms) + # InvalidTokenError is a base class from which all decode exceptions + # are derived. + # See: + # https://pyjwt.readthedocs.io/en/stable/api.html#jwt.exceptions.InvalidTokenError + # https://github.com/jpadilla/pyjwt/blob/6293865c82ef24af76455c2522806ac2b1c75d6b/jwt/exceptions.py#L9 + except InvalidTokenError: + raise PermissionDenied() + + payload = {k: b64decode(v) if k in required_keys + else v for k, v in decoded_payload.items()} + + if not all(required_key in payload for required_key in required_keys): + raise PermissionDenied() + return payload + + +def decrypt_ciphertext(encryption_key, cipher_text, nonce, tag): + cipher = AES.new(encryption_key, AES.MODE_GCM, nonce=nonce) + + try: + # decrypt_and_verify can also raise a ValueError if the MAC tag is + # invalid + # See https://pycryptodome.readthedocs.io/en/latest/src/cipher/modern.html#decrypt_and_verify + plain_text = cipher.decrypt_and_verify( + cipher_text, tag).decode("utf-8") + user_id, *user_name, timestamp = plain_text.split("-") + except ValueError: + return None, None, None + return user_id, "-".join(user_name), timestamp + -# TODO: is this in use somewhere? class SupportLoginBackend: - def authenticate(self, request, token=None): + def authenticate(self, request, token=None, key=None): logger.info("attempting support login") - try: - userid, timestamp, digest = token.split('-') - except ValueError: + + if token is None or key is None: + return None + + # A key should always be "consumed" and deleted from Redis once used: + # all suport login tokens are a one-time use + salt = get_bytes(key, delete_key=True) + if salt is None: return None - msg = f"{userid}-{timestamp}" - if digest == make_digest(msg) and int(timestamp) + TTL > time(): - return self.get_user(userid) + encryption_key, signing_key = derive_key_pair(key, 32, salt) + + required_keys = ["nonce", "tag", "text"] + payload = decode_token(token, signing_key, required_keys) + + user_id, user_name, timestamp = decrypt_ciphertext( + encryption_key, payload["text"], payload["nonce"], payload["tag"]) + + if user_id is None or user_name is None or timestamp is None: + raise PermissionDenied() + if (int(timestamp) + TTL > time.time()) and (time.time() - int(timestamp) <= TTL): + return self.get_user(user_id, name=user_name) else: raise PermissionDenied() - def get_user(self, user_id): + def get_user(self, user_id, name=None): + filters = {} + if name is not None: + filters["name"] = name try: - return Specifyuser.objects.get(pk=user_id) + return Specifyuser.objects.get(pk=user_id, **filters) except Specifyuser.DoesNotExist: return None diff --git a/specifyweb/specify/management/commands/support_login.py b/specifyweb/specify/management/commands/support_login.py index 44f29813216..98ed583c5b4 100644 --- a/specifyweb/specify/management/commands/support_login.py +++ b/specifyweb/specify/management/commands/support_login.py @@ -1,13 +1,16 @@ -from optparse import make_option +from urllib.parse import quote_plus +from Crypto.Random import get_random_bytes from django.core.management.base import BaseCommand, CommandError +from django.utils.http import urlencode from django.conf import settings -from specifyweb.specify.auth.support_login import make_token +from specifyweb.specify.auth.support_login import make_token, bytes_to_b64_url from specifyweb.specify.models import Specifyuser TTL = settings.SUPPORT_LOGIN_TTL + class Command(BaseCommand): help = 'Creates a token for support login as the given user.' @@ -30,7 +33,8 @@ def handle(self, **options): def admin(user): return 'admin' if user.is_legacy_admin() else '' for user in Specifyuser.objects.all(): - self.stdout.write('\t'.join((user.name, user.usertype, admin(user)))) + self.stdout.write( + '\t'.join((user.name, user.usertype, admin(user)))) return username = options['username'] @@ -41,10 +45,25 @@ def admin(user): return 'admin' if user.is_legacy_admin() else '' if username is None: raise CommandError('username must be supplied') + # The symmetric encryption algorithm (AES CGM - see make_token) expects + # one of three key sizes: + # - 128 bit (16 byte) + # - 192 bit (24 byte) + # - 256 bit (32 byte) + # We use the most cryptographically secure key here: doesn't seem to impact + # performance too much + # This key is essentially a private key, it is used in conjuction with + # the SECRET_KEY of the server to encrypt, decrypt, and sign information + # associated with the token + key = get_random_bytes(32) + try: user = Specifyuser.objects.get(name=username) except Specifyuser.DoesNotExist: raise CommandError('No user with name "%s"' % username) self.stdout.write("The following token is good for %d seconds:" % TTL) - self.stdout.write("/accounts/support_login/?token=" + make_token(user)) + self.stdout.write( + f"Append the token to your server domain to login as {username}") + self.stdout.write( + f"/accounts/support_login/?token={quote_plus(make_token(user, key))}&key={bytes_to_b64_url(key)}")