[Feature] Documentation for development setup

## Summary

This PR improves the local development setup experience by adding Docker Compose and Makefile for streamlined setup.

## Changes

- **Added `docker-compose.yml`**: for one-command PostgreSQL database setup
- **Added `Makefile`**: Convenient shortcuts for common dev tasks (\`make setup\`, \`make dev\`, etc.)
- **Updated `README.md`**: Quick development setup instructions using Make
-
- **Added**: RDkit installation pain point documentation
- **Fixed**: Made Java feature properly dependent

## Why these changes?

The application uses PostgreSQL-specific features (\`ArrayField\`) and requires an anonymous user created by the bootstrap command. This PR makes the setup process trivial for new developers:

```bash
cp .env.local.example .env
make setup  # Starts DB, runs migrations, bootstraps data
make dev    # Starts development server
```

Java fix:
Moved global Java import to inline to avoid everyone having to configure the Java path.

Numerous changes to view and settings.
- Applied ruff-formatting

## Testing

Verified complete setup from scratch works with:
- PostgreSQL running in Docker
- All migrations applied
- Bootstrap data loaded successfully
- Anonymous user created
- The development server starts correctly.

Co-authored-by: Tobias O <tobias.olenyi@tum.de>
Co-authored-by: Tobias O <tobias.olenyi@envipath.com>
Co-authored-by: Liam <62733830+limmooo@users.noreply.github.com>
Reviewed-on: enviPath/enviPy#143
Reviewed-by: jebus <lorsbach@envipath.com>
Reviewed-by: liambrydon <lbry121@aucklanduni.ac.nz>
Co-authored-by: t03i <mail+envipath@t03i.net>
Co-committed-by: t03i <mail+envipath@t03i.net>
This commit is contained in:
2025-10-08 18:51:50 +13:00
committed by jebus
parent c2c46fbfa7
commit 36879c266b
11 changed files with 1570 additions and 836 deletions

View File

@ -9,6 +9,7 @@ https://docs.djangoproject.com/en/4.2/topics/settings/
For the full list of settings and their values, see
https://docs.djangoproject.com/en/4.2/ref/settings/
"""
import os
from pathlib import Path
@ -20,33 +21,35 @@ from sklearn.tree import DecisionTreeClassifier
# Build paths inside the project like this: BASE_DIR / 'subdir'.
BASE_DIR = Path(__file__).resolve().parent.parent
load_dotenv(BASE_DIR / '.env', override=False)
load_dotenv(BASE_DIR / ".env", override=False)
# Quick-start development settings - unsuitable for production
# See https://docs.djangoproject.com/en/4.2/howto/deployment/checklist/
# SECURITY WARNING: keep the secret key used in production secret!
SECRET_KEY = '7!VTW`aZqg/UBLsM.P=m)2]lWqg>{+:xUgG1"WO@bCyaHR2Up8XW&g<*3.F4l2gi9c.E3}dHyA0D`&z?u#U%^7HYbj],eP"g_MS|3BNMD[mI>s#<i/%2ngZ~Oy+/w&@]'
SECRET_KEY = os.environ.get("SECRET_KEY", "secret-key")
# SECURITY WARNING: don't run with debug turned on in production!
DEBUG = os.environ.get('DEBUG', 'False') == 'True'
ALLOWED_HOSTS = os.environ['ALLOWED_HOSTS'].split(',')
DEBUG = os.environ.get("DEBUG", "False") == "True"
ALLOWED_HOSTS = os.environ["ALLOWED_HOSTS"].split(",")
# Application definition
INSTALLED_APPS = [
'django.contrib.admin',
'django.contrib.auth',
'django.contrib.contenttypes',
'django.contrib.sessions',
'django.contrib.messages',
'django.contrib.staticfiles',
"django.contrib.admin",
"django.contrib.auth",
"django.contrib.contenttypes",
"django.contrib.sessions",
"django.contrib.messages",
"django.contrib.staticfiles",
# 3rd party
'django_extensions',
'oauth2_provider',
"django_extensions",
"oauth2_provider",
# Custom
'epdb',
'migration',
"epdb",
"migration",
]
AUTHENTICATION_BACKENDS = [
@ -54,42 +57,42 @@ AUTHENTICATION_BACKENDS = [
]
MIDDLEWARE = [
'django.middleware.security.SecurityMiddleware',
'django.contrib.sessions.middleware.SessionMiddleware',
'django.middleware.common.CommonMiddleware',
'django.middleware.csrf.CsrfViewMiddleware',
'django.contrib.auth.middleware.AuthenticationMiddleware',
'django.contrib.messages.middleware.MessageMiddleware',
'django.middleware.clickjacking.XFrameOptionsMiddleware',
'oauth2_provider.middleware.OAuth2TokenMiddleware',
"django.middleware.security.SecurityMiddleware",
"django.contrib.sessions.middleware.SessionMiddleware",
"django.middleware.common.CommonMiddleware",
"django.middleware.csrf.CsrfViewMiddleware",
"django.contrib.auth.middleware.AuthenticationMiddleware",
"django.contrib.messages.middleware.MessageMiddleware",
"django.middleware.clickjacking.XFrameOptionsMiddleware",
"oauth2_provider.middleware.OAuth2TokenMiddleware",
]
OAUTH2_PROVIDER = {
"PKCE_REQUIRED": False, # Accept PKCE requests but dont require them
}
if os.environ.get('REGISTRATION_MANDATORY', False) == 'True':
MIDDLEWARE.append('epdb.middleware.login_required_middleware.LoginRequiredMiddleware')
if os.environ.get("REGISTRATION_MANDATORY", False) == "True":
MIDDLEWARE.append("epdb.middleware.login_required_middleware.LoginRequiredMiddleware")
ROOT_URLCONF = 'envipath.urls'
ROOT_URLCONF = "envipath.urls"
TEMPLATES = [
{
'BACKEND': 'django.template.backends.django.DjangoTemplates',
'DIRS': (os.path.join(BASE_DIR, 'templates'),),
'APP_DIRS': True,
'OPTIONS': {
'context_processors': [
'django.template.context_processors.debug',
'django.template.context_processors.request',
'django.contrib.auth.context_processors.auth',
'django.contrib.messages.context_processors.messages',
"BACKEND": "django.template.backends.django.DjangoTemplates",
"DIRS": (os.path.join(BASE_DIR, "templates"),),
"APP_DIRS": True,
"OPTIONS": {
"context_processors": [
"django.template.context_processors.debug",
"django.template.context_processors.request",
"django.contrib.auth.context_processors.auth",
"django.contrib.messages.context_processors.messages",
],
},
},
]
WSGI_APPLICATION = 'envipath.wsgi.application'
WSGI_APPLICATION = "envipath.wsgi.application"
# Database
# https://docs.djangoproject.com/en/4.2/ref/settings/#databases
@ -97,11 +100,11 @@ WSGI_APPLICATION = 'envipath.wsgi.application'
DATABASES = {
"default": {
"ENGINE": "django.db.backends.postgresql",
"USER": os.environ['POSTGRES_USER'],
"NAME": os.environ['POSTGRES_DB'],
"PASSWORD": os.environ['POSTGRES_PASSWORD'],
"HOST": os.environ['POSTGRES_SERVICE_NAME'],
"PORT": os.environ['POSTGRES_PORT']
"USER": os.environ["POSTGRES_USER"],
"NAME": os.environ["POSTGRES_DB"],
"PASSWORD": os.environ["POSTGRES_PASSWORD"],
"HOST": os.environ["POSTGRES_SERVICE_NAME"],
"PORT": os.environ["POSTGRES_PORT"],
}
}
@ -109,96 +112,84 @@ DATABASES = {
# https://docs.djangoproject.com/en/4.2/ref/settings/#auth-password-validators
AUTH_PASSWORD_VALIDATORS = [
{
'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
},
{
'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
},
{
'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
},
{
'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
},
{"NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator"},
{"NAME": "django.contrib.auth.password_validation.MinimumLengthValidator"},
{"NAME": "django.contrib.auth.password_validation.CommonPasswordValidator"},
{"NAME": "django.contrib.auth.password_validation.NumericPasswordValidator"},
]
# Internationalization
# https://docs.djangoproject.com/en/4.2/topics/i18n/
LANGUAGE_CODE = 'en-us'
LANGUAGE_CODE = "en-us"
TIME_ZONE = 'UTC'
TIME_ZONE = "UTC"
USE_I18N = True
USE_TZ = True
# Default primary key field type
# https://docs.djangoproject.com/en/4.2/ref/settings/#default-auto-field
DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField'
DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField"
if DEBUG:
EMAIL_BACKEND = 'django.core.mail.backends.console.EmailBackend'
EMAIL_BACKEND = "django.core.mail.backends.console.EmailBackend"
else:
EMAIL_BACKEND = 'django.core.mail.backends.smtp.EmailBackend'
EMAIL_BACKEND = "django.core.mail.backends.smtp.EmailBackend"
EMAIL_USE_TLS = True
EMAIL_HOST = 'mail.gandi.net'
EMAIL_HOST_USER = os.environ['EMAIL_HOST_USER']
EMAIL_HOST_PASSWORD = os.environ['EMAIL_HOST_PASSWORD']
EMAIL_HOST = "mail.gandi.net"
EMAIL_HOST_USER = os.environ["EMAIL_HOST_USER"]
EMAIL_HOST_PASSWORD = os.environ["EMAIL_HOST_PASSWORD"]
EMAIL_PORT = 587
AUTH_USER_MODEL = "epdb.User"
ADMIN_APPROVAL_REQUIRED = os.environ.get('ADMIN_APPROVAL_REQUIRED', 'False') == 'True'
ADMIN_APPROVAL_REQUIRED = os.environ.get("ADMIN_APPROVAL_REQUIRED", "False") == "True"
# # SESAME
# SESAME_MAX_AGE = 300
# # TODO set to "home"
# LOGIN_REDIRECT_URL = "/"
LOGIN_URL = '/login/'
LOGIN_URL = "/login/"
SERVER_URL = os.environ.get('SERVER_URL', 'http://localhost:8000')
SERVER_URL = os.environ.get("SERVER_URL", "http://localhost:8000")
CSRF_TRUSTED_ORIGINS = [SERVER_URL]
AMBIT_URL = 'http://localhost:9001'
DEFAULT_VALUES = {
'description': 'no description'
}
AMBIT_URL = "http://localhost:9001"
DEFAULT_VALUES = {"description": "no description"}
EP_DATA_DIR = os.environ['EP_DATA_DIR']
MODEL_DIR = os.path.join(EP_DATA_DIR, 'models')
EP_DATA_DIR = os.environ["EP_DATA_DIR"]
if not os.path.exists(EP_DATA_DIR):
os.mkdir(EP_DATA_DIR)
MODEL_DIR = os.path.join(EP_DATA_DIR, "models")
if not os.path.exists(MODEL_DIR):
os.mkdir(MODEL_DIR)
STATIC_DIR = os.path.join(EP_DATA_DIR, 'static')
STATIC_DIR = os.path.join(EP_DATA_DIR, "static")
if not os.path.exists(STATIC_DIR):
os.mkdir(STATIC_DIR)
LOG_DIR = os.path.join(EP_DATA_DIR, 'log')
LOG_DIR = os.path.join(EP_DATA_DIR, "log")
if not os.path.exists(LOG_DIR):
os.mkdir(LOG_DIR)
PLUGIN_DIR = os.path.join(EP_DATA_DIR, 'plugins')
PLUGIN_DIR = os.path.join(EP_DATA_DIR, "plugins")
if not os.path.exists(PLUGIN_DIR):
os.mkdir(PLUGIN_DIR)
# Set this as our static root dir
STATIC_ROOT = STATIC_DIR
STATIC_URL = '/static/'
STATIC_URL = "/static/"
# Where the sources are stored...
STATICFILES_DIRS = (
BASE_DIR / 'static',
)
STATICFILES_DIRS = (BASE_DIR / "static",)
FIXTURE_DIRS = (
BASE_DIR / 'fixtures',
)
FIXTURE_DIRS = (BASE_DIR / "fixtures",)
# Logging
LOGGING = {
@ -206,8 +197,8 @@ LOGGING = {
"disable_existing_loggers": True,
"formatters": {
"simple": {
'format': '[%(asctime)s] %(levelname)s %(module)s - %(message)s',
'datefmt': '%Y-%m-%d %H:%M:%S',
"format": "[%(asctime)s] %(levelname)s %(module)s - %(message)s",
"datefmt": "%Y-%m-%d %H:%M:%S",
},
},
"handlers": {
@ -220,7 +211,7 @@ LOGGING = {
"level": "DEBUG", # Or higher
"class": "logging.FileHandler",
"filename": os.path.join(LOG_DIR, "debug.log"),
"formatter": "simple"
"formatter": "simple",
},
},
"loggers": {
@ -228,66 +219,66 @@ LOGGING = {
"epdb": {
"handlers": ["file"], # "console",
"propagate": True,
"level": os.environ.get('LOG_LEVEL', 'INFO')
"level": os.environ.get("LOG_LEVEL", "INFO"),
},
# For everything under envipath/ loaded via getlogger(__name__)
'envipath': {
'handlers': ['file', 'console'],
'propagate': True,
'level': os.environ.get('LOG_LEVEL', 'INFO')
"envipath": {
"handlers": ["file", "console"],
"propagate": True,
"level": os.environ.get("LOG_LEVEL", "INFO"),
},
# For everything under utilities/ loaded via getlogger(__name__)
'utilities': {
'handlers': ['file', 'console'],
'propagate': True,
'level': os.environ.get('LOG_LEVEL', 'INFO')
"utilities": {
"handlers": ["file", "console"],
"propagate": True,
"level": os.environ.get("LOG_LEVEL", "INFO"),
},
},
}
# Flags
ENVIFORMER_PRESENT = os.environ.get('ENVIFORMER_PRESENT', 'False') == 'True'
ENVIFORMER_DEVICE = os.environ.get('ENVIFORMER_DEVICE', 'cpu')
ENVIFORMER_PRESENT = os.environ.get("ENVIFORMER_PRESENT", "False") == "True"
ENVIFORMER_DEVICE = os.environ.get("ENVIFORMER_DEVICE", "cpu")
# If celery is not present set always eager to true which will cause delayed tasks to block until finished
FLAG_CELERY_PRESENT = os.environ.get('FLAG_CELERY_PRESENT', 'False') == 'True'
FLAG_CELERY_PRESENT = os.environ.get("FLAG_CELERY_PRESENT", "False") == "True"
if not FLAG_CELERY_PRESENT:
CELERY_TASK_ALWAYS_EAGER = True
# Celery Configuration Options
CELERY_TIMEZONE = "Europe/Berlin"
# Celery Configuration
CELERY_BROKER_URL = 'redis://localhost:6379/0' # Use Redis as message broker
CELERY_RESULT_BACKEND = 'redis://localhost:6379/1'
CELERY_ACCEPT_CONTENT = ['json']
CELERY_TASK_SERIALIZER = 'json'
CELERY_BROKER_URL = "redis://localhost:6379/0" # Use Redis as message broker
CELERY_RESULT_BACKEND = "redis://localhost:6379/1"
CELERY_ACCEPT_CONTENT = ["json"]
CELERY_TASK_SERIALIZER = "json"
MODEL_BUILDING_ENABLED = os.environ.get('MODEL_BUILDING_ENABLED', 'False') == 'True'
APPLICABILITY_DOMAIN_ENABLED = os.environ.get('APPLICABILITY_DOMAIN_ENABLED', 'False') == 'True'
MODEL_BUILDING_ENABLED = os.environ.get("MODEL_BUILDING_ENABLED", "False") == "True"
APPLICABILITY_DOMAIN_ENABLED = os.environ.get("APPLICABILITY_DOMAIN_ENABLED", "False") == "True"
DEFAULT_RF_MODEL_PARAMS = {
'base_clf': RandomForestClassifier(
"base_clf": RandomForestClassifier(
n_estimators=100,
max_features='log2',
max_features="log2",
random_state=42,
criterion='entropy',
criterion="entropy",
ccp_alpha=0.0,
max_depth=3,
min_samples_leaf=1
min_samples_leaf=1,
),
'num_chains': 10,
"num_chains": 10,
}
DEFAULT_MODEL_PARAMS = {
'base_clf': DecisionTreeClassifier(
criterion='entropy',
"base_clf": DecisionTreeClassifier(
criterion="entropy",
max_depth=3,
min_samples_split=5,
# min_samples_leaf=5,
max_features='sqrt',
max_features="sqrt",
# class_weight='balanced',
random_state=42
random_state=42,
),
'num_chains': 10,
"num_chains": 10,
}
DEFAULT_MAX_NUMBER_OF_NODES = 30
@ -295,9 +286,10 @@ DEFAULT_MAX_DEPTH = 5
DEFAULT_MODEL_THRESHOLD = 0.25
# Loading Plugins
PLUGINS_ENABLED = os.environ.get('PLUGINS_ENABLED', 'False') == 'True'
PLUGINS_ENABLED = os.environ.get("PLUGINS_ENABLED", "False") == "True"
if PLUGINS_ENABLED:
from utilities.plugin import discover_plugins
CLASSIFIER_PLUGINS = discover_plugins(_cls=Classifier)
PROPERTY_PLUGINS = discover_plugins(_cls=Property)
DESCRIPTOR_PLUGINS = discover_plugins(_cls=Descriptor)
@ -306,59 +298,59 @@ else:
PROPERTY_PLUGINS = {}
DESCRIPTOR_PLUGINS = {}
SENTRY_ENABLED = os.environ.get('SENTRY_ENABLED', 'False') == 'True'
SENTRY_ENABLED = os.environ.get("SENTRY_ENABLED", "False") == "True"
if SENTRY_ENABLED:
import sentry_sdk
def before_send(event, hint):
# Check if was a handled exception by one of our loggers
if event.get('logger'):
for log_path in LOGGING.get('loggers').keys():
if event['logger'].startswith(log_path):
if event.get("logger"):
for log_path in LOGGING.get("loggers").keys():
if event["logger"].startswith(log_path):
return None
return event
sentry_sdk.init(
dsn=os.environ.get('SENTRY_DSN'),
dsn=os.environ.get("SENTRY_DSN"),
# Add data like request headers and IP for users,
# see https://docs.sentry.io/platforms/python/data-management/data-collected/ for more info
send_default_pii=True,
environment=os.environ.get('SENTRY_ENVIRONMENT', 'development'),
environment=os.environ.get("SENTRY_ENVIRONMENT", "development"),
before_send=before_send,
)
# compile into digestible flags
FLAGS = {
'MODEL_BUILDING': MODEL_BUILDING_ENABLED,
'CELERY': FLAG_CELERY_PRESENT,
'PLUGINS': PLUGINS_ENABLED,
'SENTRY': SENTRY_ENABLED,
'ENVIFORMER': ENVIFORMER_PRESENT,
'APPLICABILITY_DOMAIN': APPLICABILITY_DOMAIN_ENABLED,
"MODEL_BUILDING": MODEL_BUILDING_ENABLED,
"CELERY": FLAG_CELERY_PRESENT,
"PLUGINS": PLUGINS_ENABLED,
"SENTRY": SENTRY_ENABLED,
"ENVIFORMER": ENVIFORMER_PRESENT,
"APPLICABILITY_DOMAIN": APPLICABILITY_DOMAIN_ENABLED,
}
# path of the URL are checked via "startswith"
# -> /password_reset/done is covered as well
LOGIN_EXEMPT_URLS = [
'/register',
'/api/legacy/',
'/o/token/',
'/o/userinfo/',
'/password_reset/',
'/reset/',
'/microsoft/',
"/register",
"/api/legacy/",
"/o/token/",
"/o/userinfo/",
"/password_reset/",
"/reset/",
"/microsoft/",
]
# MS AD/Entra
MS_ENTRA_ENABLED = os.environ.get('MS_ENTRA_ENABLED', 'False') == 'True'
MS_ENTRA_ENABLED = os.environ.get("MS_ENTRA_ENABLED", "False") == "True"
if MS_ENTRA_ENABLED:
# Add app to installed apps
INSTALLED_APPS.append('epauth')
INSTALLED_APPS.append("epauth")
# Set vars required by app
MS_ENTRA_CLIENT_ID = os.environ['MS_CLIENT_ID']
MS_ENTRA_CLIENT_SECRET = os.environ['MS_CLIENT_SECRET']
MS_ENTRA_TENANT_ID = os.environ['MS_TENANT_ID']
MS_ENTRA_CLIENT_ID = os.environ["MS_CLIENT_ID"]
MS_ENTRA_CLIENT_SECRET = os.environ["MS_CLIENT_SECRET"]
MS_ENTRA_TENANT_ID = os.environ["MS_TENANT_ID"]
MS_ENTRA_AUTHORITY = f"https://login.microsoftonline.com/{MS_ENTRA_TENANT_ID}"
MS_ENTRA_REDIRECT_URI = os.environ['MS_REDIRECT_URI']
MS_ENTRA_SCOPES = os.environ.get('MS_SCOPES', '').split(',')
MS_ENTRA_REDIRECT_URI = os.environ["MS_REDIRECT_URI"]
MS_ENTRA_SCOPES = os.environ.get("MS_SCOPES", "").split(",")