diff --git a/docker/security/README.md b/docker/security/README.md new file mode 100644 index 0000000..01bdf80 --- /dev/null +++ b/docker/security/README.md @@ -0,0 +1,22 @@ +# Security Templates + +This folder contains host-side security templates for public launch hardening. + +## fail2ban (recommended baseline) +- Config location: + - `docker/security/fail2ban/jail.d/fiddy-nginx.conf` + - `docker/security/fail2ban/filter.d/fiddy-nginx-auth.conf` +- Purpose: + - ban repeated abusive requests against auth, join, and invite endpoints. + +## CrowdSec (optional alternative/complement) +- Config location: + - `docker/security/crowdsec/acquis.yaml` +- Purpose: + - ingest Nginx access/error logs with CrowdSec for broader behavior-based decisions. + +## Notes +- Use either fail2ban or CrowdSec as your primary auto-ban control, or carefully run both with clear ownership of ban actions. +- Validate log paths match your deployment: + - `/var/log/nginx/fiddy-access.log` + - `/var/log/nginx/fiddy-error.log` diff --git a/docker/security/crowdsec/acquis.yaml b/docker/security/crowdsec/acquis.yaml new file mode 100644 index 0000000..2fcaff6 --- /dev/null +++ b/docker/security/crowdsec/acquis.yaml @@ -0,0 +1,11 @@ +filenames: + - /var/log/nginx/fiddy-access.log +labels: + type: nginx + service: fiddy +--- +filenames: + - /var/log/nginx/fiddy-error.log +labels: + type: nginx-error + service: fiddy diff --git a/docker/security/fail2ban/filter.d/fiddy-nginx-auth.conf b/docker/security/fail2ban/filter.d/fiddy-nginx-auth.conf new file mode 100644 index 0000000..b9c006b --- /dev/null +++ b/docker/security/fail2ban/filter.d/fiddy-nginx-auth.conf @@ -0,0 +1,5 @@ +[Definition] +failregex = ^.*"uri":"\\/api\\/auth\\/(login|register)".*"status":(401|403|429).* + ^.*"uri":"\\/api\\/groups\\/join".*"status":(400|403|404|409|429).* + ^.*"uri":"\\/api\\/invite-links\\/.*".*"status":(404|410|429).* +ignoreregex = diff --git a/docker/security/fail2ban/jail.d/fiddy-nginx.conf b/docker/security/fail2ban/jail.d/fiddy-nginx.conf new file mode 100644 index 0000000..39fdfcc --- /dev/null +++ b/docker/security/fail2ban/jail.d/fiddy-nginx.conf @@ -0,0 +1,9 @@ +[fiddy-nginx-auth] +enabled = true +port = http,https +filter = fiddy-nginx-auth +logpath = /var/log/nginx/fiddy-access.log +backend = auto +maxretry = 20 +findtime = 10m +bantime = 1h diff --git a/docs/05_REFACTOR_2.md b/docs/05_REFACTOR_2.md index f91e503..e4bd492 100644 --- a/docs/05_REFACTOR_2.md +++ b/docs/05_REFACTOR_2.md @@ -123,6 +123,16 @@ Primary outcomes: - Added deploy health gate automation: - `scripts/wait-for-health.sh` polls ready endpoint and verifies `request_id` payload. - `.gitea/workflows/deploy-dokploy.yml` now runs post-deploy health verification using `DOKPLOY_HEALTHCHECK_URL`. +- Added DR + host-ban operational templates: + - `scripts/basebackup-postgres.sh` for periodic `pg_basebackup` snapshots. + - `docker/security/fail2ban/*` for auth/join/invite abuse bans from nginx JSON logs. + - `docker/security/crowdsec/acquis.yaml` as optional CrowdSec ingestion baseline. + - `docker/security/README.md` to document security template usage. +- Added restore drill logging artifacts: + - `docs/restore-drill-log.csv` as evidence log template. + - `scripts/log-restore-drill.sh` to append timestamped restore outcomes and measured RTO. +- Added consolidated execution checklist: + - `docs/07_PUBLIC_LAUNCH_CHECKLIST.md` for go-live gating across infra, deploy, security, observability, DR, and rollback. ### Risks / Notes to Revisit - Workspace is intentionally dirty; commits must be path-scoped to avoid mixing unrelated changes. diff --git a/docs/06_SECURITY_REVIEW.md b/docs/06_SECURITY_REVIEW.md index 698d9cb..5ef46aa 100644 --- a/docs/06_SECURITY_REVIEW.md +++ b/docs/06_SECURITY_REVIEW.md @@ -69,3 +69,6 @@ This document tracks launch-critical security findings for app, data, users, and - [ ] Production host firewall rules verified (`scripts/harden-host-ufw.sh` + `scripts/check-host-security.sh`). - [ ] SSH restricted to VPN/allowlist. - [ ] Backup restore drill logged for current week (`scripts/restore-drill-postgres.sh`). +- [ ] Backup restore drill logged for current week (`scripts/restore-drill-postgres.sh` + `scripts/log-restore-drill.sh`). +- [ ] Base backup job configured and validated (`scripts/basebackup-postgres.sh`). +- [ ] Auto-ban tooling configured (fail2ban or crowdsec) from `docker/security/`. diff --git a/docs/07_PUBLIC_LAUNCH_CHECKLIST.md b/docs/07_PUBLIC_LAUNCH_CHECKLIST.md new file mode 100644 index 0000000..e6b5535 --- /dev/null +++ b/docs/07_PUBLIC_LAUNCH_CHECKLIST.md @@ -0,0 +1,48 @@ +# Public Launch Checklist + +## A) Infrastructure Baseline +- [ ] Domain DNS points to public IP. +- [ ] Router forwards only `80` and `443`. +- [ ] Host firewall denies all inbound except `80/443` and restricted `22`. +- [ ] `SSH_ALLOW_CIDR` policy validated. +- [ ] Postgres port `5432` is not public. + +## B) App and Deployment +- [ ] Dokploy project connected to Gitea repo. +- [ ] Secrets configured: + - [ ] `DATABASE_URL` + - [ ] `DATABASE_SSL` + - [ ] `ALLOWED_DB_NAMES` + - [ ] `SESSION_COOKIE_NAME` + - [ ] `SESSION_TTL_DAYS` + - [ ] `DEBUG_API=0` + - [ ] `DOKPLOY_DEPLOY_HOOK` + - [ ] `DOKPLOY_HEALTHCHECK_URL` +- [ ] Deploy workflow passes build/test/push/deploy. +- [ ] Post-deploy health gate passes (`scripts/wait-for-health.sh`). +- [ ] Manual smoke passes (`scripts/smoke-public-launch.sh`). + +## C) Security Controls +- [ ] Nginx TLS/headers/rate limits enabled (`docker/nginx/fiddy.conf`). +- [ ] Request-id propagation enabled (`X-Request-Id` in responses). +- [ ] Server-side rate limits active (auth/write/ip limiters). +- [ ] Fail2ban or CrowdSec configured from `docker/security/`. +- [ ] No secrets/full invite codes in logs. + +## D) Observability +- [ ] Loki, Promtail, Grafana, Uptime Kuma running. +- [ ] Promtail ingests `job="nginx"`. +- [ ] Dashboards show request IDs for incident triage. +- [ ] Alerts configured for 5xx/auth spikes/DB failures/resource pressure. + +## E) Backup and Recovery +- [ ] Daily logical backup scheduled (`scripts/backup-postgres.sh`). +- [ ] Periodic base backup scheduled (`scripts/basebackup-postgres.sh`). +- [ ] Latest restore drill succeeded (`scripts/restore-drill-postgres.sh`). +- [ ] Drill logged (`scripts/log-restore-drill.sh` -> `docs/restore-drill-log.csv`). +- [ ] Measured RTO is acceptable. + +## F) Rollback Readiness +- [ ] Previous stable release retained in Dokploy. +- [ ] Rollback runbook tested once in staging or low-risk window. +- [ ] Rollback smoke check verified. diff --git a/docs/public-launch-runbook.md b/docs/public-launch-runbook.md index e451112..5225866 100644 --- a/docs/public-launch-runbook.md +++ b/docs/public-launch-runbook.md @@ -49,6 +49,9 @@ - dry-run firewall apply: `SSH_ALLOW_CIDR= DRY_RUN=1 scripts/harden-host-ufw.sh` - real firewall apply: `SSH_ALLOW_CIDR= DRY_RUN=0 sudo scripts/harden-host-ufw.sh` - host status audit: `scripts/check-host-security.sh` +- Auto-ban templates: + - fail2ban: `docker/security/fail2ban/*` + - crowdsec (optional): `docker/security/crowdsec/acquis.yaml` ## 5) Observability - Bring up monitoring stack: @@ -71,11 +74,16 @@ ## 6) Backup + Restore - Daily backup command: - `scripts/backup-postgres.sh` +- Periodic base backup (for faster full recovery): + - `PRIMARY_DATABASE_URL= scripts/basebackup-postgres.sh` - Retention: - default 7 days (`RETENTION_DAYS=7`) - Restore drill: - `scripts/restore-drill-postgres.sh backups/postgres/.dump ` - Run restore drill on non-prod DB before public launch. +- Record drill outcome: + - `scripts/log-restore-drill.sh ` + - log file: `docs/restore-drill-log.csv` ## 7) Incident Response Quick Flow 1. Identify failing request and `request_id`. diff --git a/docs/restore-drill-log.csv b/docs/restore-drill-log.csv new file mode 100644 index 0000000..31ecb1e --- /dev/null +++ b/docs/restore-drill-log.csv @@ -0,0 +1 @@ +timestamp_utc,environment,backup_file,restore_target,status,rto_minutes,notes diff --git a/scripts/basebackup-postgres.sh b/scripts/basebackup-postgres.sh new file mode 100644 index 0000000..e83b57a --- /dev/null +++ b/scripts/basebackup-postgres.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash +set -euo pipefail + +if [[ -z "${PRIMARY_DATABASE_URL:-}" ]]; then + echo "PRIMARY_DATABASE_URL is required (replication-capable connection string)." + exit 1 +fi + +BACKUP_DIR="${BACKUP_DIR:-./backups/postgres/base}" +RETENTION_DAYS="${RETENTION_DAYS:-7}" +TIMESTAMP="$(date -u +%Y%m%dT%H%M%SZ)" +WORK_DIR="${BACKUP_DIR}/fiddy_base_${TIMESTAMP}" +ARCHIVE_PATH="${BACKUP_DIR}/fiddy_base_${TIMESTAMP}.tar.gz" + +mkdir -p "$BACKUP_DIR" + +echo "Creating base backup directory snapshot..." +pg_basebackup \ + --dbname="$PRIMARY_DATABASE_URL" \ + --pgdata="$WORK_DIR" \ + --format=plain \ + --wal-method=stream \ + --checkpoint=fast \ + --write-recovery-conf \ + --progress \ + --verbose + +echo "Compressing base backup..." +tar -C "$BACKUP_DIR" -czf "$ARCHIVE_PATH" "fiddy_base_${TIMESTAMP}" +rm -rf "$WORK_DIR" + +echo "Pruning base backups older than ${RETENTION_DAYS} days..." +find "$BACKUP_DIR" -type f -name "fiddy_base_*.tar.gz" -mtime "+${RETENTION_DAYS}" -delete + +echo "Base backup complete: $ARCHIVE_PATH" diff --git a/scripts/log-restore-drill.sh b/scripts/log-restore-drill.sh new file mode 100644 index 0000000..505576c --- /dev/null +++ b/scripts/log-restore-drill.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash +set -euo pipefail + +if [[ $# -lt 6 ]]; then + echo "Usage: $0 " + exit 1 +fi + +ENVIRONMENT="$1" +BACKUP_FILE="$2" +RESTORE_TARGET="$3" +STATUS="$4" +RTO_MINUTES="$5" +NOTES="$6" +TIMESTAMP="$(date -u +%Y-%m-%dT%H:%M:%SZ)" +LOG_FILE="${LOG_FILE:-docs/restore-drill-log.csv}" + +mkdir -p "$(dirname "$LOG_FILE")" +if [[ ! -f "$LOG_FILE" ]]; then + echo "timestamp_utc,environment,backup_file,restore_target,status,rto_minutes,notes" > "$LOG_FILE" +fi + +safe_notes="$(printf '%s' "$NOTES" | tr '\n' ' ' | sed 's/"/'\''/g')" +echo "${TIMESTAMP},${ENVIRONMENT},${BACKUP_FILE},${RESTORE_TARGET},${STATUS},${RTO_MINUTES},\"${safe_notes}\"" >> "$LOG_FILE" +echo "Logged restore drill to $LOG_FILE"