diff --git a/.gitea/workflows/deploy-dokploy.yml b/.gitea/workflows/deploy-dokploy.yml index ec1788b..ec90a6a 100644 --- a/.gitea/workflows/deploy-dokploy.yml +++ b/.gitea/workflows/deploy-dokploy.yml @@ -42,6 +42,9 @@ jobs: needs: build runs-on: ubuntu-latest steps: + - name: Checkout repo + uses: actions/checkout@v3 + - name: Trigger Dokploy Deploy env: DOKPLOY_DEPLOY_HOOK: ${{ secrets.DOKPLOY_DEPLOY_HOOK }} @@ -54,3 +57,15 @@ jobs: curl -fsS -X POST "$DOKPLOY_DEPLOY_HOOK" \ -H "Content-Type: application/json" \ -d "{\"imageTag\":\"$IMAGE_TAG\"}" + + - name: Wait for Ready Health Check + env: + HEALTH_URL: ${{ secrets.DOKPLOY_HEALTHCHECK_URL }} + MAX_ATTEMPTS: "30" + SLEEP_SECONDS: "10" + run: | + if [ -z "$HEALTH_URL" ]; then + echo "Missing DOKPLOY_HEALTHCHECK_URL secret" + exit 1 + fi + bash scripts/wait-for-health.sh diff --git a/docs/05_REFACTOR_2.md b/docs/05_REFACTOR_2.md index c1dbc2f..f91e503 100644 --- a/docs/05_REFACTOR_2.md +++ b/docs/05_REFACTOR_2.md @@ -113,6 +113,16 @@ Primary outcomes: - Added operational smoke tooling for deploy/rollback validation: - `scripts/smoke-public-launch.sh` checks health endpoints, `X-Request-Id`, and `request_id` response fields. - Expanded `docs/public-launch-runbook.md` with deployment smoke and rollback checklist sections. +- Added host hardening and DR drill scripts: + - `scripts/harden-host-ufw.sh` (UFW baseline with dry-run default). + - `scripts/check-host-security.sh` (ports/firewall/fail2ban/docker status snapshot). + - `scripts/restore-drill-postgres.sh` (restore + validation query workflow). +- Updated docs to use executable operational checks in: + - `docs/public-launch-runbook.md` + - `docs/06_SECURITY_REVIEW.md` +- Added deploy health gate automation: + - `scripts/wait-for-health.sh` polls ready endpoint and verifies `request_id` payload. + - `.gitea/workflows/deploy-dokploy.yml` now runs post-deploy health verification using `DOKPLOY_HEALTHCHECK_URL`. ### Risks / Notes to Revisit - Workspace is intentionally dirty; commits must be path-scoped to avoid mixing unrelated changes. diff --git a/docs/06_SECURITY_REVIEW.md b/docs/06_SECURITY_REVIEW.md index d6cfd94..698d9cb 100644 --- a/docs/06_SECURITY_REVIEW.md +++ b/docs/06_SECURITY_REVIEW.md @@ -66,6 +66,6 @@ This document tracks launch-critical security findings for app, data, users, and - [x] `npm run lint` passes (warnings acceptable for now). - [x] `npm test` passes. - [x] `npm run build` passes. -- [ ] Production host firewall rules verified. +- [ ] Production host firewall rules verified (`scripts/harden-host-ufw.sh` + `scripts/check-host-security.sh`). - [ ] SSH restricted to VPN/allowlist. -- [ ] Backup restore drill logged for current week. +- [ ] Backup restore drill logged for current week (`scripts/restore-drill-postgres.sh`). diff --git a/docs/public-launch-runbook.md b/docs/public-launch-runbook.md index 6b4c785..e451112 100644 --- a/docs/public-launch-runbook.md +++ b/docs/public-launch-runbook.md @@ -28,6 +28,10 @@ - `REGISTRY_USER` - `REGISTRY_PASS` - `DOKPLOY_DEPLOY_HOOK` + - `DOKPLOY_HEALTHCHECK_URL` +- Health gate: + - workflow calls `scripts/wait-for-health.sh` against `DOKPLOY_HEALTHCHECK_URL` + - default retry window: 5 minutes (30 attempts x 10s) ## 4) Reverse Proxy + Network Hardening - Use `docker/nginx/fiddy.conf` as baseline. @@ -41,6 +45,10 @@ - Confirm Nginx writes JSON logs: - `/var/log/nginx/fiddy-access.log` - `/var/log/nginx/fiddy-error.log` +- Apply/verify host baseline using scripts: + - dry-run firewall apply: `SSH_ALLOW_CIDR= DRY_RUN=1 scripts/harden-host-ufw.sh` + - real firewall apply: `SSH_ALLOW_CIDR= DRY_RUN=0 sudo scripts/harden-host-ufw.sh` + - host status audit: `scripts/check-host-security.sh` ## 5) Observability - Bring up monitoring stack: @@ -66,7 +74,7 @@ - Retention: - default 7 days (`RETENTION_DAYS=7`) - Restore drill: - - `scripts/restore-postgres.sh backups/postgres/.dump ` + - `scripts/restore-drill-postgres.sh backups/postgres/.dump ` - Run restore drill on non-prod DB before public launch. ## 7) Incident Response Quick Flow diff --git a/scripts/check-host-security.sh b/scripts/check-host-security.sh new file mode 100644 index 0000000..7d6b71b --- /dev/null +++ b/scripts/check-host-security.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "== Host Security Check ==" +echo "Timestamp (UTC): $(date -u +%Y-%m-%dT%H:%M:%SZ)" +echo + +echo "== Listening TCP/UDP Ports ==" +if command -v ss >/dev/null 2>&1; then + ss -tulpn || true +else + echo "ss command not found" +fi +echo + +echo "== UFW Status ==" +if command -v ufw >/dev/null 2>&1; then + ufw status verbose || true +else + echo "ufw command not found" +fi +echo + +echo "== Fail2ban Status ==" +if command -v fail2ban-client >/dev/null 2>&1; then + fail2ban-client status || true +else + echo "fail2ban-client command not found" +fi +echo + +echo "== Docker Containers ==" +if command -v docker >/dev/null 2>&1; then + docker ps --format 'table {{.Names}}\t{{.Image}}\t{{.Status}}\t{{.Ports}}' || true +else + echo "docker command not found" +fi +echo + +echo "== Quick Expected Surface ==" +echo "- Public inbound expected: 80/tcp, 443/tcp only" +echo "- SSH expected: restricted source allowlist or VPN-only" +echo "- Postgres expected: private-only, not internet-exposed" diff --git a/scripts/harden-host-ufw.sh b/scripts/harden-host-ufw.sh new file mode 100644 index 0000000..514761f --- /dev/null +++ b/scripts/harden-host-ufw.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash +set -euo pipefail + +if ! command -v ufw >/dev/null 2>&1; then + echo "ufw is not installed on this host." + exit 1 +fi + +SSH_ALLOW_CIDR="${SSH_ALLOW_CIDR:-}" +DRY_RUN="${DRY_RUN:-1}" + +if [[ -z "$SSH_ALLOW_CIDR" ]]; then + echo "SSH_ALLOW_CIDR is required (example: SSH_ALLOW_CIDR=203.0.113.10/32)." + exit 1 +fi + +run_cmd() { + if [[ "$DRY_RUN" == "1" ]]; then + echo "+ $*" + else + "$@" + fi +} + +echo "Applying UFW baseline policy (DRY_RUN=$DRY_RUN)..." +run_cmd ufw --force reset +run_cmd ufw default deny incoming +run_cmd ufw default allow outgoing +run_cmd ufw allow from "$SSH_ALLOW_CIDR" to any port 22 proto tcp +run_cmd ufw allow 80/tcp +run_cmd ufw allow 443/tcp +run_cmd ufw --force enable +run_cmd ufw status verbose + +echo "Done." diff --git a/scripts/restore-drill-postgres.sh b/scripts/restore-drill-postgres.sh new file mode 100644 index 0000000..40e5f12 --- /dev/null +++ b/scripts/restore-drill-postgres.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash +set -euo pipefail + +if [[ $# -lt 2 ]]; then + echo "Usage: $0 " + exit 1 +fi + +BACKUP_FILE="$1" +TARGET_DATABASE_URL="$2" + +if [[ ! -f "$BACKUP_FILE" ]]; then + echo "Backup file not found: $BACKUP_FILE" + exit 1 +fi + +if ! command -v psql >/dev/null 2>&1; then + echo "psql command not found." + exit 1 +fi + +echo "Running restore drill..." +bash scripts/restore-postgres.sh "$BACKUP_FILE" "$TARGET_DATABASE_URL" + +echo "Running post-restore validation queries..." +psql "$TARGET_DATABASE_URL" -v ON_ERROR_STOP=1 <<'SQL' +select now() as restore_checked_at_utc; +select count(*) as public_tables +from information_schema.tables +where table_schema='public'; +select count(*) as users_count from users; +SQL + +echo "Restore drill completed successfully." diff --git a/scripts/wait-for-health.sh b/scripts/wait-for-health.sh new file mode 100644 index 0000000..f564bfa --- /dev/null +++ b/scripts/wait-for-health.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash +set -euo pipefail + +HEALTH_URL="${1:-${HEALTH_URL:-}}" +MAX_ATTEMPTS="${MAX_ATTEMPTS:-30}" +SLEEP_SECONDS="${SLEEP_SECONDS:-10}" + +if [[ -z "$HEALTH_URL" ]]; then + echo "Usage: HEALTH_URL=https://your-domain/api/health/ready scripts/wait-for-health.sh" + echo " or: scripts/wait-for-health.sh https://your-domain/api/health/ready" + exit 1 +fi + +tmp_body="$(mktemp)" +trap 'rm -f "$tmp_body"' EXIT + +for attempt in $(seq 1 "$MAX_ATTEMPTS"); do + status="$(curl -sS -o "$tmp_body" -w "%{http_code}" "$HEALTH_URL" || true)" + if [[ "$status" == "200" ]] && grep -q '"request_id"' "$tmp_body"; then + echo "Health check passed on attempt $attempt/$MAX_ATTEMPTS" + exit 0 + fi + + echo "Attempt $attempt/$MAX_ATTEMPTS failed (status=$status), retrying in ${SLEEP_SECONDS}s..." + sleep "$SLEEP_SECONDS" +done + +echo "Health check failed after $MAX_ATTEMPTS attempts." +echo "Last response body:" +cat "$tmp_body" || true +exit 1